Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 5 additions & 8 deletions src/Database/CrossLinking.hs
Original file line number Diff line number Diff line change
Expand Up @@ -398,16 +398,15 @@ findSupplierInIndexedDBs LinkingContext{..} productName location unit =
if T.null location
then extractBracketedLocation productName
else location
in if null allCandidates
then CrossDBNotLinked NoNameMatch
else
in case allCandidates of
[] -> CrossDBNotLinked NoNameMatch
((_, firstSe) : _) ->
-- Check unit compatibility first
let unitCompatible = filter (\(_, se) -> unitsAreCompatible lcUnitConfig unit (seUnit se)) allCandidates
in if null unitCompatible
then
-- All candidates failed unit check — report the first supplier's unit
let (_, firstSe) = head allCandidates
in CrossDBNotLinked (UnitIncompatible unit (seUnit firstSe))
CrossDBNotLinked (UnitIncompatible unit (seUnit firstSe))
else
-- Score by effective location
let scoredCandidates = map (scoreEntry effectiveLocation) unitCompatible
Expand Down Expand Up @@ -478,9 +477,7 @@ findSupplierAcrossDatabases ::
-- | Unit of the exchange
Text ->
CrossDBLinkResult
findSupplierAcrossDatabases ctx productName location unit =
-- Just delegate to the indexed version
findSupplierInIndexedDBs ctx productName location unit
findSupplierAcrossDatabases = findSupplierInIndexedDBs

{- | Match product names (simplified - just for scoring display)
Actual matching is done via index lookup
Expand Down
5 changes: 3 additions & 2 deletions src/Database/Loader.hs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ import Data.Bits (xor)
import qualified Data.ByteString as BS
import Data.Char (toLower)
import Data.Either (partitionEithers)
import Data.List (group, sort, sortBy, sortOn, unzip7)
import Data.List (sort, sortBy, sortOn, unzip7)
import qualified Data.List.NonEmpty as NE
import qualified Data.Map as M
import Data.Maybe (fromMaybe)
import Data.Ord (Down (..))
Expand Down Expand Up @@ -277,7 +278,7 @@ reportUnlinkedSummary summary
printf " ... and %d more activities" remainingCount
where
sortOn' f = sortBy (\a b -> compare (f a) (f b))
nub = map head . group . sort
nub = map NE.head . NE.group . sort

-- | Normalize text for matching: lowercase, strip whitespace, normalize Unicode
normalizeText :: T.Text -> T.Text
Expand Down
42 changes: 23 additions & 19 deletions src/Database/Manager.hs
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ import Data.Aeson (FromJSON (..), ToJSON (..), (.:), (.:?), (.=))
import qualified Data.Aeson as A
import Data.Bifunctor (first)
import Data.Char (toLower)
import Data.List (isPrefixOf, nub, sortOn)
import Data.Either (lefts, rights)
import Data.List (isPrefixOf, nub, sortOn, unsnoc)
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M
import Data.Maybe (catMaybes, fromMaybe, isJust)
Expand Down Expand Up @@ -1144,13 +1145,13 @@ detectDirectoryFormat path = do
files <- listDirectory path
let extensions = map (map toLower . takeExtension) files
-- Check for different formats (in order of preference)
if elem ".spold" extensions
if ".spold" `elem` extensions
then return FormatSpold
else
if elem ".csv" extensions
if ".csv" `elem` extensions
then return FormatCSV
else
if elem ".xml" extensions
if ".xml" `elem` extensions
then return FormatXML
else return FormatUnknown
else return FormatUnknown
Expand Down Expand Up @@ -2345,10 +2346,10 @@ loadMethodCollectionFromConfig mc = do
else return Nothing
let (xmlErrs, xmlMethods) = partitionEithers xmlResults
(csvErrs, csvOks) = partitionEithers csvParsed
(jsonErrs, jsonMethods) = partitionEithers [r | Just r <- jsonResults]
(jsonErrs, jsonMethods) = partitionEithers (catMaybes jsonResults)
-- Merge: SimaPro CSVs are MethodCollections, tabular CSVs are [Method]
spCollections = [sp | Left sp <- csvOks]
tabularMethods = concat [ms | Right ms <- csvOks]
spCollections = lefts csvOks
tabularMethods = concat (rights csvOks)
allMethods =
xmlMethods
++ tabularMethods
Expand All @@ -2359,21 +2360,22 @@ loadMethodCollectionFromConfig mc = do
allNWSets = concatMap mcNormWeightSets spCollections
collection = MethodCollection allMethods allDamageCats allNWSets []
errs = xmlErrs ++ csvErrs ++ jsonErrs
if null allMethods && not (null errs)
then return $ Left $ "All method files failed to parse: " <> T.pack (head errs)
else do
case (null allMethods, errs) of
(True, firstErr : _) ->
return $ Left $ "All method files failed to parse: " <> T.pack firstErr
_ -> do
let xmlOk = length xmlMethods
csvOk = length csvOks
jsonOk = length jsonMethods
reportProgress Info $ " Parsed " <> show xmlOk <> " XML, " <> show csvOk <> " CSV, " <> show jsonOk <> " JSON file(s)"
when (not (null allDamageCats)) $
unless (null allDamageCats) $
reportProgress Info $
" "
<> show (length allDamageCats)
<> " damage categories, "
<> show (length allNWSets)
<> " normalization-weighting set(s)"
when (not (null errs)) $
unless (null errs) $
reportProgress Warning $
" " <> show (length errs) <> " method file(s) failed to parse"
return $ Right (collection, flowInfo)
Expand Down Expand Up @@ -2897,13 +2899,15 @@ parseGeographiesCSV path = do
| otherwise = case T.splitOn "," line of
[] -> []
[_] -> []
parts ->
let code = T.strip (head parts)
parentsStr = T.strip (last parts)
displayRaw = T.intercalate "," (init (tail parts))
displayName = let d = T.strip displayRaw in if T.null d then code else d
parents = if T.null parentsStr then [] else T.splitOn "|" parentsStr
in [(code, (displayName, parents))]
parts@(codeRaw : _) -> case unsnoc parts of
Nothing -> [] -- unreachable: parts is non-empty by pattern
Just (initPart, parentsRaw) ->
let code = T.strip codeRaw
parentsStr = T.strip parentsRaw
displayRaw = T.intercalate "," (drop 1 initPart)
displayName = let d = T.strip displayRaw in if T.null d then code else d
parents = if T.null parentsStr then [] else T.splitOn "|" parentsStr
in [(code, (displayName, parents))]

-- | Load CSV file content from path.
loadRefDataCSV :: FilePath -> IO (Either Text BL.ByteString)
Expand Down
21 changes: 12 additions & 9 deletions src/Database/Upload.hs
Original file line number Diff line number Diff line change
Expand Up @@ -429,8 +429,9 @@ findDataDirectory dir = do
where
pickByFileCount dirs = do
counts <- mapM (\d -> (,) d <$> countDataFilesIn d) dirs
let sorted = sortOn (Down . snd) counts
return $ fst (head sorted)
case sortOn (Down . snd) counts of
(best : _) -> return (fst best)
[] -> return dir

-- Recursively find the first directory containing a processes/ subdirectory
findILCDRoot d = do
Expand All @@ -451,7 +452,7 @@ findDataDirectory dir = do

-- | Find all directories containing recognized data files under a root.
findAllDataDirectories :: FilePath -> IO [FilePath]
findAllDataDirectories root = go root
findAllDataDirectories = go
where
go dir = do
result <- try @SomeException $ do
Expand Down Expand Up @@ -489,7 +490,7 @@ anyDataFilesIn d = do
else do
let fullPaths = map (d </>) fs
extensions = map (map toLower . takeExtension) fs
if any (== ".spold") extensions
if ".spold" `elem` extensions
then return True
else do
let xmlFiles = [p | p <- fullPaths, map toLower (takeExtension p) == ".xml"]
Expand All @@ -511,11 +512,13 @@ findMethodDirectory dir = do
[one] -> return one
many -> do
counts <- mapM (\d -> (,) d <$> countMethodFilesIn d) many
return $ fst $ head $ sortOn (Down . snd) counts
case sortOn (Down . snd) counts of
(best : _) -> return (fst best)
[] -> return dir

-- | Find all directories containing ILCD method XML files under a root.
findAllMethodDirectories :: FilePath -> IO [FilePath]
findAllMethodDirectories root = go root
findAllMethodDirectories = go
where
go dir = do
hasMethod <- anyMethodFilesIn dir
Expand Down Expand Up @@ -612,9 +615,9 @@ detectDatabaseFormat path = do
else do
fs <- listDirectoryRecursive path
let extensions = map (map toLower . takeExtension) fs
let hasSpold = any (== ".spold") extensions
hasXml = any (== ".xml") extensions
hasCsv = any (== ".csv") extensions
let hasSpold = ".spold" `elem` extensions
hasXml = ".xml" `elem` extensions
hasCsv = ".csv" `elem` extensions
if hasSpold
then return EcoSpold2
else
Expand Down
Loading
Loading