From daa49c059db6f0ee2df91eb2ad8def5c2dfca88c Mon Sep 17 00:00:00 2001 From: Curtis Chin Jen Sem Date: Sat, 29 Nov 2025 21:50:53 +0100 Subject: [PATCH 1/9] Use prepared statements when indexing --- hiedb.cabal | 1 + src/HieDb/Create.hs | 85 ++++++++++++++++++++++++++++----------------- src/HieDb/Run.hs | 6 ++-- src/HieDb/Types.hs | 40 ++++++++++++++++++++- src/HieDb/Utils.hs | 5 ++- 5 files changed, 98 insertions(+), 39 deletions(-) diff --git a/hiedb.cabal b/hiedb.cabal index b7b50d6..cb5cf8e 100644 --- a/hiedb.cabal +++ b/hiedb.cabal @@ -66,6 +66,7 @@ library , containers , filepath , directory + , direct-sqlite , mtl , sqlite-simple , hie-compat ^>= 0.3 diff --git a/src/HieDb/Create.hs b/src/HieDb/Create.hs index eb91a48..252cb9a 100644 --- a/src/HieDb/Create.hs +++ b/src/HieDb/Create.hs @@ -17,6 +17,7 @@ import GHC import Control.Exception import Control.Monad +import Control.Monad.Cont import Control.Monad.IO.Class import Control.Monad.State.Strict (evalStateT) @@ -61,23 +62,52 @@ checkVersion k db@(getConn -> conn) = do else throwIO $ IncompatibleSchemaVersion dB_VERSION ver +withHieDb' :: FilePath -> ContT a IO HieDb +withHieDb' fp = do + connection <- ContT $ withConnection fp + liftIO $ setupHieDb connection + let createStatement t = fmap StatementFor (ContT (withStatement connection t)) + deleteInternalTables = do + deletions + <- traverse createStatement + [ "DELETE FROM refs WHERE hieFile = ?" + , "DELETE FROM decls WHERE hieFile = ?" + , "DELETE FROM defs WHERE hieFile = ?" + , "DELETE FROM typerefs WHERE hieFile = ?" + , "DELETE FROM mods WHERE hieFile = ?" + , "DELETE FROM exports WHERE hieFile = ?" + ] + pure $ \fp -> mapM_ (`runStatementFor_` fp) deletions + HieDb connection + <$> createStatement "INSERT INTO mods VALUES (?,?,?,?,?,?,?)" + <*> createStatement "INSERT INTO refs VALUES (?,?,?,?,?,?,?,?,?)" + <*> createStatement "INSERT INTO decls VALUES (?,?,?,?,?,?,?)" + <*> createStatement "INSERT INTO imports VALUES (?,?,?,?,?,?)" + <*> createStatement "INSERT INTO defs VALUES (?,?,?,?,?,?)" + <*> createStatement "INSERT INTO exports VALUES (?,?,?,?,?,?,?,?)" + <*> createStatement "INSERT INTO typerefs VALUES (?,?,?,?,?,?,?)" + <*> createStatement "INSERT INTO typenames(name,mod,unit) VALUES (?,?,?)" + <*> createStatement "SELECT id FROM typenames WHERE name = ? AND mod = ? AND unit = ?" + <*> deleteInternalTables + {-| Given path to @.hiedb@ file, constructs 'HieDb' and passes it to given function. -} withHieDb :: FilePath -> (HieDb -> IO a) -> IO a -withHieDb fp f = withConnection fp (checkVersion f . HieDb) +withHieDb fp f = runContT (withHieDb' fp) $ \hiedb -> do + checkVersion f hiedb {-| Given GHC LibDir and path to @.hiedb@ file, constructs DynFlags (required for printing info from @.hie@ files) and 'HieDb' and passes them to given function. -} withHieDbAndFlags :: LibDir -> FilePath -> (DynFlags -> HieDb -> IO a) -> IO a -withHieDbAndFlags libdir fp f = do +withHieDbAndFlags libdir fp f = runContT (withHieDb' fp) $ \hiedb -> do dynFlags <- dynFlagsForPrinting libdir - withConnection fp (checkVersion (f dynFlags) . HieDb) + f dynFlags hiedb {-| Initialize database schema for given 'HieDb'. -} -initConn :: HieDb -> IO () -initConn (getConn -> conn) = do +setupHieDb :: Connection -> IO () +setupHieDb conn = do execute_ conn "PRAGMA busy_timeout = 500;" execute_ conn "PRAGMA journal_mode = WAL;" execute_ conn "PRAGMA foreign_keys = ON;" @@ -188,7 +218,7 @@ Returns an Array mapping 'TypeIndex' to database ID assigned to the corresponding record in DB. -} addArr :: HieDb -> A.Array TypeIndex HieTypeFlat -> IO (A.Array TypeIndex (Maybe Int64)) -addArr (getConn -> conn) arr = do +addArr hiedb arr = do forM arr $ \case HTyVarTy n -> addName n HTyConApp tc _ -> addName (ifaceTyConName tc) @@ -201,8 +231,8 @@ addArr (getConn -> conn) arr = do let occ = nameOccName n mod = moduleName m uid = moduleUnit m - execute conn "INSERT INTO typenames(name,mod,unit) VALUES (?,?,?)" (occ,mod,uid) - fmap fromOnly . listToMaybe <$> query conn "SELECT id FROM typenames WHERE name = ? AND mod = ? AND unit = ?" (occ,mod,uid) + runStatementFor_ (insertTypenamesStatement hiedb) (occ,mod,uid) + fmap fromOnly <$> runStatementFor (queryTypenamesStatement hiedb) (occ,mod,uid) {-| Add references to types from given @.hie@ file to DB. -} addTypeRefs @@ -308,7 +338,7 @@ addRefsFromLoadedInternal addRefsFromLoadedInternal db@(getConn -> conn) path sourceFile hash skipOptions hf = liftIO $ withTransaction conn $ do - deleteInternalTables conn path + deleteInternalTablesStatement db (Only path) addRefsFromLoaded_unsafe db path sourceFile hash skipOptions hf -- | Like 'addRefsFromLoaded' but without: @@ -317,8 +347,7 @@ addRefsFromLoadedInternal -- -- Mostly useful to index a new database from scratch as fast as possible addRefsFromLoaded_unsafe - :: MonadIO m - => HieDb -- ^ HieDb into which we're adding the file + :: HieDb -- ^ HieDb into which we're adding the file -> FilePath -- ^ Path to @.hie@ file -> SourceFile -- ^ Path to .hs file from which @.hie@ file was created -- Also tells us if this is a real source file? @@ -326,9 +355,9 @@ addRefsFromLoaded_unsafe -> Fingerprint -- ^ The hash of the @.hie@ file -> SkipOptions -- ^ Skip indexing certain tables -> HieFile -- ^ Data loaded from the @.hie@ file - -> m () + -> IO () addRefsFromLoaded_unsafe - db@(getConn -> conn) path sourceFile hash skipOptions hf = liftIO $ do + db path sourceFile hash skipOptions hf = do let isBoot = "boot" `isSuffixOf` path mod = moduleName smod @@ -352,25 +381,25 @@ addRefsFromLoaded_unsafe let sourceOnlyNodeInfo = SourcedNodeInfo $ M.delete originToDrop sniMap in Node sourceOnlyNodeInfo sp (map (dropNodeInfos originToDrop) children) - execute conn "INSERT INTO mods VALUES (?,?,?,?,?,?,?)" modrow + runStatementFor_ (insertModsStatement db) modrow let AstInfo refsSrc declsSrc importsSrc = genAstInfo path smod SourceInfo refmapSourceOnly AstInfo refsGen declsGen importsGen = genAstInfo path smod GeneratedInfo refmapGeneratedOnly unless (skipRefs skipOptions) $ - executeMany conn "INSERT INTO refs VALUES (?,?,?,?,?,?,?,?,?)" (refsSrc <> refsGen) + mapM_ (runStatementFor_ (insertRefsStatement db)) (refsSrc <> refsGen) unless (skipDecls skipOptions) $ - executeMany conn "INSERT INTO decls VALUES (?,?,?,?,?,?,?)" (declsSrc <> declsGen) + mapM_ (runStatementFor_ (insertDeclsStatement db)) (declsSrc <> declsGen) unless (skipImports skipOptions) $ - executeMany conn "INSERT INTO imports VALUES (?,?,?,?,?,?)" (importsSrc <> importsGen) + mapM_ (runStatementFor_ (insertImportsStatement db)) (importsSrc <> importsGen) let defs = genDefRow path smod refmapAll unless (skipDefs skipOptions) $ - executeMany conn "INSERT INTO defs VALUES (?,?,?,?,?,?)" defs + mapM_ (runStatementFor_ (insertDefsStatement db)) defs let exports = generateExports path $ hie_exports hf unless (skipExports skipOptions) $ - executeMany conn "INSERT INTO exports VALUES (?,?,?,?,?,?,?,?)" exports + mapM_ (runStatementFor_ (insertExportsStatement db)) exports unless (skipTypes skipOptions) $ do ixs <- addArr db (hie_types hf) @@ -400,21 +429,21 @@ removeDependencySrcFiles (getConn -> conn) = {-| Delete all occurrences of given @.hie@ file from the database -} deleteFileFromIndex :: HieDb -> FilePath -> IO () -deleteFileFromIndex (getConn -> conn) path = withTransaction conn $ do - deleteInternalTables conn path +deleteFileFromIndex db@(getConn -> conn) path = withTransaction conn $ do + deleteInternalTablesStatement db (Only path) {-| Delete all entries associated with modules for which the 'modInfoSrcFile' doesn't exist on the disk. Doesn't delete it if there is no associated 'modInfoSrcFile' -} deleteMissingRealFiles :: HieDb -> IO () -deleteMissingRealFiles (getConn -> conn) = withTransaction conn $ do +deleteMissingRealFiles db@(getConn -> conn) = withTransaction conn $ do missing_file_keys <- fold_ conn "SELECT hieFile,hs_src FROM mods WHERE hs_src IS NOT NULL AND is_real" [] $ \acc (path,src) -> do exists <- doesFileExist src pure $ if exists then acc else path : acc forM_ missing_file_keys $ \path -> do - deleteInternalTables conn path + deleteInternalTablesStatement db (Only path) {-| Garbage collect typenames with no references - it is a good idea to call this function after a sequence of database updates (inserts or deletes) @@ -422,13 +451,5 @@ this function after a sequence of database updates (inserts or deletes) garbageCollectTypeNames :: HieDb -> IO Int garbageCollectTypeNames (getConn -> conn) = do execute_ conn "DELETE FROM typenames WHERE NOT EXISTS ( SELECT 1 FROM typerefs WHERE typerefs.id = typenames.id LIMIT 1 )" + execute_ conn "PRAGMA optimize;" changes conn - -deleteInternalTables :: Connection -> FilePath -> IO () -deleteInternalTables conn path = do - execute conn "DELETE FROM refs WHERE hieFile = ?" (Only path) - execute conn "DELETE FROM decls WHERE hieFile = ?" (Only path) - execute conn "DELETE FROM defs WHERE hieFile = ?" (Only path) - execute conn "DELETE FROM typerefs WHERE hieFile = ?" (Only path) - execute conn "DELETE FROM mods WHERE hieFile = ?" (Only path) - execute conn "DELETE FROM exports WHERE hieFile = ?" (Only path) diff --git a/src/HieDb/Run.hs b/src/HieDb/Run.hs index 61eb87d..3359b43 100644 --- a/src/HieDb/Run.hs +++ b/src/HieDb/Run.hs @@ -277,7 +277,7 @@ runCommand libdir opts cmd = withHieDbAndFlags libdir (database opts) $ \dynFlag when (trace opts) $ setHieTrace conn (Just $ T.hPutStrLn stderr . ("\n****TRACE: "<>)) when (reindex opts) $ do - initConn conn + setupHieDb (getConn conn) files' <- map hieModuleHieFile <$> getAllIndexedMods conn files <- fmap catMaybes $ forM files' $ \f -> do exists <- doesFileExist f @@ -293,9 +293,9 @@ runCommand libdir opts cmd = withHieDbAndFlags libdir (database opts) $ \dynFlag hPutStrLn stderr $ "Re-indexing " ++ show n ++ " files, deleting " ++ show (n-orig) ++ " files" doIndex conn opts stderr files case cmd of - Init -> initConn conn + Init -> setupHieDb (getConn conn) Index dirs -> do - initConn conn + setupHieDb (getConn conn) files <- concat <$> mapM getHieFilesIn dirs doIndex conn opts stderr files TypeRefs typ mn muid -> do diff --git a/src/HieDb/Types.hs b/src/HieDb/Types.hs index 529ed5a..a4eba61 100644 --- a/src/HieDb/Types.hs +++ b/src/HieDb/Types.hs @@ -7,6 +7,7 @@ {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE CPP #-} {-# OPTIONS_GHC -Wno-orphans #-} +{-# LANGUAGE TypeApplications #-} module HieDb.Types where import Prelude hiding (mod) @@ -31,8 +32,45 @@ import Database.SQLite.Simple.FromField import qualified Text.ParserCombinators.ReadP as R import HieDb.Compat +import qualified Database.SQLite3 as Direct +import Control.Monad (void) + +data HieDb = HieDb + { getConn :: !Connection + , insertModsStatement :: !(StatementFor HieModuleRow) + , insertRefsStatement :: !(StatementFor RefRow) + , insertDeclsStatement :: !(StatementFor DeclRow) + , insertImportsStatement :: !(StatementFor ImportRow) + , insertDefsStatement :: !(StatementFor DefRow) + , insertExportsStatement :: !(StatementFor ExportRow) + , insertTyperefsStatement :: !(StatementFor TypeRef) + , insertTypenamesStatement :: !(StatementFor (OccName, ModuleName, Unit)) + , queryTypenamesStatement :: !(StatementFor (OccName, ModuleName, Unit)) + , deleteInternalTablesStatement :: !(Only FilePath -> IO ()) + } + +newtype StatementFor a = StatementFor Statement + +data NoOutput = NoOutput + +instance FromRow NoOutput where + fromRow = pure NoOutput + +runStatementFor_ :: ToRow a => StatementFor a -> a -> IO () +{-# INLINE runStatementFor_ #-} +runStatementFor_ (StatementFor statement@(Statement s)) params = do + Direct.bind s (toRow params) + *> void (nextRow @NoOutput statement) + *> reset statement + *> Direct.clearBindings s -newtype HieDb = HieDb { getConn :: Connection } +runStatementFor :: (ToRow a, FromRow b) => StatementFor a -> a -> IO (Maybe b) +{-# INLINE runStatementFor #-} +runStatementFor (StatementFor statement@(Statement s)) params = do + Direct.bind s (toRow params) + *> nextRow statement + <* reset statement + <* Direct.clearBindings s data HieDbException = IncompatibleSchemaVersion diff --git a/src/HieDb/Utils.hs b/src/HieDb/Utils.hs index b6d9865..26e295a 100644 --- a/src/HieDb/Utils.hs +++ b/src/HieDb/Utils.hs @@ -35,7 +35,6 @@ import Data.IORef import HieDb.Types import HieDb.Compat -import Database.SQLite.Simple import Control.Monad.State.Strict (StateT, get, put) import qualified Data.IntSet as ISet import qualified Data.IntMap.Strict as IMap @@ -60,7 +59,7 @@ import Control.Concurrent.MVar (readMVar) type TypeIndexing a = StateT (IntMap IntSet) IO a addTypeRef :: HieDb -> FilePath -> A.Array TypeIndex HieTypeFlat -> A.Array TypeIndex (Maybe Int64) -> RealSrcSpan -> TypeIndex -> TypeIndexing () -addTypeRef (getConn -> conn) hf arr ixs sp = go 0 +addTypeRef hiedb hf arr ixs sp = go 0 where sl = srcSpanStartLine sp sc = srcSpanStartCol sp @@ -75,7 +74,7 @@ addTypeRef (getConn -> conn) hf arr ixs sp = go 0 indexed <- get let isTypeIndexed = ISet.member (fromIntegral occ) (IMap.findWithDefault ISet.empty depth indexed) unless isTypeIndexed $ do - liftIO $ execute conn "INSERT INTO typerefs VALUES (?,?,?,?,?,?,?)" ref + liftIO $ runStatementFor_ (insertTyperefsStatement hiedb) ref put $ IMap.alter (\case Nothing -> Just $ ISet.singleton (fromIntegral occ) Just s -> Just $ ISet.insert (fromIntegral occ) s From a101c3bf98e72d193b5960853f87a4ff662e516d Mon Sep 17 00:00:00 2001 From: Curtis Chin Jen Sem Date: Sat, 29 Nov 2025 22:01:02 +0100 Subject: [PATCH 2/9] Set `synchronous = NORMAL` This should be fine for hiedb's purposes. The loss of durability means that a committed but not yet fsync'd transaction, may be rolled back. In hiedb's case, this would be automatically fixed on the follow-up run. --- src/HieDb/Create.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/HieDb/Create.hs b/src/HieDb/Create.hs index 252cb9a..7ae0986 100644 --- a/src/HieDb/Create.hs +++ b/src/HieDb/Create.hs @@ -111,6 +111,7 @@ setupHieDb conn = do execute_ conn "PRAGMA busy_timeout = 500;" execute_ conn "PRAGMA journal_mode = WAL;" execute_ conn "PRAGMA foreign_keys = ON;" + execute_ conn "PRAGMA synchronous = NORMAL;" execute_ conn "PRAGMA defer_foreign_keys = ON;" execute_ conn "CREATE TABLE IF NOT EXISTS mods \ From 5b55fa31ea3c881b1eeb4e790f0c2d2c9f31977a Mon Sep 17 00:00:00 2001 From: Curtis Chin Jen Sem Date: Mon, 1 Dec 2025 23:34:10 +0100 Subject: [PATCH 3/9] Remove redundant `setupHieDb` calls It's already called as part of `withHieDb` that sets up and provides the `HieDb` handle. --- src/HieDb/Run.hs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/HieDb/Run.hs b/src/HieDb/Run.hs index 3359b43..2f4eb7f 100644 --- a/src/HieDb/Run.hs +++ b/src/HieDb/Run.hs @@ -277,7 +277,6 @@ runCommand libdir opts cmd = withHieDbAndFlags libdir (database opts) $ \dynFlag when (trace opts) $ setHieTrace conn (Just $ T.hPutStrLn stderr . ("\n****TRACE: "<>)) when (reindex opts) $ do - setupHieDb (getConn conn) files' <- map hieModuleHieFile <$> getAllIndexedMods conn files <- fmap catMaybes $ forM files' $ \f -> do exists <- doesFileExist f @@ -293,9 +292,8 @@ runCommand libdir opts cmd = withHieDbAndFlags libdir (database opts) $ \dynFlag hPutStrLn stderr $ "Re-indexing " ++ show n ++ " files, deleting " ++ show (n-orig) ++ " files" doIndex conn opts stderr files case cmd of - Init -> setupHieDb (getConn conn) + Init -> pure () Index dirs -> do - setupHieDb (getConn conn) files <- concat <$> mapM getHieFilesIn dirs doIndex conn opts stderr files TypeRefs typ mn muid -> do From 49941f683fba7a3d4f504888450951edce263170 Mon Sep 17 00:00:00 2001 From: Curtis Chin Jen Sem Date: Mon, 1 Dec 2025 23:38:29 +0100 Subject: [PATCH 4/9] Add backwards compatible API-functions --- src/HieDb/Create.hs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/HieDb/Create.hs b/src/HieDb/Create.hs index 7ae0986..a118191 100644 --- a/src/HieDb/Create.hs +++ b/src/HieDb/Create.hs @@ -104,6 +104,12 @@ withHieDbAndFlags libdir fp f = runContT (withHieDb' fp) $ \hiedb -> do dynFlags <- dynFlagsForPrinting libdir f dynFlags hiedb +{-| Initialize database schema for given 'HieDb'. +-} +initConn :: HieDb -> IO () +{-# DEPRECATED initConn "Use setupHieDb instead." #-} +initConn = setupHieDb . getConn + {-| Initialize database schema for given 'HieDb'. -} setupHieDb :: Connection -> IO () @@ -454,3 +460,12 @@ garbageCollectTypeNames (getConn -> conn) = do execute_ conn "DELETE FROM typenames WHERE NOT EXISTS ( SELECT 1 FROM typerefs WHERE typerefs.id = typenames.id LIMIT 1 )" execute_ conn "PRAGMA optimize;" changes conn + +deleteInternalTables :: Connection -> FilePath -> IO () +deleteInternalTables conn path = do + execute conn "DELETE FROM refs WHERE hieFile = ?" (Only path) + execute conn "DELETE FROM decls WHERE hieFile = ?" (Only path) + execute conn "DELETE FROM defs WHERE hieFile = ?" (Only path) + execute conn "DELETE FROM typerefs WHERE hieFile = ?" (Only path) + execute conn "DELETE FROM mods WHERE hieFile = ?" (Only path) + execute conn "DELETE FROM exports WHERE hieFile = ?" (Only path) From e6bea36d913b82c64b9b93d4d8075734025bbcdb Mon Sep 17 00:00:00 2001 From: Curtis Chin Jen Sem Date: Tue, 2 Dec 2025 23:02:26 +0100 Subject: [PATCH 5/9] Use sqlite-simple for binding parameters --- src/HieDb/Types.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/HieDb/Types.hs b/src/HieDb/Types.hs index a4eba61..0f551cb 100644 --- a/src/HieDb/Types.hs +++ b/src/HieDb/Types.hs @@ -59,7 +59,7 @@ instance FromRow NoOutput where runStatementFor_ :: ToRow a => StatementFor a -> a -> IO () {-# INLINE runStatementFor_ #-} runStatementFor_ (StatementFor statement@(Statement s)) params = do - Direct.bind s (toRow params) + bind statement (toRow params) *> void (nextRow @NoOutput statement) *> reset statement *> Direct.clearBindings s @@ -67,7 +67,7 @@ runStatementFor_ (StatementFor statement@(Statement s)) params = do runStatementFor :: (ToRow a, FromRow b) => StatementFor a -> a -> IO (Maybe b) {-# INLINE runStatementFor #-} runStatementFor (StatementFor statement@(Statement s)) params = do - Direct.bind s (toRow params) + bind statement (toRow params) *> nextRow statement <* reset statement <* Direct.clearBindings s From afadeb014b52b9bd0f533cb014e4d2dfe0773dcd Mon Sep 17 00:00:00 2001 From: Curtis Chin Jen Sem Date: Tue, 2 Dec 2025 23:32:41 +0100 Subject: [PATCH 6/9] Remove unnecessary call to lower-level direct-sqlite Benchmarking gives a negligible difference between binding via sqlite-simple, which does additional checks on binding parameters, and direct-sqlite that only calls the underlying sqlite3 function. Considering there is no difference stick to using helpers from the same library. --- hiedb.cabal | 1 - src/HieDb/Types.hs | 17 ++++++----------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/hiedb.cabal b/hiedb.cabal index cb5cf8e..b7b50d6 100644 --- a/hiedb.cabal +++ b/hiedb.cabal @@ -66,7 +66,6 @@ library , containers , filepath , directory - , direct-sqlite , mtl , sqlite-simple , hie-compat ^>= 0.3 diff --git a/src/HieDb/Types.hs b/src/HieDb/Types.hs index 0f551cb..c4f37c3 100644 --- a/src/HieDb/Types.hs +++ b/src/HieDb/Types.hs @@ -32,7 +32,6 @@ import Database.SQLite.Simple.FromField import qualified Text.ParserCombinators.ReadP as R import HieDb.Compat -import qualified Database.SQLite3 as Direct import Control.Monad (void) data HieDb = HieDb @@ -58,19 +57,15 @@ instance FromRow NoOutput where runStatementFor_ :: ToRow a => StatementFor a -> a -> IO () {-# INLINE runStatementFor_ #-} -runStatementFor_ (StatementFor statement@(Statement s)) params = do - bind statement (toRow params) - *> void (nextRow @NoOutput statement) - *> reset statement - *> Direct.clearBindings s +runStatementFor_ (StatementFor statement) params = do + withBind statement params $ + void (nextRow @NoOutput statement) runStatementFor :: (ToRow a, FromRow b) => StatementFor a -> a -> IO (Maybe b) {-# INLINE runStatementFor #-} -runStatementFor (StatementFor statement@(Statement s)) params = do - bind statement (toRow params) - *> nextRow statement - <* reset statement - <* Direct.clearBindings s +runStatementFor (StatementFor statement) params = do + withBind statement params $ + nextRow statement data HieDbException = IncompatibleSchemaVersion From 01241ad661a88b8622803f63fa7dfa727d1337b3 Mon Sep 17 00:00:00 2001 From: Curtis Chin Jen Sem Date: Wed, 3 Dec 2025 01:37:06 +0100 Subject: [PATCH 7/9] Remove ContT from top-level type signature --- src/HieDb/Create.hs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/HieDb/Create.hs b/src/HieDb/Create.hs index a118191..a8a450a 100644 --- a/src/HieDb/Create.hs +++ b/src/HieDb/Create.hs @@ -62,9 +62,8 @@ checkVersion k db@(getConn -> conn) = do else throwIO $ IncompatibleSchemaVersion dB_VERSION ver -withHieDb' :: FilePath -> ContT a IO HieDb -withHieDb' fp = do - connection <- ContT $ withConnection fp +withHieDb' :: FilePath -> (HieDb -> IO a) -> IO a +withHieDb' fp act = withConnection fp $ \connection -> do liftIO $ setupHieDb connection let createStatement t = fmap StatementFor (ContT (withStatement connection t)) deleteInternalTables = do @@ -78,7 +77,7 @@ withHieDb' fp = do , "DELETE FROM exports WHERE hieFile = ?" ] pure $ \fp -> mapM_ (`runStatementFor_` fp) deletions - HieDb connection + flip runContT act $ HieDb connection <$> createStatement "INSERT INTO mods VALUES (?,?,?,?,?,?,?)" <*> createStatement "INSERT INTO refs VALUES (?,?,?,?,?,?,?,?,?)" <*> createStatement "INSERT INTO decls VALUES (?,?,?,?,?,?,?)" @@ -92,7 +91,7 @@ withHieDb' fp = do {-| Given path to @.hiedb@ file, constructs 'HieDb' and passes it to given function. -} withHieDb :: FilePath -> (HieDb -> IO a) -> IO a -withHieDb fp f = runContT (withHieDb' fp) $ \hiedb -> do +withHieDb fp f = withHieDb' fp $ \hiedb -> do checkVersion f hiedb {-| Given GHC LibDir and path to @.hiedb@ file, @@ -100,7 +99,7 @@ constructs DynFlags (required for printing info from @.hie@ files) and 'HieDb' and passes them to given function. -} withHieDbAndFlags :: LibDir -> FilePath -> (DynFlags -> HieDb -> IO a) -> IO a -withHieDbAndFlags libdir fp f = runContT (withHieDb' fp) $ \hiedb -> do +withHieDbAndFlags libdir fp f = withHieDb' fp $ \hiedb -> do dynFlags <- dynFlagsForPrinting libdir f dynFlags hiedb From 7ed628e388aacf77ea2f0451d2c156fae0e8a540 Mon Sep 17 00:00:00 2001 From: Curtis Chin Jen Sem Date: Sun, 7 Dec 2025 17:27:44 +0100 Subject: [PATCH 8/9] Address review comments - Adds some needed documentation - Cleans up some helper function names - Splits off statements into its own datatype. --- src/HieDb/Create.hs | 86 +++++++++++++++++++++++++++++---------------- src/HieDb/Types.hs | 50 ++++++++++++++++++++++++-- src/HieDb/Utils.hs | 2 +- 3 files changed, 104 insertions(+), 34 deletions(-) diff --git a/src/HieDb/Create.hs b/src/HieDb/Create.hs index a8a450a..6fbf04e 100644 --- a/src/HieDb/Create.hs +++ b/src/HieDb/Create.hs @@ -62,13 +62,23 @@ checkVersion k db@(getConn -> conn) = do else throwIO $ IncompatibleSchemaVersion dB_VERSION ver -withHieDb' :: FilePath -> (HieDb -> IO a) -> IO a -withHieDb' fp act = withConnection fp $ \connection -> do +-- | Commmon implementation for the @withHieDb*@ functions that +withHieDbImpl :: FilePath -> (HieDb -> IO a) -> IO a +withHieDbImpl fp act = withConnection fp $ \connection -> do liftIO $ setupHieDb connection - let createStatement t = fmap StatementFor (ContT (withStatement connection t)) - deleteInternalTables = do + + -- We manually prepare statements instead of using the machinery in + -- sqlite-simple so we can reuse these across `.hie` files more easily. + -- This was benchmarked to be faster: + -- https://github.com/wz1000/HieDb/pull/86 + withPreparedHieDbStatements connection $ \statements -> do + act $ HieDb connection statements + +withPreparedHieDbStatements :: Connection -> (HieDbStatements -> IO a) -> IO a +withPreparedHieDbStatements connection action = do + let prepareInternalTableDeletion = do deletions - <- traverse createStatement + <- traverse (createStatement connection) [ "DELETE FROM refs WHERE hieFile = ?" , "DELETE FROM decls WHERE hieFile = ?" , "DELETE FROM defs WHERE hieFile = ?" @@ -77,21 +87,25 @@ withHieDb' fp act = withConnection fp $ \connection -> do , "DELETE FROM exports WHERE hieFile = ?" ] pure $ \fp -> mapM_ (`runStatementFor_` fp) deletions - flip runContT act $ HieDb connection - <$> createStatement "INSERT INTO mods VALUES (?,?,?,?,?,?,?)" - <*> createStatement "INSERT INTO refs VALUES (?,?,?,?,?,?,?,?,?)" - <*> createStatement "INSERT INTO decls VALUES (?,?,?,?,?,?,?)" - <*> createStatement "INSERT INTO imports VALUES (?,?,?,?,?,?)" - <*> createStatement "INSERT INTO defs VALUES (?,?,?,?,?,?)" - <*> createStatement "INSERT INTO exports VALUES (?,?,?,?,?,?,?,?)" - <*> createStatement "INSERT INTO typerefs VALUES (?,?,?,?,?,?,?)" - <*> createStatement "INSERT INTO typenames(name,mod,unit) VALUES (?,?,?)" - <*> createStatement "SELECT id FROM typenames WHERE name = ? AND mod = ? AND unit = ?" - <*> deleteInternalTables + + runContT + ( HieDbStatements + <$> createStatement connection "INSERT INTO mods VALUES (?,?,?,?,?,?,?)" + <*> createStatement connection "INSERT INTO refs VALUES (?,?,?,?,?,?,?,?,?)" + <*> createStatement connection "INSERT INTO decls VALUES (?,?,?,?,?,?,?)" + <*> createStatement connection "INSERT INTO imports VALUES (?,?,?,?,?,?)" + <*> createStatement connection "INSERT INTO defs VALUES (?,?,?,?,?,?)" + <*> createStatement connection "INSERT INTO exports VALUES (?,?,?,?,?,?,?,?)" + <*> createStatement connection "INSERT INTO typerefs VALUES (?,?,?,?,?,?,?)" + <*> createStatement connection "INSERT INTO typenames(name,mod,unit) VALUES (?,?,?)" + <*> createStatement connection "SELECT id FROM typenames WHERE name = ? AND mod = ? AND unit = ?" + <*> prepareInternalTableDeletion + ) + action {-| Given path to @.hiedb@ file, constructs 'HieDb' and passes it to given function. -} withHieDb :: FilePath -> (HieDb -> IO a) -> IO a -withHieDb fp f = withHieDb' fp $ \hiedb -> do +withHieDb fp f = withHieDbImpl fp $ \hiedb -> do checkVersion f hiedb {-| Given GHC LibDir and path to @.hiedb@ file, @@ -99,9 +113,9 @@ constructs DynFlags (required for printing info from @.hie@ files) and 'HieDb' and passes them to given function. -} withHieDbAndFlags :: LibDir -> FilePath -> (DynFlags -> HieDb -> IO a) -> IO a -withHieDbAndFlags libdir fp f = withHieDb' fp $ \hiedb -> do +withHieDbAndFlags libdir fp f = withHieDbImpl fp $ \hiedb -> do dynFlags <- dynFlagsForPrinting libdir - f dynFlags hiedb + checkVersion (f dynFlags) hiedb {-| Initialize database schema for given 'HieDb'. -} @@ -116,6 +130,16 @@ setupHieDb conn = do execute_ conn "PRAGMA busy_timeout = 500;" execute_ conn "PRAGMA journal_mode = WAL;" execute_ conn "PRAGMA foreign_keys = ON;" + + -- The default setting for synchronous is 'FULL'. `FULL` will issue fsync's + -- and wait to ensure that every write is done durably. Switching to `NORMAL` + -- moves this to the checkpoints by the WAL (when using a WAL), losing us + -- durability. Practically this means that committed transaction may be rolled + -- back on system failure. This is fine, as the hiedb file will recover by + -- reindexing. See the following. + -- - https://sqlite.org/pragma.html#pragma_synchronous + -- - https://sqlite.org/wal.html + -- - https://github.com/wz1000/HieDb/pull/86 for benchmarks. execute_ conn "PRAGMA synchronous = NORMAL;" execute_ conn "PRAGMA defer_foreign_keys = ON;" @@ -237,8 +261,8 @@ addArr hiedb arr = do let occ = nameOccName n mod = moduleName m uid = moduleUnit m - runStatementFor_ (insertTypenamesStatement hiedb) (occ,mod,uid) - fmap fromOnly <$> runStatementFor (queryTypenamesStatement hiedb) (occ,mod,uid) + runStatementFor_ (insertTypenamesStatement (preparedStatements hiedb)) (occ,mod,uid) + fmap fromOnly <$> runStatementFor (queryTypenamesStatement (preparedStatements hiedb)) (occ,mod,uid) {-| Add references to types from given @.hie@ file to DB. -} addTypeRefs @@ -344,7 +368,7 @@ addRefsFromLoadedInternal addRefsFromLoadedInternal db@(getConn -> conn) path sourceFile hash skipOptions hf = liftIO $ withTransaction conn $ do - deleteInternalTablesStatement db (Only path) + deleteInternalTablesStatement (preparedStatements db) (Only path) addRefsFromLoaded_unsafe db path sourceFile hash skipOptions hf -- | Like 'addRefsFromLoaded' but without: @@ -387,25 +411,25 @@ addRefsFromLoaded_unsafe let sourceOnlyNodeInfo = SourcedNodeInfo $ M.delete originToDrop sniMap in Node sourceOnlyNodeInfo sp (map (dropNodeInfos originToDrop) children) - runStatementFor_ (insertModsStatement db) modrow + runStatementFor_ (insertModsStatement (preparedStatements db)) modrow let AstInfo refsSrc declsSrc importsSrc = genAstInfo path smod SourceInfo refmapSourceOnly AstInfo refsGen declsGen importsGen = genAstInfo path smod GeneratedInfo refmapGeneratedOnly unless (skipRefs skipOptions) $ - mapM_ (runStatementFor_ (insertRefsStatement db)) (refsSrc <> refsGen) + mapM_ (runStatementFor_ (insertRefsStatement (preparedStatements db))) (refsSrc <> refsGen) unless (skipDecls skipOptions) $ - mapM_ (runStatementFor_ (insertDeclsStatement db)) (declsSrc <> declsGen) + mapM_ (runStatementFor_ (insertDeclsStatement (preparedStatements db))) (declsSrc <> declsGen) unless (skipImports skipOptions) $ - mapM_ (runStatementFor_ (insertImportsStatement db)) (importsSrc <> importsGen) + mapM_ (runStatementFor_ (insertImportsStatement (preparedStatements db))) (importsSrc <> importsGen) let defs = genDefRow path smod refmapAll unless (skipDefs skipOptions) $ - mapM_ (runStatementFor_ (insertDefsStatement db)) defs + mapM_ (runStatementFor_ (insertDefsStatement (preparedStatements db))) defs let exports = generateExports path $ hie_exports hf unless (skipExports skipOptions) $ - mapM_ (runStatementFor_ (insertExportsStatement db)) exports + mapM_ (runStatementFor_ (insertExportsStatement (preparedStatements db))) exports unless (skipTypes skipOptions) $ do ixs <- addArr db (hie_types hf) @@ -436,7 +460,7 @@ removeDependencySrcFiles (getConn -> conn) = {-| Delete all occurrences of given @.hie@ file from the database -} deleteFileFromIndex :: HieDb -> FilePath -> IO () deleteFileFromIndex db@(getConn -> conn) path = withTransaction conn $ do - deleteInternalTablesStatement db (Only path) + deleteInternalTablesStatement (preparedStatements db) (Only path) {-| Delete all entries associated with modules for which the 'modInfoSrcFile' doesn't exist on the disk. @@ -449,7 +473,7 @@ deleteMissingRealFiles db@(getConn -> conn) = withTransaction conn $ do exists <- doesFileExist src pure $ if exists then acc else path : acc forM_ missing_file_keys $ \path -> do - deleteInternalTablesStatement db (Only path) + deleteInternalTablesStatement (preparedStatements db) (Only path) {-| Garbage collect typenames with no references - it is a good idea to call this function after a sequence of database updates (inserts or deletes) @@ -457,6 +481,8 @@ this function after a sequence of database updates (inserts or deletes) garbageCollectTypeNames :: HieDb -> IO Int garbageCollectTypeNames (getConn -> conn) = do execute_ conn "DELETE FROM typenames WHERE NOT EXISTS ( SELECT 1 FROM typerefs WHERE typerefs.id = typenames.id LIMIT 1 )" + -- Tack on a call to optimize that'll vacuum and update any table statistics + -- that might have changed. See https://sqlite.org/pragma.html#pragma_optimize. execute_ conn "PRAGMA optimize;" changes conn diff --git a/src/HieDb/Types.hs b/src/HieDb/Types.hs index c4f37c3..c9f39a0 100644 --- a/src/HieDb/Types.hs +++ b/src/HieDb/Types.hs @@ -17,9 +17,11 @@ import Data.IORef import qualified Data.Text as T import qualified Data.Text.Encoding as T +import Control.Exception +import Control.Monad (void) +import Control.Monad.Cont (ContT(..)) import Control.Monad.IO.Class import Control.Monad.Reader -import Control.Exception import Data.List.NonEmpty (NonEmpty(..)) @@ -32,11 +34,20 @@ import Database.SQLite.Simple.FromField import qualified Text.ParserCombinators.ReadP as R import HieDb.Compat -import Control.Monad (void) data HieDb = HieDb { getConn :: !Connection - , insertModsStatement :: !(StatementFor HieModuleRow) + , preparedStatements :: HieDbStatements + } + +-- | Record of prepared statements that are relevant during the slowest +-- operation, indexing. +-- +-- See benchmarks in https://github.com/wz1000/HieDb/pull/86, where statement +-- preparation results in a ~28% improvement in indexing hie files generated +-- from HLS. +data HieDbStatements = HieDbStatements + { insertModsStatement :: !(StatementFor HieModuleRow) , insertRefsStatement :: !(StatementFor RefRow) , insertDeclsStatement :: !(StatementFor DeclRow) , insertImportsStatement :: !(StatementFor ImportRow) @@ -48,19 +59,52 @@ data HieDb = HieDb , deleteInternalTablesStatement :: !(Only FilePath -> IO ()) } +-- | A type-safe wrapper connecting the query preparation with the datatype the +-- query is intended for. The type variable 'a' should correspond to the +-- datatype that will be bound to the statement that is wrapped. newtype StatementFor a = StatementFor Statement +-- | Equivalent to unit so we can avoid parsing anything when doing insertions. +-- +-- Many of our indexing operations are just simple inserts that don't return +-- results. We can't use the array of @sqlite-simple.execute@ functions as they +-- can't make use of the statements we've prepared. data NoOutput = NoOutput instance FromRow NoOutput where fromRow = pure NoOutput +-- | Helper for preparing multiple statements, each of which has to be +-- bracket-wrapped. Done via ContT, so we can use do/applicative notation. +createStatement :: Connection -> Query -> ContT r IO (StatementFor a) +createStatement connection query = fmap StatementFor (ContT (withStatement connection query)) + +-- | Run a statement that was built for an datatype in mind, ensuring that we do +-- indeed pass that datatype. +-- +-- This function is preferably inlined so it'd get specialized with the datatype +-- encoder. runStatementFor_ :: ToRow a => StatementFor a -> a -> IO () {-# INLINE runStatementFor_ #-} runStatementFor_ (StatementFor statement) params = do withBind statement params $ + -- sqlite-simple doesn't offer the best interface for executing prepared + -- insertions. `nextRow` requires a hint to know what it needs to parse if it + -- encounters content from sqlite. Though this code path isn't hit as our + -- insertions don't return anything, we still need to provide a parsable + -- datatype. Bit of a leaky abstraction, can be cleaned up by using the + -- lower-level `direct-sqlite` instead. void (nextRow @NoOutput statement) +-- | Run a statement that was built for an datatype in mind, ensuring that we do +-- indeed pass that datatype. +-- +-- This function is preferably inlined so it'd get specialized with the datatype +-- encoder. +-- +-- NB: While the input variable acts as a witness and is carried around, the +-- output variable is unconstrained. When using this function, double check that +-- the output type is what you expect it to be. runStatementFor :: (ToRow a, FromRow b) => StatementFor a -> a -> IO (Maybe b) {-# INLINE runStatementFor #-} runStatementFor (StatementFor statement) params = do diff --git a/src/HieDb/Utils.hs b/src/HieDb/Utils.hs index 26e295a..d5661c3 100644 --- a/src/HieDb/Utils.hs +++ b/src/HieDb/Utils.hs @@ -74,7 +74,7 @@ addTypeRef hiedb hf arr ixs sp = go 0 indexed <- get let isTypeIndexed = ISet.member (fromIntegral occ) (IMap.findWithDefault ISet.empty depth indexed) unless isTypeIndexed $ do - liftIO $ runStatementFor_ (insertTyperefsStatement hiedb) ref + liftIO $ runStatementFor_ (insertTyperefsStatement (preparedStatements hiedb)) ref put $ IMap.alter (\case Nothing -> Just $ ISet.singleton (fromIntegral occ) Just s -> Just $ ISet.insert (fromIntegral occ) s From 3272e6f5151345a5624225963e0061ae04f6e8f0 Mon Sep 17 00:00:00 2001 From: Curtis Chin Jen Sem Date: Mon, 22 Dec 2025 13:22:18 +0100 Subject: [PATCH 9/9] Finish sentence that trailed off in comment --- src/HieDb/Create.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/HieDb/Create.hs b/src/HieDb/Create.hs index 6fbf04e..e86cceb 100644 --- a/src/HieDb/Create.hs +++ b/src/HieDb/Create.hs @@ -62,7 +62,9 @@ checkVersion k db@(getConn -> conn) = do else throwIO $ IncompatibleSchemaVersion dB_VERSION ver --- | Commmon implementation for the @withHieDb*@ functions that +-- | Commmon implementation for the @withHieDb*@ functions that prepared the +-- hiedb file, if it didn't already exist. Also prepares any statements that +-- we'll need to do indexing. withHieDbImpl :: FilePath -> (HieDb -> IO a) -> IO a withHieDbImpl fp act = withConnection fp $ \connection -> do liftIO $ setupHieDb connection