diff --git a/src/HieDb/Create.hs b/src/HieDb/Create.hs index eb91a48..e86cceb 100644 --- a/src/HieDb/Create.hs +++ b/src/HieDb/Create.hs @@ -17,6 +17,7 @@ import GHC import Control.Exception import Control.Monad +import Control.Monad.Cont import Control.Monad.IO.Class import Control.Monad.State.Strict (evalStateT) @@ -61,26 +62,87 @@ checkVersion k db@(getConn -> conn) = do else throwIO $ IncompatibleSchemaVersion dB_VERSION ver +-- | Commmon implementation for the @withHieDb*@ functions that prepared the +-- hiedb file, if it didn't already exist. Also prepares any statements that +-- we'll need to do indexing. +withHieDbImpl :: FilePath -> (HieDb -> IO a) -> IO a +withHieDbImpl fp act = withConnection fp $ \connection -> do + liftIO $ setupHieDb connection + + -- We manually prepare statements instead of using the machinery in + -- sqlite-simple so we can reuse these across `.hie` files more easily. + -- This was benchmarked to be faster: + -- https://github.com/wz1000/HieDb/pull/86 + withPreparedHieDbStatements connection $ \statements -> do + act $ HieDb connection statements + +withPreparedHieDbStatements :: Connection -> (HieDbStatements -> IO a) -> IO a +withPreparedHieDbStatements connection action = do + let prepareInternalTableDeletion = do + deletions + <- traverse (createStatement connection) + [ "DELETE FROM refs WHERE hieFile = ?" + , "DELETE FROM decls WHERE hieFile = ?" + , "DELETE FROM defs WHERE hieFile = ?" + , "DELETE FROM typerefs WHERE hieFile = ?" + , "DELETE FROM mods WHERE hieFile = ?" + , "DELETE FROM exports WHERE hieFile = ?" + ] + pure $ \fp -> mapM_ (`runStatementFor_` fp) deletions + + runContT + ( HieDbStatements + <$> createStatement connection "INSERT INTO mods VALUES (?,?,?,?,?,?,?)" + <*> createStatement connection "INSERT INTO refs VALUES (?,?,?,?,?,?,?,?,?)" + <*> createStatement connection "INSERT INTO decls VALUES (?,?,?,?,?,?,?)" + <*> createStatement connection "INSERT INTO imports VALUES (?,?,?,?,?,?)" + <*> createStatement connection "INSERT INTO defs VALUES (?,?,?,?,?,?)" + <*> createStatement connection "INSERT INTO exports VALUES (?,?,?,?,?,?,?,?)" + <*> createStatement connection "INSERT INTO typerefs VALUES (?,?,?,?,?,?,?)" + <*> createStatement connection "INSERT INTO typenames(name,mod,unit) VALUES (?,?,?)" + <*> createStatement connection "SELECT id FROM typenames WHERE name = ? AND mod = ? AND unit = ?" + <*> prepareInternalTableDeletion + ) + action + {-| Given path to @.hiedb@ file, constructs 'HieDb' and passes it to given function. -} withHieDb :: FilePath -> (HieDb -> IO a) -> IO a -withHieDb fp f = withConnection fp (checkVersion f . HieDb) +withHieDb fp f = withHieDbImpl fp $ \hiedb -> do + checkVersion f hiedb {-| Given GHC LibDir and path to @.hiedb@ file, constructs DynFlags (required for printing info from @.hie@ files) and 'HieDb' and passes them to given function. -} withHieDbAndFlags :: LibDir -> FilePath -> (DynFlags -> HieDb -> IO a) -> IO a -withHieDbAndFlags libdir fp f = do +withHieDbAndFlags libdir fp f = withHieDbImpl fp $ \hiedb -> do dynFlags <- dynFlagsForPrinting libdir - withConnection fp (checkVersion (f dynFlags) . HieDb) + checkVersion (f dynFlags) hiedb {-| Initialize database schema for given 'HieDb'. -} initConn :: HieDb -> IO () -initConn (getConn -> conn) = do +{-# DEPRECATED initConn "Use setupHieDb instead." #-} +initConn = setupHieDb . getConn + +{-| Initialize database schema for given 'HieDb'. +-} +setupHieDb :: Connection -> IO () +setupHieDb conn = do execute_ conn "PRAGMA busy_timeout = 500;" execute_ conn "PRAGMA journal_mode = WAL;" execute_ conn "PRAGMA foreign_keys = ON;" + + -- The default setting for synchronous is 'FULL'. `FULL` will issue fsync's + -- and wait to ensure that every write is done durably. Switching to `NORMAL` + -- moves this to the checkpoints by the WAL (when using a WAL), losing us + -- durability. Practically this means that committed transaction may be rolled + -- back on system failure. This is fine, as the hiedb file will recover by + -- reindexing. See the following. + -- - https://sqlite.org/pragma.html#pragma_synchronous + -- - https://sqlite.org/wal.html + -- - https://github.com/wz1000/HieDb/pull/86 for benchmarks. + execute_ conn "PRAGMA synchronous = NORMAL;" execute_ conn "PRAGMA defer_foreign_keys = ON;" execute_ conn "CREATE TABLE IF NOT EXISTS mods \ @@ -188,7 +250,7 @@ Returns an Array mapping 'TypeIndex' to database ID assigned to the corresponding record in DB. -} addArr :: HieDb -> A.Array TypeIndex HieTypeFlat -> IO (A.Array TypeIndex (Maybe Int64)) -addArr (getConn -> conn) arr = do +addArr hiedb arr = do forM arr $ \case HTyVarTy n -> addName n HTyConApp tc _ -> addName (ifaceTyConName tc) @@ -201,8 +263,8 @@ addArr (getConn -> conn) arr = do let occ = nameOccName n mod = moduleName m uid = moduleUnit m - execute conn "INSERT INTO typenames(name,mod,unit) VALUES (?,?,?)" (occ,mod,uid) - fmap fromOnly . listToMaybe <$> query conn "SELECT id FROM typenames WHERE name = ? AND mod = ? AND unit = ?" (occ,mod,uid) + runStatementFor_ (insertTypenamesStatement (preparedStatements hiedb)) (occ,mod,uid) + fmap fromOnly <$> runStatementFor (queryTypenamesStatement (preparedStatements hiedb)) (occ,mod,uid) {-| Add references to types from given @.hie@ file to DB. -} addTypeRefs @@ -308,7 +370,7 @@ addRefsFromLoadedInternal addRefsFromLoadedInternal db@(getConn -> conn) path sourceFile hash skipOptions hf = liftIO $ withTransaction conn $ do - deleteInternalTables conn path + deleteInternalTablesStatement (preparedStatements db) (Only path) addRefsFromLoaded_unsafe db path sourceFile hash skipOptions hf -- | Like 'addRefsFromLoaded' but without: @@ -317,8 +379,7 @@ addRefsFromLoadedInternal -- -- Mostly useful to index a new database from scratch as fast as possible addRefsFromLoaded_unsafe - :: MonadIO m - => HieDb -- ^ HieDb into which we're adding the file + :: HieDb -- ^ HieDb into which we're adding the file -> FilePath -- ^ Path to @.hie@ file -> SourceFile -- ^ Path to .hs file from which @.hie@ file was created -- Also tells us if this is a real source file? @@ -326,9 +387,9 @@ addRefsFromLoaded_unsafe -> Fingerprint -- ^ The hash of the @.hie@ file -> SkipOptions -- ^ Skip indexing certain tables -> HieFile -- ^ Data loaded from the @.hie@ file - -> m () + -> IO () addRefsFromLoaded_unsafe - db@(getConn -> conn) path sourceFile hash skipOptions hf = liftIO $ do + db path sourceFile hash skipOptions hf = do let isBoot = "boot" `isSuffixOf` path mod = moduleName smod @@ -352,25 +413,25 @@ addRefsFromLoaded_unsafe let sourceOnlyNodeInfo = SourcedNodeInfo $ M.delete originToDrop sniMap in Node sourceOnlyNodeInfo sp (map (dropNodeInfos originToDrop) children) - execute conn "INSERT INTO mods VALUES (?,?,?,?,?,?,?)" modrow + runStatementFor_ (insertModsStatement (preparedStatements db)) modrow let AstInfo refsSrc declsSrc importsSrc = genAstInfo path smod SourceInfo refmapSourceOnly AstInfo refsGen declsGen importsGen = genAstInfo path smod GeneratedInfo refmapGeneratedOnly unless (skipRefs skipOptions) $ - executeMany conn "INSERT INTO refs VALUES (?,?,?,?,?,?,?,?,?)" (refsSrc <> refsGen) + mapM_ (runStatementFor_ (insertRefsStatement (preparedStatements db))) (refsSrc <> refsGen) unless (skipDecls skipOptions) $ - executeMany conn "INSERT INTO decls VALUES (?,?,?,?,?,?,?)" (declsSrc <> declsGen) + mapM_ (runStatementFor_ (insertDeclsStatement (preparedStatements db))) (declsSrc <> declsGen) unless (skipImports skipOptions) $ - executeMany conn "INSERT INTO imports VALUES (?,?,?,?,?,?)" (importsSrc <> importsGen) + mapM_ (runStatementFor_ (insertImportsStatement (preparedStatements db))) (importsSrc <> importsGen) let defs = genDefRow path smod refmapAll unless (skipDefs skipOptions) $ - executeMany conn "INSERT INTO defs VALUES (?,?,?,?,?,?)" defs + mapM_ (runStatementFor_ (insertDefsStatement (preparedStatements db))) defs let exports = generateExports path $ hie_exports hf unless (skipExports skipOptions) $ - executeMany conn "INSERT INTO exports VALUES (?,?,?,?,?,?,?,?)" exports + mapM_ (runStatementFor_ (insertExportsStatement (preparedStatements db))) exports unless (skipTypes skipOptions) $ do ixs <- addArr db (hie_types hf) @@ -400,21 +461,21 @@ removeDependencySrcFiles (getConn -> conn) = {-| Delete all occurrences of given @.hie@ file from the database -} deleteFileFromIndex :: HieDb -> FilePath -> IO () -deleteFileFromIndex (getConn -> conn) path = withTransaction conn $ do - deleteInternalTables conn path +deleteFileFromIndex db@(getConn -> conn) path = withTransaction conn $ do + deleteInternalTablesStatement (preparedStatements db) (Only path) {-| Delete all entries associated with modules for which the 'modInfoSrcFile' doesn't exist on the disk. Doesn't delete it if there is no associated 'modInfoSrcFile' -} deleteMissingRealFiles :: HieDb -> IO () -deleteMissingRealFiles (getConn -> conn) = withTransaction conn $ do +deleteMissingRealFiles db@(getConn -> conn) = withTransaction conn $ do missing_file_keys <- fold_ conn "SELECT hieFile,hs_src FROM mods WHERE hs_src IS NOT NULL AND is_real" [] $ \acc (path,src) -> do exists <- doesFileExist src pure $ if exists then acc else path : acc forM_ missing_file_keys $ \path -> do - deleteInternalTables conn path + deleteInternalTablesStatement (preparedStatements db) (Only path) {-| Garbage collect typenames with no references - it is a good idea to call this function after a sequence of database updates (inserts or deletes) @@ -422,6 +483,9 @@ this function after a sequence of database updates (inserts or deletes) garbageCollectTypeNames :: HieDb -> IO Int garbageCollectTypeNames (getConn -> conn) = do execute_ conn "DELETE FROM typenames WHERE NOT EXISTS ( SELECT 1 FROM typerefs WHERE typerefs.id = typenames.id LIMIT 1 )" + -- Tack on a call to optimize that'll vacuum and update any table statistics + -- that might have changed. See https://sqlite.org/pragma.html#pragma_optimize. + execute_ conn "PRAGMA optimize;" changes conn deleteInternalTables :: Connection -> FilePath -> IO () diff --git a/src/HieDb/Run.hs b/src/HieDb/Run.hs index 61eb87d..2f4eb7f 100644 --- a/src/HieDb/Run.hs +++ b/src/HieDb/Run.hs @@ -277,7 +277,6 @@ runCommand libdir opts cmd = withHieDbAndFlags libdir (database opts) $ \dynFlag when (trace opts) $ setHieTrace conn (Just $ T.hPutStrLn stderr . ("\n****TRACE: "<>)) when (reindex opts) $ do - initConn conn files' <- map hieModuleHieFile <$> getAllIndexedMods conn files <- fmap catMaybes $ forM files' $ \f -> do exists <- doesFileExist f @@ -293,9 +292,8 @@ runCommand libdir opts cmd = withHieDbAndFlags libdir (database opts) $ \dynFlag hPutStrLn stderr $ "Re-indexing " ++ show n ++ " files, deleting " ++ show (n-orig) ++ " files" doIndex conn opts stderr files case cmd of - Init -> initConn conn + Init -> pure () Index dirs -> do - initConn conn files <- concat <$> mapM getHieFilesIn dirs doIndex conn opts stderr files TypeRefs typ mn muid -> do diff --git a/src/HieDb/Types.hs b/src/HieDb/Types.hs index 529ed5a..c9f39a0 100644 --- a/src/HieDb/Types.hs +++ b/src/HieDb/Types.hs @@ -7,6 +7,7 @@ {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE CPP #-} {-# OPTIONS_GHC -Wno-orphans #-} +{-# LANGUAGE TypeApplications #-} module HieDb.Types where import Prelude hiding (mod) @@ -16,9 +17,11 @@ import Data.IORef import qualified Data.Text as T import qualified Data.Text.Encoding as T +import Control.Exception +import Control.Monad (void) +import Control.Monad.Cont (ContT(..)) import Control.Monad.IO.Class import Control.Monad.Reader -import Control.Exception import Data.List.NonEmpty (NonEmpty(..)) @@ -32,7 +35,81 @@ import qualified Text.ParserCombinators.ReadP as R import HieDb.Compat -newtype HieDb = HieDb { getConn :: Connection } +data HieDb = HieDb + { getConn :: !Connection + , preparedStatements :: HieDbStatements + } + +-- | Record of prepared statements that are relevant during the slowest +-- operation, indexing. +-- +-- See benchmarks in https://github.com/wz1000/HieDb/pull/86, where statement +-- preparation results in a ~28% improvement in indexing hie files generated +-- from HLS. +data HieDbStatements = HieDbStatements + { insertModsStatement :: !(StatementFor HieModuleRow) + , insertRefsStatement :: !(StatementFor RefRow) + , insertDeclsStatement :: !(StatementFor DeclRow) + , insertImportsStatement :: !(StatementFor ImportRow) + , insertDefsStatement :: !(StatementFor DefRow) + , insertExportsStatement :: !(StatementFor ExportRow) + , insertTyperefsStatement :: !(StatementFor TypeRef) + , insertTypenamesStatement :: !(StatementFor (OccName, ModuleName, Unit)) + , queryTypenamesStatement :: !(StatementFor (OccName, ModuleName, Unit)) + , deleteInternalTablesStatement :: !(Only FilePath -> IO ()) + } + +-- | A type-safe wrapper connecting the query preparation with the datatype the +-- query is intended for. The type variable 'a' should correspond to the +-- datatype that will be bound to the statement that is wrapped. +newtype StatementFor a = StatementFor Statement + +-- | Equivalent to unit so we can avoid parsing anything when doing insertions. +-- +-- Many of our indexing operations are just simple inserts that don't return +-- results. We can't use the array of @sqlite-simple.execute@ functions as they +-- can't make use of the statements we've prepared. +data NoOutput = NoOutput + +instance FromRow NoOutput where + fromRow = pure NoOutput + +-- | Helper for preparing multiple statements, each of which has to be +-- bracket-wrapped. Done via ContT, so we can use do/applicative notation. +createStatement :: Connection -> Query -> ContT r IO (StatementFor a) +createStatement connection query = fmap StatementFor (ContT (withStatement connection query)) + +-- | Run a statement that was built for an datatype in mind, ensuring that we do +-- indeed pass that datatype. +-- +-- This function is preferably inlined so it'd get specialized with the datatype +-- encoder. +runStatementFor_ :: ToRow a => StatementFor a -> a -> IO () +{-# INLINE runStatementFor_ #-} +runStatementFor_ (StatementFor statement) params = do + withBind statement params $ + -- sqlite-simple doesn't offer the best interface for executing prepared + -- insertions. `nextRow` requires a hint to know what it needs to parse if it + -- encounters content from sqlite. Though this code path isn't hit as our + -- insertions don't return anything, we still need to provide a parsable + -- datatype. Bit of a leaky abstraction, can be cleaned up by using the + -- lower-level `direct-sqlite` instead. + void (nextRow @NoOutput statement) + +-- | Run a statement that was built for an datatype in mind, ensuring that we do +-- indeed pass that datatype. +-- +-- This function is preferably inlined so it'd get specialized with the datatype +-- encoder. +-- +-- NB: While the input variable acts as a witness and is carried around, the +-- output variable is unconstrained. When using this function, double check that +-- the output type is what you expect it to be. +runStatementFor :: (ToRow a, FromRow b) => StatementFor a -> a -> IO (Maybe b) +{-# INLINE runStatementFor #-} +runStatementFor (StatementFor statement) params = do + withBind statement params $ + nextRow statement data HieDbException = IncompatibleSchemaVersion diff --git a/src/HieDb/Utils.hs b/src/HieDb/Utils.hs index b6d9865..d5661c3 100644 --- a/src/HieDb/Utils.hs +++ b/src/HieDb/Utils.hs @@ -35,7 +35,6 @@ import Data.IORef import HieDb.Types import HieDb.Compat -import Database.SQLite.Simple import Control.Monad.State.Strict (StateT, get, put) import qualified Data.IntSet as ISet import qualified Data.IntMap.Strict as IMap @@ -60,7 +59,7 @@ import Control.Concurrent.MVar (readMVar) type TypeIndexing a = StateT (IntMap IntSet) IO a addTypeRef :: HieDb -> FilePath -> A.Array TypeIndex HieTypeFlat -> A.Array TypeIndex (Maybe Int64) -> RealSrcSpan -> TypeIndex -> TypeIndexing () -addTypeRef (getConn -> conn) hf arr ixs sp = go 0 +addTypeRef hiedb hf arr ixs sp = go 0 where sl = srcSpanStartLine sp sc = srcSpanStartCol sp @@ -75,7 +74,7 @@ addTypeRef (getConn -> conn) hf arr ixs sp = go 0 indexed <- get let isTypeIndexed = ISet.member (fromIntegral occ) (IMap.findWithDefault ISet.empty depth indexed) unless isTypeIndexed $ do - liftIO $ execute conn "INSERT INTO typerefs VALUES (?,?,?,?,?,?,?)" ref + liftIO $ runStatementFor_ (insertTyperefsStatement (preparedStatements hiedb)) ref put $ IMap.alter (\case Nothing -> Just $ ISet.singleton (fromIntegral occ) Just s -> Just $ ISet.insert (fromIntegral occ) s