diff --git a/glean.cabal.in b/glean.cabal.in
index 35c872ce55..d7e316e827 100644
--- a/glean.cabal.in
+++ b/glean.cabal.in
@@ -651,6 +651,7 @@ library db
Glean.Query.Expand
Glean.Query.Flatten
Glean.Query.Flatten.Types
+ Glean.Query.Lint
Glean.Query.Opt
Glean.Query.Prune
Glean.Query.Reorder
diff --git a/glean/db/Glean/Database/Config.hs b/glean/db/Glean/Database/Config.hs
index fece5e72d8..9dc7d4032c 100644
--- a/glean/db/Glean/Database/Config.hs
+++ b/glean/db/Glean/Database/Config.hs
@@ -204,15 +204,17 @@ data Config = Config
data DebugFlags = DebugFlags
{ tcDebug :: !Bool
, queryDebug :: !Bool
+ , queryLint :: !Bool
}
instance Default DebugFlags where
- def = DebugFlags { tcDebug = False, queryDebug = False }
+ def = DebugFlags { tcDebug = False, queryDebug = False, queryLint = False }
instance Semigroup DebugFlags where
a <> b = DebugFlags
{ tcDebug = tcDebug a || tcDebug b
, queryDebug = queryDebug a || queryDebug b
+ , queryLint = queryLint a || queryLint b
}
instance Monoid DebugFlags where
@@ -561,6 +563,7 @@ options = do
debugParser = do
tcDebug <- switch (long "debug-tc")
queryDebug <- switch (long "debug-query")
+ queryLint <- switch (long "debug-query-lint")
return DebugFlags{..}
serverConfigThriftSource = option (eitherReader ThriftSource.parse)
diff --git a/glean/db/Glean/Database/Env.hs b/glean/db/Glean/Database/Env.hs
index 84faf01ca7..1121cebafa 100644
--- a/glean/db/Glean/Database/Env.hs
+++ b/glean/db/Glean/Database/Env.hs
@@ -161,6 +161,7 @@ getDebugEnv = do
where
add "tc" = return def { tcDebug = True }
add "query" = return def { queryDebug = True }
+ add "lint" = return def { queryLint = True }
add other = do
logWarning $ "Unkonwn GLEAN_DEBUG class: " <> other
return def
diff --git a/glean/db/Glean/Query/Lint.hs b/glean/db/Glean/Query/Lint.hs
new file mode 100644
index 0000000000..9c9f3a1194
--- /dev/null
+++ b/glean/db/Glean/Query/Lint.hs
@@ -0,0 +1,290 @@
+{-
+ Copyright (c) Meta Platforms, Inc. and affiliates.
+ All rights reserved.
+
+ This source code is licensed under the BSD-style license found in the
+ LICENSE file in the root directory of this source tree.
+-}
+
+module Glean.Query.Lint (
+ lintQuery
+ ) where
+
+import Control.Monad
+import Control.Monad.Except
+import Control.Monad.State
+import Data.List (foldl')
+import Data.IntSet (IntSet)
+import qualified Data.IntSet as IntSet
+import Compat.Prettyprinter
+
+import Glean.Angle.Types
+ (Type_(..), latestAngleVersion, tempPredicateId)
+import Glean.Database.Schema.Types
+ (DbSchema, PredicateDetails(..), lookupPid, lookupPredicateId)
+import Glean.Display
+import Glean.Query.Codegen.Types
+import Glean.RTS.Term as RTS
+import Glean.RTS.Types as RTS
+import Glean.Schema.Util (tupleSchema)
+
+-- | The lint monad: tracks the set of bound variable IDs in state,
+-- and can throw a pretty-printed error.
+type L a = StateT IntSet (Except (Doc ())) a
+
+runL :: L a -> Either (Doc ()) a
+runL m = runExcept (evalStateT m IntSet.empty)
+
+lintError :: Doc () -> L a
+lintError = throwError
+
+-- | Record a variable as bound.
+bindVar :: Var -> L ()
+bindVar v = modify (IntSet.insert (varId v))
+
+-- | Check that a variable is currently bound.
+requireBound :: Var -> L ()
+requireBound v = do
+ bound <- get
+ unless (varId v `IntSet.member` bound) $
+ lintError $ "unbound variable:" <+> displayDefault v
+
+-- | Run an action in a nested scope. The bound set is saved and
+-- restored afterwards, so bindings made inside do not escape.
+scoped :: L a -> L a
+scoped m = do
+ saved <- get
+ a <- m
+ put saved
+ return a
+
+{-
+ Check the query for internal consistency. lintQuery will check
+ the following properties of the query:
+
+ - type-correctness, e.g.
+ - in A = B, the types of A and B are equivalent
+ - in FactGenerator and DerivedFactGenerator, the types of the key
+ and value pattern match those of the predicate in the schema
+ - arguments of primitives have the correct type
+ - bind-before-use: for all variables, MatchBind occurs before MatchVar
+
+-}
+lintQuery :: DbSchema -> CodegenQuery -> Either (Doc ()) ()
+lintQuery schema QueryWithInfo{..} = runL $ do
+ let CgQuery head body = qiQuery
+ lintStatements schema body
+ lintPat head
+ case qiGenerator of
+ Nothing -> checkEqType "query head" head qiReturnType
+ Just gen -> do
+ lintGenerator gen
+ lintGeneratorTypes schema head gen
+
+-- ---------------------------------------------------------------------------
+-- Bind-before-use and type checking for statement lists
+
+lintStatements :: DbSchema -> [CgStatement] -> L ()
+lintStatements schema = mapM_ (lintStatement schema)
+
+lintStatement :: DbSchema -> CgStatement -> L ()
+lintStatement schema = \case
+ CgStatement pat gen -> do
+ -- The generator is evaluated first; its patterns can bind variables
+ lintGenerator gen
+ -- Then the LHS pattern matches the generator result, and can use
+ -- variables bound by the generator as well as previously bound ones
+ lintPat pat
+ -- Type check: pattern type should match generator result type
+ lintStmtTypes schema pat gen
+
+ CgAllStatement var expr stmts -> do
+ -- The inner statements are evaluated in a nested scope
+ scoped $ do
+ lintStatements schema stmts
+ lintPat expr
+ -- The variable bound by 'all' is available to subsequent statements
+ bindVar var
+
+ CgNegation stmts ->
+ -- Negation introduces its own scope; bindings do not escape
+ scoped $ lintStatements schema stmts
+
+ CgDisjunction stmtss -> do
+ -- Each branch starts with the same bound set. Variables bound
+ -- in all branches are available afterwards.
+ bounds <- forM stmtss $ \stmts -> scoped $ do
+ lintStatements schema stmts
+ get
+ case bounds of
+ [] -> return ()
+ (b:bs) -> put (foldl' IntSet.intersection b bs)
+
+ CgConditional{..} -> do
+ outer <- get
+ lintStatements schema cond
+ -- then branch sees cond bindings
+ lintStatements schema then_
+ thenBound <- get
+ -- else branch only sees pre-condition bindings
+ put outer
+ lintStatements schema else_
+ elseBound <- get
+ -- Variables bound in both branches are available after
+ put (IntSet.intersection thenBound elseBound)
+
+-- ---------------------------------------------------------------------------
+-- Check uses and record bindings in a single depth-first left-to-right
+-- traversal, so that a MatchBind makes the variable immediately
+-- available for a later MatchVar in the same pattern.
+
+lintPat :: Pat -> L ()
+lintPat = mapM_ lintMatch
+
+lintMatch :: Match () Var -> L ()
+lintMatch = \case
+ MatchBind v -> bindVar v
+ MatchVar v -> requireBound v
+ MatchAnd a b -> do
+ lintPat a
+ lintPat b
+ MatchPrefix _ rest ->
+ lintPat rest
+ MatchArrayPrefix _ pre whole -> do
+ mapM_ lintPat pre
+ lintPat whole
+ _ -> return ()
+
+-- | Lint expression-position subterms of a generator, then bind
+-- pattern-position variables.
+lintGenerator :: Generator -> L ()
+lintGenerator = \case
+ FactGenerator _ kpat vpat _ -> do
+ lintPat kpat
+ lintPat vpat
+ TermGenerator expr ->
+ lintPat expr
+ DerivedFactGenerator _ key value -> do
+ lintPat key
+ lintPat value
+ ArrayElementGenerator _ arr ->
+ lintPat arr
+ SetElementGenerator _ set ->
+ lintPat set
+ PrimCall _ args _ ->
+ mapM_ lintPat args
+
+-- | Type-check the result generator (qiGenerator). The head expression
+-- is the fact ID that the generator will look up.
+lintGeneratorTypes :: DbSchema -> Pat -> Generator -> L ()
+lintGeneratorTypes schema head gen = case gen of
+ FactGenerator pidRef kpat vpat _ -> do
+ mdetails <- lookupPredicate schema pidRef
+ forM_ mdetails $ \details -> do
+ checkEqType
+ ("result generator key of" <+> displayDefault pidRef)
+ kpat (predicateKeyType details)
+ checkEqType
+ ("result generator value of" <+> displayDefault pidRef)
+ vpat (predicateValueType details)
+ checkEqType "result generator fact ID" head (PredicateTy () pidRef)
+ _ -> return ()
+
+-- ---------------------------------------------------------------------------
+-- Type checking
+
+-- | Check the type consistency of a statement: pattern type vs
+-- generator result type, and predicate key/value types for fact
+-- generators.
+lintStmtTypes :: DbSchema -> Pat -> Generator -> L ()
+lintStmtTypes schema pat gen = case gen of
+ FactGenerator pidRef kpat vpat _ -> do
+ mdetails <- lookupPredicate schema pidRef
+ forM_ mdetails $ \details -> do
+ checkEqType
+ ("key of" <+> displayDefault pidRef) kpat (predicateKeyType details)
+ checkEqType
+ ("value of" <+> displayDefault pidRef) vpat (predicateValueType details)
+ -- The LHS pattern matches the fact ID, which has the predicate type
+ checkEqType "fact generator result" pat (PredicateTy () pidRef)
+
+ DerivedFactGenerator pidRef key value -> do
+ mdetails <- lookupPredicate schema pidRef
+ forM_ mdetails $ \details -> do
+ checkEqType
+ ("key of" <+> displayDefault pidRef) key (predicateKeyType details)
+ checkEqType
+ ("value of" <+> displayDefault pidRef) value (predicateValueType details)
+ checkEqType "derived fact generator result" pat (PredicateTy () pidRef)
+
+ TermGenerator expr ->
+ case (patType pat, patType expr) of
+ (Just pty, Just ety) -> typesShouldMatch "term generator" pty ety
+ _ -> return ()
+
+ ArrayElementGenerator eltTy _arr ->
+ checkEqType "array element generator" pat eltTy
+
+ SetElementGenerator eltTy _set ->
+ checkEqType "set element generator" pat eltTy
+
+ PrimCall _op _args retTy ->
+ checkEqType "primcall result" pat retTy
+
+lookupPredicate :: DbSchema -> PidRef -> L (Maybe PredicateDetails)
+lookupPredicate schema (PidRef pid predId)
+ -- The temporary predicate used for derived fact generators is not
+ -- stored in the schema, so skip it.
+ | predId == tempPredicateId = return Nothing
+ | otherwise =
+ case lookupPid pid schema of
+ Nothing -> case lookupPredicateId predId schema of
+ Nothing ->
+ lintError $ "unknown predicate:" <+> displayDefault predId
+ Just details -> return (Just details)
+ Just details -> return (Just details)
+
+-- | Check that the type of a pattern matches an expected type.
+-- When the pattern's type cannot be determined, the check is skipped.
+checkEqType :: Doc () -> Pat -> Type -> L ()
+checkEqType ctx pat expected =
+ case patType pat of
+ Nothing -> return ()
+ Just actual -> typesShouldMatch ctx actual expected
+
+typesShouldMatch :: Doc () -> Type -> Type -> L ()
+typesShouldMatch ctx actual expected
+ | eqType latestAngleVersion actual expected = return ()
+ | otherwise = throwError $ vcat
+ [ "type mismatch in" <+> ctx <> ":"
+ , " expected:" <+> displayDefault expected
+ , " actual:" <+> displayDefault actual
+ ]
+
+-- ---------------------------------------------------------------------------
+-- Inferring the type of a pattern
+
+-- | Attempt to determine the type of a pattern. Returns Nothing when
+-- the type cannot be determined from the pattern structure alone
+-- (e.g. an empty array or an Alt without surrounding context).
+patType :: Pat -> Maybe Type
+patType (Byte _) = Just ByteTy
+patType (Nat _) = Just NatTy
+patType (RTS.String _) = Just StringTy
+patType (ByteArray _) = Just (ArrayTy ByteTy)
+patType (Array []) = Nothing
+patType (Array (t:_)) = ArrayTy <$> patType t
+patType (Tuple ts) = tupleSchema <$> traverse patType ts
+patType (Alt _ _) = Nothing
+patType (Ref m) = matchType m
+
+matchType :: Match () Var -> Maybe Type
+matchType (MatchWild ty) = Just ty
+matchType (MatchNever ty) = Just ty
+matchType (MatchBind v) = Just (varType v)
+matchType (MatchVar v) = Just (varType v)
+matchType (MatchFid _) = Nothing
+matchType (MatchAnd a _) = patType a
+matchType (MatchPrefix _ _) = Just StringTy
+matchType (MatchArrayPrefix ty _ _) = Just (ArrayTy ty)
+matchType (MatchExt _) = Nothing
diff --git a/glean/db/Glean/Query/UserQuery.hs b/glean/db/Glean/Query/UserQuery.hs
index 6b869bf908..dc79e29e64 100644
--- a/glean/db/Glean/Query/UserQuery.hs
+++ b/glean/db/Glean/Query/UserQuery.hs
@@ -74,6 +74,7 @@ import Glean.Query.Flatten
import Glean.Query.Opt
import Glean.Query.Reorder
import Glean.Query.Incremental (makeIncremental)
+import Glean.Query.Lint (lintQuery)
import Glean.RTS as RTS
import Glean.RTS.Bytecode.Disassemble
import qualified Glean.RTS.Bytecode.Gen.Version as Bytecode
@@ -987,6 +988,12 @@ compileAngleQuery rec ver dbSchema mode source stored debug = do
reordered <- checkBadQuery id $ runExcept $ reorder dbSchema optimised
ifDebug $ "reordered query: " <> show (displayDefault (qiQuery reordered))
+ when (queryLint debug) $
+ case lintQuery dbSchema reordered of
+ Left err -> throwIO $ Thrift.BadQuery $
+ Text.pack $ show $ "query lint:" <+> err
+ Right () -> return ()
+
final <- case mode of
NoExtraSteps -> return reordered
IncrementalDerivation getStats -> do
diff --git a/glean/hs/Glean/RTS/Types.hs b/glean/hs/Glean/RTS/Types.hs
index 2d865845a7..e08a59b2a1 100644
--- a/glean/hs/Glean/RTS/Types.hs
+++ b/glean/hs/Glean/RTS/Types.hs
@@ -143,6 +143,7 @@ eqType version a b = case (a,b) of
(NatTy, NatTy) -> True
(StringTy, StringTy) -> True
(ArrayTy a, ArrayTy b) -> eqType version a b
+ (SetTy a, SetTy b) -> eqType version a b
(RecordTy as, RecordTy bs) ->
let isTuple = all (Text.isInfixOf "tuplefield" . fieldDefName)
-- previous to version 7 records were always compared structurally
diff --git a/glean/test/lib/Glean/Database/Test.hs b/glean/test/lib/Glean/Database/Test.hs
index a2b6cdb7fa..4921169fbb 100644
--- a/glean/test/lib/Glean/Database/Test.hs
+++ b/glean/test/lib/Glean/Database/Test.hs
@@ -23,6 +23,7 @@ module Glean.Database.Test
, setLMDBNoUnpack
, enableTcDebug
, enableQueryDebug
+ , enableQueryLint
, enableRocksDBCache
, withTestEnv
, kickOffTestDB
@@ -129,6 +130,10 @@ enableQueryDebug :: Setting
enableQueryDebug cfg = cfg
{ cfgDebug = (cfgDebug cfg) { queryDebug = True } }
+enableQueryLint :: Setting
+enableQueryLint cfg = cfg
+ { cfgDebug = (cfgDebug cfg) { queryLint = True } }
+
enableRocksDBCache :: Setting
enableRocksDBCache cfg = cfg
{ cfgServerConfig = (cfgServerConfig cfg) <&> \scfg -> scfg
@@ -146,12 +151,12 @@ withTestEnv settings action =
\(cfgAPI :: NullConfigProvider) -> do
let
dbConfig = foldl' (\acc f -> f acc)
- def
+ (enableQueryLint def
{ cfgDataStore = tmpDataStore
, cfgSchemaLocation = Just schemaLocationFiles
, cfgServerConfig = ThriftSource.value def
{ ServerConfig.config_db_rocksdb_cache_mb = 0 }
- }
+ })
settings
withDatabases evb dbConfig cfgAPI action
diff --git a/glean/website/docs/running.md b/glean/website/docs/running.md
index 31844c8fe4..d84645cdb3 100644
--- a/glean/website/docs/running.md
+++ b/glean/website/docs/running.md
@@ -181,3 +181,6 @@ Enable debugging output for the Angle typechecker.
* `--debug-query`
Enable debugging output for the Angle query compiler.
+
+* `--debug-query-lint`
+Enable consistency checks on the query before running; catches internal errors in the query compiler.