diff --git a/crates/mq-check/src/builtin.rs b/crates/mq-check/src/builtin.rs index 4f1dc5122..aaad09da8 100644 --- a/crates/mq-check/src/builtin.rs +++ b/crates/mq-check/src/builtin.rs @@ -263,7 +263,7 @@ fn register_math(ctx: &mut InferenceContext) { // Unary math: number -> number register_many( ctx, - &["abs", "ceil", "floor", "round", "trunc"], + &["abs", "ceil", "floor", "round", "trunc", "ln", "log10", "sqrt", "exp"], vec![Type::Number], Type::Number, ); @@ -338,6 +338,7 @@ fn register_string(ctx: &mut InferenceContext) { Type::array(Type::String), ); register_binary(ctx, "is_regex_match", Type::String, Type::String, Type::Bool); + register_binary(ctx, "is_not_regex_match", Type::String, Type::String, Type::Bool); // Encoding functions register_many( @@ -358,6 +359,12 @@ fn register_string(ctx: &mut InferenceContext) { Type::dict(Type::Var(k), Type::Var(v)), ); + // Scan: (string, pattern) -> [a] + // Element type is left polymorphic since it depends on whether the pattern has + // capture groups (string per match) or not (array of groups per match). + let a = ctx.fresh_var(); + register_binary(ctx, "scan", Type::String, Type::String, Type::array(Type::Var(a))); + // None propagation for string functions register_none_propagation_unary( ctx, @@ -703,6 +710,10 @@ fn register_type_conversion(ctx: &mut InferenceContext) { // bytes -> [number] register_unary(ctx, "to_array", Type::Bytes, Type::array(Type::Number)); + // to_boolean: bool -> bool, string -> bool (parses "true"/"false") + register_unary(ctx, "to_boolean", Type::Bool, Type::Bool); + register_unary(ctx, "to_boolean", Type::String, Type::Bool); + // to_bytes: string -> bytes, [number] -> bytes, bytes -> bytes register_unary(ctx, "to_bytes", Type::String, Type::Bytes); register_unary(ctx, "to_bytes", Type::array(Type::Number), Type::Bytes); @@ -880,6 +891,36 @@ fn register_datetime(ctx: &mut InferenceContext) { register_nullary(ctx, "now", Type::Number); register_unary(ctx, "from_date", Type::String, Type::Number); register_binary(ctx, "to_date", Type::Number, Type::String, Type::String); + + // gmtime/localtime: number (unix timestamp) -> [number] (broken-down time array) + register_unary(ctx, "gmtime", Type::Number, Type::array(Type::Number)); + register_unary(ctx, "localtime", Type::Number, Type::array(Type::Number)); + + // mktime: [number] (broken-down time array) -> number (unix timestamp) + register_unary(ctx, "mktime", Type::array(Type::Number), Type::Number); + + // strftime: (number, string) -> string + register_binary(ctx, "strftime", Type::Number, Type::String, Type::String); + + // date_add: ([number], number, string) -> [number] + register_ternary( + ctx, + "date_add", + Type::array(Type::Number), + Type::Number, + Type::String, + Type::array(Type::Number), + ); + + // date_diff: ([number], [number], string) -> number + register_ternary( + ctx, + "date_diff", + Type::array(Type::Number), + Type::array(Type::Number), + Type::String, + Type::Number, + ); } /// I/O and control flow functions: print, stderr, error, halt, input @@ -962,6 +1003,8 @@ fn register_markdown(ctx: &mut InferenceContext) { "is_math_inline", "is_toml", "is_yaml", + "is_callout", + "is_table_align", ] { register_unary(ctx, name, Type::Markdown, Type::Bool); } @@ -1005,6 +1048,8 @@ fn register_markdown(ctx: &mut InferenceContext) { "is_math_inline", "is_toml", "is_yaml", + "is_callout", + "is_table_align", ] { let a = ctx.fresh_var(); register_unary(ctx, name, Type::Var(a), Type::Bool); @@ -1038,6 +1083,8 @@ fn register_markdown(ctx: &mut InferenceContext) { "to_code_inline", "to_strong", "to_em", + "to_blockquote", + "to_delete", "increase_header_level", "decrease_header_level", "to_math", @@ -1048,6 +1095,25 @@ fn register_markdown(ctx: &mut InferenceContext) { Type::Markdown, ); + // to_callout: (markdown, string, string) -> markdown + register_ternary( + ctx, + "to_callout", + Type::Markdown, + Type::String, + Type::String, + Type::Markdown, + ); + + // to_md_fragment: markdown -> markdown, [a] -> markdown + register_unary(ctx, "to_md_fragment", Type::Markdown, Type::Markdown); + let a = ctx.fresh_var(); + register_unary(ctx, "to_md_fragment", Type::array(Type::Var(a)), Type::Markdown); + + // to_md_table_align: [a] -> markdown + let a = ctx.fresh_var(); + register_unary(ctx, "to_md_table_align", Type::array(Type::Var(a)), Type::Markdown); + // (markdown, number) -> markdown let a = ctx.fresh_var(); register_binary(ctx, "to_h", Type::Var(a), Type::Number, Type::Markdown); @@ -1150,6 +1216,26 @@ fn register_debug(ctx: &mut InferenceContext) { /// File I/O functions fn register_file_io(ctx: &mut InferenceContext) { register_unary(ctx, "read_file", Type::String, Type::String); + register_unary(ctx, "read_file_bytes", Type::String, Type::Bytes); + register_unary(ctx, "file_exists", Type::String, Type::Bool); + + // collection: string (dir path) -> [{path, title, frontmatter, content}] + let (k, v) = (ctx.fresh_var(), ctx.fresh_var()); + register_unary( + ctx, + "collection", + Type::String, + Type::array(Type::dict(Type::Var(k), Type::Var(v))), + ); + + // Path manipulation: string -> string + register_many( + ctx, + &["basename", "dirname", "extname", "stem"], + vec![Type::String], + Type::String, + ); + register_binary(ctx, "path_join", Type::String, Type::String, Type::String); } fn register_bytes(ctx: &mut InferenceContext) { @@ -1373,6 +1459,12 @@ mod tests { #[case::nan("nan()", true)] #[case::infinite("infinite()", true)] #[case::is_nan("is_nan(1.0)", true)] + #[case::ln("ln(2.0)", true)] + #[case::log10("log10(100)", true)] + #[case::sqrt("sqrt(4)", true)] + #[case::exp("exp(1)", true)] + #[case::ln_string("ln(\"x\")", false)] // Should fail: wrong type + #[case::sqrt_string("sqrt(\"x\")", false)] // Should fail: wrong type fn test_special_number_functions(#[case] code: &str, #[case] should_succeed: bool) { let result = check_types(code); assert_eq!( @@ -1396,6 +1488,10 @@ mod tests { #[case::rindex("rindex(\"hello world hello\", \"hello\")", true)] #[case::capture("capture(\"hello 42\", \"(?P\\\\w+)\")", true)] #[case::is_regex_match("is_regex_match(\"hello123\", \"[0-9]+\")", true)] + #[case::is_not_regex_match("is_not_regex_match(\"hello123\", \"[0-9]+\")", true)] + #[case::is_not_regex_match_number("is_not_regex_match(42, \"[0-9]+\")", false)] // Should fail: wrong type + #[case::scan("scan(\"2024-06\", \"(\\\\d{4})-(\\\\d{2})\")", true)] + #[case::scan_number("scan(42, \"[0-9]+\")", false)] // Should fail: wrong type #[case::base64url("base64url(\"hello\")", true)] #[case::base64urld("base64urld(\"aGVsbG8=\")", true)] #[case::ltrim_number("ltrim(42)", false)] @@ -1545,6 +1641,8 @@ mod tests { #[case::keys("keys({\"a\": 1, \"b\": 2})", true)] #[case::values("values({\"a\": 1, \"b\": 2})", true)] #[case::entries("entries({\"a\": 1, \"b\": 2})", true)] + #[case::has_dict("has({\"a\": 1}, \"a\")", true)] + #[case::has_array("has([1, 2, 3], 1)", true)] fn test_dict_query_functions(#[case] code: &str, #[case] should_succeed: bool) { let result = check_types(code); assert_eq!( @@ -1578,6 +1676,8 @@ mod tests { #[case::set("set({\"a\": 1}, \"b\", 2)", true)] #[case::del("del({\"a\": 1, \"b\": 2}, \"a\")", true)] #[case::update("update({\"a\": 1}, {\"b\": 2})", true)] + #[case::from_entries("from_entries([[\"a\", 1], [\"b\", 2]])", true)] + #[case::with_entries("with_entries({\"a\": 1}, fn(pair): [pair[0], pair[1] + 1];)", true)] fn test_dict_manipulation_functions(#[case] code: &str, #[case] should_succeed: bool) { let result = check_types(code); assert_eq!( @@ -1596,6 +1696,9 @@ mod tests { #[case::to_string("to_string(42)", true)] #[case::to_array("to_array(42)", true)] #[case::type_of("type(42)", true)] + #[case::to_boolean_string("to_boolean(\"true\")", true)] + #[case::to_boolean_bool("to_boolean(true)", true)] + #[case::to_boolean_number("to_boolean(42)", false)] // Should fail: wrong type fn test_type_conversion_functions(#[case] code: &str, #[case] should_succeed: bool) { let result = check_types(code); assert_eq!( @@ -1613,6 +1716,18 @@ mod tests { #[case::now("now()", true)] #[case::from_date("from_date(\"2024-01-01\")", true)] #[case::to_date("to_date(1704067200000, \"%Y-%m-%d\")", true)] + #[case::gmtime("gmtime(0)", true)] + #[case::localtime("localtime(0)", true)] + #[case::mktime("mktime([2024, 0, 1, 0, 0, 0, 1, 0])", true)] + #[case::strftime("strftime(0, \"%Y-%m-%d\")", true)] + #[case::date_add("date_add([2024, 0, 1, 0, 0, 0, 1, 0], 1, \"days\")", true)] + #[case::date_diff( + "date_diff([2024, 0, 1, 0, 0, 0, 1, 0], [2024, 0, 2, 0, 0, 0, 2, 1], \"days\")", + true + )] + #[case::gmtime_string("gmtime(\"x\")", false)] // Should fail: wrong type + #[case::mktime_string("mktime(\"x\")", false)] // Should fail: wrong type + #[case::strftime_swapped("strftime(\"x\", 1)", false)] // Should fail: wrong type fn test_datetime_functions(#[case] code: &str, #[case] should_succeed: bool) { let result = check_types(code); assert_eq!( @@ -1641,6 +1756,35 @@ mod tests { ); } + // File I/O And Path Functions + + #[rstest] + #[case::read_file("read_file(\"a.md\")", true)] + #[case::read_file_bytes("read_file_bytes(\"a.md\")", true)] + #[case::file_exists("file_exists(\"a.md\")", true)] + #[case::collection("collection(\"docs\")", true)] + #[case::collection_len("len(collection(\"docs\"))", true)] + #[case::basename("basename(\"a/b.md\")", true)] + #[case::dirname("dirname(\"a/b.md\")", true)] + #[case::extname("extname(\"a/b.md\")", true)] + #[case::stem("stem(\"a/b.md\")", true)] + #[case::path_join("path_join(\"a\", \"b.md\")", true)] + #[case::read_file_number("read_file(42)", false)] // Should fail: wrong type + #[case::file_exists_number("file_exists(42)", false)] // Should fail: wrong type + #[case::collection_number("collection(42)", false)] // Should fail: wrong type + #[case::basename_number("basename(42)", false)] // Should fail: wrong type + #[case::path_join_number("path_join(42, \"b\")", false)] // Should fail: wrong type + fn test_file_io_functions(#[case] code: &str, #[case] should_succeed: bool) { + let result = check_types(code); + assert_eq!( + result.is_empty(), + should_succeed, + "Code: {}\nResult: {:?}", + code, + result + ); + } + // Complex Expressions With Builtins #[rstest] @@ -1942,6 +2086,33 @@ mod tests { ); } + // Type filter functions (`arrays`, `booleans`, etc.) are `def`-based builtins from + // builtin.mq, not registered in this file: hir.add_builtin() loads builtin.mq's + // source into the HIR, so their types are inferred from the function bodies just + // like any other user-defined function. + #[rstest] + #[case::arrays("arrays([1, 2])", true)] + #[case::markdowns("markdowns(to_hr())", true)] + #[case::booleans("booleans(true)", true)] + #[case::numbers("numbers(42)", true)] + #[case::strings("strings(\"a\")", true)] + #[case::dicts("dicts({\"a\": 1})", true)] + #[case::nones("nones(None)", true)] + #[case::bytes_filter("bytes(to_bytes(\"a\"))", true)] + #[case::iterables_array("iterables([1, 2])", true)] + #[case::iterables_dict("iterables({\"a\": 1})", true)] + #[case::scalars("scalars(1)", true)] + fn test_type_filter_functions(#[case] code: &str, #[case] should_succeed: bool) { + let result = check_types(code); + assert_eq!( + result.is_empty(), + should_succeed, + "Code: {}\nResult: {:?}", + code, + result + ); + } + // Utility Functions #[rstest] @@ -2022,6 +2193,8 @@ mod tests { #[case::is_yaml("to_markdown(\"hello\") | first() | is_yaml()", true)] #[case::is_h_level("to_markdown(\"# hello\") | first() | is_h_level(1)", true)] #[case::is_h_level_wrong_type("is_h_level(42, \"str\")", false)] + #[case::is_callout("to_markdown(\"hello\") | first() | is_callout()", true)] + #[case::is_table_align("to_markdown(\"hello\") | first() | is_table_align()", true)] fn test_markdown_type_check_functions(#[case] code: &str, #[case] should_succeed: bool) { let result = check_types(code); assert_eq!( @@ -2057,6 +2230,12 @@ mod tests { #[case::to_mdx("to_mdx(\"hello\")", true)] #[case::to_strong("to_markdown(\"hello\") | first() | to_strong()", true)] #[case::to_em("to_markdown(\"hello\") | first() | to_em()", true)] + #[case::to_blockquote("to_markdown(\"hello\") | first() | to_blockquote()", true)] + #[case::to_delete("to_markdown(\"hello\") | first() | to_delete()", true)] + #[case::to_callout("to_markdown(\"hello\") | first() | to_callout(\"note\", \"Note\")", true)] + #[case::to_md_fragment_markdown("to_markdown(\"hello\") | first() | to_md_fragment()", true)] + #[case::to_md_fragment_array("to_md_fragment([\"a\", \"b\"])", true)] + #[case::to_md_table_align("to_md_table_align([\"left\", \"right\"])", true)] fn test_markdown_conversion_functions(#[case] code: &str, #[case] should_succeed: bool) { let result = check_types(code); assert_eq!( @@ -2193,6 +2372,17 @@ mod tests { )] #[case::to_markdown_valid("to_markdown(\"# hello\")", true, "to_markdown with string is valid")] #[case::to_h_valid("to_markdown(\"hello\") | to_h(1)", true, "to_h with number is valid")] + #[case::to_callout_wrong_kind_type( + "to_markdown(\"hello\") | first() | to_callout(42, \"Note\")", + false, + "to_callout expects string kind" + )] + #[case::to_callout_wrong_title_type( + "to_markdown(\"hello\") | first() | to_callout(\"note\", 42)", + false, + "to_callout expects string title" + )] + #[case::to_md_table_align_wrong_type("to_md_table_align(\"left\")", false, "to_md_table_align expects an array")] fn test_markdown_type_errors(#[case] code: &str, #[case] should_succeed: bool, #[case] description: &str) { let result = check_types(code); assert_eq!( diff --git a/crates/mq-lang/builtin.mq b/crates/mq-lang/builtin.mq index 9d5c01d1e..255b8ab79 100644 --- a/crates/mq-lang/builtin.mq +++ b/crates/mq-lang/builtin.mq @@ -77,6 +77,24 @@ def booleans(b): select(b, is_bool(b)); # Returns number if input is number, None otherwise def numbers(n): select(n, is_number(n)); +# Returns string if input is string, None otherwise +def strings(s): select(s, is_string(s)); + +# Returns dict if input is dict, None otherwise +def dicts(d): select(d, is_dict(d)); + +# Returns the value if it is None, None otherwise +def nones(n): select(n, is_none(n)); + +# Returns bytes if input is bytes, None otherwise +def bytes(b): select(b, is_bytes(b)); + +# Returns the value if it is an array or dict (i.e. a container that can be iterated over), None otherwise +def iterables(v): select(v, is_array(v) || is_dict(v)); + +# Returns the value if it is not an array or dict (i.e. a leaf/scalar value), None otherwise +def scalars(v): select(v, !(is_array(v) || is_dict(v))); + # Formats a date to ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ) def to_date_iso8601(d): to_date(d, "%Y-%m-%dT%H:%M:%SZ"); @@ -877,6 +895,25 @@ def omit(d, keys): end end +# Checks if a dict has the given key, or an array has an element at the given index. +def has(v, key): + if (is_dict(v)): + in(keys(v), key) + elif (is_array(v)): + is_number(key) && between(key, 0, len(v) - 1) + else: + false +end + +# Builds a dict from an array of [key, value] pairs, as produced by `entries`. +# If the same key appears more than once, the last occurrence wins. +def from_entries(arr): + fold(arr, dict(), fn(acc, pair): set(acc, to_string(pair[0]), pair[1]);); + +# Transforms each [key, value] pair of a dict by applying the given function, +# then rebuilds a dict from the resulting pairs. +def with_entries(d, f): from_entries(map(entries(d), f)); + # Parses frontmatter from a markdown node, supporting both YAML and TOML formats. def frontmatter(v): if (is_yaml(v)): diff --git a/crates/mq-lang/builtin_tests.mq b/crates/mq-lang/builtin_tests.mq index a76b0f16d..c85c31359 100644 --- a/crates/mq-lang/builtin_tests.mq +++ b/crates/mq-lang/builtin_tests.mq @@ -194,6 +194,63 @@ def test_numbers(): | assert_eq(result2, None) end +def test_strings(): + let result1 = strings("hello") + | assert_eq(result1, "hello") + + | let result2 = strings(42) + | assert_eq(result2, None) +end + +def test_dicts(): + let result1 = dicts({"a": 1}) + | assert_eq(result1, {"a": 1}) + + | let result2 = dicts([1, 2]) + | assert_eq(result2, None) +end + +def test_nones(): + let result1 = nones(None) + | assert_eq(result1, None) + + | let result2 = nones(42) + | assert_eq(result2, None) +end + +def test_bytes(): + let result1 = bytes(b"data") + | assert_eq(result1, b"data") + + | let result2 = bytes("not bytes") + | assert_eq(result2, None) +end + +def test_iterables(): + let result1 = iterables([1, 2, 3]) + | assert_eq(result1, [1, 2, 3]) + + | let result2 = iterables({"a": 1}) + | assert_eq(result2, {"a": 1}) + + | let result3 = iterables(42) + | assert_eq(result3, None) +end + +def test_scalars(): + let result1 = scalars(42) + | assert_eq(result1, 42) + + | let result2 = scalars("hello") + | assert_eq(result2, "hello") + + | let result3 = scalars([1, 2, 3]) + | assert_eq(result3, None) + + | let result4 = scalars({"a": 1}) + | assert_eq(result4, None) +end + # Array manipulation tests def test_map(): let result1 = map([1, 2, 3], fn(x): x * 2;) @@ -831,6 +888,46 @@ def test_omit(): | assert_eq(result3, {"key": "value"}) end +def test_has(): + let result1 = has({"a": 1, "b": 2}, "a") + | assert_eq(result1, true) + + | let result2 = has({"a": 1, "b": 2}, "c") + | assert_eq(result2, false) + + | let result3 = has([1, 2, 3], 1) + | assert_eq(result3, true) + + | let result4 = has([1, 2, 3], 5) + | assert_eq(result4, false) + + | let result5 = has([1, 2, 3], -1) + | assert_eq(result5, false) + + | let result6 = has("not a container", "key") + | assert_eq(result6, false) +end + +def test_from_entries(): + let result1 = from_entries([["a", 1], ["b", 2]]) + | assert_eq(result1, {"a": 1, "b": 2}) + + | let result2 = from_entries([]) + | assert_eq(result2, dict()) + + # Later occurrences of the same key overwrite earlier ones + | let result3 = from_entries([["a", 1], ["a", 2]]) + | assert_eq(result3, {"a": 2}) +end + +def test_with_entries(): + let result1 = with_entries({"a": 1, "b": 2}, fn(pair): [pair[0], pair[1] * 10];) + | assert_eq(result1, {"a": 10, "b": 20}) + + | let result2 = with_entries({}, fn(pair): pair;) + | assert_eq(result2, dict()) +end + def test_frontmatter(): # YAML frontmatter is parsed into a dict let result1 = do "---\ntitle: Hello\nauthor: World\n---\n\n# Heading" | to_markdown() | first() | frontmatter(); diff --git a/crates/mq-lang/src/eval/builtin.rs b/crates/mq-lang/src/eval/builtin.rs index 2c6714a37..065bc6381 100644 --- a/crates/mq-lang/src/eval/builtin.rs +++ b/crates/mq-lang/src/eval/builtin.rs @@ -30,7 +30,7 @@ use std::sync::LazyLock; use thiserror::Error; use self::range::{generate_char_range, generate_multi_char_range, generate_numeric_range}; -use self::regex::{capture_re, is_match_re, match_re, replace_re, split_re}; +use self::regex::{capture_re, is_match_re, match_re, replace_re, scan_re, split_re}; use super::runtime_value::{self, RuntimeValue}; use mq_markdown; @@ -735,6 +735,11 @@ fn to_number_impl(_: &Ident, _: &RuntimeValue, mut args: Args, _: &SharedEnv) -> convert::to_number(&mut args[0]) } +#[mq_macros::mq_fn(name = "to_boolean", params = Fixed(1))] +fn to_boolean_impl(_: &Ident, _: &RuntimeValue, args: Args, _: &SharedEnv) -> Result { + convert::to_boolean(&args[0]) +} + #[mq_macros::mq_fn(name = "to_array", params = Fixed(1))] fn to_array_impl(_: &Ident, _: &RuntimeValue, mut args: Args, _: &SharedEnv) -> Result { convert::to_array(&mut args[0]) @@ -902,6 +907,23 @@ fn capture_impl(ident: &Ident, _: &RuntimeValue, mut args: Args, _: &SharedEnv) } } +#[mq_macros::mq_fn(name = "scan", params = Fixed(2))] +fn scan_impl(ident: &Ident, _: &RuntimeValue, mut args: Args, _: &SharedEnv) -> Result { + match args.as_mut_slice() { + [RuntimeValue::String(s), RuntimeValue::String(pattern)] => scan_re(s, pattern), + [node @ RuntimeValue::Markdown(_, _), RuntimeValue::String(pattern)] => node + .markdown_node() + .map(|md| scan_re(&md.value(), pattern)) + .unwrap_or_else(|| Ok(RuntimeValue::EMPTY_ARRAY)), + [RuntimeValue::None, RuntimeValue::String(_)] => Ok(RuntimeValue::EMPTY_ARRAY), + [a, b] => Err(Error::InvalidTypes( + ident.to_string(), + vec![std::mem::take(a), std::mem::take(b)], + )), + _ => unreachable!("scan should always receive exactly two arguments"), + } +} + #[mq_macros::mq_fn(name = "downcase", params = Fixed(1))] fn downcase_impl(_: &Ident, _: &RuntimeValue, args: Args, _: &SharedEnv) -> Result { match args.as_slice() { @@ -3880,6 +3902,7 @@ mq_macros::builtin_dispatch! { TO_MARKDOWN_STRING, TO_STRING, TO_NUMBER, + TO_BOOLEAN, TO_ARRAY, TO_BYTES, FROM_HEX, @@ -3899,6 +3922,7 @@ mq_macros::builtin_dispatch! { IS_REGEX_MATCH, IS_NOT_REGEX_MATCH, CAPTURE, + SCAN, DOWNCASE, ASCII_DOWNCASE, GSUB, @@ -4739,6 +4763,13 @@ pub static BUILTIN_FUNCTION_DOC: LazyLock params: &["value"], }, ); + map.insert( + SmolStr::new("to_boolean"), + BuiltinFunctionDoc { + description: "Converts the given value to a boolean. Booleans are returned unchanged, the strings \"true\" and \"false\" are converted to their boolean equivalent, and all other input results in an error.", + params: &["value"], + }, + ); map.insert( SmolStr::new("to_array"), BuiltinFunctionDoc { @@ -4886,6 +4917,13 @@ pub static BUILTIN_FUNCTION_DOC: LazyLock params: &["string", "pattern"], }, ); + map.insert( + SmolStr::new("scan"), + BuiltinFunctionDoc { + description: "Finds all matches of a regular expression pattern in the string. For each match, returns the captured groups as an array if the pattern has capture groups, otherwise returns the whole match as a string.", + params: &["string", "pattern"], + }, + ); map.insert( SmolStr::new("downcase"), BuiltinFunctionDoc { diff --git a/crates/mq-lang/src/eval/builtin/convert.rs b/crates/mq-lang/src/eval/builtin/convert.rs index e83f8c2b2..87c168b5b 100644 --- a/crates/mq-lang/src/eval/builtin/convert.rs +++ b/crates/mq-lang/src/eval/builtin/convert.rs @@ -317,6 +317,19 @@ pub(super) fn to_number(value: &mut RuntimeValue) -> Result } } +/// convert to boolean +pub(super) fn to_boolean(value: &RuntimeValue) -> Result { + match value { + b @ RuntimeValue::Boolean(_) => Ok(b.clone()), + RuntimeValue::String(s) => match s.as_str() { + "true" => Ok(RuntimeValue::Boolean(true)), + "false" => Ok(RuntimeValue::Boolean(false)), + _ => Err(Error::InvalidTypes("to_boolean".to_string(), vec![value.clone()])), + }, + _ => Err(Error::InvalidTypes("to_boolean".to_string(), vec![value.clone()])), + } +} + /// convert to array pub(super) fn to_array(value: &mut RuntimeValue) -> Result { match value { diff --git a/crates/mq-lang/src/eval/builtin/regex.rs b/crates/mq-lang/src/eval/builtin/regex.rs index ba6ab22cd..9cffe4880 100644 --- a/crates/mq-lang/src/eval/builtin/regex.rs +++ b/crates/mq-lang/src/eval/builtin/regex.rs @@ -81,6 +81,41 @@ pub(super) fn replace_re(input: &str, pattern: &str, replacement: &str) -> Resul Ok(re.replace_all(input, replacement).to_string().into()) } +fn scan_re_inner(re: &Regex, input: &str) -> RuntimeValue { + let has_groups = re.captures_len() > 1; + let matches: Vec = re + .captures_iter(input) + .map(|caps| { + if has_groups { + RuntimeValue::Array( + caps.iter() + .skip(1) + .map(|m| { + m.map(|m| RuntimeValue::String(m.as_str().to_string())) + .unwrap_or(RuntimeValue::NONE) + }) + .collect(), + ) + } else { + RuntimeValue::String(caps.get(0).map(|m| m.as_str().to_string()).unwrap_or_default()) + } + }) + .collect(); + RuntimeValue::Array(matches) +} + +pub(super) fn scan_re(input: &str, pattern: &str) -> Result { + if let Some(re) = REGEX_CACHE.read().unwrap().get(pattern).cloned() { + return Ok(scan_re_inner(&re, input)); + } + let re = RegexBuilder::new(pattern) + .size_limit(1 << 20) + .build() + .map_err(|_| Error::InvalidRegularExpression(pattern.to_string()))?; + REGEX_CACHE.write().unwrap().insert(pattern.to_string(), re.clone()); + Ok(scan_re_inner(&re, input)) +} + #[inline(always)] pub(super) fn split_re(input: &str, pattern: &str) -> Result { if let Some(re) = REGEX_CACHE.read().unwrap().get(pattern).cloned() { @@ -208,4 +243,42 @@ mod tests { fn test_split_re_invalid_pattern() { assert!(split_re("text", "[invalid").is_err()); } + + #[test] + fn test_scan_re_no_groups() { + let result = scan_re("a1b2c3", r"\d").unwrap(); + assert_eq!(result, strings(vec!["1", "2", "3"])); + // second call hits cache — same result expected + let result2 = scan_re("a1b2c3", r"\d").unwrap(); + assert_eq!(result, result2); + } + + #[test] + fn test_scan_re_with_groups() { + let result = scan_re("2024-06 2025-07", r"(\d{4})-(\d{2})").unwrap(); + assert_eq!( + result, + RuntimeValue::Array(vec![ + RuntimeValue::Array(vec![ + RuntimeValue::String("2024".to_string()), + RuntimeValue::String("06".to_string()), + ]), + RuntimeValue::Array(vec![ + RuntimeValue::String("2025".to_string()), + RuntimeValue::String("07".to_string()), + ]), + ]) + ); + } + + #[test] + fn test_scan_re_no_match() { + let result = scan_re("no digits here", r"\d+").unwrap(); + assert_eq!(result, RuntimeValue::Array(vec![])); + } + + #[test] + fn test_scan_re_invalid_pattern() { + assert!(scan_re("text", "[invalid").is_err()); + } } diff --git a/crates/mq-lang/tests/integration_tests.rs b/crates/mq-lang/tests/integration_tests.rs index 074913569..d8cda3583 100644 --- a/crates/mq-lang/tests/integration_tests.rs +++ b/crates/mq-lang/tests/integration_tests.rs @@ -2520,6 +2520,11 @@ fn engine() -> DefaultEngine { #[case::ascii_upcase_non_ascii(r##"ascii_upcase("abcà")"##, vec![RuntimeValue::None], Ok(vec![RuntimeValue::String("ABCà".to_string())].into()))] #[case::gsub_simple(r##"gsub("a1b2", "\\d", "x")"##, vec![RuntimeValue::None], Ok(vec![RuntimeValue::String("axbx".to_string())].into()))] #[case::regex_match_simple(r##"regex_match("a1b2", "\\d")"##, vec![RuntimeValue::None], Ok(vec![RuntimeValue::Array(vec![RuntimeValue::String("1".to_string()), RuntimeValue::String("2".to_string())])].into()))] +#[case::scan_no_groups(r##"scan("a1b2", "\\d")"##, vec![RuntimeValue::None], Ok(vec![RuntimeValue::Array(vec![RuntimeValue::String("1".to_string()), RuntimeValue::String("2".to_string())])].into()))] +#[case::scan_with_groups(r##"scan("2024-06 2025-07", "(\\d{4})-(\\d{2})")"##, vec![RuntimeValue::None], Ok(vec![RuntimeValue::Array(vec![ + RuntimeValue::Array(vec![RuntimeValue::String("2024".to_string()), RuntimeValue::String("06".to_string())]), + RuntimeValue::Array(vec![RuntimeValue::String("2025".to_string()), RuntimeValue::String("07".to_string())]), +])].into()))] #[case::slice_simple(r##"slice("abcdef", 1, 4)"##, vec![RuntimeValue::None], Ok(vec![RuntimeValue::String("bcd".to_string())].into()))] #[case::sort_by_impl_simple(r##"_sort_by_impl([[2, "b"], [1, "a"]])"##, vec![RuntimeValue::None], Ok(vec![RuntimeValue::Array(vec![ RuntimeValue::Array(vec![RuntimeValue::Number(1.into()), RuntimeValue::String("a".to_string())]), @@ -2843,6 +2848,10 @@ fn engine() -> DefaultEngine { #[case::url_encode_plain(r#"url_encode("abc")"#, vec![RuntimeValue::None], Ok(vec![RuntimeValue::String("abc".to_string())].into()))] // to_number conversion #[case::to_number_string(r#"to_number("42")"#, vec![RuntimeValue::None], Ok(vec![RuntimeValue::Number(42.into())].into()))] +// to_boolean conversion +#[case::to_boolean_true_string(r#"to_boolean("true")"#, vec![RuntimeValue::None], Ok(vec![RuntimeValue::Boolean(true)].into()))] +#[case::to_boolean_false_string(r#"to_boolean("false")"#, vec![RuntimeValue::None], Ok(vec![RuntimeValue::Boolean(false)].into()))] +#[case::to_boolean_bool(r#"to_boolean(true)"#, vec![RuntimeValue::None], Ok(vec![RuntimeValue::Boolean(true)].into()))] // to_html conversion #[case::to_html_string(r#"to_html("hello") | type"#, vec![RuntimeValue::None], Ok(vec![RuntimeValue::String("string".to_string())].into()))] // to_text conversion @@ -3424,6 +3433,10 @@ fn test_eval(mut engine: Engine, #[case] program: &str, #[case] input: Vec) { assert!(engine.eval(program, input.into_iter()).is_err()); }