Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

53 changes: 52 additions & 1 deletion crates/ragfs-python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use ragfs::cache::{
use ragfs::core::builder::EncryptionConfig;
use ragfs::core::{
build_default_stack, register_builtin_plugins, ConfigValue, FileInfo, FileSystem,
FilesystemStats, FsContext, FsContextInner, FsOperation, GrepResult, MountableFS,
FilesystemStats, FsContext, FsContextInner, FsOperation, GlobPage, GrepResult, MountableFS,
OperationStats, PluginConfig, RagfsConfig, StatsWrappedFS, TreeEntry, WriteFlag, FS_CTX,
};

Expand Down Expand Up @@ -1498,6 +1498,57 @@ impl RAGFSBindingClient {
})
}

/// Return one page of flat glob results.
///
/// Args:
/// path: The root path of the traversal
/// pattern: Glob pattern matched against query-root-relative paths
/// show_hidden: Whether to include hidden files (default: False)
/// page_size: Maximum number of matched entries returned in this page
/// level_limit: Maximum depth relative to query root (default: None)
/// continuation_token: Opaque token returned by the previous page
/// ctx: Optional FsContext dict (e.g. {"account_id": ...})
///
/// Returns:
/// A dict with keys: entries (list[GlobEntry]), next_token (str | None)
#[pyo3(signature = (path, pattern, show_hidden=false, page_size=None, level_limit=None, continuation_token=None, ctx=None))]
fn glob_directory(
&self,
py: Python<'_>,
path: String,
pattern: String,
show_hidden: bool,
page_size: Option<i32>,
level_limit: Option<i32>,
continuation_token: Option<String>,
ctx: Option<HashMap<String, String>>,
) -> PyResult<Py<PyAny>> {
let fs_ctx = build_fs_context(ctx);
let top = self.top.clone();
let page_size = page_size.map(|n| if n < 0 { 0 } else { n as usize });
let level_limit_usize = level_limit.map(|n| if n < 0 { 0 } else { n as usize });

let page: GlobPage = self
.run_scoped(py, fs_ctx, move || async move {
top.glob_directory(
&path,
&pattern,
show_hidden,
page_size,
level_limit_usize,
continuation_token,
)
.await
})
.map_err(to_py_err)?;

Python::attach(|py| {
let value = serde_json::to_value(&page)
.map_err(|err| PyRuntimeError::new_err(err.to_string()))?;
serde_json_to_py(py, &value)
})
}

/// Query multi-write sync status under a file or directory path.
///
/// Args:
Expand Down
1 change: 1 addition & 0 deletions crates/ragfs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ lru = "0.12"
# Regular expressions for grep
regex = "1.10"
mime_guess = "2.0"
globset = "0.4"

# Encryption (envelope encryption: AES-256-GCM + HKDF-SHA256)
aes-gcm = "0.10"
Expand Down
24 changes: 23 additions & 1 deletion crates/ragfs/src/cache/wrapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ use crate::core::filesystem::{
relative_match_file,
};
use crate::core::{
FileInfo, FileSystem, GrepMatch, GrepResult, MultiWriteWrappedFS, Result, TreeEntry, WriteFlag,
FileInfo, FileSystem, GlobPage, GrepMatch, GrepResult, MultiWriteWrappedFS, Result,
TreeEntry, WriteFlag,
};
use async_trait::async_trait;
use bytes::Bytes;
Expand Down Expand Up @@ -1221,6 +1222,27 @@ impl FileSystem for CachedFileSystem {
.tree_directory(path, show_hidden, node_limit, level_limit)
.await
}

async fn glob_directory(
&self,
path: &str,
pattern: &str,
show_hidden: bool,
page_size: Option<usize>,
level_limit: Option<usize>,
continuation_token: Option<String>,
) -> Result<GlobPage> {
self.backend
.glob_directory(
path,
pattern,
show_hidden,
page_size,
level_limit,
continuation_token,
)
.await
}
}

fn normalize_path(path: &str) -> String {
Expand Down
27 changes: 26 additions & 1 deletion crates/ragfs/src/core/encryption_wrapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use crate::shape::SHAPE_MANIFEST_PATH;
use super::context::FsContextView;
use super::errors::{Error, Result};
use super::filesystem::{compile_grep_regex, normalize_prefix_path, FileSystem};
use super::types::{FileInfo, GrepResult, TreeEntry, WriteFlag};
use super::types::{FileInfo, GlobPage, GrepResult, TreeEntry, WriteFlag};

const SYSTEM_ACCOUNT_ID: &str = "_system";

Expand Down Expand Up @@ -326,6 +326,31 @@ impl FileSystem for EncryptionWrappedFS {
.collect())
}

async fn glob_directory(
&self,
path: &str,
pattern: &str,
show_hidden: bool,
page_size: Option<usize>,
level_limit: Option<usize>,
continuation_token: Option<String>,
) -> Result<GlobPage> {
let mut page = self
.inner
.glob_directory(
path,
pattern,
show_hidden,
page_size,
level_limit,
continuation_token,
)
.await?;
page.entries
.retain(|entry| !Self::is_shape_manifest_path(&entry.path));
Ok(page)
}

async fn ensure_parent_dirs(&self, path: &str, mode: u32) -> Result<()> {
self.inner.ensure_parent_dirs(path, mode).await
}
Expand Down
199 changes: 198 additions & 1 deletion crates/ragfs/src/core/filesystem.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@ use regex::Regex;
use std::any::Any;

use super::errors::{Error, Result};
use super::types::{FileInfo, GrepResult, TreeEntry, WriteFlag};
use super::glob::{
compare_rel_paths, decode_offset_token, encode_offset_token, purepath_match, validate_pattern,
};
use super::types::{FileInfo, GlobEntry, GlobPage, GrepResult, TreeEntry, WriteFlag};

/// Normalize a path for prefix comparisons.
///
Expand Down Expand Up @@ -440,6 +443,65 @@ pub trait FileSystem: Send + Sync + Any {
Ok(result)
}

/// Return one page of flat glob results under `path`.
///
/// The default implementation preserves the current Python behavior by
/// reusing `tree_directory()` and matching against the returned `rel_path`
/// values, then slicing matches with an opaque continuation token.
async fn glob_directory(
&self,
path: &str,
pattern: &str,
show_hidden: bool,
page_size: Option<usize>,
level_limit: Option<usize>,
continuation_token: Option<String>,
) -> Result<GlobPage> {
validate_pattern(pattern)?;
if matches!(page_size, Some(0)) {
return Err(Error::invalid_operation("page_size must be positive"));
}

let entries = self
.tree_directory(path, show_hidden, None, level_limit)
.await?;

let mut matched = Vec::new();
for entry in entries {
if purepath_match(&entry.rel_path, pattern)? {
matched.push(GlobEntry {
path: entry.path,
rel_path: entry.rel_path,
name: entry.info.name,
is_dir: entry.info.is_dir,
});
}
}
matched.sort_by(|left, right| compare_rel_paths(&left.rel_path, &right.rel_path));

let start = decode_offset_token(
continuation_token.as_deref(),
path,
pattern,
show_hidden,
level_limit,
)?;
if start > matched.len() {
return Err(Error::invalid_operation("continuation token out of range"));
}
let end = page_size
.map(|limit| start.saturating_add(limit))
.unwrap_or(matched.len())
.min(matched.len());
let next_token = (end < matched.len())
.then(|| encode_offset_token(end, path, pattern, show_hidden, level_limit));

Ok(GlobPage {
entries: matched[start..end].to_vec(),
next_token,
})
}

/// Internal recursive helper for tree_directory.
///
/// # Arguments
Expand Down Expand Up @@ -1029,4 +1091,139 @@ mod tests {
assert!(names.contains(&"secret.txt".to_string()));
assert!(!names.contains(&".hidden_file".to_string()));
}

/// Test helper that calls `glob_directory` with a fixed `/root` query root.
///
/// Args:
/// - `fs`: The `TreeFS` instance under test.
/// - `pattern`: The glob pattern to match.
/// - `page_size`: The requested page size.
/// - `continuation_token`: The pagination token for the next page.
///
/// Returns:
/// - A `GlobPage` on success. In tests this helper uses `unwrap()`, so any
/// error fails the test immediately.
async fn root_glob(
fs: &TreeFS,
pattern: &str,
page_size: Option<usize>,
continuation_token: Option<String>,
) -> crate::core::GlobPage {
fs.glob_directory("/root", pattern, false, page_size, None, continuation_token)
.await
.unwrap()
}

/// Test helper that extracts each entry's `rel_path` from a `GlobPage`.
///
/// Args:
/// - `page`: The glob page whose relative paths should be collected.
///
/// Returns:
/// - A list of `rel_path` values in their original order, suitable for
/// result-content and ordering assertions.
fn glob_rel_paths(page: &crate::core::GlobPage) -> Vec<String> {
page.entries
.iter()
.map(|entry| entry.rel_path.clone())
.collect()
}

#[tokio::test]
async fn test_glob_directory_matches_basename_suffix_semantics() {
let fs = TreeFS::default()
.with_dir_entries("/root", vec![("sub", true), ("top.md", false)])
.with_dir_entries(
"/root/sub",
vec![("nested.md", false), ("nested.txt", false)],
);

let page = root_glob(&fs, "*.md", None, None).await;

assert_eq!(glob_rel_paths(&page), vec!["sub/nested.md", "top.md"]);
assert!(page.next_token.is_none());
}

#[tokio::test]
async fn test_glob_directory_matches_path_suffix_segments() {
let fs = TreeFS::default()
.with_dir_entries("/root", vec![("a", true), ("x", true)])
.with_dir_entries("/root/a", vec![("b", true)])
.with_dir_entries("/root/a/b", vec![("c.md", false)])
.with_dir_entries("/root/x", vec![("a", true)])
.with_dir_entries("/root/x/a", vec![("b", true)])
.with_dir_entries("/root/x/a/b", vec![("c.md", false)]);

let page = root_glob(&fs, "a/**/*.md", None, None).await;

assert_eq!(glob_rel_paths(&page), vec!["a/b/c.md", "x/a/b/c.md"]);
}

#[tokio::test]
async fn test_glob_directory_paginates_with_opaque_offset_tokens() {
let fs = TreeFS::default().with_dir_entries(
"/root",
vec![("a.md", false), ("b.md", false), ("c.md", false)],
);

let first = root_glob(&fs, "*.md", Some(2), None).await;
assert_eq!(glob_rel_paths(&first), vec!["a.md", "b.md"]);
assert!(first.next_token.is_some());

let second = root_glob(&fs, "*.md", Some(2), first.next_token).await;
assert_eq!(glob_rel_paths(&second), vec!["c.md"]);
assert!(second.next_token.is_none());
}

#[tokio::test]
async fn test_glob_directory_rejects_token_from_different_query_scope() {
let fs = TreeFS::default().with_dir_entries(
"/root",
vec![("a.md", false), ("b.md", false), ("c.md", false)],
);

let first = root_glob(&fs, "*.md", Some(2), None).await;
let err = fs
.glob_directory("/root", "*.txt", false, Some(2), None, first.next_token)
.await
.unwrap_err();

assert!(matches!(err, Error::InvalidOperation(_)));
}

#[tokio::test]
async fn test_glob_directory_empty_pattern_is_invalid() {
let fs = TreeFS::default().with_dir_entries("/root", vec![("a.md", false)]);

let err = fs
.glob_directory("/root", "", false, None, None, None)
.await
.unwrap_err();

assert!(matches!(err, Error::InvalidOperation(_)));
}

#[tokio::test]
async fn test_glob_directory_empty_pattern_is_invalid_for_empty_directory() {
let fs = TreeFS::default().with_dir_entries("/root", vec![]);

let err = fs
.glob_directory("/root", "", false, None, None, None)
.await
.unwrap_err();

assert!(matches!(err, Error::InvalidOperation(_)));
}

#[tokio::test]
async fn test_glob_directory_zero_page_size_is_invalid() {
let fs = TreeFS::default().with_dir_entries("/root", vec![("a.md", false)]);

let err = fs
.glob_directory("/root", "*.md", false, Some(0), None, None)
.await
.unwrap_err();

assert!(matches!(err, Error::InvalidOperation(_)));
}
}
Loading
Loading