diff --git a/crates/ov_cli/README.md b/crates/ov_cli/README.md index f1ac7343..133e4120 100644 --- a/crates/ov_cli/README.md +++ b/crates/ov_cli/README.md @@ -114,6 +114,12 @@ ov -o json ls # Compact JSON wrapper for scripts # Add URL and wait for processing ov add-resource https://example.com/docs --wait --timeout 60 +# Add local directory with advanced options +ov add-resource ./dir \ + --wait --timeout 600 \ + --ignore-dirs "subdir-a,subdir-b/subsubdir-c" \ + --exclude "*.tmp,*.log" + # Search with threshold ov find "API authentication" --threshold 0.7 --limit 5 diff --git a/crates/ov_cli/src/client.rs b/crates/ov_cli/src/client.rs index 808d6c6f..63c8bfa4 100644 --- a/crates/ov_cli/src/client.rs +++ b/crates/ov_cli/src/client.rs @@ -425,6 +425,11 @@ impl HttpClient { instruction: &str, wait: bool, timeout: Option, + strict: bool, + ignore_dirs: Option, + include: Option, + exclude: Option, + directly_upload_media: bool, ) -> Result { let path_obj = Path::new(path); @@ -441,6 +446,11 @@ impl HttpClient { "instruction": instruction, "wait": wait, "timeout": timeout, + "strict": strict, + "ignore_dirs": ignore_dirs, + "include": include, + "exclude": exclude, + "directly_upload_media": directly_upload_media, }); self.post("/api/v1/resources", &body).await @@ -453,6 +463,11 @@ impl HttpClient { "instruction": instruction, "wait": wait, "timeout": timeout, + "strict": strict, + "ignore_dirs": ignore_dirs, + "include": include, + "exclude": exclude, + "directly_upload_media": directly_upload_media, }); self.post("/api/v1/resources", &body).await diff --git a/crates/ov_cli/src/commands/resources.rs b/crates/ov_cli/src/commands/resources.rs index 2ecf5f52..35d5f116 100644 --- a/crates/ov_cli/src/commands/resources.rs +++ b/crates/ov_cli/src/commands/resources.rs @@ -10,11 +10,28 @@ pub async fn add_resource( instruction: String, wait: bool, timeout: Option, + strict: bool, + ignore_dirs: Option, + include: Option, + exclude: Option, + directly_upload_media: bool, format: OutputFormat, compact: bool, ) -> Result<()> { let result = client - .add_resource(path, to, &reason, &instruction, wait, timeout) + .add_resource( + path, + to, + &reason, + &instruction, + wait, + timeout, + strict, + ignore_dirs, + include, + exclude, + directly_upload_media, + ) .await?; output_success(&result, format, compact); Ok(()) diff --git a/crates/ov_cli/src/main.rs b/crates/ov_cli/src/main.rs index 74c71d6f..7c32f446 100644 --- a/crates/ov_cli/src/main.rs +++ b/crates/ov_cli/src/main.rs @@ -76,7 +76,22 @@ enum Commands { wait: bool, /// Wait timeout in seconds #[arg(long)] - timeout: Option, + timeout: f64, + /// No strict mode for directory scanning + #[arg(long = "no-strict", default_value_t = false)] + no_strict: bool, + /// Ignore directories, e.g. --ignore-dirs "node_modules,dist" + #[arg(long)] + ignore_dirs: Option, + /// Include files extensions, e.g. --include "*.pdf,*.md" + #[arg(long)] + include: Option, + /// Exclude files extensions, e.g. --exclude "*.tmp,*.log" + #[arg(long)] + exclude: Option, + /// Do not directly upload media files + #[arg(long = "no-directly-upload-media", default_value_t = false)] + no_directly_upload_media: bool, }, /// Add a skill into OpenViking AddSkill { @@ -452,8 +467,34 @@ async fn main() { }; let result = match cli.command { - Commands::AddResource { path, to, reason, instruction, wait, timeout } => { - handle_add_resource(path, to, reason, instruction, wait, timeout, ctx).await + Commands::AddResource { + path, + to, + reason, + instruction, + wait, + timeout, + no_strict, + ignore_dirs, + include, + exclude, + no_directly_upload_media, + } => { + handle_add_resource( + path, + to, + reason, + instruction, + wait, + timeout, + no_strict, + ignore_dirs, + include, + exclude, + no_directly_upload_media, + ctx, + ) + .await } Commands::AddSkill { data, wait, timeout } => { handle_add_skill(data, wait, timeout, ctx).await @@ -544,7 +585,12 @@ async fn handle_add_resource( reason: String, instruction: String, wait: bool, - timeout: Option, + timeout: f64, + no_strict: bool, + ignore_dirs: Option, + include: Option, + exclude: Option, + no_directly_upload_media: bool, ctx: CliContext, ) -> Result<()> { // Validate path: if it's a local path, check if it exists @@ -575,9 +621,25 @@ async fn handle_add_resource( path = unescaped_path; } + let strict = !no_strict; + let directly_upload_media = !no_directly_upload_media; + let client = ctx.get_client(); commands::resources::add_resource( - &client, &path, to, reason, instruction, wait, timeout, ctx.output_format, ctx.compact + &client, + &path, + to, + reason, + instruction, + wait, + Some(timeout), + strict, + ignore_dirs, + include, + exclude, + directly_upload_media, + ctx.output_format, + ctx.compact, ).await } diff --git a/examples/server_client/README.md b/examples/server_client/README.md index 01e47690..fd9e47d6 100644 --- a/examples/server_client/README.md +++ b/examples/server_client/README.md @@ -142,11 +142,13 @@ await client.close() ### CLI ```bash -# CLI 从 ~/.openviking/ovcli.conf 或 $OPENVIKING_CLI_CONFIG_FILE 读取连接配置 +# CLI 从 ~/.openviking/ovcli.conf 或 $s 读取连接配置 # 基本操作 openviking health -openviking add-resource ./document.md +openviking add-resource ./document.md # 上传文件 +openviking add-resource ./dir --exclude "*.tmp,*.log" --ignore-dirs "subdir-a,subdir-b/subsubdir-c" # 上传文件夹 + openviking wait openviking find "search query" diff --git a/openviking/parse/directory_scan.py b/openviking/parse/directory_scan.py index 07b10283..bec7f7fa 100644 --- a/openviking/parse/directory_scan.py +++ b/openviking/parse/directory_scan.py @@ -221,6 +221,17 @@ def scan_directory( include_patterns = _parse_patterns(include) exclude_patterns = _parse_patterns(exclude) + # Normalize ignore_dirs: + # - If caller passed a comma-separated string (common from CLI/HTTP), + # split it into a set of entries. + # - If already a set/list-like, keep as is. + ignore_dirs_set: Optional[Set[str]] + if isinstance(ignore_dirs, str): + entries = _parse_patterns(ignore_dirs) + ignore_dirs_set = set(entries) if entries else None + else: + ignore_dirs_set = ignore_dirs + result = DirectoryScanResult(root=root) for dir_path_str, dir_names, file_names in os.walk(root, topdown=True): dir_path = Path(dir_path_str) @@ -229,7 +240,7 @@ def scan_directory( kept = [] for d in dir_names: sub = dir_path / d - skip, reason = _should_skip_directory(sub, root, ignore_dirs) + skip, reason = _should_skip_directory(sub, root, ignore_dirs_set) if skip: result.skipped.append(f"{sub.relative_to(root)} ({reason})") else: diff --git a/openviking/server/routers/resources.py b/openviking/server/routers/resources.py index 5e850c66..2daaaa48 100644 --- a/openviking/server/routers/resources.py +++ b/openviking/server/routers/resources.py @@ -29,6 +29,11 @@ class AddResourceRequest(BaseModel): instruction: str = "" wait: bool = False timeout: Optional[float] = None + strict: bool = True + ignore_dirs: Optional[str] = None + include: Optional[str] = None + exclude: Optional[str] = None + directly_upload_media: bool = True class AddSkillRequest(BaseModel): @@ -98,6 +103,11 @@ async def add_resource( instruction=request.instruction, wait=request.wait, timeout=request.timeout, + strict=request.strict, + ignore_dirs=request.ignore_dirs, + include=request.include, + exclude=request.exclude, + directly_upload_media=request.directly_upload_media, ) return Response(status="ok", result=result) diff --git a/openviking/utils/media_processor.py b/openviking/utils/media_processor.py index 3e2475bc..2edc005c 100644 --- a/openviking/utils/media_processor.py +++ b/openviking/utils/media_processor.py @@ -60,7 +60,7 @@ async def process( if path.exists(): if path.is_dir(): return await self._process_directory(path, instruction, **kwargs) - return await self._process_file(path, instruction) + return await self._process_file(path, instruction, **kwargs) else: logger.warning(f"Path {path} does not exist") raise FileNotFoundError(f"Path {path} does not exist") @@ -103,6 +103,7 @@ async def _process_file( self, file_path: Path, instruction: str, + **kwargs, ) -> ParseResult: """Process file with unified parsing.""" # Check if it's a zip file @@ -111,7 +112,7 @@ async def _process_file( try: with zipfile.ZipFile(file_path, "r") as zipf: zipf.extractall(temp_dir) - return await self._process_directory(temp_dir, instruction) + return await self._process_directory(temp_dir, instruction, **kwargs) finally: pass # Don't delete temp_dir yet, it will be used by TreeBuilder return await parse( diff --git a/openviking_cli/cli/commands/resources.py b/openviking_cli/cli/commands/resources.py index a9bfc28f..4324ad6c 100644 --- a/openviking_cli/cli/commands/resources.py +++ b/openviking_cli/cli/commands/resources.py @@ -22,6 +22,21 @@ def add_resource_command( instruction: str = typer.Option("", help="Additional instruction"), wait: bool = typer.Option(False, "--wait", help="Wait until processing is complete"), timeout: Optional[float] = typer.Option(600.0, help="Wait timeout in seconds"), + no_strict: bool = typer.Option( + False, "--no-strict", help="No strict mode for directory scanning" + ), + ignore_dirs: Optional[str] = typer.Option( + None, "--ignore-dirs", help='Ignore directories, e.g. --ignore-dirs "node_modules,dist"' + ), + include: Optional[str] = typer.Option( + None, "--include", help='Include files extensions, e.g. --include "*.pdf,*.md"' + ), + exclude: Optional[str] = typer.Option( + None, "--exclude", help='Exclude files extensions, e.g. --exclude "*.tmp,*.log"' + ), + no_directly_upload_media: bool = typer.Option( + False, "--no-directly-upload-media", help="Do not directly upload media files" + ), ) -> None: """Add resources into OpenViking.""" # Validate path: if it's a local path, check if it exists @@ -85,6 +100,11 @@ def add_resource_command( instruction=instruction, wait=wait, timeout=timeout, + strict=not no_strict, + ignore_dirs=ignore_dirs, + include=include, + exclude=exclude, + directly_upload_media=not no_directly_upload_media, ), ) diff --git a/openviking_cli/client/http.py b/openviking_cli/client/http.py index 3975c28e..f468e6f6 100644 --- a/openviking_cli/client/http.py +++ b/openviking_cli/client/http.py @@ -280,6 +280,11 @@ async def add_resource( instruction: str = "", wait: bool = False, timeout: Optional[float] = None, + strict: bool = True, + ignore_dirs: Optional[str] = None, + include: Optional[str] = None, + exclude: Optional[str] = None, + directly_upload_media: bool = True, ) -> Dict[str, Any]: """Add resource to OpenViking.""" request_data = { @@ -288,6 +293,11 @@ async def add_resource( "instruction": instruction, "wait": wait, "timeout": timeout, + "strict": strict, + "ignore_dirs": ignore_dirs, + "include": include, + "exclude": exclude, + "directly_upload_media": directly_upload_media, } path_obj = Path(path) diff --git a/openviking_cli/client/sync_http.py b/openviking_cli/client/sync_http.py index 27d2a305..1629f4fb 100644 --- a/openviking_cli/client/sync_http.py +++ b/openviking_cli/client/sync_http.py @@ -106,10 +106,27 @@ def add_resource( instruction: str = "", wait: bool = False, timeout: Optional[float] = None, + strict: bool = True, + ignore_dirs: Optional[str] = None, + include: Optional[str] = None, + exclude: Optional[str] = None, + directly_upload_media: bool = True, ) -> Dict[str, Any]: """Add resource to OpenViking.""" return run_async( - self._async_client.add_resource(path, target, reason, instruction, wait, timeout) + self._async_client.add_resource( + path, + target, + reason, + instruction, + wait, + timeout, + strict, + ignore_dirs, + include, + exclude, + directly_upload_media, + ) ) def add_skill( diff --git a/pyproject.toml b/pyproject.toml index ac30a11f..25050f2a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -95,6 +95,7 @@ openviking-server = "openviking.server.bootstrap:main" [tool.setuptools_scm] write_to = "openviking/_version.py" local_scheme = "no-local-version" +tag_regex = "^(?:[a-zA-Z0-9_]+@)?(?P[0-9]+(?:\\.[0-9]+)*)$" [tool.setuptools.packages.find] where = ["."]