From 1feea1e41efa6032513846fe3bca9b734e148b70 Mon Sep 17 00:00:00 2001 From: shaoeric Date: Wed, 25 Feb 2026 12:06:25 +0800 Subject: [PATCH 1/5] tests(parsers): add unit tests for office extensions within add_resource directory --- openviking/parse/registry.py | 4 +- tests/parse/test_add_directory.py | 206 +++++++++++++++++++ tests/parse/test_directory_parser_routing.py | 45 ++++ tests/parse/test_directory_scan.py | 13 +- 4 files changed, 266 insertions(+), 2 deletions(-) diff --git a/openviking/parse/registry.py b/openviking/parse/registry.py index 5777edec..77a90395 100644 --- a/openviking/parse/registry.py +++ b/openviking/parse/registry.py @@ -65,8 +65,10 @@ def __init__(self, register_optional: bool = True): self.register("powerpoint", PowerPointParser()) self.register("excel", ExcelParser()) self.register("epub", EPubParser()) - self.register("zip", ZipParser()) + # CodeRepositoryParser also uses .zip; register it before ZipParser + # so that .zip resolves to ZipParser (file) rather than code repo. self.register("code", CodeRepositoryParser()) + self.register("zip", ZipParser()) self.register("directory", DirectoryParser()) self.register("image", ImageParser()) diff --git a/tests/parse/test_add_directory.py b/tests/parse/test_add_directory.py index 81fd650b..5c4e884b 100644 --- a/tests/parse/test_add_directory.py +++ b/tests/parse/test_add_directory.py @@ -371,6 +371,212 @@ async def test_txt_file_goes_through_parser(self, tmp_path: Path, parser, fake_f assert result.meta["file_count"] == 1 assert len(fake_fs.files) > 0 + @pytest.mark.asyncio + async def test_docx_file_goes_through_parser(self, tmp_path: Path, parser, fake_fs) -> None: + """Word (.docx) files should be processed by WordParser.parse().""" + (tmp_path / "report.docx").write_bytes(b"PK\x03\x04") + + mock_temp = fake_fs.create_temp_uri() + doc_dir = f"{mock_temp}/report" + await fake_fs.mkdir(mock_temp) + await fake_fs.mkdir(doc_dir) + await fake_fs.write_file(f"{doc_dir}/report.md", "# Converted Word") + + fake_result = create_parse_result( + root=ResourceNode(type=NodeType.ROOT), + source_path=str(tmp_path / "report.docx"), + source_format="docx", + parser_name="WordParser", + parse_time=0.1, + ) + fake_result.temp_dir_path = mock_temp + + with patch( + "openviking.parse.parsers.directory.DirectoryParser._assign_parser", + ) as mock_assign: + from openviking.parse.parsers.word import WordParser as _Word + + mock_word = AsyncMock(spec=_Word) + mock_word.parse = AsyncMock(return_value=fake_result) + + def assign_side_effect(cf, registry): + if cf.path.suffix == ".docx": + return mock_word + return registry.get_parser_for_file(cf.path) + + mock_assign.side_effect = assign_side_effect + await parser.parse(str(tmp_path)) + + dir_name = tmp_path.name + found_md = any( + uri.endswith("report.md") and f"/{dir_name}/" in uri for uri in fake_fs.files + ) + assert found_md, f"report.md not found. Files: {list(fake_fs.files.keys())}" + + @pytest.mark.asyncio + async def test_xlsx_file_goes_through_parser(self, tmp_path: Path, parser, fake_fs) -> None: + """Excel (.xlsx) files should be processed by ExcelParser.parse().""" + (tmp_path / "data.xlsx").write_bytes(b"PK\x03\x04") + + mock_temp = fake_fs.create_temp_uri() + doc_dir = f"{mock_temp}/data" + await fake_fs.mkdir(mock_temp) + await fake_fs.mkdir(doc_dir) + await fake_fs.write_file(f"{doc_dir}/data.md", "# Converted Excel") + + fake_result = create_parse_result( + root=ResourceNode(type=NodeType.ROOT), + source_path=str(tmp_path / "data.xlsx"), + source_format="xlsx", + parser_name="ExcelParser", + parse_time=0.1, + ) + fake_result.temp_dir_path = mock_temp + + with patch( + "openviking.parse.parsers.directory.DirectoryParser._assign_parser", + ) as mock_assign: + from openviking.parse.parsers.excel import ExcelParser as _Excel + + mock_excel = AsyncMock(spec=_Excel) + mock_excel.parse = AsyncMock(return_value=fake_result) + + def assign_side_effect(cf, registry): + if cf.path.suffix in {".xlsx", ".xls", ".xlsm"}: + return mock_excel + return registry.get_parser_for_file(cf.path) + + mock_assign.side_effect = assign_side_effect + await parser.parse(str(tmp_path)) + + dir_name = tmp_path.name + found_md = any(uri.endswith("data.md") and f"/{dir_name}/" in uri for uri in fake_fs.files) + assert found_md, f"data.md not found. Files: {list(fake_fs.files.keys())}" + + @pytest.mark.asyncio + async def test_epub_file_goes_through_parser(self, tmp_path: Path, parser, fake_fs) -> None: + """EPub (.epub) files should be processed by EPubParser.parse().""" + (tmp_path / "book.epub").write_bytes(b"PK\x03\x04") + + mock_temp = fake_fs.create_temp_uri() + doc_dir = f"{mock_temp}/book" + await fake_fs.mkdir(mock_temp) + await fake_fs.mkdir(doc_dir) + await fake_fs.write_file(f"{doc_dir}/book.md", "# Converted EPub") + + fake_result = create_parse_result( + root=ResourceNode(type=NodeType.ROOT), + source_path=str(tmp_path / "book.epub"), + source_format="epub", + parser_name="EPubParser", + parse_time=0.1, + ) + fake_result.temp_dir_path = mock_temp + + with patch( + "openviking.parse.parsers.directory.DirectoryParser._assign_parser", + ) as mock_assign: + from openviking.parse.parsers.epub import EPubParser as _EPub + + mock_epub = AsyncMock(spec=_EPub) + mock_epub.parse = AsyncMock(return_value=fake_result) + + def assign_side_effect(cf, registry): + if cf.path.suffix == ".epub": + return mock_epub + return registry.get_parser_for_file(cf.path) + + mock_assign.side_effect = assign_side_effect + await parser.parse(str(tmp_path)) + + dir_name = tmp_path.name + found_md = any(uri.endswith("book.md") and f"/{dir_name}/" in uri for uri in fake_fs.files) + assert found_md, f"book.md not found. Files: {list(fake_fs.files.keys())}" + + @pytest.mark.asyncio + async def test_pptx_file_goes_through_parser(self, tmp_path: Path, parser, fake_fs) -> None: + """PowerPoint (.pptx) files should be processed by PowerPointParser.parse().""" + (tmp_path / "slides.pptx").write_bytes(b"PK\x03\x04") + + mock_temp = fake_fs.create_temp_uri() + doc_dir = f"{mock_temp}/slides" + await fake_fs.mkdir(mock_temp) + await fake_fs.mkdir(doc_dir) + await fake_fs.write_file(f"{doc_dir}/slides.md", "# Converted PowerPoint") + + fake_result = create_parse_result( + root=ResourceNode(type=NodeType.ROOT), + source_path=str(tmp_path / "slides.pptx"), + source_format="pptx", + parser_name="PowerPointParser", + parse_time=0.1, + ) + fake_result.temp_dir_path = mock_temp + + with patch( + "openviking.parse.parsers.directory.DirectoryParser._assign_parser", + ) as mock_assign: + from openviking.parse.parsers.powerpoint import PowerPointParser as _PPT + + mock_ppt = AsyncMock(spec=_PPT) + mock_ppt.parse = AsyncMock(return_value=fake_result) + + def assign_side_effect(cf, registry): + if cf.path.suffix == ".pptx": + return mock_ppt + return registry.get_parser_for_file(cf.path) + + mock_assign.side_effect = assign_side_effect + await parser.parse(str(tmp_path)) + + dir_name = tmp_path.name + found_md = any( + uri.endswith("slides.md") and f"/{dir_name}/" in uri for uri in fake_fs.files + ) + assert found_md, f"slides.md not found. Files: {list(fake_fs.files.keys())}" + + @pytest.mark.asyncio + async def test_zip_file_goes_through_parser(self, tmp_path: Path, parser, fake_fs) -> None: + """ZIP (.zip) files should be processed by ZipParser.parse().""" + (tmp_path / "archive.zip").write_bytes(b"PK\x03\x04") + + mock_temp = fake_fs.create_temp_uri() + doc_dir = f"{mock_temp}/archive" + await fake_fs.mkdir(mock_temp) + await fake_fs.mkdir(doc_dir) + await fake_fs.write_file(f"{doc_dir}/archive.md", "# Converted ZIP") + + fake_result = create_parse_result( + root=ResourceNode(type=NodeType.ROOT), + source_path=str(tmp_path / "archive.zip"), + source_format="zip", + parser_name="ZipParser", + parse_time=0.1, + ) + fake_result.temp_dir_path = mock_temp + + with patch( + "openviking.parse.parsers.directory.DirectoryParser._assign_parser", + ) as mock_assign: + from openviking.parse.parsers.zip_parser import ZipParser as _Zip + + mock_zip = AsyncMock(spec=_Zip) + mock_zip.parse = AsyncMock(return_value=fake_result) + + def assign_side_effect(cf, registry): + if cf.path.suffix == ".zip": + return mock_zip + return registry.get_parser_for_file(cf.path) + + mock_assign.side_effect = assign_side_effect + await parser.parse(str(tmp_path)) + + dir_name = tmp_path.name + found_md = any( + uri.endswith("archive.md") and f"/{dir_name}/" in uri for uri in fake_fs.files + ) + assert found_md, f"archive.md not found. Files: {list(fake_fs.files.keys())}" + # --------------------------------------------------------------------------- # Tests: PDF conversion via parser.parse() diff --git a/tests/parse/test_directory_parser_routing.py b/tests/parse/test_directory_parser_routing.py index 349fd2f9..622f772d 100644 --- a/tests/parse/test_directory_parser_routing.py +++ b/tests/parse/test_directory_parser_routing.py @@ -27,10 +27,15 @@ DirectoryScanResult, scan_directory, ) +from openviking.parse.parsers.epub import EPubParser +from openviking.parse.parsers.excel import ExcelParser from openviking.parse.parsers.html import HTMLParser from openviking.parse.parsers.markdown import MarkdownParser from openviking.parse.parsers.pdf import PDFParser +from openviking.parse.parsers.powerpoint import PowerPointParser from openviking.parse.parsers.text import TextParser +from openviking.parse.parsers.word import WordParser +from openviking.parse.parsers.zip_parser import ZipParser from openviking.parse.registry import ParserRegistry # ═══════════════════════════════════════════════════════════════════════════ @@ -66,6 +71,16 @@ def tmp_all_parsers(tmp_path: Path) -> Path: text/ notes.txt -> TextParser log.text -> TextParser + office/ + report.docx -> WordParser + data.xlsx -> ExcelParser + legacy.xls -> ExcelParser + macro.xlsm -> ExcelParser + slides.pptx -> PowerPointParser + books/ + book.epub -> EPubParser + archives/ + bundle.zip -> ZipParser code/ app.py -> text-fallback (is_text_file) main.js -> text-fallback @@ -105,6 +120,19 @@ def tmp_all_parsers(tmp_path: Path) -> Path: (tmp_path / "config" / "data.json").write_text("{}", encoding="utf-8") (tmp_path / "config" / "rules.toml").write_text("[section]", encoding="utf-8") + (tmp_path / "office").mkdir() + (tmp_path / "office" / "report.docx").write_bytes(b"PK\x03\x04") + (tmp_path / "office" / "data.xlsx").write_bytes(b"PK\x03\x04") + (tmp_path / "office" / "legacy.xls").write_bytes(b"\xd0\xcf\x11\xe0") + (tmp_path / "office" / "macro.xlsm").write_bytes(b"PK\x03\x04") + (tmp_path / "office" / "slides.pptx").write_bytes(b"PK\x03\x04") + + (tmp_path / "books").mkdir() + (tmp_path / "books" / "book.epub").write_bytes(b"PK\x03\x04") + + (tmp_path / "archives").mkdir() + (tmp_path / "archives" / "bundle.zip").write_bytes(b"PK\x03\x04") + (tmp_path / "unsupported").mkdir() (tmp_path / "unsupported" / "image.bmp").write_bytes(b"BM\x00\x00") (tmp_path / "unsupported" / "archive.rar").write_bytes(b"RAR\x00") @@ -126,6 +154,13 @@ class TestParserSelection: ".pdf": PDFParser, ".txt": TextParser, ".text": TextParser, + ".docx": WordParser, + ".xlsx": ExcelParser, + ".xls": ExcelParser, + ".xlsm": ExcelParser, + ".epub": EPubParser, + ".pptx": PowerPointParser, + ".zip": ZipParser, } # Extensions that are *processable* (via is_text_file) but have no @@ -203,6 +238,11 @@ class TestParserCanParse: (HTMLParser, ["page.html", "site.htm"]), (PDFParser, ["paper.pdf"]), (TextParser, ["notes.txt", "log.text"]), + (WordParser, ["report.docx"]), + (ExcelParser, ["data.xlsx", "legacy.xls", "book.xlsm"]), + (EPubParser, ["book.epub"]), + (PowerPointParser, ["slides.pptx"]), + (ZipParser, ["archive.zip"]), ], ) def test_can_parse_returns_true(self, parser_cls: type, filenames: List[str]) -> None: @@ -219,6 +259,11 @@ def test_can_parse_returns_true(self, parser_cls: type, filenames: List[str]) -> (HTMLParser, ["file.md", "file.pdf", "file.txt"]), (PDFParser, ["file.md", "file.txt", "file.html"]), (TextParser, ["file.md", "file.html", "file.pdf"]), + (WordParser, ["file.pdf", "file.xlsx", "file.txt"]), + (ExcelParser, ["file.docx", "file.pdf", "file.txt"]), + (EPubParser, ["file.pdf", "file.docx", "file.zip"]), + (PowerPointParser, ["file.pdf", "file.docx", "file.txt"]), + (ZipParser, ["file.rar", "file.pdf", "file.docx"]), ], ) def test_can_parse_returns_false_for_wrong_extension( diff --git a/tests/parse/test_directory_scan.py b/tests/parse/test_directory_scan.py index 0a316695..cd440dca 100644 --- a/tests/parse/test_directory_scan.py +++ b/tests/parse/test_directory_scan.py @@ -23,10 +23,15 @@ @pytest.fixture def tmp_tree(tmp_path: Path) -> Path: """Create a directory tree with mixed file types for scan tests.""" - # rich (parser exists): .md, .pdf, .html, .txt + # rich (parser exists): .md, .pdf, .html, .txt, .docx, .xlsx, .epub, .pptx, .zip (tmp_path / "readme.md").write_text("# README", encoding="utf-8") (tmp_path / "doc.html").write_text("", encoding="utf-8") (tmp_path / "note.txt").write_text("plain text", encoding="utf-8") + (tmp_path / "report.docx").write_bytes(b"PK\x03\x04") + (tmp_path / "data.xlsx").write_bytes(b"PK\x03\x04") + (tmp_path / "book.epub").write_bytes(b"PK\x03\x04") + (tmp_path / "slides.pptx").write_bytes(b"PK\x03\x04") + (tmp_path / "bundle.zip").write_bytes(b"PK\x03\x04") # text (code/config, no dedicated parser or text parser only): .py, .yaml (tmp_path / "main.py").write_text("print(1)", encoding="utf-8") @@ -118,6 +123,12 @@ def test_processable_includes_parser_files( assert "readme.md" in processable_rel assert "doc.html" in processable_rel assert "note.txt" in processable_rel + # Word, Excel, EPub, PowerPoint, Zip parsers + assert "report.docx" in processable_rel + assert "data.xlsx" in processable_rel + assert "book.epub" in processable_rel + assert "slides.pptx" in processable_rel + assert "bundle.zip" in processable_rel def test_processable_includes_code_or_config( self, tmp_tree: Path, registry: ParserRegistry From fbe83b224f9f796d42dcf49187d0440852394824 Mon Sep 17 00:00:00 2001 From: shaoeric Date: Thu, 26 Feb 2026 17:56:49 +0800 Subject: [PATCH 2/5] feat: enhance add-resource functionality with new options - Updated CLI commands to support additional parameters: --ignore-dirs, --include, --exclude, and --no-directly-upload-media. - Modified resource processing to handle new parameters for directory scanning and file inclusion/exclusion. - Improved README documentation for CLI usage and examples. This update allows for more flexible resource management and better control over file uploads. --- examples/server_client/README.md | 6 ++++-- openviking/parse/directory_scan.py | 13 ++++++++++++- openviking/server/routers/resources.py | 10 ++++++++++ openviking/utils/media_processor.py | 5 +++-- openviking_cli/cli/commands/resources.py | 20 ++++++++++++++++++++ openviking_cli/client/http.py | 10 ++++++++++ openviking_cli/client/sync_http.py | 19 ++++++++++++++++++- 7 files changed, 77 insertions(+), 6 deletions(-) diff --git a/examples/server_client/README.md b/examples/server_client/README.md index 764b5fc7..d94066ac 100644 --- a/examples/server_client/README.md +++ b/examples/server_client/README.md @@ -142,11 +142,13 @@ await client.close() ### CLI ```bash -# CLI 从 ~/.openviking/ovcli.conf 或 $OPENVIKING_CLI_CONFIG_FILE 读取连接配置 +# CLI 从 ~/.openviking/ovcli.conf 或 $s 读取连接配置 # 基本操作 openviking health -openviking add-resource ./document.md +openviking add-resource ./document.md # 上传文件 +openviking add-resource ./dir --exclude "*.tmp,*.log" --ignore-dirs "subdir-a,subdir-b/subsubdir-c" # 上传文件夹 + openviking wait openviking find "search query" diff --git a/openviking/parse/directory_scan.py b/openviking/parse/directory_scan.py index 07b10283..bec7f7fa 100644 --- a/openviking/parse/directory_scan.py +++ b/openviking/parse/directory_scan.py @@ -221,6 +221,17 @@ def scan_directory( include_patterns = _parse_patterns(include) exclude_patterns = _parse_patterns(exclude) + # Normalize ignore_dirs: + # - If caller passed a comma-separated string (common from CLI/HTTP), + # split it into a set of entries. + # - If already a set/list-like, keep as is. + ignore_dirs_set: Optional[Set[str]] + if isinstance(ignore_dirs, str): + entries = _parse_patterns(ignore_dirs) + ignore_dirs_set = set(entries) if entries else None + else: + ignore_dirs_set = ignore_dirs + result = DirectoryScanResult(root=root) for dir_path_str, dir_names, file_names in os.walk(root, topdown=True): dir_path = Path(dir_path_str) @@ -229,7 +240,7 @@ def scan_directory( kept = [] for d in dir_names: sub = dir_path / d - skip, reason = _should_skip_directory(sub, root, ignore_dirs) + skip, reason = _should_skip_directory(sub, root, ignore_dirs_set) if skip: result.skipped.append(f"{sub.relative_to(root)} ({reason})") else: diff --git a/openviking/server/routers/resources.py b/openviking/server/routers/resources.py index 370612fd..3e8bb291 100644 --- a/openviking/server/routers/resources.py +++ b/openviking/server/routers/resources.py @@ -29,6 +29,11 @@ class AddResourceRequest(BaseModel): instruction: str = "" wait: bool = False timeout: Optional[float] = None + strict: bool = True + ignore_dirs: Optional[str] = None + include: Optional[str] = None + exclude: Optional[str] = None + directly_upload_media: bool = True class AddSkillRequest(BaseModel): @@ -96,6 +101,11 @@ async def add_resource( instruction=request.instruction, wait=request.wait, timeout=request.timeout, + strict=request.strict, + ignore_dirs=request.ignore_dirs, + include=request.include, + exclude=request.exclude, + directly_upload_media=request.directly_upload_media, ) return Response(status="ok", result=result) diff --git a/openviking/utils/media_processor.py b/openviking/utils/media_processor.py index 3e2475bc..2edc005c 100644 --- a/openviking/utils/media_processor.py +++ b/openviking/utils/media_processor.py @@ -60,7 +60,7 @@ async def process( if path.exists(): if path.is_dir(): return await self._process_directory(path, instruction, **kwargs) - return await self._process_file(path, instruction) + return await self._process_file(path, instruction, **kwargs) else: logger.warning(f"Path {path} does not exist") raise FileNotFoundError(f"Path {path} does not exist") @@ -103,6 +103,7 @@ async def _process_file( self, file_path: Path, instruction: str, + **kwargs, ) -> ParseResult: """Process file with unified parsing.""" # Check if it's a zip file @@ -111,7 +112,7 @@ async def _process_file( try: with zipfile.ZipFile(file_path, "r") as zipf: zipf.extractall(temp_dir) - return await self._process_directory(temp_dir, instruction) + return await self._process_directory(temp_dir, instruction, **kwargs) finally: pass # Don't delete temp_dir yet, it will be used by TreeBuilder return await parse( diff --git a/openviking_cli/cli/commands/resources.py b/openviking_cli/cli/commands/resources.py index a9bfc28f..4324ad6c 100644 --- a/openviking_cli/cli/commands/resources.py +++ b/openviking_cli/cli/commands/resources.py @@ -22,6 +22,21 @@ def add_resource_command( instruction: str = typer.Option("", help="Additional instruction"), wait: bool = typer.Option(False, "--wait", help="Wait until processing is complete"), timeout: Optional[float] = typer.Option(600.0, help="Wait timeout in seconds"), + no_strict: bool = typer.Option( + False, "--no-strict", help="No strict mode for directory scanning" + ), + ignore_dirs: Optional[str] = typer.Option( + None, "--ignore-dirs", help='Ignore directories, e.g. --ignore-dirs "node_modules,dist"' + ), + include: Optional[str] = typer.Option( + None, "--include", help='Include files extensions, e.g. --include "*.pdf,*.md"' + ), + exclude: Optional[str] = typer.Option( + None, "--exclude", help='Exclude files extensions, e.g. --exclude "*.tmp,*.log"' + ), + no_directly_upload_media: bool = typer.Option( + False, "--no-directly-upload-media", help="Do not directly upload media files" + ), ) -> None: """Add resources into OpenViking.""" # Validate path: if it's a local path, check if it exists @@ -85,6 +100,11 @@ def add_resource_command( instruction=instruction, wait=wait, timeout=timeout, + strict=not no_strict, + ignore_dirs=ignore_dirs, + include=include, + exclude=exclude, + directly_upload_media=not no_directly_upload_media, ), ) diff --git a/openviking_cli/client/http.py b/openviking_cli/client/http.py index 13e44b31..a8e632c7 100644 --- a/openviking_cli/client/http.py +++ b/openviking_cli/client/http.py @@ -275,6 +275,11 @@ async def add_resource( instruction: str = "", wait: bool = False, timeout: Optional[float] = None, + strict: bool = True, + ignore_dirs: Optional[str] = None, + include: Optional[str] = None, + exclude: Optional[str] = None, + directly_upload_media: bool = True, ) -> Dict[str, Any]: """Add resource to OpenViking.""" request_data = { @@ -283,6 +288,11 @@ async def add_resource( "instruction": instruction, "wait": wait, "timeout": timeout, + "strict": strict, + "ignore_dirs": ignore_dirs, + "include": include, + "exclude": exclude, + "directly_upload_media": directly_upload_media, } path_obj = Path(path) diff --git a/openviking_cli/client/sync_http.py b/openviking_cli/client/sync_http.py index 40ea2e12..7b7fac41 100644 --- a/openviking_cli/client/sync_http.py +++ b/openviking_cli/client/sync_http.py @@ -88,10 +88,27 @@ def add_resource( instruction: str = "", wait: bool = False, timeout: Optional[float] = None, + strict: bool = True, + ignore_dirs: Optional[str] = None, + include: Optional[str] = None, + exclude: Optional[str] = None, + directly_upload_media: bool = True, ) -> Dict[str, Any]: """Add resource to OpenViking.""" return run_async( - self._async_client.add_resource(path, target, reason, instruction, wait, timeout) + self._async_client.add_resource( + path, + target, + reason, + instruction, + wait, + timeout, + strict, + ignore_dirs, + include, + exclude, + directly_upload_media, + ) ) def add_skill( From f2a21d1ff1f8b7bc1db05ba779e3e08db863c32c Mon Sep 17 00:00:00 2001 From: shaoeric Date: Fri, 27 Feb 2026 11:19:18 +0800 Subject: [PATCH 3/5] feat(cli): enhance add-resource command with new options - Added support for additional parameters: --no-strict, --ignore-dirs, --include, --exclude, and --no-directly-upload-media. - Updated the handling of resource addition to accommodate new options for improved directory scanning and file management. - Enhanced README documentation with examples for the new functionality. This update provides users with greater flexibility in managing resources. --- crates/ov_cli/Cargo.toml | 2 + crates/ov_cli/README.md | 6 +++ crates/ov_cli/src/client.rs | 15 ++++++ crates/ov_cli/src/commands/resources.rs | 19 ++++++- crates/ov_cli/src/main.rs | 72 +++++++++++++++++++++++-- 5 files changed, 108 insertions(+), 6 deletions(-) diff --git a/crates/ov_cli/Cargo.toml b/crates/ov_cli/Cargo.toml index 48d3df51..d7f6e56c 100644 --- a/crates/ov_cli/Cargo.toml +++ b/crates/ov_cli/Cargo.toml @@ -1,3 +1,5 @@ +cargo-features = ["edition2024"] + [package] name = "ov_cli" version = "0.2.0" diff --git a/crates/ov_cli/README.md b/crates/ov_cli/README.md index f1ac7343..133e4120 100644 --- a/crates/ov_cli/README.md +++ b/crates/ov_cli/README.md @@ -114,6 +114,12 @@ ov -o json ls # Compact JSON wrapper for scripts # Add URL and wait for processing ov add-resource https://example.com/docs --wait --timeout 60 +# Add local directory with advanced options +ov add-resource ./dir \ + --wait --timeout 600 \ + --ignore-dirs "subdir-a,subdir-b/subsubdir-c" \ + --exclude "*.tmp,*.log" + # Search with threshold ov find "API authentication" --threshold 0.7 --limit 5 diff --git a/crates/ov_cli/src/client.rs b/crates/ov_cli/src/client.rs index 808d6c6f..63c8bfa4 100644 --- a/crates/ov_cli/src/client.rs +++ b/crates/ov_cli/src/client.rs @@ -425,6 +425,11 @@ impl HttpClient { instruction: &str, wait: bool, timeout: Option, + strict: bool, + ignore_dirs: Option, + include: Option, + exclude: Option, + directly_upload_media: bool, ) -> Result { let path_obj = Path::new(path); @@ -441,6 +446,11 @@ impl HttpClient { "instruction": instruction, "wait": wait, "timeout": timeout, + "strict": strict, + "ignore_dirs": ignore_dirs, + "include": include, + "exclude": exclude, + "directly_upload_media": directly_upload_media, }); self.post("/api/v1/resources", &body).await @@ -453,6 +463,11 @@ impl HttpClient { "instruction": instruction, "wait": wait, "timeout": timeout, + "strict": strict, + "ignore_dirs": ignore_dirs, + "include": include, + "exclude": exclude, + "directly_upload_media": directly_upload_media, }); self.post("/api/v1/resources", &body).await diff --git a/crates/ov_cli/src/commands/resources.rs b/crates/ov_cli/src/commands/resources.rs index 2ecf5f52..35d5f116 100644 --- a/crates/ov_cli/src/commands/resources.rs +++ b/crates/ov_cli/src/commands/resources.rs @@ -10,11 +10,28 @@ pub async fn add_resource( instruction: String, wait: bool, timeout: Option, + strict: bool, + ignore_dirs: Option, + include: Option, + exclude: Option, + directly_upload_media: bool, format: OutputFormat, compact: bool, ) -> Result<()> { let result = client - .add_resource(path, to, &reason, &instruction, wait, timeout) + .add_resource( + path, + to, + &reason, + &instruction, + wait, + timeout, + strict, + ignore_dirs, + include, + exclude, + directly_upload_media, + ) .await?; output_success(&result, format, compact); Ok(()) diff --git a/crates/ov_cli/src/main.rs b/crates/ov_cli/src/main.rs index 74c71d6f..7c32f446 100644 --- a/crates/ov_cli/src/main.rs +++ b/crates/ov_cli/src/main.rs @@ -76,7 +76,22 @@ enum Commands { wait: bool, /// Wait timeout in seconds #[arg(long)] - timeout: Option, + timeout: f64, + /// No strict mode for directory scanning + #[arg(long = "no-strict", default_value_t = false)] + no_strict: bool, + /// Ignore directories, e.g. --ignore-dirs "node_modules,dist" + #[arg(long)] + ignore_dirs: Option, + /// Include files extensions, e.g. --include "*.pdf,*.md" + #[arg(long)] + include: Option, + /// Exclude files extensions, e.g. --exclude "*.tmp,*.log" + #[arg(long)] + exclude: Option, + /// Do not directly upload media files + #[arg(long = "no-directly-upload-media", default_value_t = false)] + no_directly_upload_media: bool, }, /// Add a skill into OpenViking AddSkill { @@ -452,8 +467,34 @@ async fn main() { }; let result = match cli.command { - Commands::AddResource { path, to, reason, instruction, wait, timeout } => { - handle_add_resource(path, to, reason, instruction, wait, timeout, ctx).await + Commands::AddResource { + path, + to, + reason, + instruction, + wait, + timeout, + no_strict, + ignore_dirs, + include, + exclude, + no_directly_upload_media, + } => { + handle_add_resource( + path, + to, + reason, + instruction, + wait, + timeout, + no_strict, + ignore_dirs, + include, + exclude, + no_directly_upload_media, + ctx, + ) + .await } Commands::AddSkill { data, wait, timeout } => { handle_add_skill(data, wait, timeout, ctx).await @@ -544,7 +585,12 @@ async fn handle_add_resource( reason: String, instruction: String, wait: bool, - timeout: Option, + timeout: f64, + no_strict: bool, + ignore_dirs: Option, + include: Option, + exclude: Option, + no_directly_upload_media: bool, ctx: CliContext, ) -> Result<()> { // Validate path: if it's a local path, check if it exists @@ -575,9 +621,25 @@ async fn handle_add_resource( path = unescaped_path; } + let strict = !no_strict; + let directly_upload_media = !no_directly_upload_media; + let client = ctx.get_client(); commands::resources::add_resource( - &client, &path, to, reason, instruction, wait, timeout, ctx.output_format, ctx.compact + &client, + &path, + to, + reason, + instruction, + wait, + Some(timeout), + strict, + ignore_dirs, + include, + exclude, + directly_upload_media, + ctx.output_format, + ctx.compact, ).await } From d66b7ffa8411f1539d8f0f9f268f73cb30662003 Mon Sep 17 00:00:00 2001 From: shaoeric Date: Fri, 27 Feb 2026 11:26:19 +0800 Subject: [PATCH 4/5] chore(cli): remove unused cargo feature for edition 2024 in Cargo.toml --- crates/ov_cli/Cargo.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/crates/ov_cli/Cargo.toml b/crates/ov_cli/Cargo.toml index d7f6e56c..48d3df51 100644 --- a/crates/ov_cli/Cargo.toml +++ b/crates/ov_cli/Cargo.toml @@ -1,5 +1,3 @@ -cargo-features = ["edition2024"] - [package] name = "ov_cli" version = "0.2.0" From 4597f153d931b8896eff66699b142fdf6bb13902 Mon Sep 17 00:00:00 2001 From: shaoeric Date: Fri, 27 Feb 2026 11:30:36 +0800 Subject: [PATCH 5/5] chore(pyproject): update tag_regex in setuptools_scm configuration for version parsing --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index ac30a11f..25050f2a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -95,6 +95,7 @@ openviking-server = "openviking.server.bootstrap:main" [tool.setuptools_scm] write_to = "openviking/_version.py" local_scheme = "no-local-version" +tag_regex = "^(?:[a-zA-Z0-9_]+@)?(?P[0-9]+(?:\\.[0-9]+)*)$" [tool.setuptools.packages.find] where = ["."]