Skip to content

Commit bf35c80

Browse files
committed
# Release anyparser-core@1.0.1
- Updated branding from \"AnyParser\" to \"Anyparser\" throughout the codebase - Added empty path validation in path validator - Added version information to package - Updated documentation with AI-focused benefits and examples - Updated URLs from app.anyparser.com to studio.anyparser.com - Added type improvements for crawl directives - Added support for unavailable_after in crawl directives - Added text and images fields to AnyparserUrl
1 parent 665a015 commit bf35c80

10 files changed

Lines changed: 471 additions & 15 deletions

File tree

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,11 @@ https://anyparser.com
1414

1515
**Get Started Quickly:**
1616

17-
1. **Free Access:** Obtain your API credentials and start building your AI data pipelines today at [Anyparser Dashboard](https://app.anyparser.com/).
17+
1. **Free Access:** Obtain your API credentials and start building your AI data pipelines today at [Anyparser Studio](https://studio.anyparser.com/).
1818
2. **Installation:** Install the SDK with a simple pip command.
1919
3. **Run Examples:** Copy and paste the provided examples to see how easy it is to extract data for your AI projects.
2020

21-
Before starting, add a new API key on the [Anyparser Dashboard](https://app.anyparser.com/).
21+
Before starting, add a new API key on the [Anyparser Studio](https://studio.anyparser.com/).
2222

2323

2424
```bash
@@ -230,7 +230,7 @@ from anyparser_core import OcrLanguage, OCRPreset
230230

231231
@dataclass
232232
class AnyparserOption:
233-
"""Configuration options for the AnyParser API."""
233+
"""Configuration options for the Anyparser API."""
234234

235235
# API Configuration
236236
api_url: Optional[str] = None # API endpoint URL, defaults to environment variable ANYPARSER_API_URL

anyparser_core/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
)
1717
from .validator import validate_and_parse, validate_option, validate_path
1818

19+
__version__ = "1.0.1"
1920
__all__ = [
2021
"Anyparser",
2122
"AnyparserCrawlDirective",

anyparser_core/options.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
Options module for AnyParser configuration and parsing.
2+
Options module for Anyparser configuration and parsing.
33
"""
44

55
from dataclasses import dataclass, field
@@ -15,7 +15,7 @@
1515

1616
@dataclass
1717
class AnyparserOption:
18-
"""Configuration options for the AnyParser API."""
18+
"""Configuration options for the Anyparser API."""
1919

2020
api_url: Optional[str] = None
2121
api_key: Optional[str] = None

anyparser_core/parser.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22
import json
33
import uuid
44
from dataclasses import dataclass, field
5-
from typing import Dict, List, Optional, Union
5+
from typing import Dict, List, Optional, Union, Literal
66
from urllib.parse import urljoin, urlparse
7+
from datetime import datetime
78

89
from .form import build_form
910
from .options import AnyparserOption
@@ -36,22 +37,22 @@ class AnyparserResultBase:
3637
class AnyparserCrawlDirectiveBase:
3738
"""Represents Anyparser crawl directive base with type, priority, name, noindex, nofollow, and crawl delay."""
3839

39-
type: str = field(default="")
40+
type: Literal["HTTP Header", "HTML Meta", "Combined"] = field(default="Combined")
4041
priority: int = field(default=0)
4142
name: Optional[str] = field(default=None)
4243
noindex: Optional[bool] = field(default=False)
4344
nofollow: Optional[bool] = field(default=False)
4445
crawl_delay: Optional[int] = field(default=None)
46+
unavailable_after: Optional[datetime] = field(default=None)
4547

4648

4749
@dataclass
4850
class AnyparserCrawlDirective(AnyparserCrawlDirectiveBase):
4951
"""Represents Anyparser crawl directive with type 'Combined', overriding the name to be None and adding the 'underlying' field."""
5052

5153
underlying: List[AnyparserCrawlDirectiveBase] = field(default_factory=list)
52-
type: str = field(default="Combined")
54+
type: Literal["Combined"] = field(default="Combined")
5355
name: Optional[None] = field(default=None)
54-
# name: None = None
5556

5657

5758
@dataclass
@@ -74,11 +75,11 @@ class AnyparserUrl:
7475
politeness_delay: int = field(default=0)
7576
total_characters: int = field(default=0)
7677
markdown: str = field(default="")
77-
7878
directive: AnyparserCrawlDirective = field(default_factory=AnyparserCrawlDirective)
7979
title: Optional[str] = field(default=None)
8080
crawled_at: Optional[str] = field(default=None)
81-
81+
images: List[AnyparserImageReference] = field(default_factory=list)
82+
text: Optional[str] = field(default=None)
8283

8384
@dataclass
8485
class AnyparserPdfPage:
@@ -111,11 +112,11 @@ class AnyparserCrawlResult:
111112
robots_directive: AnyparserRobotsTxtDirective
112113

113114

114-
AnyparserResult = Union[AnyparserPdfResult, AnyparserCrawlResult]
115+
AnyparserResult = Union[AnyparserPdfResult, AnyparserCrawlResult, AnyparserResultBase]
115116

116117

117118
class Anyparser:
118-
"""Main class for parsing itemss using the AnyParser API."""
119+
"""Main class for parsing itemss using the Anyparser API."""
119120

120121
def __init__(self, options: Optional[AnyparserOption] = None) -> None:
121122
"""Initialize the parser with optional configuration.
@@ -128,7 +129,7 @@ def __init__(self, options: Optional[AnyparserOption] = None) -> None:
128129
async def parse(
129130
self, file_paths_or_url: Union[str, List[str]]
130131
) -> Union[List[AnyparserResult], str]:
131-
"""Parse files using the AnyParser API.
132+
"""Parse files using the Anyparser API.
132133
133134
Args:
134135
file_paths_or_url: A single file path or list of file paths to parse, or a start URL for crawling

anyparser_core/validator/path.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@ async def validate_path(file_paths: Union[str, List[str]]) -> PathValidationResu
1616
"""
1717
Validates file paths exist and are accessible
1818
"""
19+
if not file_paths or (isinstance(file_paths, str) and not file_paths.strip()):
20+
return InvalidPathValidationResult(
21+
error=FileNotFoundError("No files provided")
22+
)
23+
24+
1925
if isinstance(file_paths, (str, Path)):
2026
files = [file_paths]
2127
else:

0 commit comments

Comments
 (0)