diff --git a/examples/markdownify/markdownify_scrapegraphai.py b/examples/markdownify/markdownify_scrapegraphai.py index de36607d..66a1596c 100644 --- a/examples/markdownify/markdownify_scrapegraphai.py +++ b/examples/markdownify/markdownify_scrapegraphai.py @@ -1,7 +1,8 @@ """ -Example script demonstrating the markdownify functionality +Example script demonstrating the scrape functionality (v2 API - replaces markdownify) """ +import json import os from dotenv import load_dotenv from scrapegraph_py import Client @@ -20,16 +21,13 @@ def main(): raise ValueError("SCRAPEGRAPH_API_KEY environment variable not found") sgai_client = Client(api_key=api_key) - # Example 1: Convert a website to Markdown - print("Example 1: Converting website to Markdown") + # Scrape a website as markdown (v2 API - replaces markdownify) + print("Scraping website as Markdown") print("-" * 50) - response = sgai_client.markdownify( - website_url="https://example.com" + response = sgai_client.scrape( + url="https://example.com" ) - print("Markdown output:") - print(response["result"]) # Access the result key from the dictionary - print("\nMetadata:") - print(response.get("metadata", {})) # Use get() with default value - print("\n" + "=" * 50 + "\n") + print(json.dumps(response, indent=2)) + if __name__ == "__main__": main() diff --git a/examples/search_graph/scrapegraphai/searchscraper_scrapegraphai.py b/examples/search_graph/scrapegraphai/searchscraper_scrapegraphai.py index e88a92ce..1f48adb6 100644 --- a/examples/search_graph/scrapegraphai/searchscraper_scrapegraphai.py +++ b/examples/search_graph/scrapegraphai/searchscraper_scrapegraphai.py @@ -1,54 +1,14 @@ """ -Example implementation of search-based scraping using Scrapegraph AI. -This example demonstrates how to use the searchscraper to extract information from the web. +Example implementation of search-based scraping using Scrapegraph AI v2 API. +This example demonstrates how to use the search endpoint to extract information from the web. """ +import json import os -from typing import Dict, Any from dotenv import load_dotenv from scrapegraph_py import Client from scrapegraph_py.logger import sgai_logger -def format_response(response: Dict[str, Any]) -> None: - """ - Format and print the search response in a readable way. - - Args: - response (Dict[str, Any]): The response from the search API - """ - print("\n" + "="*50) - print("SEARCH RESULTS") - print("="*50) - - # Print request ID - print(f"\nRequest ID: {response['request_id']}") - - # Print number of sources - urls = response.get('reference_urls', []) - print(f"\nSources Processed: {len(urls)}") - - # Print the extracted information - print("\nExtracted Information:") - print("-"*30) - if isinstance(response['result'], dict): - for key, value in response['result'].items(): - print(f"\n{key.upper()}:") - if isinstance(value, list): - for item in value: - print(f" • {item}") - else: - print(f" {value}") - else: - print(response['result']) - - # Print source URLs - if urls: - print("\nSources:") - print("-"*30) - for i, url in enumerate(urls, 1): - print(f"{i}. {url}") - print("\n" + "="*50) - def main(): # Load environment variables load_dotenv() @@ -65,13 +25,13 @@ def main(): sgai_client = Client(api_key=api_key) try: - # Basic search scraper example + # Search request (v2 API - replaces searchscraper) print("\nSearching for information...") - search_response = sgai_client.searchscraper( - user_prompt="Extract webpage information" + search_response = sgai_client.search( + query="Extract webpage information" ) - format_response(search_response) + print(json.dumps(search_response, indent=2)) except Exception as e: print(f"\nError occurred: {str(e)}") diff --git a/examples/smart_scraper_graph/scrapegraphai/smartscraper_scrapegraphai.py b/examples/smart_scraper_graph/scrapegraphai/smartscraper_scrapegraphai.py index 47181cbb..c78cf50f 100644 --- a/examples/smart_scraper_graph/scrapegraphai/smartscraper_scrapegraphai.py +++ b/examples/smart_scraper_graph/scrapegraphai/smartscraper_scrapegraphai.py @@ -1,7 +1,8 @@ """ -Example implementation using scrapegraph-py client directly. +Example implementation using scrapegraph-py v2 client directly. """ +import json import os from dotenv import load_dotenv from scrapegraph_py import Client @@ -14,7 +15,7 @@ def main(): # Get API key from environment variables api_key = os.getenv("SCRAPEGRAPH_API_KEY") if not api_key: - raise ValueError("SCRAPEGRAPH_API_KEY non trovato nelle variabili d'ambiente") + raise ValueError("SCRAPEGRAPH_API_KEY not found in environment variables") # Set up logging sgai_logger.set_logging(level="INFO") @@ -23,17 +24,14 @@ def main(): sgai_client = Client(api_key=api_key) try: - # SmartScraper request - response = sgai_client.smartscraper( - website_url="https://scrapegraphai.com", - user_prompt="Extract the founders' informations" + # Extract request (v2 API - replaces smartscraper) + response = sgai_client.extract( + url="https://scrapegraphai.com", + prompt="Extract the founders' informations" ) # Print the response - print(f"Request ID: {response['request_id']}") - print(f"Result: {response['result']}") - if response.get('reference_urls'): - print(f"Reference URLs: {response['reference_urls']}") + print(json.dumps(response, indent=2)) except Exception as e: print(f"Error occurred: {str(e)}") diff --git a/pyproject.toml b/pyproject.toml index 6537bbcf..5dd2b198 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "jsonschema>=4.25.1", "duckduckgo-search>=8.1.1", "pydantic>=2.12.5", - "scrapegraph-py>=1.44.0", + "scrapegraph-py>=2.0.0", ] readme = "README.md" diff --git a/scrapegraphai/graphs/smart_scraper_graph.py b/scrapegraphai/graphs/smart_scraper_graph.py index ffcd3dbe..6790d71b 100644 --- a/scrapegraphai/graphs/smart_scraper_graph.py +++ b/scrapegraphai/graphs/smart_scraper_graph.py @@ -90,18 +90,17 @@ def _create_graph(self) -> BaseGraph: # Initialize the client with explicit API key sgai_client = Client(api_key=self.config.get("api_key")) - # SmartScraper request - response = sgai_client.smartscraper( - website_url=self.source, - user_prompt=self.prompt, + # Extract request (v2 API) + response = sgai_client.extract( + url=self.source, + prompt=self.prompt, ) # Use logging instead of print for better production practices - if "request_id" in response and "result" in response: - logger.info(f"Request ID: {response['request_id']}") - logger.info(f"Result: {response['result']}") - else: - logger.warning("Missing expected keys in response.") + if "id" in response: + logger.info(f"Request ID: {response['id']}") + if "data" in response: + logger.info(f"Result: {response['data']}") sgai_client.close()