Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 8 additions & 10 deletions examples/markdownify/markdownify_scrapegraphai.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
"""
Example script demonstrating the markdownify functionality
Example script demonstrating the scrape functionality (v2 API - replaces markdownify)
"""

import json
import os
from dotenv import load_dotenv
from scrapegraph_py import Client
Expand All @@ -20,16 +21,13 @@ def main():
raise ValueError("SCRAPEGRAPH_API_KEY environment variable not found")
sgai_client = Client(api_key=api_key)

# Example 1: Convert a website to Markdown
print("Example 1: Converting website to Markdown")
# Scrape a website as markdown (v2 API - replaces markdownify)
print("Scraping website as Markdown")
print("-" * 50)
response = sgai_client.markdownify(
website_url="https://example.com"
response = sgai_client.scrape(
url="https://example.com"
)
print("Markdown output:")
print(response["result"]) # Access the result key from the dictionary
print("\nMetadata:")
print(response.get("metadata", {})) # Use get() with default value
print("\n" + "=" * 50 + "\n")
print(json.dumps(response, indent=2))

if __name__ == "__main__":
main()
54 changes: 7 additions & 47 deletions examples/search_graph/scrapegraphai/searchscraper_scrapegraphai.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,14 @@
"""
Example implementation of search-based scraping using Scrapegraph AI.
This example demonstrates how to use the searchscraper to extract information from the web.
Example implementation of search-based scraping using Scrapegraph AI v2 API.
This example demonstrates how to use the search endpoint to extract information from the web.
"""

import json
import os
from typing import Dict, Any
from dotenv import load_dotenv
from scrapegraph_py import Client
from scrapegraph_py.logger import sgai_logger

def format_response(response: Dict[str, Any]) -> None:
"""
Format and print the search response in a readable way.

Args:
response (Dict[str, Any]): The response from the search API
"""
print("\n" + "="*50)
print("SEARCH RESULTS")
print("="*50)

# Print request ID
print(f"\nRequest ID: {response['request_id']}")

# Print number of sources
urls = response.get('reference_urls', [])
print(f"\nSources Processed: {len(urls)}")

# Print the extracted information
print("\nExtracted Information:")
print("-"*30)
if isinstance(response['result'], dict):
for key, value in response['result'].items():
print(f"\n{key.upper()}:")
if isinstance(value, list):
for item in value:
print(f" • {item}")
else:
print(f" {value}")
else:
print(response['result'])

# Print source URLs
if urls:
print("\nSources:")
print("-"*30)
for i, url in enumerate(urls, 1):
print(f"{i}. {url}")
print("\n" + "="*50)

def main():
# Load environment variables
load_dotenv()
Expand All @@ -65,13 +25,13 @@ def main():
sgai_client = Client(api_key=api_key)

try:
# Basic search scraper example
# Search request (v2 API - replaces searchscraper)
print("\nSearching for information...")

search_response = sgai_client.searchscraper(
user_prompt="Extract webpage information"
search_response = sgai_client.search(
query="Extract webpage information"
)
format_response(search_response)
print(json.dumps(search_response, indent=2))

except Exception as e:
print(f"\nError occurred: {str(e)}")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
"""
Example implementation using scrapegraph-py client directly.
Example implementation using scrapegraph-py v2 client directly.
"""

import json
import os
from dotenv import load_dotenv
from scrapegraph_py import Client
Expand All @@ -14,7 +15,7 @@ def main():
# Get API key from environment variables
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
if not api_key:
raise ValueError("SCRAPEGRAPH_API_KEY non trovato nelle variabili d'ambiente")
raise ValueError("SCRAPEGRAPH_API_KEY not found in environment variables")

# Set up logging
sgai_logger.set_logging(level="INFO")
Expand All @@ -23,17 +24,14 @@ def main():
sgai_client = Client(api_key=api_key)

try:
# SmartScraper request
response = sgai_client.smartscraper(
website_url="https://scrapegraphai.com",
user_prompt="Extract the founders' informations"
# Extract request (v2 API - replaces smartscraper)
response = sgai_client.extract(
url="https://scrapegraphai.com",
prompt="Extract the founders' informations"
)

# Print the response
print(f"Request ID: {response['request_id']}")
print(f"Result: {response['result']}")
if response.get('reference_urls'):
print(f"Reference URLs: {response['reference_urls']}")
print(json.dumps(response, indent=2))

except Exception as e:
print(f"Error occurred: {str(e)}")
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ dependencies = [
"jsonschema>=4.25.1",
"duckduckgo-search>=8.1.1",
"pydantic>=2.12.5",
"scrapegraph-py>=1.44.0",
"scrapegraph-py>=2.0.0",
]

readme = "README.md"
Expand Down
17 changes: 8 additions & 9 deletions scrapegraphai/graphs/smart_scraper_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,18 +90,17 @@ def _create_graph(self) -> BaseGraph:
# Initialize the client with explicit API key
sgai_client = Client(api_key=self.config.get("api_key"))

# SmartScraper request
response = sgai_client.smartscraper(
website_url=self.source,
user_prompt=self.prompt,
# Extract request (v2 API)
response = sgai_client.extract(
url=self.source,
prompt=self.prompt,
)

# Use logging instead of print for better production practices
if "request_id" in response and "result" in response:
logger.info(f"Request ID: {response['request_id']}")
logger.info(f"Result: {response['result']}")
else:
logger.warning("Missing expected keys in response.")
if "id" in response:
logger.info(f"Request ID: {response['id']}")
if "data" in response:
logger.info(f"Result: {response['data']}")

sgai_client.close()

Expand Down
Loading