-
Notifications
You must be signed in to change notification settings - Fork 14
Expand file tree
/
Copy pathsmartScraper.js
More file actions
97 lines (90 loc) · 3.5 KB
/
smartScraper.js
File metadata and controls
97 lines (90 loc) · 3.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import axios from 'axios';
import handleError from './utils/handleError.js';
import { ZodType } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema';
/**
* Scrape and extract structured data from a webpage using ScrapeGraph AI.
*
* @param {string} apiKey - Your ScrapeGraph AI API key
* @param {string} url - The URL of the webpage to scrape
* @param {string} prompt - Natural language prompt describing what data to extract
* @param {Object} [schema] - Optional schema object defining the output structure
* @param {number} [numberOfScrolls] - Optional number of times to scroll the page (0-100). If not provided, no scrolling will be performed.
* @returns {Promise<string>} Extracted data in JSON format matching the provided schema
* @throws - Will throw an error in case of an HTTP failure.
*/
export async function smartScraper(apiKey, url, prompt, schema = null, numberOfScrolls = null) {
const endpoint = 'https://api.scrapegraphai.com/v1/smartscraper';
const headers = {
'accept': 'application/json',
'SGAI-APIKEY': apiKey,
'Content-Type': 'application/json',
};
const payload = {
website_url: url,
user_prompt: prompt,
};
if (schema) {
if (schema instanceof ZodType) {
payload.output_schema = zodToJsonSchema(schema);
} else {
throw new Error('The schema must be an instance of a valid Zod schema');
}
}
if (numberOfScrolls !== null) {
if (!Number.isInteger(numberOfScrolls) || numberOfScrolls < 0 || numberOfScrolls > 100) {
throw new Error('numberOfScrolls must be an integer between 0 and 100');
}
payload.number_of_scrolls = numberOfScrolls;
}
try {
const response = await axios.post(endpoint, payload, { headers });
return response.data;
} catch (error) {
handleError(error);
}
}
/**
* Retrieve the status or the result of a smartScraper request. It also allows you to see the result of old requests.
*
* @param {string} apiKey - Your ScrapeGraph AI API key
* @param {string} requestId - The request ID associated with the output of a smartScraper request.
* @returns {Promise<Object>} A promise that resolves to an object containing:
* - status: The current status of the request ('pending', 'completed', 'failed')
* - result: The extracted data in JSON format (when status is 'completed')
* - error: Error message if the request failed (when status is 'failed')
* - created_at: Timestamp of when the request was created
* - completed_at: Timestamp of when the request was completed (if applicable)
* @throws {Error} Throws an error if the HTTP request fails or if the API key is invalid
*
* @example
* // Example usage:
* const apiKey = 'your-api-key';
* const requestId = 'previously-obtained-request-id';
*
* try {
* const result = await getSmartScraperRequest(apiKey, requestId);
* if (result.status === 'completed') {
* console.log('Extracted data:', result.result);
* } else if (result.status === 'pending') {
* console.log('Request is still processing');
* } else {
* console.log('Request failed:', result.error);
* }
* } catch (error) {
* console.error('Error fetching request:', error);
* }
*/
export async function getSmartScraperRequest(apiKey, requestId) {
const endpoint = 'https://api.scrapegraphai.com/v1/smartscraper/' + requestId;
const headers = {
'accept': 'application/json',
'SGAI-APIKEY': apiKey,
};
try {
const response = await axios.get(endpoint, { headers });
return response.data;
} catch (error) {
handleError(error);
}
}