-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgoogle_search.py
More file actions
91 lines (67 loc) · 2.59 KB
/
google_search.py
File metadata and controls
91 lines (67 loc) · 2.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os
import http.client
import dotenv
import json
import pprint
import requests
from bs4 import BeautifulSoup
import tiktoken
from newspaper import Article
dotenv.load_dotenv()
serp_api_key = os.getenv("SERPER_API_KEY")
def search_results(payload):
"""
Fetches search results from a Google Search API.
This function sends a POST request to the Google Search API with a given payload.
It then parses the response and extracts the title and link of each organic search result.
Args:
payload (dict): The payload to send in the POST request. This should contain the search parameters.
Returns:
dict: A dictionary where the keys are the titles of the search results and the values are the corresponding links.
"""
url = "https://google.serper.dev/search"
headers = {
'X-API-KEY': serp_api_key,
'Content-Type': 'application/json'
}
response = requests.post(url, json=payload, headers=headers)
data = response.json()
links = {org['title']: org['link'] for org in data['organic']}
return links
# pprint.pprint(search_results('Apple latest product'))
# def extract_webpage_content(url):
# try:
# # Make an HTTP request to the webpage
# response = requests.get(url)
# response.raise_for_status() # Check if the request was successful
# except requests.HTTPError as http_err:
# print(f"HTTP error occurred: {http_err}")
# return None
# except Exception as err:
# print(f"An error occurred: {err}")
# return None
# # Parse the webpage content with Beautiful Soup
# soup = BeautifulSoup(response.content, 'html.parser')
# # Get the text content from the webpage
# text_content = soup.get_text(separator='\n', strip=True)
# return text_content
def extract_webpage_content_efficient(url):
"""
Extracts the main content from a webpage.
This function uses the Newspaper3k library to download and parse the webpage.
It then extracts the main text content from the parsed webpage.
Args:
url (str): The URL of the webpage to extract content from.
Returns:
str: The main text content of the webpage.
"""
article = Article(url)
article.download()
article.parse()
main_text = article.text
return main_text
# page_content =extract_webpage_content_efficient(
# "https://www.zdnet.com/article/every-product-apple-announced-this-week-iphone-15-pro-apple-watch-series-9-airpods/")
# encoding = tiktoken.encoding_for_model('gpt-3.5-turbo')
# print(len(encoding.encode(page_content)))
# print(page_content)