-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathScrape.py
More file actions
85 lines (78 loc) · 3.23 KB
/
Scrape.py
File metadata and controls
85 lines (78 loc) · 3.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import bs4 as BeautifulSoup # Parses HTMl
import requests # Gets the HTML source code
import json
def Scraping(WebUrl): # Gets all the recipes for a meal type, along with their url, image url, title, and ingredients
url = WebUrl
code = requests.get(url)
plain = code.text
soup = BeautifulSoup.BeautifulSoup(plain, 'html.parser')
#Finds all the liks to the recipes and asdds them to a list
for link in soup.findAll('a'):
recipe = link.get('href')
if "-recipe-" in recipe:
listOfRecipes.append(recipe)
listOfRecipes.pop()
#Parses through all the recipes and gets the ingredeints, image url, and title
for recipe in listOfRecipes:
code = requests.get(recipe).text
soupIngredient = BeautifulSoup.BeautifulSoup(code, 'html.parser')
listOfIngredients = []
listOfIngredients.append(recipe)
spans = soupIngredient.find_all("span", attrs={"data-ingredient-name":"true"})
recipeTitle = soupIngredient.find("h1", {"class": "heading__title"})
listOfIngredients.append(recipeTitle.text)
for imgLink in soupIngredient.findAll('img'):
image = (str)(imgLink.get('src'))
if "-LEAD-" in image:
listOfIngredients.append(image)
break
for ingredient in spans:
listOfIngredients.append(ingredient.text)
eachUrl.update({recipe:listOfIngredients})
mealType = input("What meal type is it? ")
if mealType.lower() == 'breakfast':
startUrl = "https://www.simplyrecipes.com/breakfast-recipes-5091541"
mealType = "Breakfast"
elif mealType.lower() == 'lunch':
startUrl = "https://www.simplyrecipes.com/lunch-recipes-5091263"
mealType = "Lunch"
elif mealType.lower() == 'dinner':
startUrl = "https://www.simplyrecipes.com/dinner-recipes-5091433"
mealType = "Dinner"
elif mealType.lower() == 'dessert':
startUrl = "https://www.simplyrecipes.com/dessert-recipes-5091513"
mealType = "Dessert"
elif mealType.lower() == 'snacks and appitizers':
startUrl = "https://www.simplyrecipes.com/snacks-and-appetizer-recipes-5090762"
mealType = "SnacksAndApps"
else:
startUrl = ""
startIngredients = []
#startIngredients = (input("Which ingredients do you have? ")).split(", ")
listOfRecipes = []
matchingRecipes = {}
#Scraping(startUrl)
#Adds only the recipes with matching ingredietns to the dictionary
"""
for recipe in eachUrl:
if all(item in eachUrl[recipe] for item in startIngredients):
matchingRecipes.update({recipe:eachUrl[recipe]})
"""
#for recipe in matchingRecipes.values():
# print (recipe)
mealTypes = ["Dinner", "Dessert", "SnacksAndApps"]
for mealType in mealTypes:
if mealType.lower() == 'dinner':
startUrl = "https://www.simplyrecipes.com/dinner-recipes-5091433"
mealType = "Dinner"
if mealType.lower() == 'dessert':
startUrl = "https://www.simplyrecipes.com/dessert-recipes-5091513"
mealType = "Dessert"
if mealType.lower() == 'snacksandapps':
startUrl = "https://www.simplyrecipes.com/snacks-and-appetizer-recipes-5090762"
mealType = "SnacksAndApps"
eachUrl = {}
Scraping(startUrl)
json_object = json.dumps(eachUrl, indent=4)
with open("" + mealType + ".json", "w") as outfile:
outfile.write(json_object)