From 633f7d5a577bcff5e82dee2b513ef98b14cb371f Mon Sep 17 00:00:00 2001 From: Prince Shrivastav <55189910+Pridude@users.noreply.github.com> Date: Wed, 5 Oct 2022 13:14:20 +0530 Subject: [PATCH 1/2] Create Scraping Flipkart Websit.py --- Scraping Flipkart Websit.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 Scraping Flipkart Websit.py diff --git a/Scraping Flipkart Websit.py b/Scraping Flipkart Websit.py new file mode 100644 index 0000000..8170ebd --- /dev/null +++ b/Scraping Flipkart Websit.py @@ -0,0 +1,20 @@ +gedit fk-web-s.py +from selenium import webdriver +from BeautifulSoup import BeautifulSoup +import pandas as pd +driver = webdriver.Chrome("/usr/lib/chromium-browser/chromedriver") +products=[] #List to store name of the product +prices=[] #List to store price of the product +ratings=[] #List to store rating of the product +driver.get("https://www.flipkart.com/laptops/~buyback-guarantee-on-laptops-/pr?sid=6bo%2Cb5g&amp;amp;amp;amp;amp;amp;amp;amp;amp;uniq") +content = driver.page_source +soup = BeautifulSoup(content) +for a in soup.findAll('a',href=True, attrs={'class':'_31qSD5'}): +name=a.find('div', attrs={'class':'_3wU53n'}) +price=a.find('div', attrs={'class':'_1vC4OE _2rQ-NK'}) +rating=a.find('div', attrs={'class':'hGSR34 _2beYZw'}) +products.append(name.text) +prices.append(price.text) +ratings.append(rating.text) +df = pd.DataFrame({'Product Name':products,'Price':prices,'Rating':ratings}) +df.to_csv('products.csv', index=False, encoding='utf-8') From 1d1fe30c43920bcc777e6912a44676a3ce7858cc Mon Sep 17 00:00:00 2001 From: Prince Shrivastav <55189910+Pridude@users.noreply.github.com> Date: Thu, 6 Oct 2022 22:33:19 +0530 Subject: [PATCH 2/2] Update Scraping Flipkart Websit.py --- Scraping Flipkart Websit.py | 54 ++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/Scraping Flipkart Websit.py b/Scraping Flipkart Websit.py index 8170ebd..fc68e91 100644 --- a/Scraping Flipkart Websit.py +++ b/Scraping Flipkart Websit.py @@ -1,20 +1,38 @@ -gedit fk-web-s.py +# gedit fk-web-s.py +import requests +import sys +from bs4 import BeautifulSoup +import time from selenium import webdriver -from BeautifulSoup import BeautifulSoup +from bs4 import BeautifulSoup import pandas as pd -driver = webdriver.Chrome("/usr/lib/chromium-browser/chromedriver") -products=[] #List to store name of the product -prices=[] #List to store price of the product -ratings=[] #List to store rating of the product -driver.get("https://www.flipkart.com/laptops/~buyback-guarantee-on-laptops-/pr?sid=6bo%2Cb5g&amp;amp;amp;amp;amp;amp;amp;amp;amp;uniq") -content = driver.page_source -soup = BeautifulSoup(content) -for a in soup.findAll('a',href=True, attrs={'class':'_31qSD5'}): -name=a.find('div', attrs={'class':'_3wU53n'}) -price=a.find('div', attrs={'class':'_1vC4OE _2rQ-NK'}) -rating=a.find('div', attrs={'class':'hGSR34 _2beYZw'}) -products.append(name.text) -prices.append(price.text) -ratings.append(rating.text) -df = pd.DataFrame({'Product Name':products,'Price':prices,'Rating':ratings}) -df.to_csv('products.csv', index=False, encoding='utf-8') +# driver = webdriver.Chrome("/usr/lib/chromium-browser/chromedriver") +# products=[] #List to store name of the product +# prices=[] #List to store price of the product +# ratings=[] #List to store rating of the product +# driver.get("https://www.flipkart.com/laptops/~buyback-guarantee-on-laptops-/pr?sid=6bo%2Cb5g&amp;amp;amp;amp;amp;amp;amp;amp;amp;uniq") +# content = driver.page_source +# soup = BeautifulSoup(content) +# for a in soup.findAll('a',href=True, attrs={'class':'_31qSD5'}): +# name=a.find('div', attrs={'class':'_3wU53n'}) +# price=a.find('div', attrs={'class':'_1vC4OE _2rQ-NK'}) +# rating=a.find('div', attrs={'class':'hGSR34 _2beYZw'}) +# products.append(name.text) +# prices.append(price.text) +# ratings.append(rating.text) +# df = pd.DataFrame({'Product Name':products,'Price':prices,'Rating':ratings}) +# df.to_csv('products.csv', index=False, encoding='utf-8') + + +payload = {'q':'laptop'} +r = requests.get('http://www.flipkart.com/search', params = payload) +data = r.content.decode(encoding='UTF-8') +f = open("flipkartdata.txt","w+",encoding='UTF-8') +f.write(data) +soup = BeautifulSoup(r.content.decode(encoding='UTF-8'), "lxml") +collection = soup.find_all("div", {"class": "_2kHMtA"})#input your class of the required search +href = [] +for c in collection: + a = c.find("a") + href.append(a['href']) + #can store the data as such you want