Skip to content
This repository was archived by the owner on Mar 11, 2026. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ pip install play-scraper
* [search](#search): Fetch applications matching a search query.
* [similar](#similar): Fetch an application's similar apps.
* [categories](#categories): Fetch a list of available categories.
* [reviews](#reviews): Fetch a list of an application's reviews.

#### details

Expand Down Expand Up @@ -273,6 +274,30 @@ Options:
'url': 'https://play.google.com/store/apps/category/ART_AND_DESIGN'}, ...}
```

#### reviews

Fetch a list of an application's reviews.

Options:

* `app_id` the app id to get, e.g. `com.android.chrome` for Google Chrome.
* `page` (default 1) the page number to fetch.

```python
>>> import play_scraper
>>> play_scraper.reviews('com.android.chrome', 1)
[{
'author_image': 'https://lh3.googleusercontent.com/a-/...',
'review_id': 'gp:AOqpTOHu4lr...',
'review_permalink': '/store/apps/details?id=com.android.chrome&reviewId=...',
'author_name': ' Martin Staf ',
'review_date': 'December 5, 2018',
'current_rating': 5,
'review_title': '',
'review_body': " The overflow menu at the bottom ..."
},...]
```

### Tests

Run test:
Expand Down
1 change: 1 addition & 0 deletions play_scraper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
similar,
suggestions,
categories,
reviews,
)


Expand Down
12 changes: 12 additions & 0 deletions play_scraper/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,15 @@ def categories(hl='en', gl='us', ignore_promotions=True):
"""
s = scraper.PlayScraper(hl, gl)
return s.categories(ignore_promotions)


def reviews(app_id, page=1, hl='en', gl='us'):
"""Sends a POST request and retrieves a list of reviews for
the specified app.

:param app_id: the app to retrieve details from, e.g. 'com.nintendo.zaaa'
:param page: the page number to retrieve; max is 10
:return: a list of reviews
"""
s = scraper.PlayScraper(hl, gl)
return s.reviews(app_id, page)
56 changes: 56 additions & 0 deletions play_scraper/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import requests
from bs4 import BeautifulSoup, SoupStrainer
import cssutils

from play_scraper import settings as s
from play_scraper.constants import HL_LANGUAGE_CODES, GL_COUNTRY_CODES
Expand Down Expand Up @@ -283,3 +284,58 @@ def categories(self, ignore_promotions=True):
'category_id': category_id}

return categories

def reviews(self, app_id, page=1):
"""Sends a POST request and retrieves a list of reviews for
the specified app.

:param app_id: the app to retrieve details from, e.g. 'com.nintendo.zaaa'
:param page: the page number to retrieve; max is 10
:return: a list of reviews
"""
data = {
'reviewType': 0,
'pageNum': page,
'id': app_id,
'reviewSortOrder': 4,
'xhr': 1,
'hl': self.language
}
self.params['authuser'] = '0'

response = send_request('POST', s.REVIEW_URL, data, self.params)
content = response.text
content = content[content.find('[["ecr"'):].strip()
data = json.loads(content)
html = data[0][2]
soup = BeautifulSoup(html, 'lxml', from_encoding='utf8')

reviews = []
for element in soup.select('.single-review'):
review = {}

avatar_style = element.select_one('.author-image').get('style')
if avatar_style:
sheet = cssutils.css.CSSStyleSheet()
sheet.add('tmp { %s }' % avatar_style)
review['author_image'] = list(cssutils.getUrls(sheet))[0]

review_header = element.select_one('.review-header')
review['review_id'] = review_header.get('data-reviewid', '')
review['review_permalink'] = review_header.select_one('.reviews-permalink').get('href')

review['author_name'] = review_header.select_one('.author-name').text
review['review_date'] = review_header.select_one('.review-date').text

curr_rating = review_header.select_one('.current-rating').get('style')
review['current_rating'] = int(int(str(cssutils.parseStyle(curr_rating).width).replace('%', '')) / 20)

body_elem = element.select_one('.review-body')
review_title = body_elem.select_one('.review-title').extract()
body_elem.select_one('.review-link').decompose()
review['review_title'] = review_title.text
review['review_body'] = body_elem.text

reviews.append(review)

return reviews
6 changes: 4 additions & 2 deletions play_scraper/settings.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# -*- coding: utf-8 -*-

BASE_URL = 'https://play.google.com/store/apps'
PLAYSTORE_URL = 'https://play.google.com/store'
BASE_URL = PLAYSTORE_URL + '/apps'
SUGGESTION_URL = 'https://market.android.com/suggest/SuggRequest'
SEARCH_URL = 'https://play.google.com/store/search'
SEARCH_URL = PLAYSTORE_URL + '/search'
REVIEW_URL = PLAYSTORE_URL + '/getreviews'

CONCURRENT_REQUESTS = 10
USER_AGENT = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) '
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ certifi==2018.4.16
cffi==1.11.5
chardet==3.0.4
cryptography==2.3
cssutils==1.0.2
enum34==1.1.6
futures==3.2.0;python_version<"2.7"
idna==2.7
Expand Down
16 changes: 16 additions & 0 deletions tests/test_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,14 @@
'updated',
}

REVIEW_KEYS = {
'author_name',
'review_id',
'review_permalink',
'review_date',
'review_title',
'review_body'
}

class ScraperTestBase(unittest.TestCase):
def setUp(self):
Expand Down Expand Up @@ -380,3 +388,11 @@ def test_different_language_and_country(self):
categories = s.categories()

self.assertTrue(all(key in categories for key in CATEGORIES))


class ReviewTest(ScraperTestBase):
def test_review_ok(self):
reviews = self.s.reviews('com.android.chrome')

self.assertGreater(len(reviews), 0)
self.assertTrue(all(key in reviews[0] for key in REVIEW_KEYS))