-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgetTweets.py
More file actions
82 lines (72 loc) · 3.15 KB
/
getTweets.py
File metadata and controls
82 lines (72 loc) · 3.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# Import the Twython class
from twython import Twython
import json
import pandas as pd
from datetime import date, timedelta
from regions import regions
print("Tweets will be fetched for the days", str(date.today() - timedelta(days=7)),
"to", str(date.today()))
print()
# Load credentials from json file
with open("credentials.json", "r") as file:
creds = json.load(file)
# Instantiate an object
python_tweets = Twython(creds['CONSUMER_KEY'], creds['CONSUMER_SECRET'])
def collect(days, languages):
# Create our query
for lang in languages:
query = {'result_type': 'recent',
'count': 100,
'lang': lang,
}
# Search tweets
#dict_ = {'user': [], 'date': [], 'text': [], 'region': []}
for day_i in range(days):
for region in regions:
query['geocode'] = region['geocode']
query['until'] = str(date.today() - timedelta(days=day_i))
query['since'] = str(date.today() - timedelta(days=day_i+1))
print(query)
print()
query_result = python_tweets.search(**query)
print('Found %d tweets' %len(query_result['statuses']))
for status in query_result['statuses']:
tweet = {}
tweet['id'] = status['id']
tweet['user'] = status['user']['screen_name']
tweet['user_location'] = status['user']['location']
tweet['date'] = status['created_at']
tweet['text'] = status['text']
tweet['geo'] = status['geo']
try:
tweet['bounding_box'] = status['bounding_box']
except KeyError:
pass
tweet['lang'] = status['lang']
tweet['urls'] = status['entities']['urls']
tweet['mentions'] = status['entities']['user_mentions']
#tweet['filtered_text'] = utils.filter_tweet(tweet)
#tweet['without_stopwords'] = utils.remove_stopwords(tweet)
tweet['region'] = region['name']
try:
region['tweets'][query['since']].append(tweet)
except KeyError:
region['tweets'][query['since']] = []
region['tweets'][query['since']].append(tweet)
# Write tweets to JSON files
for region in regions:
for day in region['tweets'].keys():
filename = region['name'] + '_' + day
print(filename, len(region['tweets'][day]))
with open('tweets/'+ lang + "/" +filename+'.json', 'w') as tweets_file:
json.dump(region['tweets'][day], tweets_file, indent=4)
if __name__ == "__main__":
# Collect tweets from last 7 days
days = 7
languages = ['en', 'ar']
collect(days, languages)
# Structure data in a pandas DataFrame for easier manipulation
#df = pd.DataFrame(dict_)
#df.sort_values(by='favorite_count', inplace=True, ascending=False)
#df.head(5)
print('done')