From 0ae9c5f91ee1a5ed8ed6ed6c1501c57df1ef3bb6 Mon Sep 17 00:00:00 2001 From: Jamsheed Shorish Date: Tue, 20 Jan 2015 14:14:00 +0000 Subject: [PATCH] The python3 branch contains updated code to run on Python 3.4 (the latest version as of January 2015). This branch has been successfully used with Python 3.4 to create, submit, list, and receive jobs to/from Gnip Historical Powertrack. Data files were also successfully downloaded. No structural changes were made to the Python 2.x code--code was simply updated for use with Python 3. --- src/README | 0 src/accept_job.py | 6 +- src/create_job.py | 14 ++-- src/gnip_historical/__init__.py | 1 + src/gnip_historical/gnip_historical.py | 80 +++++++++++----------- src/gnip_historical/gnip_historical_cmd.py | 27 ++++---- src/gnip_historical/gnip_historical_job.py | 10 +-- src/list_jobs.py | 22 +++--- src/name_mangle.py | 2 +- src/reject_job.py | 6 +- src/setup.py | 2 +- src/setup_gnip_creds.py | 52 +++++++------- 12 files changed, 112 insertions(+), 110 deletions(-) create mode 100644 src/README diff --git a/src/README b/src/README new file mode 100644 index 0000000..e69de29 diff --git a/src/accept_job.py b/src/accept_job.py index d9b4e17..ac44784 100755 --- a/src/accept_job.py +++ b/src/accept_job.py @@ -3,8 +3,8 @@ class AcceptJob(GnipHistoricalCmd): def __call__(self): if self.userUrl is None: - print "Please provide a job URL. Use accept_job.py -h for more information." + print("Please provide a job URL. Use accept_job.py -h for more information.") else: - print "RESULT:" - print str(self.gnipHistorical.acceptJob(self.userUrl)) + print ("RESULT:") + print (str(self.gnipHistorical.acceptJob(self.userUrl))) AcceptJob()() diff --git a/src/create_job.py b/src/create_job.py index 19d9226..bf29d58 100755 --- a/src/create_job.py +++ b/src/create_job.py @@ -10,16 +10,16 @@ def setOptions(self, parser): def __call__(self): if self.options.fileName is None: - print "Please provide a job description file. Use create_job.py -h for more information." + print("Please provide a job description file. Use create_job.py -h for more information.") else: self.gnipHistorical.jobPars = JobParameters(self.options.title, jobFileName = self.options.fileName) - print "#"*35 - print "CREATING JOB: (%s)"%self.gnipHistorical.jobPars.getTitle() - print "PARAMETERS:" - print str(self.gnipHistorical.jobPars) - print "RESPONSE:" + print("#"*35) + print("CREATING JOB: (%s)"%self.gnipHistorical.jobPars.getTitle()) + print("PARAMETERS:") + print(str(self.gnipHistorical.jobPars)) + print("RESPONSE:") res = self.gnipHistorical.createJob() - print str(res) + print(str(res)) if res.jobURL is not None: self.updateURLConfig(url = res.jobURL) diff --git a/src/gnip_historical/__init__.py b/src/gnip_historical/__init__.py index e69de29..343d16b 100644 --- a/src/gnip_historical/__init__.py +++ b/src/gnip_historical/__init__.py @@ -0,0 +1 @@ +__all__ = ["gnip_historical", "gnip_historical_cmd", "gnip_historical_job"] \ No newline at end of file diff --git a/src/gnip_historical/gnip_historical.py b/src/gnip_historical/gnip_historical.py index c306108..8033a82 100755 --- a/src/gnip_historical/gnip_historical.py +++ b/src/gnip_historical/gnip_historical.py @@ -3,11 +3,11 @@ import json import sys import datetime -from gnip_historical_job import * +from gnip_historical.gnip_historical_job import * class DataSetResults(object): def __init__(self, resDict): - #print resDict.keys() + #print(resDict.keys()) if "urlList" in resDict: self.dataURLs = resDict["urlList"] elif "url_list" in resDict: @@ -32,11 +32,11 @@ def __init__(self, resDict): self.suspectMinuteURLs = [] def write(self): - with open("./data_files.txt", "wb") as f: + with open("./data_files.txt", "w") as f: for i in self.dataURLs: f.write("%s\n"%i) - if self.suspectMinuteURLs <> []: - with open("./suspect_files.txt", "wb") as f: + if self.suspectMinuteURLs != []: + with open("./suspect_files.txt", "w") as f: for i in self.suspectMinuteURLs: f.write("%s\n"%i) @@ -64,7 +64,7 @@ def __repr__(self): # class Result(object): def __init__(self, resDict, gnipHist): - #print str(resDict) + #print(str(resDict)) self.completedAt = datetime.datetime.strptime(DATE_RE.search(resDict["completedAt"]).group(0),DATEFMT) try: self.activityCount = int(resDict["activityCount"]) @@ -244,9 +244,9 @@ def xJob(self, jobURL, payload): s.auth = (self.user_name, self.password) s.headers = {'content-type':'application/json'} res = s.put(jobURL, data=json.dumps(payload)) - except requests.exceptions.ConnectionError, e: + except requests.exceptions.ConnectionError as e: print >> sys.stderr, "Server request failed with message {}".format(e) - except requests.exceptions.HTTPError, e: + except requests.exceptions.HTTPError as e: print >> sys.stderr, "Server request failed with message {}".format(e) if res is not None and res.status_code == 200: return "Job {}ed successfully".format(payload["status"]) @@ -260,10 +260,10 @@ def createJob(self): s.auth = (self.user_name, self.password) s.headers = {'content-type':'application/json'} res = s.post(self.baseUrl + "jobs.json", data=str(self.jobPars)) - except requests.exceptions.ConnectionError, e: - print >> sys.stderr, "Server request failed with message {}".format(e) - except requests.exceptions.HTTPError, e: - print >> sys.stderr, "Server request failed with message {}".format(e) + except requests.exceptions.ConnectionError as e: + print("Server request failed with message {}".format(e), file=sys.stderr) + except requests.exceptions.HTTPError as e: + print("Server request failed with message {}".format(e), file=sys.stderr) return Status(res.json()) def listJobs(self): @@ -274,10 +274,10 @@ def listJobs(self): s = requests.Session() s.auth = (self.user_name, self.password) res = s.get(self.baseUrl + "jobs.json") - except requests.exceptions.ConnectionError, e: - print >> sys.stderr, "Server request failed with message {}".format(e) - except requests.exceptions.HTTPError, e: - print >> sys.stderr, "Server request failed with message {}".format(e) + except requests.exceptions.ConnectionError as e: + print("Server request failed with message {}".format(e), file=sys.stderr) + except requests.exceptions.HTTPError as e: + print("Server request failed with message {}".format(e), file=sys.stderr) if res is not None and "jobs" in res.json(): for x in res.json()["jobs"]: yield Status(x) @@ -293,10 +293,10 @@ def getDataURLDict(self, URL): s = requests.Session() s.auth = (self.user_name, self.password) res = s.get(URL) - except requests.exceptions.ConnectionError, e: - print >> sys.stderr, "Server request failed with message {}".format(e) - except requests.exceptions.HTTPError, e: - print >> sys.stderr, "Server request failed with message {}".format(e) + except requests.exceptions.ConnectionError as e: + print("Server request failed with message {}".format(e), file=sys.stderr) + except requests.exceptions.HTTPError as e: + print("Server request failed with message {}".format(e), file=sys.stderr) if res is not None: return res.json() else: @@ -317,10 +317,10 @@ def getJobStatusDict(self, jobURL = None): s = requests.Session() s.auth = (self.user_name, self.password) res = s.get(jobURL) - except requests.exceptions.ConnectionError, e: - print >> sys.stderr, "Server request failed with message {}".format(e) - except requests.exceptions.HTTPError, e: - print >> sys.stderr, "Server request failed with message {}".format(e) + except requests.exceptions.ConnectionError as e: + print("Server request failed with message {}".format(e), file=sys.stderr) + except requests.exceptions.HTTPError as e: + print("Server request failed with message {}".format(e), file=sys.stderr) if res is not None: return res.json() else: @@ -370,15 +370,15 @@ def results(self, jobURL = None): jp.setToDate("2012-01-01T00:01:00") tmp = jp.getToDate() jp.setToDate("201201010001") # same as above - print jp.getToDate(), "=", tmp + print(jp.getToDate(), "=", tmp) jp.setUser("DrSkippy27") jp.addRule("bieber", "bestRuleEver") # job json as string - print jp + print(jp) # job json as dict pprint(jp.job) - print "Job duration = ",jp.duration().seconds - print + print("Job duration = ",jp.duration().seconds) + print() # Example 2 # save job description in file jp.writeToFile("./bieber_job1.json") @@ -386,34 +386,34 @@ def results(self, jobURL = None): # read job description from file jp1 = JobParameters("BieberJob2", jobFileName = "./FileMissing.JSON") # this file doesn't exist jp1 = JobParameters("BieberJob2", jobFileName = "./bieber_job1.json") - print jp1 - print + print(jp1) + print() # mess it up jp1.setFromDate("2012-01-01T00:02:00") try: - print jp1 # error - except ValueError, e: - print e - print + print(jp1) # error + except ValueError as e: + print(e) + print() # Example 4 # working with rules jp3 = JobParameters("BieberJob2", jobFileName = "./bieber_job1.json") jp3.setRules([{"value": "no bieber"}]) - print jp3 + print(jp3) jp3.addRule("belieber") - print jp3 + print(jp3) jp3.setRules('[{value":"one"}]') # error this is missing a quote jp3.setRules('[{"value":"one"}]') - print jp3 + print(jp3) #################################### # Historical 1 - Change if you want to hit the server # r = GnipHistorical("user", "password", "https://historical.gnip.com/accounts/", jp) # Creates a job - # print r.createJob() + # print(r.createJob()) try: r.acceptJob("not a URL") # error - except ValueError,e: - print e + except ValueError as e: + print(e) # r.rejectJob("not a URL") # error # r.jobStatus("not a URL") # error # r.jobs() # get a list of jobs from the gnip server diff --git a/src/gnip_historical/gnip_historical_cmd.py b/src/gnip_historical/gnip_historical_cmd.py index e1ff5fd..c925cb4 100755 --- a/src/gnip_historical/gnip_historical_cmd.py +++ b/src/gnip_historical/gnip_historical_cmd.py @@ -1,18 +1,18 @@ #!/usr/bin/env python -import ConfigParser +import configparser from optparse import OptionParser -from gnip_historical import * +from gnip_historical.gnip_historical import * DEFAULT_FILE_NAME='./.gnip' class GnipHistoricalCmd(object): def __init__(self, jobPar=None): - self.config = ConfigParser.ConfigParser() + self.config = configparser.ConfigParser() self.config.read(DEFAULT_FILE_NAME) - un = self.config.get('creds', 'un') - pwd = self.config.get('creds', 'pwd') - endURL = self.config.get('endpoint', 'url') - self.prevurl = self.config.get('tmp','prevUrl') + un = self.config['creds']['un'] + pwd = self.config['creds']['pwd'] + endURL = self.config['endpoint']['url'] + self.prevurl = self.config['tmp']['prevUrl'] parser = OptionParser() parser.add_option("-u", "--url", dest="url", default=None, help="Job url.") @@ -30,16 +30,17 @@ def setOptions(self, parser): # help="Use the prev Job URL.") pass - def updateURLConfig(self, url = None): + def updateURLConfig(self, url = 'None'): if self.options.prevUrl: self.userUrl = self.prevurl - elif self.options.url is not None: + elif self.options.url is not 'None': self.userUrl = self.options.url - elif url is not None: + elif url is not 'None': self.userUrl = url else: - self.userUrl = None - self.config.set('tmp','prevUrl',self.userUrl) - with open(DEFAULT_FILE_NAME, 'wb') as self.configfile: + self.userUrl = 'None' + print(self.userUrl) + self.config['tmp']['prevUrl'] = str(self.userUrl) + with open(DEFAULT_FILE_NAME, 'w') as self.configfile: self.config.write(self.configfile) diff --git a/src/gnip_historical/gnip_historical_job.py b/src/gnip_historical/gnip_historical_job.py index 894f13f..c40c0c4 100755 --- a/src/gnip_historical/gnip_historical_job.py +++ b/src/gnip_historical/gnip_historical_job.py @@ -32,7 +32,7 @@ def __init__(self, title, jobDict = None, jobFileName = None): elif jobFileName is not None: # lastly, try to read json from file try: - with codecs.open(jobFileName,"rb","utf-8") as tmpJobFile: + with codecs.open(jobFileName,"r","utf-8") as tmpJobFile: tmp = tmpJobFile.read() try: tmpJob = json.loads(tmp) @@ -40,12 +40,12 @@ def __init__(self, title, jobDict = None, jobFileName = None): if test_key not in tmpJob: raise ValueError("Required fields missing ({})".format(test_key)) self.job = tmpJob - except ValueError, e: + except ValueError as e: sys.stderr.write("Failed to parse input JSON. (%s). Exiting.\n"%e) sys.exit() self.setToDate(tmpJob["toDate"]) self.setFromDate(tmpJob["fromDate"]) - except IOError,e: + except IOError as e: sys.stderr.write("Failed to open rules file. (%s)\n"%e) # Given title supercedes file title, otherwise, use give title if title is not None: @@ -53,7 +53,7 @@ def __init__(self, title, jobDict = None, jobFileName = None): def writeToFile(self, jobFileName): """Write current configuration as a job file""" - with codecs.open(jobFileName,"wb","utf-8") as tmpJobFile: + with codecs.open(jobFileName,"w","utf-8") as tmpJobFile: tmpJobFile.write(str(self)) def setTitle(self, t): @@ -117,7 +117,7 @@ def setRules(self, ruleList): elif type(ruleList) == type("string"): try: self.job["rules"] = json.loads(ruleList) - except ValueError, e: + except ValueError as e: sys.stderr.write("Failed to set rules by parsing JSON string. (%s)\n"%e) else: sys.stderr.write("Failed to set rules. Check argument type is list of valid rules or string with valid JSON.\n") diff --git a/src/list_jobs.py b/src/list_jobs.py index f8cddec..b22e0ce 100755 --- a/src/list_jobs.py +++ b/src/list_jobs.py @@ -18,24 +18,24 @@ def output(self, status): else: if self.options.verbose: status = self.gnipHistorical.getJobStatus(status.jobURL) - print str(status) + print(str(status)) if status.result is not None: status.result.write() else: - print "#"*25 - print "TITLE: ",status.title - print "STATUS: ",status.status - print "PROGRESS: ",status.percentComplete,"%" - print "JOB URL: ",status.jobURL + print("#"*25) + print("TITLE: ",status.title) + print("STATUS: ",status.status) + print("PROGRESS: ",status.percentComplete,"%") + print("JOB URL: ",status.jobURL) if self.options.prevUrl or self.options.url is not None: if status.result is not None: - print - print str(status.quote) - print str(status.result) - print "Writing files to data_files.txt..." + print() + print(str(status.quote)) + print(str(status.result)) + print("Writing files to data_files.txt...") status.result.write() elif status.status.lower().startswith("delivered"): - print 'Data files available, use "-v, -u or -l" flag to download files list.' + print('Data files available, use "-v, -u or -l" flag to download files list.') def __call__(self): if self.userUrl is None: diff --git a/src/name_mangle.py b/src/name_mangle.py index 3789e79..3d62296 100755 --- a/src/name_mangle.py +++ b/src/name_mangle.py @@ -8,4 +8,4 @@ # https://archive.replay.snapshots.review.s3.amazonaws.com/snapshots/twitter/track/activity-streams/shendrickson/2012/08/13/20090101-20100101_c9pe0day6h/2009/12/31/23/50_activities.json.gz?AWSAccessKeyId=AKIAI3ZYYXK57KIWDGHQ&Expires=1347654202&Signature=ej8iMVWVfYZE6qVGi%2FU%2FY5clnb0%3D infile = line.split("?")[0] # https://archive.replay.snapshots.review.s3.amazonaws.com/snapshots/twitter/track/activity-streams/shendrickson/2012/08/13/20090101-20100101_c9pe0day6h/2009/12/31/23/50_activities.json.gz - print dateRE.search(infile).group(0) + nameRE.search(infile).group(0) + ".json.gz" + print(dateRE.search(infile).group(0) + nameRE.search(infile).group(0) + ".json.gz") diff --git a/src/reject_job.py b/src/reject_job.py index 2a554e5..273691d 100755 --- a/src/reject_job.py +++ b/src/reject_job.py @@ -3,8 +3,8 @@ class RejectJob(GnipHistoricalCmd): def __call__(self): if self.userUrl is None: - print "Please provide a job URL. Use reject_job.py -h for more information." + print("Please provide a job URL. Use reject_job.py -h for more information.") else: - print "RESULT:" - print str(self.gnipHistorical.rejectJob(self.userUrl)) + print("RESULT:") + print(str(self.gnipHistorical.rejectJob(self.userUrl))) RejectJob()() diff --git a/src/setup.py b/src/setup.py index e4f4237..bbae32b 100644 --- a/src/setup.py +++ b/src/setup.py @@ -10,6 +10,6 @@ url='http://pypi.python.org/pypi/gnip-historical/', license='LICENSE.txt', description='Gnip Historical libarary and command scripts.', - install_requires=["requests > 1.2.2"], + requires=["requests(>1.2.2)"], long_description=open('README').read(), ) diff --git a/src/setup_gnip_creds.py b/src/setup_gnip_creds.py index 2044b09..288fb7c 100755 --- a/src/setup_gnip_creds.py +++ b/src/setup_gnip_creds.py @@ -3,47 +3,47 @@ import os import datetime import getpass -import ConfigParser +import configparser import shutil -config = ConfigParser.ConfigParser() +config = configparser.ConfigParser() config.read('./.gnip') try: - config.add_section('creds') -except ConfigParser.DuplicateSectionError: - overwrite = raw_input("File ./.gnip already exists. Overwrite? (Y/n)") + config['creds'] = {} +except configparser.DuplicateSectionError: + overwrite = input("File ./.gnip already exists. Overwrite? (Y/n)") if overwrite.lower() not in ['y','yes','','yup','ye','yep','affirmative','yessir','yepums','si','oui','ok']: - print "Exiting." + print("Exiting.") sys.exit() else: shutil.move("./.gnip","./.gnip.old") - config = ConfigParser.ConfigParser() + config = configparser.ConfigParser() config.read('./.gnip') - config.add_section('creds') + config['creds'] = {} -un = raw_input("Username: ") -config.set('creds', 'un', un) +un = input("Username: ") +config['creds']['un'] = un pwd = "" pwd1 = "not set" -while pwd <> pwd1: +while pwd != pwd1: pwd = getpass.getpass("Password: ") pwd1 = getpass.getpass("Password again: ") -config.set('creds', 'pwd', pwd) -config.add_section('endpoint') -an = raw_input("Endpoint URL. Enter your Account Name (eg https://historical.gnip.com/accounts//): ") -config.set('endpoint', 'url', "https://historical.gnip.com/accounts/%s/"%an) -config.add_section('tmp') -config.set('tmp','prevUrl', "") -with open("./.gnip","wb") as f: +config['creds']['pwd'] = pwd +config['endpoint'] = {} +an = input("Endpoint URL. Enter your Account Name (eg https://historical.gnip.com/accounts//): ") +config['endpoint']['url'] = "https://historical.gnip.com/accounts/%s/"%an +config['tmp'] = {} +config['tmp']['prevUrl'] = "" +with open("./.gnip","w") as f: config.write(f) -print "Done creating file ./.gnip" -print "Be sure to run:\nchmod og-w .gnip" -print "Configuration setup complete." -print "\nUpdating path information in get_data_files.bash..." +print ("Done creating file ./.gnip") +print("Be sure to run:\nchmod og-w .gnip") +print("Configuration setup complete.") +print("\nUpdating path information in get_data_files.bash...") currentPath = os.getcwd() state = 0 -with open("./get_data_files.bash","wb") as outf: - with open("./get_data_files.bash.orig","rb") as inf: +with open("./get_data_files.bash","w") as outf: + with open("./get_data_files.bash.orig","r") as inf: for line in inf: newline = line if line.startswith("AUTOPATH="): @@ -54,6 +54,6 @@ newline = "AUTOPATH=%s\n"%currentPath + line state = 2 outf.write(newline) -os.chmod("./get_data_files.bash", 0755 ) -print "Done." +os.chmod("./get_data_files.bash", 0o755 ) +print("Done.")