forked from openwpm/OpenWPM
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathget_host.py
More file actions
33 lines (27 loc) · 1.15 KB
/
get_host.py
File metadata and controls
33 lines (27 loc) · 1.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import json
from datetime import datetime
import sqlite3
import csv
from pandas import read_sql_query
def get_host_from_url(url):
return strip_scheme_www_and_query(url).split("/", 1)[0]
def strip_scheme_www_and_query(url):
"""Remove the scheme and query section of a URL."""
if url:
return url.split("//")[-1].split("?")[0].lstrip("www.")
else:
return ""
if __name__ == '__main__':
http_response_hosts = []
with open('/Users/payalkulkarni/Documents/GitHub/OpenWPM-But-Better/HTTP Response Analysis/Stateless/Ghostery/responses_ghostery_stateless.csv','r') as csvfile:
for url in csvfile:
if "visit_id" not in url:
hostname = get_host_from_url(url)
#print(hostname)
http_response_hosts.append(hostname)
# with open('ghostery_stateless_repsonses_hosts.csv', 'w') as outFile:
# for val in http_response_hosts:
# outFile.write(val)
with open('ghostery_stateless_repsonses_hosts.csv', 'w', newline='') as myfile:
wr = csv.writer(myfile)
wr.writerows([c.strip() for c in r.strip(', ').split(',')] for r in http_response_hosts)