Joeyh021 · LoudShadow · Jul 29, 2021 · Jul 29, 2021 · Jul 29, 2021
diff --git a/PaperScraper.py b/PaperScraper.py
@@ -4,31 +4,43 @@
 
 
 # edit these two parameters
-URL = "https://warwick.ac.uk/services/exampapers/?q=&department=CS&year="
-COOKIE_TOKEN = "your token here"
-
+COOKIE_TOKEN = ""
 
 s = rq.Session()
 s.cookies["WarwickSSO"] = COOKIE_TOKEN
 
 
-def downloadpdf(url: str):
+def downloadpdf(url: str, dep: str):
 
     file = s.get(url)
     filename: str = url.split('/')[-1]
     year: str = url.split('/')[-2]
 
-    makedirs(f"papers/{year}", exist_ok=True)
+    makedirs(f"papers/{dep}/{year}", exist_ok=True)
 
-    with open(f"papers/{year}/{filename}", "wb+") as f:
+    with open(f"papers/{dep}/{year}/{filename}", "wb+") as f:
         f.write(file.content)
 
 
-page = s.get(URL).text
-soup = BeautifulSoup(page, 'html.parser')
-
-for tag in soup.find_all("a"):
-    url: str = tag.get("href")
-    if url.endswith(".pdf"):
-        print(url)
-        downloadpdf(url)
+def getDepartment(dep: list[str]):
+    URL=f"https://warwick.ac.uk/services/exampapers/?q=&department={dep[0]}&year="
+    page = s.get(URL).text
+    soup = BeautifulSoup(page, 'html.parser')
+
+    for tag in soup.find_all("a"):
+        url: str = tag.get("href")
+        if url.endswith(".pdf"):
+            print(url)
+            downloadpdf(url,dep[1])
+
+def chosenDepartments():
+    departments=[]
+    with open("departments.txt") as dep:
+        for line in dep.readlines():
+            if line[0]!="#":
+                parts=line.strip().split(" ")
+                departments.append([parts[0]," ".join(parts[1:])])
+    return departments
+
+for department in chosenDepartments():
+    getDepartment(department)
diff --git a/README.md b/README.md
@@ -8,7 +8,7 @@ You'll need to provide an SSO token from a cookie so the script can access the p
 
 Next open devtools, and navigate to the storage tab. There you should be able to find your cookies for this site. You want with one with the key `WarwickSSO`. Copy the value of it, and assign it to the constant `COOKIE_TOKEN` in the script.
 
-By default the url points to all DCS past papers for all years. You can change this by navigating to the page you want the papers from, and changing the `URL` parameter at the top of the script.
+By default no departments are selected you can change this by un-commenting the department(s) of your choice in departments.txt
 
 The papers are organised by year. If you want to make a PR to organise them by module or some other way feel free.
 

diff --git a/departments.txt b/departments.txt
@@ -0,0 +1,23 @@
+#un-comment any departments to download
+#ET Applied Linguistics
+#CE Centre for Lifelong Learning
+#CY Centre for Scientific Computing
+#CH Chemistry
+#CX Classics & Ancient History
+#AM Comparative American Studies
+#CS Computer Science
+#EC Economics
+#EQ Education Studies
+#EN English and Comparative Literary Studies
+#PX Physics
+#PO Politics & International Studies
+#PS Psychology
+#ES School of Engineering
+#AS School of Health & Social Studies
+#LA School of Law
+#LN School of Modern Languages and Cultures
+#SO Sociology
+#ST Statistics
+#TH Theatre and Performance Studies
+#IB Warwick Business School
+#MA Warwick Mathematics Institute