Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 96 additions & 65 deletions crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,31 +12,31 @@
'ComEng2': [['term', 'HS']],
'SEProj': [['term', 'FS'],['isMandatory', True]],
'PF': [['isDeactivated', True]],
'SE1': [['successorModuleId', 'SEP2']],
'SE2': [['successorModuleId', 'SEP2']],
'SEP1': [['predecessorModuleId', 'SE1'],['isMandatory', True]],
'SEP2': [['predecessorModuleId', 'SE2'],['isMandatory', True]],
'BuPro': [['successorModuleId', 'WI2']],
'WI2': [['predecessorModuleId', 'BuPro']],
'RheKI': [['successorModuleId', 'RheKoI']],
'RheKoI': [['predecessorModuleId', 'RheKI']],
'RKI': [['successorModuleId', 'RheKI']],
'RheKI': [['predecessorModuleId', 'RKI']],
'SDW': [['successorModuleId', 'IBN']],
'IBN': [['predecessorModuleId', 'SDW']],
'FunProg': [['successorModuleId', 'FP']],
'FP': [['predecessorModuleId', 'FunProg']],
'IBN': [['predecessorModuleId', 'SDW']],
'WIoT': [['successorModuleId', 'WsoT']],
'WsoT': [['predecessorModuleId', 'WIoT']],
'SecSW': [['successorModuleId', 'SecSoW']],
'SecSoW': [['predecessorModuleId', 'SecSW']],
'Inno2': [['successorModuleId', 'Inno_2']],
'Inno_2': [['predecessorModuleId', 'Inno2']],
'SE1': [['successormoduleShortKey', 'SEP2']],
'SE2': [['successormoduleShortKey', 'SEP2']],
'SEP1': [['predecessormoduleShortKey', 'SE1'],['isMandatory', True]],
'SEP2': [['predecessormoduleShortKey', 'SE2'],['isMandatory', True]],
'BuPro': [['successormoduleShortKey', 'WI2']],
'WI2': [['predecessormoduleShortKey', 'BuPro']],
'RheKI': [['successormoduleShortKey', 'RheKoI']],
'RheKoI': [['predecessormoduleShortKey', 'RheKI']],
'RKI': [['successormoduleShortKey', 'RheKI']],
'RheKI': [['predecessormoduleShortKey', 'RKI']],
'SDW': [['successormoduleShortKey', 'IBN']],
'IBN': [['predecessormoduleShortKey', 'SDW']],
'FunProg': [['successormoduleShortKey', 'FP']],
'FP': [['predecessormoduleShortKey', 'FunProg']],
'IBN': [['predecessormoduleShortKey', 'SDW']],
'WIoT': [['successormoduleShortKey', 'WsoT']],
'WsoT': [['predecessormoduleShortKey', 'WIoT']],
'SecSW': [['successormoduleShortKey', 'SecSoW']],
'SecSoW': [['predecessormoduleShortKey', 'SecSW']],
'Inno2': [['successormoduleShortKey', 'Inno_2']],
'Inno_2': [['predecessormoduleShortKey', 'Inno2']],
'BAI21': [['term', 'both'],['isMandatory', True]],
'SAI21': [['term', 'both'],['isMandatory', True]],
'IKBH': [['successorModuleId', 'IKBD']],
'IKBD': [['predecessorModuleId', 'IKBH']]
'IKBH': [['successormoduleShortKey', 'IKBD']],
'IKBD': [['predecessormoduleShortKey', 'IKBH']]
}

def write_json(data, filename):
Expand All @@ -50,26 +50,31 @@ def set_default(obj):
json.dump(data, output, indent=2, ensure_ascii=False, default=set_default)
output.write('\n')

def getIdForModule(kuerzel):
def getShortNameForModule(kuerzel):
return kuerzel.removeprefix('M_').replace('_p', 'p')

def getIdForCategory(kuerzel):
return kuerzel.removeprefix('I-').removeprefix('I_').removeprefix('Kat_').replace('IKTS-help', 'GWRIKTS')

def create_module(content):
return {
'id': getIdForModule(content['kuerzel']),
'module_id': content['id'],
'id': getShortNameForModule(content['kuerzel']),
'name': content['bezeichnung'].strip(),
'url': content['url'],
'focuses': [],
'categories': set(),
'ects': 0,
'isDeactivated': False,
'term': '',
'recommendedModuleIds': set(),
'dependentModuleIds': set(),
'successorModuleId': None,
'predecessorModuleId': None
'recommendedmodules': [],
'recommendedmoduleIds': set(),
'recommendedmoduleShortKeys': set(),
'dependentmodules': [],
'dependentmoduleIds': set(),
'dependentmoduleShortKeys': set(),
'successormoduleShortKey': None,
'predecessormoduleShortKey': None
}

def set_term_for_module(module, moduleContent):
Expand All @@ -87,32 +92,47 @@ def set_term_for_module(module, moduleContent):
else:
module['term'] = endSemester
else:
print(f'{module["id"]} has no term')
print(f'Module {module["id"]} {module["module_id"]} has no term')

def set_successor_and_predecessor_for_module(module, moduleContent, modules):
if 'nachfolger' in moduleContent and moduleContent['nachfolger']['kuerzel'] != moduleContent['kuerzel']:
successorModuleId = getIdForModule(moduleContent['nachfolger']['kuerzel'])
module['successorModuleId'] = successorModuleId
if successorModuleId in modules and modules[successorModuleId]['predecessorModuleId'] == "":
modules[successorModuleId]['predecessorModuleId'] = module['id']
successormoduleShortKey = getShortNameForModule(moduleContent['nachfolger']['kuerzel'])
module['successormoduleShortKey'] = successormoduleShortKey
if successormoduleShortKey in modules and modules[successormoduleShortKey]['predecessormoduleShortKey'] == "":
modules[successormoduleShortKey]['predecessormoduleShortKey'] = module['id']
if 'vorgaenger' in moduleContent and moduleContent['vorgaenger']['kuerzel'] != moduleContent['kuerzel']:
predecessorModuleId = getIdForModule(moduleContent['vorgaenger']['kuerzel'])
module['predecessorModuleId'] = predecessorModuleId
if predecessorModuleId in modules and modules[predecessorModuleId]['successorModuleId'] == "":
modules[predecessorModuleId]['successorModuleId'] = module['id']
predecessormoduleShortKey = getShortNameForModule(moduleContent['vorgaenger']['kuerzel'])
module['predecessormoduleShortKey'] = predecessormoduleShortKey
if predecessormoduleShortKey in modules and modules[predecessormoduleShortKey]['successormoduleShortKey'] == "":
modules[predecessormoduleShortKey]['successormoduleShortKey'] = module['id']

def set_recommended_modules_for_module(module, moduleContent):
if 'empfehlungen' in moduleContent:
if 'empfehlungen' in moduleContent:
# print(f"Empfehlungen für {module['id']} - {module['id']}")
for empfehlung in moduleContent['empfehlungen']:
recommendedModuleId = getIdForModule(empfehlung['kuerzel'])
if recommendedModuleId in modules:
# print(empfehlung['id'],empfehlung['kuerzel'])
recommendedmoduleShortKey = getShortNameForModule(empfehlung['kuerzel'])

recommendedModule = {empfehlung['id']:recommendedmoduleShortKey}

if recommendedModule not in module['recommendedmodules']:
module['recommendedmodules'].append(recommendedModule)
module['recommendedmoduleIds'].add(empfehlung['id'])
module['recommendedmoduleShortKeys'].add(recommendedmoduleShortKey)
# if recommendedmoduleShortKey in modules:
# modules not for "Studiengang Informatik" can be recommended, such as AN1aE, which we do not care about
module['recommendedModuleIds'].add(recommendedModuleId)
if 'voraussetzungen' in moduleContent:
for voraussetzung in moduleContent['voraussetzungen']:
module['recommendedModuleIds'].add(getIdForModule(voraussetzung['kuerzel']))
recommendedmoduleShortKey = getShortNameForModule(voraussetzung['kuerzel'])
module['recommendedmoduleIds'].add(voraussetzung['id'])
module['recommendedmoduleShortKeys'].add(getShortNameForModule(voraussetzung['kuerzel']))

recommendedModule = {voraussetzung['id']:recommendedmoduleShortKey}

if recommendedModule not in module['recommendedmodules']:
module['recommendedmodules'].append(recommendedModule)

def set_deactivated_for_module(module, moduleContent):
def set_deactivated_for_module(module, moduleContent):
# assumption: module is deactivated, if 'zustand' is 'deaktiviert' and either (1) 'endJahr' of 'durchfuehrungen' was last year or earlier or (2) no 'durchfuehrungen' is defined
if 'zustand' in moduleContent and moduleContent['zustand'] == 'deaktiviert':
if 'durchfuehrungen' not in moduleContent:
Expand Down Expand Up @@ -161,10 +181,10 @@ def enrich_module_from_json(module, moduleContent):
for cat in module['categories']:
if cat['id'] in categories:
categories[cat['id']]['modules'].append(
{'id': module['id'], 'name': module['name'], 'url': module['url']})
{'module_id': module['module_id'], 'id': module['id'], 'name': module['name'], 'url': module['url']})
elif cat['id'] == 'GWRIKTS':
categories['gwr']['modules'].append(
{'id': module['id'], 'name': module['name'], 'url': module['url']})
{'module_id': module['module_id'], 'id': module['id'], 'name': module['name'], 'url': module['url']})

# 'kredits' contains categories
kredits = jsonContent['kredits']
Expand All @@ -174,9 +194,10 @@ def enrich_module_from_json(module, moduleContent):
if category['kuerzel'] == 'IKTS-help':
continue

catId = getIdForCategory(category['kuerzel'])
categories[catId] = {
'id': catId,
catShortName = getIdForCategory(category['kuerzel'])
categories[catShortName] = {
'module_id': category['id'],
'id': catShortName,
'required_ects': kredit['minKredits'],
'name': category['bezeichnung'],
'modules': [],
Expand All @@ -200,7 +221,7 @@ def enrich_module_from_json(module, moduleContent):
if zuordnung['kuerzel'].endswith('_p'):
module['name'] += ' (Projektarbeit)'

modules[module['id']] = module
modules[module['module_id']] = module

for additional_module_url in additional_module_urls:
moduleContent = json.loads(requests.get(f'{BASE_URL}{additional_module_url}').content)
Expand All @@ -209,7 +230,7 @@ def enrich_module_from_json(module, moduleContent):
categoriesForStudienordnung = [z['kategorien'] for z in moduleContent['zuordnungen'] if z['url'] == url][0]
module['categories'] = [{'id': getIdForCategory(c['kuerzel']), 'name': c['bezeichnung'], 'ects': c['kreditpunkte']} for c in categoriesForStudienordnung]
module['ects'] = moduleContent['kreditpunkte']
modules[module['id']] = module
modules[module['module_id']] = module

for module in modules.values():
try:
Expand All @@ -221,49 +242,59 @@ def enrich_module_from_json(module, moduleContent):


for module in modules.values():
for recommendedModuleId in module['recommendedModuleIds']:
if recommendedModuleId in modules:
modules[recommendedModuleId]['dependentModuleIds'].add(module['id'])
if modules[recommendedModuleId]['isDeactivated'] == False:
for recommendedmoduleId in module['recommendedmoduleIds']:
if recommendedmoduleId in modules:

dependentModule = {module['module_id']:module['id']}

if dependentModule not in modules[recommendedmoduleId]['dependentmodules']:
modules[recommendedmoduleId]['dependentmodules'].append(dependentModule)

modules[recommendedmoduleId]['dependentmoduleShortKeys'].add(module['id'])
modules[recommendedmoduleId]['dependentmoduleIds'].add(module['module_id'])
if modules[recommendedmoduleId]['isDeactivated'] == False:
continue

# 'spezialisierungen' contains focuses
spezialisierungen = jsonContent['spezialisierungen']
for spez in spezialisierungen:
focus = {
'module_id': spez['id'],
'id': spez['kuerzel'],
'url': spez['url'],
'name': spez['bezeichnung'],
'modules': []
}
focusContent = json.loads(requests.get(f'{BASE_URL}{spez["url"]}').content)
for zuordnung in focusContent['zuordnungen']:
moduleId = getIdForModule(zuordnung['kuerzel'])
moduleId = zuordnung['id']
moduleShortKey = getShortNameForModule(zuordnung['kuerzel'])

if moduleId == 'WIoT':
moduleId = 'WsoT'
if moduleShortKey == 'WIoT':
moduleShortKey = 'WsoT'

if moduleId in modules:
focus['modules'].append({
'id': moduleId,
'module_id': moduleId,
'id': moduleShortKey,
'name': modules[moduleId]['name'],
'url': modules[moduleId]['url']})

modules[moduleId]['focuses'].append({'id': focus['id'], 'name': focus['name'], 'url': focus['url']})
modules[moduleId]['focuses'].append({'module_id': focus['module_id'], 'id': focus['id'], 'name': focus['name'], 'url': focus['url']})

focus['modules'].sort(key = lambda x: x['id'])
focus['modules'] = list({m['id']: m for m in focus['modules']}.values())
focus['modules'].sort(key = lambda x: x['module_id'])
focus['modules'] = list({m['module_id']: m for m in focus['modules']}.values())
focuses.append(focus)

# id should be unique for each module
idsSet = set([m['id'] for m in modules.values()])
idsSet = set([m['module_id'] for m in modules.values()])
if len(idsSet) != len(modules):
sys.exit(1)

categories = list(categories.values())

for category in categories:
category['modules'].sort(key = lambda x: x['id'])
category['modules'].sort(key = lambda x: x['module_id'])

categories.sort(key = lambda x: x['id'])
focuses.sort(key = lambda x: x['id'])
Expand Down Expand Up @@ -291,5 +322,5 @@ def enrich_module_from_json(module, moduleContent):
os.mkdir(output_directory)

modules = list(modules.values())
modules.sort(key = lambda x: x['id'])
modules.sort(key = lambda x: x['module_id'])
write_json(modules, f'{output_directory}/modules.json')
Loading