diff --git a/sites/Kongregate.py b/sites/Kongregate.py index 4ea8f27..442cae3 100644 --- a/sites/Kongregate.py +++ b/sites/Kongregate.py @@ -1,10 +1,10 @@ # Kongregate definition. import fpclib -import bs4, re, urllib, uuid +import bs4, re, urllib, uuid, json regex = 'kongregate.com' -ver = 6 +ver = 7 # Made to work with the 2026 site redesign IF_URL = re.compile(r'[\'"]iframe_url[\'"]:[\'"](.*?)[\'"]') SWF_URL = re.compile(r'swf_location\s?=\s?[\'\"]\/?\/?(.+?)(\?.+?)?[\'\"]') @@ -12,6 +12,7 @@ EMBED_UNITY = re.compile(r'kongregateUnityDiv\\\",\s\\\"\/\/(.*?)\\",\s(\d*?),\s(\d*?),') UUID = re.compile(r'[0-9a-fA-F]{32}') SIZE = re.compile(r'[\'"]game_width[\'"]:(\d+),[\'"]game_height[\'"]:(\d+)') +GAME_TYPE = re.compile(r'game_type":[\'"](.*)[\'"]') UNITY_EMBED = """ @@ -47,95 +48,115 @@ class Kongregate(fpclib.Curation): def parse(self, soup): k_uuid = str(uuid.uuid4()) - self.title = soup.find("h1", itemprop="name").text.strip() + + metaJson = soup.find("script", type="application/ld+json").text + metaJson = json.loads(metaJson) + + gameJson = [] + for x in metaJson['@graph']: + if x['@type'] == "VideoGame" or "VideoGame" in x['@type']: + gameJson = x + + + self.title = gameJson['name'] # Get Logo - try: self.logo = fpclib.normalize(soup.find("meta", property="og:image")["content"], keep_prot=True) + try: self.logo = gameJson['image'].split("?")[0] except: pass # Get Developer and set Publisher - self.dev = [dev.text.strip() for dev in soup.select(".game_dev_list > li")] + self.dev = gameJson['author']['name'] self.pub = "Kongregate" # Get Release Date - date = soup.select_one(".game_pub_plays > p > .highcontrast").text - self.date = date[-4:] + "-" + fpclib.MONTHS[date[:3]] + "-" + date[5:7] + self.date = gameJson['datePublished'].split("T")[0] # Get description (combination of instructions and description) - # idata is inside a script tag and hasn't been inserted yet. - idata = bs4.BeautifulSoup(soup.select_one("#game_tab_pane_template").string, "html.parser") - desc = "" try: - desc += idata.select_one("#game_description > div > .full_text").text.replace("\t", "")[:-9] - except: - try: desc += idata.select_one("#game_description > p").text.replace("\t", "") - except: pass - try: - desc += ("\n\n" if desc else "") + "Instructions\n" + idata.select_one("#game_instructions > div > .full_text").text[:-9].replace("\t", "") + howToPlay = soup.find_all("div",{"class":"mb-8 text-left"})[1].text + + desc += gameJson['description'] + "\n" + desc += howToPlay + except: - try: desc += ("\n\n" if desc else "") + "Instructions\n" + idata.select_one("#game_instructions > p").text.replace("\t", "") - except: pass - - self.desc = desc - - # Get tags - tags = soup.find_all('a', attrs={'class':'term'}) - if len(tags): - self.tags = [x.text for x in tags] - - # Kongregate makes it slightly difficult to find the launch command, but we'll get there - # First, find the script next to the would be game frame: - if_script = soup.select_one("#gameiframe + script").string - # Next, get the location of the html containing the game frame (using a uuid might help to avoid potential blocks) - if_url = IF_URL.search(if_script)[1] + k_uuid + "?kongregate_host=www.kongregate.com" - # Then soupify that new url and find the relavant script data - scripts = fpclib.get_soup(if_url).select("body > script") - - if len(scripts) > 3: - # Effectively confirmed, this is a Flash or Unity game - gdata = scripts[4].string - # If game_swf is present, that takes priority - cmd = GAME_SWF.search(gdata) - self.platform = "Flash" - self.app = fpclib.FLASH - if cmd: cmd = fpclib.normalize(urllib.parse.unquote(cmd[1])) - else: - # Otherwise check that there isn't a uuid in the swfurl, or is Unity (if neither, throw an error) - try: - unity_data = EMBED_UNITY.search(gdata) - self.if_url = fpclib.normalize(urllib.parse.unquote(unity_data[1])) - self.if_file = self.if_url - cmd = fpclib.normalize(self.src) - self.size = ["", unity_data[2], unity_data[3]] - self.platform = "Unity" - self.app = fpclib.UNITY - except: - # Otherwise check that there isn't a uuid in the swfurl - cmd = fpclib.normalize(SWF_URL.search(gdata)[1]) - if UUID.search(cmd): raise ValueError("swfurl is not a valid game swf") - self.platform = "Flash" - self.cmd = cmd - else: - # It's not a Flash game, so we will embed the html ourselves later + desc = gameJson['description'] + + self.desc = desc.strip() + + + # get iframe embed + embedUrl = soup.find("iframe", {"class":"game-embed-iframe"})['src'] + embedData = fpclib.get_soup("https://www.kongregate.com/" + embedUrl) + + # Then retrieve the game embed within the iframe embed + scripts = embedData.find_all("script") + for x in scripts: + if "iframeUrl" in x.text: + if_url = IF_URL.search(x.text)[1] + self.size = SIZE.search(x.text) + + # The game type is referenced directly in the iframeConfig, + # we can use it to quickly know how to deal with the game + gameType = GAME_TYPE.search(x.text)[1] + + if if_url[:2] == "//": + if_url = "http:" + if_url + + if if_url[-7:] == "/frame/": + if_url = if_url + k_uuid + "/?kongregate_host=www.kongregate.com" + + + if gameType == "flash" or gameType == "unity": + gameEmbed = fpclib.get_soup(if_url) + gameScripts = gameEmbed.find_all("script") + + for x in gameScripts: + if "swf_location" in x.text: + if_file = SWF_URL.search(x.text)[1] + elif "kongregateUnityDiv" in x.text: + if_file = EMBED_UNITY.search(x.text)[1] + + + if gameType == "html" or gameType == "iframe": self.platform = "HTML5" self.app = fpclib.FPNAVIGATOR - self.cmd = fpclib.normalize(self.src) + self.cmd = fpclib.normalize(if_url).rsplit("/",1)[0] + "/customKongEmbed.html" self.if_url = fpclib.normalize(if_url, keep_vars=True) self.if_file = fpclib.normalize(if_url) - self.size = SIZE.search(if_script) + + elif gameType == "flash": + self.platform = "Flash" + self.app = fpclib.FLASH + self.cmd = fpclib.normalize(if_file) + + elif gameType == "unity": + self.platform = "Unity" + self.app = fpclib.UNITY + self.cmd = fpclib.normalize(if_url) + self.if_url = fpclib.normalize(if_url, keep_vars=True) + self.if_file = fpclib.normalize(if_file) + + else: + raise ValueError("Unhandled game type. Only Flash, HTML5 and Unity are supported") + + def get_files(self): if self.platform == "HTML5" or self.platform == "Unity": # Download iframe that ought to be embedded - fpclib.download_all((self.if_url,)) + fpclib.download_all((self.if_file,)) # Replace all references to https with http fpclib.replace(self.if_file[7:], "https:", "http:") # Create file to embed swf f = self.cmd[7:] if f[-1] == "/": f += "index.html" - if self.platform == "HTML5": fpclib.write(f, HTML_EMBED % (self.title, self.size[1], self.size[2], self.if_file)) - else: fpclib.write(f, UNITY_EMBED % (self.title, self.size[1], self.size[2], self.if_file)) + + if self.platform == "HTML5": + fpclib.write(f, HTML_EMBED % (self.title, self.size[1], self.size[2], self.if_file)) + else: + fpclib.write(f, UNITY_EMBED % (self.title, self.size[1], self.size[2], self.if_file)) + #fpclib.download_all((self.if_file,)) else: # Flash games are downloaded normally super().get_files()