diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 47c39438de..d3f44240cc 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -159,6 +159,7 @@ "errorType": "status_code", "url": "https://www.artstation.com/{}", "urlMain": "https://www.artstation.com/", + "urlProbe": "https://www.artstation.com/users/{}.json", "username_claimed": "Blue" }, "Asciinema": { @@ -404,6 +405,13 @@ "urlMain": "https://carbonmade.com/", "username_claimed": "jenny" }, + "Carrd": { + "errorType": "status_code", + "regexCheck": "^[a-zA-Z0-9_-]{3,50}$", + "url": "https://{}.carrd.co/", + "urlMain": "https://carrd.co/", + "username_claimed": "blue" + }, "Career.habr": { "errorMsg": "

\u041e\u0448\u0438\u0431\u043a\u0430 404

", "errorType": "message", @@ -602,10 +610,9 @@ "username_claimed": "blue" }, "Cracked Forum": { - "errorMsg": "The member you specified is either invalid or doesn't exist", - "errorType": "message", - "url": "https://cracked.sh/{}", - "urlMain": "https://cracked.sh/", + "errorType": "status_code", + "url": "https://cracked.ax/{}", + "urlMain": "https://cracked.ax/", "username_claimed": "Blue" }, "Credly": { @@ -952,7 +959,8 @@ "username_claimed": "blue" }, "GeeksforGeeks": { - "errorType": "status_code", + "errorMsg": "false | GeeksforGeeks Profile", + "errorType": "message", "url": "https://auth.geeksforgeeks.org/user/{}", "urlMain": "https://www.geeksforgeeks.org/", "username_claimed": "adam" @@ -1526,7 +1534,8 @@ "username_claimed": "lottiefiles" }, "LushStories": { - "errorType": "status_code", + "errorType": "response_url", + "errorUrl": "https://www.lushstories.com/login", "isNSFW": true, "url": "https://www.lushstories.com/profile/{}", "urlMain": "https://www.lushstories.com/", @@ -2279,6 +2288,13 @@ "urlMain": "https://sourceforge.net/", "username_claimed": "blue" }, + "SpaceHey": { + "errorType": "message", + "errorMsg": "Not Found (Error 404) | SpaceHey", + "url": "https://spacehey.com/{}", + "urlMain": "https://spacehey.com/", + "username_claimed": "blue" + }, "SoylentNews": { "errorMsg": "The user you requested does not exist, no matter how much you wish this might be the case.", "errorType": "message", @@ -2376,6 +2392,13 @@ "urlMain": "https://www.strava.com/", "username_claimed": "blue" }, + "Substack": { + "errorType": "status_code", + "regexCheck": "^[a-zA-Z0-9][a-zA-Z0-9_-]{1,60}$", + "url": "https://{}.substack.com/", + "urlMain": "https://substack.com/", + "username_claimed": "green" + }, "SublimeForum": { "errorType": "status_code", "url": "https://forum.sublimetext.com/u/{}", @@ -2827,8 +2850,10 @@ }, "akniga": { "errorType": "status_code", + "errorCode": 404, + "request_method": "GET", "url": "https://akniga.org/profile/{}", - "urlMain": "https://akniga.org/profile/blue/", + "urlMain": "https://akniga.org/", "username_claimed": "blue" }, "authorSTREAM": { diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index a284f47ad5..e037d39458 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -136,6 +136,9 @@ def get_response(request_future, error_type, social_network): except requests.exceptions.RequestException as err: error_context = "Unknown Error" exception_text = str(err) + except UnicodeError as err: + error_context = "Encoding Error" + exception_text = str(err) return response, error_context, exception_text diff --git a/tests/test_unicode.py b/tests/test_unicode.py new file mode 100644 index 0000000000..fa6e3a3038 --- /dev/null +++ b/tests/test_unicode.py @@ -0,0 +1,47 @@ +"""Tests for handling usernames with special/unicode characters.""" + +from concurrent.futures import Future + +from sherlock_project.sherlock import get_response + + +def _make_future_with_exception(exc): + """Create a Future that raises the given exception.""" + future = Future() + future.set_exception(exc) + return future + + +def test_get_response_handles_unicode_decode_error(): + """Regression test for issue #2730. + + Usernames with special characters (e.g. 'Émile') can trigger a + UnicodeDecodeError inside the requests library during redirect + handling. This must not crash the program. + """ + future = _make_future_with_exception( + UnicodeDecodeError("utf-8", b"\xe9", 0, 1, "invalid continuation byte") + ) + response, error_context, exception_text = get_response( + request_future=future, + error_type=["status_code"], + social_network="TestSite", + ) + assert response is None + assert error_context == "Encoding Error" + assert "utf-8" in exception_text + + +def test_get_response_handles_unicode_encode_error(): + """UnicodeEncodeError should also be caught (subclass of UnicodeError).""" + future = _make_future_with_exception( + UnicodeEncodeError("ascii", "É", 0, 1, "ordinal not in range(128)") + ) + response, error_context, exception_text = get_response( + request_future=future, + error_type=["status_code"], + social_network="TestSite", + ) + assert response is None + assert error_context == "Encoding Error" + assert "ascii" in exception_text diff --git a/tests/test_ux.py b/tests/test_ux.py index 3c62463b50..1feaf88a19 100644 --- a/tests/test_ux.py +++ b/tests/test_ux.py @@ -4,7 +4,7 @@ from sherlock_interactives import InteractivesSubprocessError def test_remove_nsfw(sites_obj): - nsfw_target: str = 'Pornhub' + nsfw_target: str = 'Xvideos' assert nsfw_target in {site.name: site.information for site in sites_obj} sites_obj.remove_nsfw_sites() assert nsfw_target not in {site.name: site.information for site in sites_obj} @@ -12,8 +12,8 @@ def test_remove_nsfw(sites_obj): # Parametrized sites should *not* include Motherless, which is acting as the control @pytest.mark.parametrize('nsfwsites', [ - ['Pornhub'], - ['Pornhub', 'Xvideos'], + ['Xvideos'], + ['Xvideos', 'Erome'], ]) def test_nsfw_explicit_selection(sites_obj, nsfwsites): for site in nsfwsites: