diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json
index 47c39438de..d3f44240cc 100644
--- a/sherlock_project/resources/data.json
+++ b/sherlock_project/resources/data.json
@@ -159,6 +159,7 @@
"errorType": "status_code",
"url": "https://www.artstation.com/{}",
"urlMain": "https://www.artstation.com/",
+ "urlProbe": "https://www.artstation.com/users/{}.json",
"username_claimed": "Blue"
},
"Asciinema": {
@@ -404,6 +405,13 @@
"urlMain": "https://carbonmade.com/",
"username_claimed": "jenny"
},
+ "Carrd": {
+ "errorType": "status_code",
+ "regexCheck": "^[a-zA-Z0-9_-]{3,50}$",
+ "url": "https://{}.carrd.co/",
+ "urlMain": "https://carrd.co/",
+ "username_claimed": "blue"
+ },
"Career.habr": {
"errorMsg": "
\u041e\u0448\u0438\u0431\u043a\u0430 404
",
"errorType": "message",
@@ -602,10 +610,9 @@
"username_claimed": "blue"
},
"Cracked Forum": {
- "errorMsg": "The member you specified is either invalid or doesn't exist",
- "errorType": "message",
- "url": "https://cracked.sh/{}",
- "urlMain": "https://cracked.sh/",
+ "errorType": "status_code",
+ "url": "https://cracked.ax/{}",
+ "urlMain": "https://cracked.ax/",
"username_claimed": "Blue"
},
"Credly": {
@@ -952,7 +959,8 @@
"username_claimed": "blue"
},
"GeeksforGeeks": {
- "errorType": "status_code",
+ "errorMsg": "false | GeeksforGeeks Profile",
+ "errorType": "message",
"url": "https://auth.geeksforgeeks.org/user/{}",
"urlMain": "https://www.geeksforgeeks.org/",
"username_claimed": "adam"
@@ -1526,7 +1534,8 @@
"username_claimed": "lottiefiles"
},
"LushStories": {
- "errorType": "status_code",
+ "errorType": "response_url",
+ "errorUrl": "https://www.lushstories.com/login",
"isNSFW": true,
"url": "https://www.lushstories.com/profile/{}",
"urlMain": "https://www.lushstories.com/",
@@ -2279,6 +2288,13 @@
"urlMain": "https://sourceforge.net/",
"username_claimed": "blue"
},
+ "SpaceHey": {
+ "errorType": "message",
+ "errorMsg": "Not Found (Error 404) | SpaceHey",
+ "url": "https://spacehey.com/{}",
+ "urlMain": "https://spacehey.com/",
+ "username_claimed": "blue"
+ },
"SoylentNews": {
"errorMsg": "The user you requested does not exist, no matter how much you wish this might be the case.",
"errorType": "message",
@@ -2376,6 +2392,13 @@
"urlMain": "https://www.strava.com/",
"username_claimed": "blue"
},
+ "Substack": {
+ "errorType": "status_code",
+ "regexCheck": "^[a-zA-Z0-9][a-zA-Z0-9_-]{1,60}$",
+ "url": "https://{}.substack.com/",
+ "urlMain": "https://substack.com/",
+ "username_claimed": "green"
+ },
"SublimeForum": {
"errorType": "status_code",
"url": "https://forum.sublimetext.com/u/{}",
@@ -2827,8 +2850,10 @@
},
"akniga": {
"errorType": "status_code",
+ "errorCode": 404,
+ "request_method": "GET",
"url": "https://akniga.org/profile/{}",
- "urlMain": "https://akniga.org/profile/blue/",
+ "urlMain": "https://akniga.org/",
"username_claimed": "blue"
},
"authorSTREAM": {
diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py
index a284f47ad5..e037d39458 100644
--- a/sherlock_project/sherlock.py
+++ b/sherlock_project/sherlock.py
@@ -136,6 +136,9 @@ def get_response(request_future, error_type, social_network):
except requests.exceptions.RequestException as err:
error_context = "Unknown Error"
exception_text = str(err)
+ except UnicodeError as err:
+ error_context = "Encoding Error"
+ exception_text = str(err)
return response, error_context, exception_text
diff --git a/tests/test_unicode.py b/tests/test_unicode.py
new file mode 100644
index 0000000000..fa6e3a3038
--- /dev/null
+++ b/tests/test_unicode.py
@@ -0,0 +1,47 @@
+"""Tests for handling usernames with special/unicode characters."""
+
+from concurrent.futures import Future
+
+from sherlock_project.sherlock import get_response
+
+
+def _make_future_with_exception(exc):
+ """Create a Future that raises the given exception."""
+ future = Future()
+ future.set_exception(exc)
+ return future
+
+
+def test_get_response_handles_unicode_decode_error():
+ """Regression test for issue #2730.
+
+ Usernames with special characters (e.g. 'Émile') can trigger a
+ UnicodeDecodeError inside the requests library during redirect
+ handling. This must not crash the program.
+ """
+ future = _make_future_with_exception(
+ UnicodeDecodeError("utf-8", b"\xe9", 0, 1, "invalid continuation byte")
+ )
+ response, error_context, exception_text = get_response(
+ request_future=future,
+ error_type=["status_code"],
+ social_network="TestSite",
+ )
+ assert response is None
+ assert error_context == "Encoding Error"
+ assert "utf-8" in exception_text
+
+
+def test_get_response_handles_unicode_encode_error():
+ """UnicodeEncodeError should also be caught (subclass of UnicodeError)."""
+ future = _make_future_with_exception(
+ UnicodeEncodeError("ascii", "É", 0, 1, "ordinal not in range(128)")
+ )
+ response, error_context, exception_text = get_response(
+ request_future=future,
+ error_type=["status_code"],
+ social_network="TestSite",
+ )
+ assert response is None
+ assert error_context == "Encoding Error"
+ assert "ascii" in exception_text
diff --git a/tests/test_ux.py b/tests/test_ux.py
index 3c62463b50..1feaf88a19 100644
--- a/tests/test_ux.py
+++ b/tests/test_ux.py
@@ -4,7 +4,7 @@
from sherlock_interactives import InteractivesSubprocessError
def test_remove_nsfw(sites_obj):
- nsfw_target: str = 'Pornhub'
+ nsfw_target: str = 'Xvideos'
assert nsfw_target in {site.name: site.information for site in sites_obj}
sites_obj.remove_nsfw_sites()
assert nsfw_target not in {site.name: site.information for site in sites_obj}
@@ -12,8 +12,8 @@ def test_remove_nsfw(sites_obj):
# Parametrized sites should *not* include Motherless, which is acting as the control
@pytest.mark.parametrize('nsfwsites', [
- ['Pornhub'],
- ['Pornhub', 'Xvideos'],
+ ['Xvideos'],
+ ['Xvideos', 'Erome'],
])
def test_nsfw_explicit_selection(sites_obj, nsfwsites):
for site in nsfwsites: