sparkmicro · IJIJI · Jun 21, 2026 · Jun 21, 2026 · Jun 21, 2026 · Jun 21, 2026
diff --git a/.gitignore b/.gitignore
@@ -10,3 +10,9 @@ kintree/tests/*
 .coverage
 htmlcov/
 .vscode/launch.json
+
+# vscode
+.vscode
+
+# github
+.github/copilot-instructions.md
diff --git a/kintree/common/tools.py b/kintree/common/tools.py
@@ -79,6 +79,13 @@ def get_image_with_retries(url, headers, retries=3, wait=5, silent=False):
     return None
 
 
+def get_with_cloudscraper(url, headers, timeout):
+    """ Single-shot GET using cloudscraper, to get past Cloudflare protection """
+    import cloudscraper
+    scraper = cloudscraper.create_scraper()
+    return scraper.get(url, headers=headers, timeout=timeout)
+
+
 def download(url, filetype='API data', fileoutput='', timeout=3, enable_headers=False, requests_lib=False, try_cloudscraper=False, silent=False):
     ''' Standard method to download URL content, with option to save to local file (eg. images) '''
 
@@ -132,19 +139,17 @@ def download(url, filetype='API data', fileoutput='', timeout=3, enable_headers=
                     return None
             return file
         else:
-            # some suppliers work with requests.get(), others need urllib.request.urlopen()
-            try:
-                response = requests.get(url)
-                data_json = response.json()
-                return data_json
-            except requests.exceptions.JSONDecodeError:
+            # Try plain requests first; if the endpoint is Cloudflare-protected the body
+            # won't be JSON (or the connection is refused) -> fall back to cloudscraper.
+            # Some suppliers only work with urllib.request.urlopen(), kept as last resort.
+            for fetch in (requests.get, get_with_cloudscraper):
                 try:
-                    url_data = urllib.request.urlopen(url)
-                    data = url_data.read()
-                    data_json = json.loads(data.decode('utf-8'))
-                    return data_json
-                finally:
-                    pass
+                    return fetch(url, headers=headers, timeout=timeout).json()
+                except (requests.exceptions.RequestException, ValueError):
+                    continue
+            url_data = urllib.request.urlopen(url)
+            data = url_data.read()
+            return json.loads(data.decode('utf-8'))
     except (socket.timeout, requests.exceptions.ConnectTimeout, requests.exceptions.ReadTimeout):
         cprint(f'[INFO]\tWarning: {filetype} download socket timed out ({timeout}s)', silent=silent)
     except (urllib.error.HTTPError, requests.exceptions.ConnectionError):

diff --git a/kintree/search/lcsc_api.py b/kintree/search/lcsc_api.py
@@ -132,8 +132,9 @@ def test_api() -> bool:
     ''' Test method for API '''
 
     test_success = True
+    # Exact-match only on stable identity fields. The free-text 'productIntroEn'
+    # description is edited by LCSC from time to time, so it is checked loosely below.
     expected = {
-        'productIntroEn': '25V 100pF C0G ±5% 0201 Multilayer Ceramic Capacitors MLCC - SMD/SMT ROHS',
         'productCode': 'C2181718',
         'brandNameEn': 'TDK',
         'productModel': 'C0603C0G1E101J030BA',
@@ -142,13 +143,19 @@ def test_api() -> bool:
     test_part = fetch_part_info('C2181718')
     if not test_part:
         test_success = False
-        
+
     # Check content of response
     if test_success:
         for key, value in expected.items():
-            if test_part[key] != value:
-                print(f'"{test_part[key]}" <> "{value}"')
+            actual = test_part.get(key)
+            if actual != value:
+                print(f'"{actual}" <> "{value}"')
                 test_success = False
                 break
 
+    # Loose check on the free-text description (resilient to LCSC rewording)
+    if test_success and '100pF' not in test_part.get('productIntroEn', ''):
+        print(f'"100pF" not found in "{test_part.get("productIntroEn", "")}"')
+        test_success = False
+
     return test_success