maxi07 · maxi07 · Jul 27, 2025 · Jul 27, 2025 · Jul 27, 2025 · Jul 27, 2025
diff --git a/docker-compose.test.yml b/docker-compose.test.yml
@@ -38,6 +38,7 @@ services:
       - data_test:/app/data
       - preview_images_test:/app/preview-images
       - ./detection_service:/tests/tests/detection_service
+      - ./web_service:/tests/tests/web_service
 
   redis:
     image: redis:latest

diff --git a/metadata_service/main.py b/metadata_service/main.py
@@ -84,31 +84,31 @@ def on_created(filepaths: list):
         item.smb_target_ids = []
     update_scanneddata_database(item, {"file_status": item.status.value, "additional_smb": additional_smbs_str, "local_filepath": item.local_directory_above, "file_name": item.filename})
 
-    # Match a remote destination
+    # Match remote destinations in the correct order
     smb_names = [item.local_directory_above] + item.additional_remote_paths
 
-    if smb_names:
-        placeholders = ",".join("?" for _ in smb_names)
-        query = f"""
+    # Query each SMB name individually to maintain order
+    for smb_name in smb_names:
+        query = """
             SELECT onedrive_path, folder_id, drive_id
             FROM smb_onedrive
-            WHERE smb_name IN ({placeholders})
+            WHERE smb_name = ?
         """
-        result = execute_query(query, tuple(smb_names), fetchall=True)
-    else:
-        result = []
-    if result:
-        for res in result:
-            item.OneDriveDestinations.append(
-                OneDriveDestination(
-                    remote_file_path=res.get("onedrive_path"),
-                    remote_folder_id=res.get("folder_id"),
-                    remote_drive_id=res.get("drive_id")
+        result = execute_query(query, (smb_name,), fetchall=True)
+
+        if result:
+            for res in result:
+                item.OneDriveDestinations.append(
+                    OneDriveDestination(
+                        remote_file_path=res.get("onedrive_path"),
+                        remote_folder_id=res.get("folder_id"),
+                        remote_drive_id=res.get("drive_id")
+                    )
                 )
-            )
-            logger.debug(f"Found remote destination for {res}: {res.get("onedrive_path")}")
-    else:
-        logger.warning(f"Could not find remote destination for {item.local_directory_above}")
+            logger.debug(f"Found remote destination for {smb_name}: {res.get('onedrive_path')}")
+        else:
+            logger.warning(f"Could not find remote destination for {smb_name}")
+
     update_scanneddata_database(item, {'remote_filepath': ",".join([dest.remote_file_path for dest in item.OneDriveDestinations])})
 
     logger.info(f"Waiting for {item.filename} to be a valid PDF or image file")

diff --git a/scansynclib/scansynclib/ProcessItem.py b/scansynclib/scansynclib/ProcessItem.py
@@ -116,6 +116,7 @@ def __init__(self, remote_file_path: str, remote_folder_id: str, remote_drive_id
         self.remote_directory = None
         self.remote_folder_id = remote_folder_id
         self.remote_drive_id = remote_drive_id
+        self.web_url = None  # Will be set after successful upload
 
 
 class ProcessItem:

diff --git a/scansynclib/scansynclib/onedrive_api.py b/scansynclib/scansynclib/onedrive_api.py
@@ -224,7 +224,7 @@ def upload_small(item: ProcessItem, onedriveitem: OneDriveDestination) -> bool:
         file_size = os.path.getsize(item.ocr_file)
         if file_size > 250 * 1024 * 1024:
             logger.error(f"File {item.ocr_file} is larger than 250MB, using chunked upload")
-            return upload(item)
+            return upload(item, onedriveitem)
         file_size_kb = file_size / 1024
         file_size_mb = file_size_kb / 1024
         logger.debug(f"File size: {file_size} bytes ({file_size_kb:.2f} KB, {file_size_mb:.2f} MB)")
@@ -245,9 +245,10 @@ def upload_small(item: ProcessItem, onedriveitem: OneDriveDestination) -> bool:
             webUrl = response.json().get("webUrl")
             if webUrl:
                 logger.debug(f"File is accessible at {webUrl}")
-                item.web_url.append(webUrl)
+                # Store the web URL in the OneDriveDestination object for later processing
+                onedriveitem.web_url = webUrl
+                # Only update the file name, web_url will be handled in batch by upload_service
                 update_scanneddata_database(item, {
-                        "web_url": ",".join(item.web_url),
                         "file_name": response.json().get("name", item.filename)
                     }
                 )
@@ -263,16 +264,16 @@ def upload_small(item: ProcessItem, onedriveitem: OneDriveDestination) -> bool:
 
 
 @retry(stop=stop_after_attempt(3), wait=wait_random_exponential(multiplier=10, min=10, max=60))
-def upload(item: ProcessItem) -> bool:
+def upload(item: ProcessItem, onedriveitem: OneDriveDestination) -> bool:
     try:
-        logger.info(f"Uploading file {item.ocr_file} to OneDrive")
+        logger.info(f"Uploading file {item.ocr_file} to OneDrive: {onedriveitem.remote_file_path}")
         access_token = get_access_token()
         if not access_token:
             logger.error("No access token available to upload file")
             return False
 
         file_size = os.path.getsize(item.ocr_file)
-        upload_session_url = f"https://graph.microsoft.com/v1.0/drives/{item.remote_drive_id}/items/{item.remote_folder_id}:/{item.filename}:/createUploadSession"
+        upload_session_url = f"https://graph.microsoft.com/v1.0/drives/{onedriveitem.remote_drive_id}/items/{onedriveitem.remote_folder_id}:/{item.filename}:/createUploadSession"
 
         # Create an upload session
         logger.debug(f"Creating upload session for {item.ocr_file} to {upload_session_url}")
@@ -322,9 +323,15 @@ def upload(item: ProcessItem) -> bool:
                     webUrl = chunk_response.json().get("webUrl")
                     if webUrl:
                         logger.debug(f"File is accessible at {webUrl}")
-                        update_scanneddata_database(item, {"web_url": webUrl, "remote_filepath": item.remote_file_path})
-
-        logger.info(f"File {item.ocr_file} uploaded successfully to {item.remote_file_path}")
+                        # Store the web URL in the OneDriveDestination object for later processing
+                        onedriveitem.web_url = webUrl
+                        # Only update the file name, web_url will be handled in batch by upload_service
+                        update_scanneddata_database(item, {
+                            "file_name": chunk_response.json().get("name", item.filename)
+                        })
+
+        logger.info(f"File {item.ocr_file} uploaded successfully to {onedriveitem.remote_file_path}")
+        return True
-        return True
-        return True
     except requests.exceptions.RequestException as e:
         logger.error(f"Request exception occurred during upload: {str(e)}")
     except Exception as e: