Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docker-compose.test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ services:
- data_test:/app/data
- preview_images_test:/app/preview-images
- ./detection_service:/tests/tests/detection_service
- ./web_service:/tests/tests/web_service

redis:
image: redis:latest
Expand Down
38 changes: 19 additions & 19 deletions metadata_service/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,31 +84,31 @@ def on_created(filepaths: list):
item.smb_target_ids = []
update_scanneddata_database(item, {"file_status": item.status.value, "additional_smb": additional_smbs_str, "local_filepath": item.local_directory_above, "file_name": item.filename})

# Match a remote destination
# Match remote destinations in the correct order
smb_names = [item.local_directory_above] + item.additional_remote_paths

if smb_names:
placeholders = ",".join("?" for _ in smb_names)
query = f"""
# Query each SMB name individually to maintain order
for smb_name in smb_names:
query = """
SELECT onedrive_path, folder_id, drive_id
FROM smb_onedrive
WHERE smb_name IN ({placeholders})
WHERE smb_name = ?
"""
result = execute_query(query, tuple(smb_names), fetchall=True)
else:
result = []
if result:
for res in result:
item.OneDriveDestinations.append(
OneDriveDestination(
remote_file_path=res.get("onedrive_path"),
remote_folder_id=res.get("folder_id"),
remote_drive_id=res.get("drive_id")
result = execute_query(query, (smb_name,), fetchall=True)

if result:
for res in result:
item.OneDriveDestinations.append(
OneDriveDestination(
remote_file_path=res.get("onedrive_path"),
remote_folder_id=res.get("folder_id"),
remote_drive_id=res.get("drive_id")
)
)
)
logger.debug(f"Found remote destination for {res}: {res.get("onedrive_path")}")
else:
logger.warning(f"Could not find remote destination for {item.local_directory_above}")
logger.debug(f"Found remote destination for {smb_name}: {res.get('onedrive_path')}")
else:
logger.warning(f"Could not find remote destination for {smb_name}")

update_scanneddata_database(item, {'remote_filepath': ",".join([dest.remote_file_path for dest in item.OneDriveDestinations])})

logger.info(f"Waiting for {item.filename} to be a valid PDF or image file")
Expand Down
1 change: 1 addition & 0 deletions scansynclib/scansynclib/ProcessItem.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ def __init__(self, remote_file_path: str, remote_folder_id: str, remote_drive_id
self.remote_directory = None
self.remote_folder_id = remote_folder_id
self.remote_drive_id = remote_drive_id
self.web_url = None # Will be set after successful upload


class ProcessItem:
Expand Down
25 changes: 16 additions & 9 deletions scansynclib/scansynclib/onedrive_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def upload_small(item: ProcessItem, onedriveitem: OneDriveDestination) -> bool:
file_size = os.path.getsize(item.ocr_file)
if file_size > 250 * 1024 * 1024:
logger.error(f"File {item.ocr_file} is larger than 250MB, using chunked upload")
return upload(item)
return upload(item, onedriveitem)
file_size_kb = file_size / 1024
file_size_mb = file_size_kb / 1024
logger.debug(f"File size: {file_size} bytes ({file_size_kb:.2f} KB, {file_size_mb:.2f} MB)")
Expand All @@ -245,9 +245,10 @@ def upload_small(item: ProcessItem, onedriveitem: OneDriveDestination) -> bool:
webUrl = response.json().get("webUrl")
if webUrl:
logger.debug(f"File is accessible at {webUrl}")
item.web_url.append(webUrl)
# Store the web URL in the OneDriveDestination object for later processing
onedriveitem.web_url = webUrl
# Only update the file name, web_url will be handled in batch by upload_service
update_scanneddata_database(item, {
"web_url": ",".join(item.web_url),
"file_name": response.json().get("name", item.filename)
}
)
Expand All @@ -263,16 +264,16 @@ def upload_small(item: ProcessItem, onedriveitem: OneDriveDestination) -> bool:


@retry(stop=stop_after_attempt(3), wait=wait_random_exponential(multiplier=10, min=10, max=60))
def upload(item: ProcessItem) -> bool:
def upload(item: ProcessItem, onedriveitem: OneDriveDestination) -> bool:
try:
logger.info(f"Uploading file {item.ocr_file} to OneDrive")
logger.info(f"Uploading file {item.ocr_file} to OneDrive: {onedriveitem.remote_file_path}")
access_token = get_access_token()
if not access_token:
logger.error("No access token available to upload file")
return False

file_size = os.path.getsize(item.ocr_file)
upload_session_url = f"https://graph.microsoft.com/v1.0/drives/{item.remote_drive_id}/items/{item.remote_folder_id}:/{item.filename}:/createUploadSession"
upload_session_url = f"https://graph.microsoft.com/v1.0/drives/{onedriveitem.remote_drive_id}/items/{onedriveitem.remote_folder_id}:/{item.filename}:/createUploadSession"

# Create an upload session
logger.debug(f"Creating upload session for {item.ocr_file} to {upload_session_url}")
Expand Down Expand Up @@ -322,9 +323,15 @@ def upload(item: ProcessItem) -> bool:
webUrl = chunk_response.json().get("webUrl")
if webUrl:
logger.debug(f"File is accessible at {webUrl}")
update_scanneddata_database(item, {"web_url": webUrl, "remote_filepath": item.remote_file_path})

logger.info(f"File {item.ocr_file} uploaded successfully to {item.remote_file_path}")
# Store the web URL in the OneDriveDestination object for later processing
onedriveitem.web_url = webUrl
# Only update the file name, web_url will be handled in batch by upload_service
update_scanneddata_database(item, {
"file_name": chunk_response.json().get("name", item.filename)
})

logger.info(f"File {item.ocr_file} uploaded successfully to {onedriveitem.remote_file_path}")
return True
Copy link

Copilot AI Jul 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The return statement is inside the try block but outside the main logic flow, which means it will always return True even if the upload fails. This should be moved to the end of the successful upload logic.

Suggested change
return True

Copilot uses AI. Check for mistakes.
Comment on lines +332 to +334
Copy link

Copilot AI Jul 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function returns True here but this return statement is outside the chunk upload loop. If the upload completes in an earlier chunk (status 201), this return statement will not be reached, potentially causing the function to return None instead of True.

Copilot uses AI. Check for mistakes.
except requests.exceptions.RequestException as e:
logger.error(f"Request exception occurred during upload: {str(e)}")
except Exception as e:
Expand Down
Loading