diff --git a/docker-compose.test.yml b/docker-compose.test.yml index 350290d..866e9e6 100644 --- a/docker-compose.test.yml +++ b/docker-compose.test.yml @@ -38,6 +38,7 @@ services: - data_test:/app/data - preview_images_test:/app/preview-images - ./detection_service:/tests/tests/detection_service + - ./web_service:/tests/tests/web_service redis: image: redis:latest diff --git a/metadata_service/main.py b/metadata_service/main.py index 31b0b74..b7290f1 100644 --- a/metadata_service/main.py +++ b/metadata_service/main.py @@ -84,31 +84,31 @@ def on_created(filepaths: list): item.smb_target_ids = [] update_scanneddata_database(item, {"file_status": item.status.value, "additional_smb": additional_smbs_str, "local_filepath": item.local_directory_above, "file_name": item.filename}) - # Match a remote destination + # Match remote destinations in the correct order smb_names = [item.local_directory_above] + item.additional_remote_paths - if smb_names: - placeholders = ",".join("?" for _ in smb_names) - query = f""" + # Query each SMB name individually to maintain order + for smb_name in smb_names: + query = """ SELECT onedrive_path, folder_id, drive_id FROM smb_onedrive - WHERE smb_name IN ({placeholders}) + WHERE smb_name = ? """ - result = execute_query(query, tuple(smb_names), fetchall=True) - else: - result = [] - if result: - for res in result: - item.OneDriveDestinations.append( - OneDriveDestination( - remote_file_path=res.get("onedrive_path"), - remote_folder_id=res.get("folder_id"), - remote_drive_id=res.get("drive_id") + result = execute_query(query, (smb_name,), fetchall=True) + + if result: + for res in result: + item.OneDriveDestinations.append( + OneDriveDestination( + remote_file_path=res.get("onedrive_path"), + remote_folder_id=res.get("folder_id"), + remote_drive_id=res.get("drive_id") + ) ) - ) - logger.debug(f"Found remote destination for {res}: {res.get("onedrive_path")}") - else: - logger.warning(f"Could not find remote destination for {item.local_directory_above}") + logger.debug(f"Found remote destination for {smb_name}: {res.get('onedrive_path')}") + else: + logger.warning(f"Could not find remote destination for {smb_name}") + update_scanneddata_database(item, {'remote_filepath': ",".join([dest.remote_file_path for dest in item.OneDriveDestinations])}) logger.info(f"Waiting for {item.filename} to be a valid PDF or image file") diff --git a/scansynclib/scansynclib/ProcessItem.py b/scansynclib/scansynclib/ProcessItem.py index 237b3b7..c64b76a 100644 --- a/scansynclib/scansynclib/ProcessItem.py +++ b/scansynclib/scansynclib/ProcessItem.py @@ -116,6 +116,7 @@ def __init__(self, remote_file_path: str, remote_folder_id: str, remote_drive_id self.remote_directory = None self.remote_folder_id = remote_folder_id self.remote_drive_id = remote_drive_id + self.web_url = None # Will be set after successful upload class ProcessItem: diff --git a/scansynclib/scansynclib/onedrive_api.py b/scansynclib/scansynclib/onedrive_api.py index 7288ef1..3b9af6e 100644 --- a/scansynclib/scansynclib/onedrive_api.py +++ b/scansynclib/scansynclib/onedrive_api.py @@ -224,7 +224,7 @@ def upload_small(item: ProcessItem, onedriveitem: OneDriveDestination) -> bool: file_size = os.path.getsize(item.ocr_file) if file_size > 250 * 1024 * 1024: logger.error(f"File {item.ocr_file} is larger than 250MB, using chunked upload") - return upload(item) + return upload(item, onedriveitem) file_size_kb = file_size / 1024 file_size_mb = file_size_kb / 1024 logger.debug(f"File size: {file_size} bytes ({file_size_kb:.2f} KB, {file_size_mb:.2f} MB)") @@ -245,9 +245,10 @@ def upload_small(item: ProcessItem, onedriveitem: OneDriveDestination) -> bool: webUrl = response.json().get("webUrl") if webUrl: logger.debug(f"File is accessible at {webUrl}") - item.web_url.append(webUrl) + # Store the web URL in the OneDriveDestination object for later processing + onedriveitem.web_url = webUrl + # Only update the file name, web_url will be handled in batch by upload_service update_scanneddata_database(item, { - "web_url": ",".join(item.web_url), "file_name": response.json().get("name", item.filename) } ) @@ -263,16 +264,16 @@ def upload_small(item: ProcessItem, onedriveitem: OneDriveDestination) -> bool: @retry(stop=stop_after_attempt(3), wait=wait_random_exponential(multiplier=10, min=10, max=60)) -def upload(item: ProcessItem) -> bool: +def upload(item: ProcessItem, onedriveitem: OneDriveDestination) -> bool: try: - logger.info(f"Uploading file {item.ocr_file} to OneDrive") + logger.info(f"Uploading file {item.ocr_file} to OneDrive: {onedriveitem.remote_file_path}") access_token = get_access_token() if not access_token: logger.error("No access token available to upload file") return False file_size = os.path.getsize(item.ocr_file) - upload_session_url = f"https://graph.microsoft.com/v1.0/drives/{item.remote_drive_id}/items/{item.remote_folder_id}:/{item.filename}:/createUploadSession" + upload_session_url = f"https://graph.microsoft.com/v1.0/drives/{onedriveitem.remote_drive_id}/items/{onedriveitem.remote_folder_id}:/{item.filename}:/createUploadSession" # Create an upload session logger.debug(f"Creating upload session for {item.ocr_file} to {upload_session_url}") @@ -322,9 +323,15 @@ def upload(item: ProcessItem) -> bool: webUrl = chunk_response.json().get("webUrl") if webUrl: logger.debug(f"File is accessible at {webUrl}") - update_scanneddata_database(item, {"web_url": webUrl, "remote_filepath": item.remote_file_path}) - - logger.info(f"File {item.ocr_file} uploaded successfully to {item.remote_file_path}") + # Store the web URL in the OneDriveDestination object for later processing + onedriveitem.web_url = webUrl + # Only update the file name, web_url will be handled in batch by upload_service + update_scanneddata_database(item, { + "file_name": chunk_response.json().get("name", item.filename) + }) + + logger.info(f"File {item.ocr_file} uploaded successfully to {onedriveitem.remote_file_path}") + return True except requests.exceptions.RequestException as e: logger.error(f"Request exception occurred during upload: {str(e)}") except Exception as e: diff --git a/tests/test_badge_generator.py b/tests/test_badge_generator.py new file mode 100644 index 0000000..a54f679 --- /dev/null +++ b/tests/test_badge_generator.py @@ -0,0 +1,371 @@ +"""Tests for the badge_generator module.""" + +import pytest +import sys +import os + +# Add paths for imports +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../scansynclib')) +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../web_service/src')) +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'web_service/src')) + +try: + from web_service.src.badge_generator import generate_badges +except ImportError: + # For Docker environment + sys.path.insert(0, '/tests/tests/web_service/src') + from badge_generator import generate_badges + +from scansynclib.helpers import SMB_TAG_COLORS + + +class TestBadgeGenerator: + """Test cases for badge generation functionality.""" + + @pytest.mark.parametrize("smb_target_ids, expected_count", [ + ([], 0), + ([{'id': 1}], 1), + ([{'id': 1}, {'id': 2}], 2), + ([{'id': 1}, {'id': 2}, {'id': 3}], 3), + ]) + def test_generate_badges_count(self, smb_target_ids, expected_count): + """Test that the correct number of badges is generated.""" + badges = generate_badges( + pdf_id=1, + smb_target_ids=smb_target_ids, + local_filepath='test.pdf' + ) + assert len(badges) == expected_count + + @pytest.mark.parametrize("local_filepath, additional_smb_names, expected_texts", [ + ('document.pdf', [], ['document.pdf']), + ('path/to/document.pdf', [], ['path/to/document.pdf']), + ('document.pdf', ['extra1.pdf'], ['document.pdf', 'extra1.pdf']), + ('zebra.pdf', ['alpha.pdf', 'beta.pdf'], ['alpha.pdf', 'beta.pdf', 'zebra.pdf']), + ]) + def test_generate_badges_alphabetical_sorting(self, local_filepath, additional_smb_names, expected_texts): + """Test that badges are sorted alphabetically by filename.""" + smb_target_ids = [{'id': i+1} for i in range(len(additional_smb_names) + 1)] + badges = generate_badges( + pdf_id=1, + smb_target_ids=smb_target_ids, + local_filepath=local_filepath, + additional_smb_names=additional_smb_names + ) + actual_texts = [badge['text'] for badge in badges] + assert actual_texts == expected_texts + + @pytest.mark.parametrize("local_filepath, expected_filename", [ + ('document.pdf', 'document.pdf'), + ('path/to/document.pdf', 'path/to/document.pdf'), + ('/full/path/to/document.pdf', '/full/path/to/document.pdf'), + ('C:\\Windows\\Path\\document.pdf', 'C:\\Windows\\Path\\document.pdf'), + ('document with spaces.pdf', 'document with spaces.pdf'), + ('document-with-dashes_and_underscores.pdf', 'document-with-dashes_and_underscores.pdf'), + ]) + def test_filename_extraction(self, local_filepath, expected_filename): + """Test that filenames are used as provided (no extraction).""" + badges = generate_badges( + pdf_id=1, + smb_target_ids=[{'id': 1}], + local_filepath=local_filepath + ) + assert len(badges) == 1 + assert badges[0]['text'] == expected_filename + + def test_color_consistency_same_text(self): + """Test that the same text always gets the same color.""" + badges1 = generate_badges( + pdf_id=1, + smb_target_ids=[{'id': 1}], + local_filepath='test.pdf' + ) + badges2 = generate_badges( + pdf_id=2, + smb_target_ids=[{'id': 999}], + local_filepath='test.pdf' + ) + + assert len(badges1) == 1 + assert len(badges2) == 1 + assert badges1[0]['color'] == badges2[0]['color'] + assert badges1[0]['text'] == badges2[0]['text'] + + def test_color_consistency_different_ids_same_path(self): + """Test that different IDs with same path get consistent colors.""" + badges1 = generate_badges( + pdf_id=1, + smb_target_ids=[{'id': 8}], + local_filepath='document.pdf' + ) + badges2 = generate_badges( + pdf_id=2, + smb_target_ids=[{'id': 10}], + local_filepath='document.pdf' + ) + badges3 = generate_badges( + pdf_id=3, + smb_target_ids=[{'id': 6}], + local_filepath='document.pdf' + ) + + # All should have same color for same filename + assert badges1[0]['color'] == badges2[0]['color'] == badges3[0]['color'] + assert badges1[0]['text'] == badges2[0]['text'] == badges3[0]['text'] + + def test_color_from_valid_range(self): + """Test that generated colors are from the SMB_TAG_COLORS list.""" + badges = generate_badges( + pdf_id=1, + smb_target_ids=[{'id': 1}, {'id': 2}, {'id': 3}], + local_filepath='test1.pdf', + additional_smb_names=['test2.pdf', 'test3.pdf'] + ) + + for badge in badges: + assert badge['color'] in SMB_TAG_COLORS + + def test_handle_empty_smb_target_ids(self): + """Test handling of empty smb_target_ids.""" + badges = generate_badges( + pdf_id=1, + smb_target_ids=[], + local_filepath='test.pdf' + ) + assert len(badges) == 0 + + def test_empty_string_local_filepath(self): + """Test handling of empty string local_filepath.""" + badges = generate_badges( + pdf_id=1, + smb_target_ids=[{'id': 1}], + local_filepath='' + ) + + assert len(badges) == 1 + assert badges[0]['text'] == 'N/A' # Empty strings become 'N/A' + assert badges[0]['color'] in SMB_TAG_COLORS + + def test_none_local_filepath(self): + """Test handling of None local_filepath.""" + badges = generate_badges( + pdf_id=1, + smb_target_ids=[{'id': 1}], + local_filepath=None + ) + + assert len(badges) == 1 + assert badges[0]['text'] == 'N/A' + # Should use 'N/A' for None values with deterministic hash + try: + from web_service.src.badge_generator import _deterministic_hash + except ImportError: + # For Docker environment + from badge_generator import _deterministic_hash + + expected_hash = _deterministic_hash('N/A') % len(SMB_TAG_COLORS) + expected_color = SMB_TAG_COLORS[expected_hash] + assert badges[0]['color'] == expected_color + + def test_badge_structure(self): + """Test that each badge has the correct structure.""" + badges = generate_badges( + pdf_id=1, + smb_target_ids=[{'id': 1}], + local_filepath='test.pdf' + ) + + assert len(badges) == 1 + badge = badges[0] + + # Check required keys + required_keys = ['id', 'text', 'color', 'url', 'title'] + for key in required_keys: + assert key in badge + + # Check types + assert isinstance(badge['text'], str) + assert isinstance(badge['color'], str) + assert isinstance(badge['id'], str) + + # Check color format (should be hex color) + assert badge['color'].startswith('#') + assert len(badge['color']) == 7 # #RRGGBB format + + def test_web_urls_and_remote_paths(self): + """Test that web URLs and remote paths are properly assigned.""" + web_urls = ['http://example.com/file1', 'http://example.com/file2'] + remote_paths = ['remote/path1', 'remote/path2'] + + badges = generate_badges( + pdf_id=1, + smb_target_ids=[{'id': 1}, {'id': 2}], + local_filepath='test.pdf', + additional_smb_names=['extra.pdf'], + web_urls=web_urls, + remote_paths=remote_paths + ) + + assert len(badges) == 2 + # Find badges by text to handle alphabetical sorting + test_badge = next(b for b in badges if b['text'] == 'test.pdf') + extra_badge = next(b for b in badges if b['text'] == 'extra.pdf') + + assert test_badge['url'] == web_urls[0] + assert test_badge['title'] == remote_paths[0] + assert extra_badge['url'] == web_urls[1] + assert extra_badge['title'] == remote_paths[1] + + def test_badge_id_format(self): + """Test that badge IDs follow the expected format.""" + badges = generate_badges( + pdf_id=123, + smb_target_ids=[{'id': 1}, {'id': 2}], + local_filepath='test.pdf', + additional_smb_names=['extra.pdf'] + ) + + assert len(badges) == 2 + + # Check that one badge has the main PDF format and others have target format + badge_ids = [badge['id'] for badge in badges] + assert any(bid == '123_pdf_smb' for bid in badge_ids) + assert any(bid.startswith('123_badge_target_') for bid in badge_ids) + + def test_deterministic_color_assignment(self): + """Test that color assignment is deterministic across multiple calls.""" + badges1 = generate_badges( + pdf_id=1, + smb_target_ids=[{'id': 1}, {'id': 2}], + local_filepath='test1.pdf', + additional_smb_names=['test2.pdf'] + ) + badges2 = generate_badges( + pdf_id=1, + smb_target_ids=[{'id': 1}, {'id': 2}], + local_filepath='test1.pdf', + additional_smb_names=['test2.pdf'] + ) + badges3 = generate_badges( + pdf_id=1, + smb_target_ids=[{'id': 1}, {'id': 2}], + local_filepath='test1.pdf', + additional_smb_names=['test2.pdf'] + ) + + # Results should be identical + assert badges1 == badges2 == badges3 + + @pytest.mark.parametrize("special_chars_filename", [ + 'file with spaces.pdf', + 'file-with-dashes.pdf', + 'file_with_underscores.pdf', + 'file.with.dots.pdf', + 'file(with)parentheses.pdf', + 'file[with]brackets.pdf', + 'file{with}braces.pdf', + 'file@with#special$.pdf', + ]) + def test_special_characters_in_filename(self, special_chars_filename): + """Test handling of filenames with special characters.""" + badges = generate_badges( + pdf_id=1, + smb_target_ids=[{'id': 1}], + local_filepath=special_chars_filename # Direct filename, not in path + ) + + assert len(badges) == 1 + assert badges[0]['text'] == special_chars_filename + assert badges[0]['color'] in SMB_TAG_COLORS + + def test_additional_smb_names_color_consistency(self): + """Test that additional SMB names get consistent colors based on their text.""" + badges1 = generate_badges( + pdf_id=1, + smb_target_ids=[{'id': 1}, {'id': 2}], + local_filepath='main.pdf', + additional_smb_names=['extra.pdf'] + ) + badges2 = generate_badges( + pdf_id=2, + smb_target_ids=[{'id': 10}, {'id': 20}], + local_filepath='different.pdf', + additional_smb_names=['extra.pdf'] # Same additional name + ) + + # Find extra.pdf badges in both results + extra1 = next(b for b in badges1 if b['text'] == 'extra.pdf') + extra2 = next(b for b in badges2 if b['text'] == 'extra.pdf') + + # Should have same color despite different target IDs + assert extra1['color'] == extra2['color'] + + def test_deterministic_hash_consistency(self): + """Test that the deterministic hash function provides consistent results.""" + # Import the internal hash function for testing + try: + from web_service.src.badge_generator import _deterministic_hash + except ImportError: + # For Docker environment + from badge_generator import _deterministic_hash + + test_strings = [ + 'test.pdf', + 'document with spaces.pdf', + 'file-with-dashes.pdf', + 'special@chars#file$.pdf', + 'unicode_file_äöü.pdf', + '' + ] + + for test_string in test_strings: + # Call hash function multiple times + hash1 = _deterministic_hash(test_string) + hash2 = _deterministic_hash(test_string) + hash3 = _deterministic_hash(test_string) + + # All results should be identical + assert hash1 == hash2 == hash3, f"Hash inconsistency for '{test_string}'" + + # Hash should be non-negative + assert hash1 >= 0, f"Hash should be non-negative for '{test_string}'" + + def test_color_consistency_across_calls(self): + """Test that badge colors remain consistent across multiple function calls.""" + test_data = [ + ('document.pdf', ['extra1.pdf', 'extra2.pdf']), + ('another_file.pdf', ['additional.pdf']), + ('simple.pdf', []) + ] + + for local_filepath, additional_names in test_data: + # Generate badges multiple times + smb_target_ids = [{'id': i+1} for i in range(len(additional_names) + 1)] + + badges1 = generate_badges( + pdf_id=1, + smb_target_ids=smb_target_ids, + local_filepath=local_filepath, + additional_smb_names=additional_names + ) + + badges2 = generate_badges( + pdf_id=2, # Different PDF ID + smb_target_ids=smb_target_ids, + local_filepath=local_filepath, + additional_smb_names=additional_names + ) + + badges3 = generate_badges( + pdf_id=3, # Another different PDF ID + smb_target_ids=smb_target_ids, + local_filepath=local_filepath, + additional_smb_names=additional_names + ) + + # Colors should be consistent across all calls for same text + assert len(badges1) == len(badges2) == len(badges3) + + for i in range(len(badges1)): + assert badges1[i]['text'] == badges2[i]['text'] == badges3[i]['text'] + assert badges1[i]['color'] == badges2[i]['color'] == badges3[i]['color'] diff --git a/upload_service/main.py b/upload_service/main.py index d1e0c24..ebdd255 100644 --- a/upload_service/main.py +++ b/upload_service/main.py @@ -49,6 +49,20 @@ def start_processing(item: ProcessItem): update_scanneddata_database(item, {"file_status": item.status.value}) res = upload_small(item, onedriveitem) results.append(res) + + # Collect all web URLs in the correct order after all uploads are completed + web_urls = [] + for onedriveitem in item.OneDriveDestinations: + if hasattr(onedriveitem, 'web_url') and onedriveitem.web_url: + web_urls.append(onedriveitem.web_url) + else: + web_urls.append("") # Empty placeholder to maintain order + + # Update the database with all web URLs in the correct order + if web_urls: + update_scanneddata_database(item, {'web_url': ",".join(web_urls)}) + logger.debug(f"Updated web URLs in correct order: {web_urls}") + res = all(results) if res is False: logger.error(f"Failed to upload {item.ocr_file}") diff --git a/web_service/src/badge_generator.py b/web_service/src/badge_generator.py new file mode 100644 index 0000000..b275c58 --- /dev/null +++ b/web_service/src/badge_generator.py @@ -0,0 +1,115 @@ +""" +Unified badge generation for both dashboard and live updates. +This ensures consistent badge colors across all scenarios. +""" + +from scansynclib.helpers import SMB_TAG_COLORS +from scansynclib.logging import logger + + +def _deterministic_hash(text): + """ + Create a deterministic hash for consistent color assignment across Python restarts. + Unlike Python's built-in hash(), this function always returns the same value + for the same input string, regardless of hash randomization. + """ + if not text: + return 0 + + hash_value = 0 + for char in text: + hash_value = (hash_value * 31 + ord(char)) & 0x7FFFFFFF + return hash_value + + +def generate_badges( + pdf_id, + smb_target_ids, + local_filepath, + additional_smb_names=None, + web_urls=None, + remote_paths=None +): + """ + Generate badges with consistent colors based on SMB target IDs. + + Args: + pdf_id: PDF database ID + smb_target_ids: List of SMB target dictionaries with 'id' key + local_filepath: Main local file path name + additional_smb_names: List of additional SMB names + web_urls: List of web URLs (optional) + remote_paths: List of remote file paths (optional) + + Returns: + List of badge dictionaries with id, text, color, url, title + """ + badges = [] + + # Ensure defaults + if additional_smb_names is None: + additional_smb_names = [] + if web_urls is None: + web_urls = [] + if remote_paths is None: + remote_paths = [] + + # Create all badges first, then sort alphabetically + if isinstance(smb_target_ids, list) and smb_target_ids: + # Create badge data for all targets + all_badge_data = [] + + # Add main badge data + if smb_target_ids: + main_target = smb_target_ids[0] + target_id = main_target.get('id') if isinstance(main_target, dict) else main_target + # Use deterministic hash for consistent colors across application restarts + text_hash = _deterministic_hash(local_filepath or 'N/A') % len(SMB_TAG_COLORS) + color = SMB_TAG_COLORS[text_hash] + + all_badge_data.append({ + "target_id": target_id, + "text": local_filepath or 'N/A', + "color": color, + "url": web_urls[0] if web_urls else None, + "title": remote_paths[0] if remote_paths else 'Open in OneDrive', + "is_main": True + }) + + # Add additional badge data + for i, target in enumerate(smb_target_ids[1:], 1): + target_id = target.get('id') if isinstance(target, dict) else target + text = additional_smb_names[i-1] if i-1 < len(additional_smb_names) else 'N/A' + # Use deterministic hash for consistent colors across application restarts + text_hash = _deterministic_hash(text) % len(SMB_TAG_COLORS) + color = SMB_TAG_COLORS[text_hash] + + all_badge_data.append({ + "target_id": target_id, + "text": text, + "color": color, + "url": web_urls[i] if i < len(web_urls) else None, + "title": remote_paths[i] if i < len(remote_paths) else 'Open in OneDrive', + "is_main": False + }) # Sort all badges alphabetically by text + sorted_badge_data = sorted(all_badge_data, key=lambda x: (x["text"] or "").lower()) + + # Create actual badge objects in alphabetical order + for badge_data in sorted_badge_data: + if badge_data["is_main"]: + badge_id = f"{pdf_id}_pdf_smb" + else: + # Use target_id for consistent badge IDs across sorting + badge_id = f"{pdf_id}_badge_target_{badge_data['target_id']}" + + badge = { + "id": badge_id, + "text": badge_data["text"], + "color": badge_data["color"], + "url": badge_data["url"], + "title": badge_data["title"] + } + badges.append(badge) + + logger.debug(f"Generated {len(badges)} badges for PDF {pdf_id} with sorted targets: {badges}") + return badges diff --git a/web_service/src/main.py b/web_service/src/main.py index 593ffd3..a274290 100644 --- a/web_service/src/main.py +++ b/web_service/src/main.py @@ -54,6 +54,25 @@ def rabbitmq_listener(): def callback(ch, method, properties, body): if connected_clients > 0: item: ProcessItem = pickle.loads(body) + + # Import unified badge generator + from badge_generator import generate_badges + + # Prepare data for unified badge generation + web_urls = [dest.web_url for dest in item.OneDriveDestinations if dest.web_url] if item.OneDriveDestinations else [] + remote_paths = [dest.remote_file_path for dest in item.OneDriveDestinations] if item.OneDriveDestinations else [] + additional_smb = item.additional_remote_paths or [] + + # Generate badges using unified function + badges = generate_badges( + pdf_id=item.db_id, + smb_target_ids=item.smb_target_ids or [], + local_filepath=item.local_directory_above, + additional_smb_names=additional_smb, + web_urls=web_urls, + remote_paths=remote_paths + ) + payload = dict( id=item.db_id, file_name=item.filename, @@ -63,11 +82,12 @@ def callback(ch, method, properties, body): remote_filepaths=[dest.remote_file_path for dest in item.OneDriveDestinations] if item.OneDriveDestinations else [], pdf_pages=int(item.pdf_pages) if item.pdf_pages is not None else 0, status_progressbar=int(StatusProgressBar.get_progress(item.status)), - web_url=item.web_url, + web_url=[dest.web_url for dest in item.OneDriveDestinations if dest.web_url] if item.OneDriveDestinations else [], smb_target_ids=item.smb_target_ids, additional_smb=item.additional_remote_paths, currently_uploading=item.current_uploading, current_upload_target=item.current_upload_target, + badges=badges, # Add the generated badges ) payload["dashboard_data"] = get_dashboard_info() # Nur bei Bedarf abrufen sse_queue.put(json.dumps(payload, default=str)) # Ensure all objects are serializable diff --git a/web_service/src/routes/dashboard.py b/web_service/src/routes/dashboard.py index 12cfe2e..35145c3 100644 --- a/web_service/src/routes/dashboard.py +++ b/web_service/src/routes/dashboard.py @@ -86,24 +86,51 @@ def index(): latest_timestamp_processing = None latest_timestamp_completed = None - # Match additional SMB targets + # Match additional SMB targets and create smb_target_ids structure new_pdfs = [] for pdf in pdfs: pdf = dict(pdf) # Make mutable + # Build smb_target_ids structure for consistent badge generation + smb_target_ids = [] + + # Add main SMB target (from local_filepath) + if pdf.get('smb_target_id'): + smb_target_ids.append({'id': pdf['smb_target_id']}) + + # Process additional SMB targets names = [s.strip() for s in (pdf.get('smb_additional_target_ids') or '').split(',') if s.strip()] + additional_target_ids = [] + additional_names = [] + if names: placeholders = ','.join('?' * len(names)) rows = db.execute( - f"SELECT id FROM smb_onedrive WHERE smb_name IN ({placeholders})", + f"SELECT id, smb_name FROM smb_onedrive WHERE smb_name IN ({placeholders})", names ).fetchall() - # Extract IDs - matched_ids = [str(row['id']) for row in rows] - matched_ids.reverse() - pdf['smb_additional_target_ids'] = ','.join(matched_ids) + + # Maintain the original order of names + for name in names: + for row in rows: + if row['smb_name'] == name: + additional_target_ids.append(row['id']) + additional_names.append(row['smb_name']) + break + + # Store matched IDs and names in correct order + pdf['smb_additional_target_ids'] = ','.join(str(id) for id in additional_target_ids) + pdf['additional_smb'] = additional_names + + # Add additional targets to smb_target_ids (maintaining order) + for target_id in additional_target_ids: + smb_target_ids.append({'id': target_id}) else: pdf['smb_additional_target_ids'] = '' + pdf['additional_smb'] = [] + + # Set the smb_target_ids structure + pdf['smb_target_ids'] = smb_target_ids new_pdfs.append(pdf) pdfs = new_pdfs @@ -158,6 +185,49 @@ def index(): except Exception: logger.exception(f"Failed setting progressbar for {pdf['id']}.") + try: + # Import the unified badge generator + from badge_generator import generate_badges + + # Generate badges with complete information server-side + smb_target_ids = pdf.get('smb_target_ids', []) + + # Parse web_url and remote_filepath + web_urls = pdf.get('web_url', []) + if isinstance(web_urls, str): + web_urls = [url.strip() for url in web_urls.split(',') if url.strip()] + elif not isinstance(web_urls, list): + web_urls = [] + + remote_paths = pdf.get('remote_filepath', []) + if isinstance(remote_paths, str): + remote_paths = [path.strip() for path in remote_paths.split(',') if path.strip()] + elif not isinstance(remote_paths, list): + remote_paths = [] + + # Get additional SMB names + additional_smb = pdf.get('additional_smb', []) + if isinstance(additional_smb, str): + additional_smb = [name.strip() for name in additional_smb.split(',') if name.strip()] + elif not isinstance(additional_smb, list): + additional_smb = [] + + # Generate badges using unified function + badges = generate_badges( + pdf_id=pdf['id'], + smb_target_ids=smb_target_ids, + local_filepath=pdf.get('local_filepath', 'N/A'), + additional_smb_names=additional_smb, + web_urls=web_urls, + remote_paths=remote_paths + ) + + pdf['badges'] = badges + + except Exception as ex: + logger.exception(f"Failed setting badges for {pdf['id']}. {ex}") + pdf['badges'] = [] + return render_template('dashboard.html', pdfs=pdfs_dicts, total_pages=total_pages, diff --git a/web_service/src/static/js/dashboard.js b/web_service/src/static/js/dashboard.js index c8eb0cf..86e3d5f 100644 --- a/web_service/src/static/js/dashboard.js +++ b/web_service/src/static/js/dashboard.js @@ -3,6 +3,18 @@ // Set to track which card IDs have been displayed to avoid race condition issues let displayedCardIds = new Set(); +// Global function to get consistent badge colors based on SMB target ID +function getBadgeColor(id) { + if (typeof id !== 'number' || !Number.isFinite(id)) { + return '#6c757d'; + } + const idx = id - 1; + if (Array.isArray(smb_tag_colors) && smb_tag_colors.length > 0 && idx >= 0) { + return smb_tag_colors[idx % smb_tag_colors.length]; + } + return '#6c757d'; +} + document.addEventListener('DOMContentLoaded', function() { document.getElementById('top-progress-bar').style.display = 'block'; console.log("Creating " + pdfsData.length + " pdf cards."); @@ -104,64 +116,44 @@ function updateCard(updateData) { console.error(`Error updating image: ${error.message}`); } - // Update Local Filepath + // Update Local Filepath and Badges try { - if (updateData.local_filepath && updateData.local_filepath.trim() !== "") { - const element = document.getElementById(updateData.id + "_pdf_smb"); - element.textContent = updateData.local_filepath; - element.innerHTML += "
"; - // Get the "id" value from the first object in smb_target_ids array - const firstid = Array.isArray(updateData.smb_target_ids) && updateData.smb_target_ids.length > 0 - ? updateData.smb_target_ids[0]?.id - : 1; // Default to 1 if not set - const idx = (firstid ? firstid - 1 : -1); - // console.log(`Using SMB tag color index: ${idx} for smb_target_id: ${updateData.smb_target_id}`); - let bgColor; - if (Array.isArray(smb_tag_colors) && smb_tag_colors.length > 0 && Number.isInteger(idx) && idx >= 0) { - bgColor = smb_tag_colors[idx % smb_tag_colors.length]; - } else { - bgColor = '#6c757d'; - } - const textColor = getContrastYIQ(bgColor); - element.style.color = textColor; - element.style.backgroundColor = bgColor; - } - } catch (error) { - console.error(`Error updating local filepath: ${error.message}`); - } - - // Update Cloud Link - try { - if (Array.isArray(updateData.web_url) && updateData.web_url.length > 0) { - const smbBadges = cardElement.querySelectorAll('.smb-badge'); - smbBadges.forEach(badge => { - if (Array.isArray(updateData.web_url)) { - const badgeIndex = Array.from(smbBadges).indexOf(badge); - const url = updateData.web_url[badgeIndex]; - if (url) { + // Update all badges using server-side generated badge data + const cardElement = document.getElementById(updateData.id + '_pdf_card'); + if (cardElement && updateData.badges && Array.isArray(updateData.badges)) { + const badgesContainer = cardElement.querySelector('.smb-badges-container'); + if (badgesContainer) { + // Clear existing badges + badgesContainer.innerHTML = ''; + + // Add badges using server-generated data in original order + updateData.badges.forEach((badgeData) => { + const badge = document.createElement('span'); + badge.className = 'badge align-middle smb-badge'; + badge.id = badgeData.id; + badge.style.backgroundColor = badgeData.color; + badge.style.setProperty('background-color', badgeData.color, 'important'); + badge.style.color = getContrastYIQ(badgeData.color); + badge.textContent = badgeData.text || 'N/A'; + + if (badgeData.url) { badge.style.cursor = 'pointer'; - badge.onclick = () => window.open(url, '_blank'); - badge.title = 'Open in OneDrive'; - } else { - badge.onclick = null; - badge.style.cursor = ''; - badge.title = ''; + badge.onclick = () => window.open(badgeData.url, '_blank'); + badge.title = badgeData.title || 'Open in OneDrive'; } - } else if (typeof updateData.web_url === 'string') { - badge.style.cursor = 'pointer'; - badge.onclick = () => window.open(updateData.web_url, '_blank'); - badge.title = 'Open in OneDrive'; - } else { - badge.onclick = null; - badge.style.cursor = ''; - badge.title = ''; - } - }); + + badgesContainer.appendChild(badge); + }); + } } } catch (error) { - console.error(`Error updating cloud link: ${error.message}`); + console.error(`Error updating local filepath and badges: ${error.message}`); } + // Update Cloud Link (URLs are now handled in badges, but keep for backward compatibility) + // This section is now disabled since server-side badges are being used + // The server-generated badges already contain all URL and click handler information + // Update File Status try { @@ -350,65 +342,100 @@ function addPdfCard(pdfData) { badgesContainer.style.alignItems = 'center'; smbContainer.appendChild(badgesContainer); - // Helper to choose color - const getBadgeColor = (id) => { - const idx = (id ? id - 1 : -1); - if (Array.isArray(smb_tag_colors) && smb_tag_colors.length > 0 && idx >= 0) { - return smb_tag_colors[idx % smb_tag_colors.length]; - } - return '#6c757d'; - }; - - // Helper to create badge - const createBadge = (text, color, url, remote_filepath) => { + // Helper to create badge from server data + const createBadgeFromData = (badgeData) => { const badge = document.createElement('span'); badge.className = 'badge align-middle smb-badge'; - badge.style.backgroundColor = color; - badge.style.color = getContrastYIQ(color); - badge.textContent = text || 'N/A'; - if (url) { + badge.id = badgeData.id; + badge.style.backgroundColor = badgeData.color; + badge.style.setProperty('background-color', badgeData.color, 'important'); + badge.style.color = getContrastYIQ(badgeData.color); + badge.textContent = badgeData.text || 'N/A'; + + if (badgeData.url) { badge.style.cursor = 'pointer'; - badge.onclick = () => window.open(url, '_blank'); - badge.title = remote_filepath || 'Open in OneDrive'; + badge.onclick = () => window.open(badgeData.url, '_blank'); + badge.title = badgeData.title || 'Open in OneDrive'; } - + return badge; }; - // Add additional SMB badges - const additionalIds = (pdfData.smb_additional_target_ids || '') - .split(',') - .map(s => parseInt(s.trim(), 10)) - .filter(id => !isNaN(id)); - - const urls = Array.isArray(pdfData.web_url) - ? pdfData.web_url - : (pdfData.web_url || '').split(',').map(s => s.trim()).filter(Boolean); - - // Parse remote_filepaths as an array, splitting by comma if it's a string - const remote_filepaths = typeof pdfData.remote_filepath === 'string' - ? pdfData.remote_filepath.split(',').map(s => s.trim()).filter(Boolean) - : (Array.isArray(pdfData.remote_filepath) ? pdfData.remote_filepath : []); - additionalIds.forEach((id, i) => { - const name = (pdfData.additional_smb || '').split(',')[i]?.trim() || 'N/A'; - const color = getBadgeColor(id); - badgesContainer.appendChild(createBadge(name, color, urls[i + 1]?.trim(), remote_filepaths[i + 1]?.trim())); - }); - - // Add main SMB badge - const mainColor = getBadgeColor(pdfData.smb_target_id); - const mainName = pdfData.local_filepath || 'N/A'; - const mainBadge = createBadge(mainName, mainColor, urls[0]?.trim(), remote_filepaths[0]?.trim()); - mainBadge.id = `${pdfData.id}_pdf_smb`; - badgesContainer.appendChild(mainBadge); - - // Add additional SMB targets - if (Array.isArray(pdfData.smb_target_ids) && pdfData.smb_target_ids.length > 0) { - pdfData.smb_target_ids.forEach((smbTarget, index) => { - if (index === 0) return; - const smbBadge = createBadge(pdfData.additional_smb[index - 1], getBadgeColor(smbTarget.id)); - badgesContainer.appendChild(smbBadge); + // Use server-generated badges if available + if (pdfData.badges && Array.isArray(pdfData.badges)) { + pdfData.badges.forEach((badgeData) => { + const badge = createBadgeFromData(badgeData); + badgesContainer.appendChild(badge); }); + } else { + console.log(`[INITIAL LOAD] No server badges found for PDF ${pdfData.id}, using fallback logic`); + // Fallback to old client-side generation logic + const urls = Array.isArray(pdfData.web_url) + ? pdfData.web_url + : (pdfData.web_url || '').split(',').map(s => s.trim()).filter(Boolean); + + // Parse remote_filepaths as an array, splitting by comma if it's a string + const remote_filepaths = typeof pdfData.remote_filepath === 'string' + ? pdfData.remote_filepath.split(',').map(s => s.trim()).filter(Boolean) + : (Array.isArray(pdfData.remote_filepath) ? pdfData.remote_filepath : []); + + // Helper to create badge (fallback) + const createBadge = (text, color, url, remote_filepath) => { + const badge = document.createElement('span'); + badge.className = 'badge align-middle smb-badge'; + badge.style.backgroundColor = color; + badge.style.color = getContrastYIQ(color); + badge.textContent = text || 'N/A'; + if (url) { + badge.style.cursor = 'pointer'; + badge.onclick = () => window.open(url, '_blank'); + badge.title = remote_filepath || 'Open in OneDrive'; + } + return badge; + }; + + // Use the new smb_target_ids structure for consistent badge creation + if (Array.isArray(pdfData.smb_target_ids) && pdfData.smb_target_ids.length > 0) { + // Add main SMB badge (first in smb_target_ids) + const mainColor = getBadgeColor(pdfData.smb_target_ids[0]?.id); + const mainName = pdfData.local_filepath || 'N/A'; + const mainBadge = createBadge(mainName, mainColor, urls[0]?.trim(), remote_filepaths[0]?.trim()); + mainBadge.id = `${pdfData.id}_pdf_smb`; + badgesContainer.appendChild(mainBadge); + + // Add additional SMB badges (rest of smb_target_ids) + pdfData.smb_target_ids.forEach((smbTarget, index) => { + if (index === 0) return; // Skip main badge + const additionalName = pdfData.additional_smb?.[index - 1] || 'N/A'; + const additionalColor = getBadgeColor(smbTarget.id); + const additionalBadge = createBadge( + additionalName, + additionalColor, + urls[index]?.trim(), + remote_filepaths[index]?.trim() + ); + badgesContainer.appendChild(additionalBadge); + }); + } else { + // Fallback to old structure if smb_target_ids is not available + const mainColor = getBadgeColor(pdfData.smb_target_id); + const mainName = pdfData.local_filepath || 'N/A'; + const mainBadge = createBadge(mainName, mainColor, urls[0]?.trim(), remote_filepaths[0]?.trim()); + mainBadge.id = `${pdfData.id}_pdf_smb`; + badgesContainer.appendChild(mainBadge); + + // Add additional badges from old structure + const additionalIds = (pdfData.smb_additional_target_ids || '') + .split(',') + .map(s => parseInt(s.trim(), 10)) + .filter(id => !isNaN(id)); + + additionalIds.forEach((id, i) => { + const name = (pdfData.additional_smb || '').split(',')[i]?.trim() || 'N/A'; + const color = getBadgeColor(id); + badgesContainer.appendChild(createBadge(name, color, urls[i + 1]?.trim(), remote_filepaths[i + 1]?.trim())); + }); + } } let statusText = document.createElement('span');