Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 43 additions & 1 deletion src/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import json
import asyncio
import requests
from pathlib import Path
from fastapi import FastAPI, Form, HTTPException, Query, Request
from fastapi.responses import HTMLResponse, JSONResponse
Expand Down Expand Up @@ -134,7 +135,7 @@ async def select(
local_image_paths = download_images(selected_images, download_path)

images_data = [
(path, os.path.basename(path))
(f"/images/{os.path.basename(path)}", os.path.basename(path))
for path in local_image_paths
]

Expand Down Expand Up @@ -292,6 +293,47 @@ async def error_page(request: Request, message: str = Query(...)):
})


@app.get("/debug-api", response_class=HTMLResponse)
async def debug_api(request: Request):
"""Debug endpoint to test API configuration"""
api_key = os.getenv("GOOGLE_API_KEY")
search_engine_id = os.getenv("SEARCH_ENGINE_ID")

debug_info = {
"api_key_set": bool(api_key),
"api_key_preview": f"{api_key[:10]}..." if api_key else "NOT SET",
"search_engine_id_set": bool(search_engine_id),
"search_engine_id": search_engine_id,
}

# Try to make a simple API call
test_result = None
if api_key and search_engine_id:
try:
test_url = (
f"https://www.googleapis.com/customsearch/v1?"
f"q=test&searchType=image&key={api_key}&cx={search_engine_id}&num=1"
)
response = requests.get(test_url, timeout=5)
test_result = {
"status_code": response.status_code,
"success": response.status_code == 200,
"response": response.json()
}
except Exception as e:
test_result = {
"status_code": "N/A",
"success": False,
"error": str(e)
}

return templates.TemplateResponse("debug.html", {
"request": request,
"debug_info": debug_info,
"test_result": test_result
})


@app.post("/save_annotations", response_class=HTMLResponse)
async def save_annotations(
request: Request,
Expand Down
153 changes: 129 additions & 24 deletions src/search_images.py
Original file line number Diff line number Diff line change
@@ -1,69 +1,174 @@
import requests
import json
import logging
import re

logger = logging.getLogger(__name__)


def search_images(query, api_key, search_engine_id, num_results=10):
"""
Search for images using Google Custom Search API.
Falls back to Bing Images if Google fails (no API key needed).
"""
images = []
google_error = None

# Try Google Custom Search first
try:
images = _search_google_custom_search(query, api_key, search_engine_id, num_results)
if images:
logger.info(f"Successfully retrieved {len(images)} images from Google Custom Search")
return images
except Exception as e:
google_error = str(e)
logger.warning(f"Google Custom Search failed: {google_error}")

# Fallback to Bing Images (free, no API key needed)
try:
logger.info("Falling back to Bing Images for search")
images = _search_bing_images(query, num_results)
if images:
logger.info(f"Successfully retrieved {len(images)} images from Bing Images")
return images
except Exception as e:
logger.error(f"Bing Images fallback also failed: {str(e)}")

# If both fail, raise an error with helpful message
if google_error:
raise Exception(
f"Unable to search for images. Google API error: {google_error}\n\n"
f"The app attempted to use a fallback image source (Bing Images) but it also failed. "
f"Please check your internet connection and try again."
)
else:
raise Exception("No image search service is available")


def _search_google_custom_search(query, api_key, search_engine_id, num_results=10):
"""Search using Google Custom Search API"""
images = []
# Google Custom Search allows a maximum of 10 results per page
results_per_page = 10
start_index = 1

while len(images) < num_results:
# Adjust the start index for pagination
search_url = (
f"https://www.googleapis.com/customsearch/v1?"
f"q={query}&searchType=image&key={api_key}&cx={search_engine_id}"
f"&start={start_index}&num={min(results_per_page, num_results - len(images))}")

response = requests.get(search_url)
if response.status_code != 200:
error_message = _parse_api_error(response)
raise Exception(error_message)
try:
response = requests.get(search_url, timeout=10)

logger.debug(f"Google API Response Status: {response.status_code}")

if response.status_code != 200:
error_message = _parse_google_api_error(response)
raise Exception(error_message)

data = response.json()
if 'items' not in data:
break # No more results
data = response.json()
if 'items' not in data:
break

for item in data['items']:
images.append(item['link']) # Get the image URL
for item in data['items']:
images.append(item['link'])

# Increment the start index for the next batch of results
start_index += results_per_page
start_index += results_per_page

if len(data['items']) < results_per_page:
break # No more results available
if len(data['items']) < results_per_page:
break
except requests.exceptions.RequestException as e:
raise Exception(f"Network error while searching: {str(e)}")

return images


def _parse_api_error(response):
def _search_bing_images(query, num_results=10):
"""
Search using Bing Images (free, no API key required)
Scrapes image URLs from Bing image search
"""
images = []

try:
# Bing Images search URL
search_url = "https://www.bing.com/images/search"

params = {
"q": query,
"count": min(num_results, 35),
}

headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}

response = requests.get(search_url, params=params, headers=headers, timeout=10)
logger.debug(f"Bing Images Response Status: {response.status_code}")

if response.status_code != 200:
raise Exception(f"Bing Images returned status {response.status_code}")

# Extract image URLs from the HTML response using regex
# Bing stores lazy-loaded images in data-src attributes
# These are Bing image proxy URLs (tse1.mm.bing.net, etc.)
image_pattern = r'<img[^>]+data-src="([^"]+)"'
matches = re.findall(image_pattern, response.text)

if not matches:
raise Exception("No images found on Bing Images")

# Process URLs and decode HTML entities
for url in matches:
if url.startswith('http') and len(images) < num_results:
# Decode HTML entities (e.g., &amp; to &)
url = url.replace('&amp;', '&')
url = url.replace('\\/', '/')
images.append(url)

if not images:
raise Exception("No valid image URLs found")

logger.info(f"Bing Images search returned {len(images)} images for query: {query}")
return images[:num_results]

except Exception as e:
logger.error(f"Bing Images error: {str(e)}")
raise


def _parse_google_api_error(response):
"""Parse Google API error response and return a user-friendly message"""
try:
data = response.json()
if 'error' in data:
error_obj = data['error']

# Handle different error formats
if isinstance(error_obj, dict):
message = error_obj.get('message', 'Unknown error')
code = error_obj.get('code', response.status_code)
status = error_obj.get('status', 'UNKNOWN')

# Map common errors to user-friendly messages
if status == 'PERMISSION_DENIED' or code == 403:
return f"Access Denied: {message} - Please check your Google API credentials and ensure the Custom Search JSON API is enabled in your Google Cloud project."
return (
f"Google Custom Search API Access Denied (403): {message}\n\n"
f"This usually means:\n"
f"• The Custom Search JSON API is not enabled in your Google Cloud project\n"
f"• Your API key doesn't have the right permissions\n"
f"• The search engine ID (CX) is incorrect or disabled\n\n"
f"The app will use Bing Images as a fallback image source."
)
elif status == 'INVALID_ARGUMENT' or code == 400:
return f"Invalid Request: {message} - Please verify your search query and API configuration."
return f"Invalid Request: {message}"
elif status == 'UNAUTHENTICATED' or code == 401:
return f"Authentication Failed: {message} - Your API key may be invalid or expired."
return f"Authentication Failed: {message}"
elif status == 'RESOURCE_EXHAUSTED' or code == 429:
return f"Rate Limited: {message} - You've exceeded your daily search quota. Please try again later."
return f"Rate Limited: {message} - Daily quota exceeded"
else:
return f"API Error ({code}): {message}"
else:
return f"API Error: {str(error_obj)}"
except:
pass

# Fallback error message
return f"Failed to fetch images: Status code {response.status_code}. The image search service returned an error. Please verify your API keys and search query."
return f"Google API failed with status {response.status_code}. Using Bing Images as fallback."
21 changes: 13 additions & 8 deletions src/templates/annotate.html
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,19 @@
}

.annotation-container {
max-width: 600px;
display: grid;
grid-template-columns: repeat(3, 1fr);
gap: 20px;
margin: 0 auto;
max-width: 1400px;
}

.canvas-wrapper {
background: #f8f9fa;
border-radius: 12px;
padding: 20px;
padding: 15px;
text-align: center;
margin-bottom: 20px;
margin-bottom: 0;
}

.canvas-wrapper h3 {
Expand All @@ -40,6 +43,8 @@
border-radius: 8px;
cursor: crosshair;
max-width: 100%;
display: block;
margin: 0 auto;
}

.annotation-tools {
Expand Down Expand Up @@ -114,19 +119,19 @@ <h4>How to annotate:</h4>
<form action="/save_annotations" method="post" id="annotation-form">
<input type="hidden" name="original_query" value="{{ query }}">

{% for local_image_path, image_filename in images %}
<div class="annotation-container">
<div class="annotation-container" style="margin-top: 30px;">
{% for local_image_path, image_filename in images %}
<div class="canvas-wrapper">
<h3>Image {{ loop.index }}: {{ image_filename }}</h3>
<canvas id="canvas_{{ loop.index0 }}" width="500" height="400"></canvas>
<h3>Image {{ loop.index }}</h3>
<canvas id="canvas_{{ loop.index0 }}" width="300" height="300"></canvas>
<input type="hidden" name="image_urls" value="{{ local_image_path }}">
<input type="hidden" id="annotation_{{ loop.index0 }}" name="annotations">
<div style="margin-top: 10px;">
<button type="button" class="tool-btn" onclick="clearCanvas({{ loop.index0 }})">Clear</button>
</div>
</div>
{% endfor %}
</div>
{% endfor %}

<div style="text-align: center; margin-top: 30px;">
<button type="submit" class="btn btn-success">Train Model</button>
Expand Down
Loading
Loading