diff --git a/.env b/.env index e92f520..923baa6 100644 --- a/.env +++ b/.env @@ -3,3 +3,8 @@ VIDEO_FILENAME=video1.strm WORKERS=4 # How many workers to use for scanning VIDEO_START_TIME=10 # Start at 10 seconds to avoid ads TMDB_API_KEY=your_tmdb_api_key + +# Optional: Auto-mapping for media libraries without {imdb-...} folder names +# Comma-separated list of media directories to scan +# MEDIA_DIRS=/mnt/plex/Movies,/mnt/plex/TV Shows,/mnt/plex/Anime +# MAPPING_FILE=media_mapping.json diff --git a/README.md b/README.md index d3ca25e..6bf8f40 100644 --- a/README.md +++ b/README.md @@ -63,10 +63,13 @@ WORKERS=4 VIDEO_START_TIME=10 ``` -- `SCAN_PATH`: Directory to scan for IMDb IDs +- `SCAN_PATH`: Directory to scan for IMDb IDs (used with `{imdb-ttXXXXXXX}` folder naming) - `VIDEO_FILENAME`: Name of the .strm file to update - `WORKERS`: Amount of workers to use - `VIDEO_START_TIME`: Start time in seconds for the video (default: 10) +- `TMDB_API_KEY`: Your TMDB API key (required for auto-mapping, get one free at [themoviedb.org](https://www.themoviedb.org/settings/api)) +- `MEDIA_DIRS`: *(Optional)* Comma-separated list of media directories for auto-mapping (see below) +- `MAPPING_FILE`: *(Optional)* Path to the mapping file (default: `media_mapping.json`) ### 3. Build and Run with Docker @@ -123,6 +126,54 @@ This mode will: - Skip movies without trailers - Run in the background +## Auto-Mapping (no folder renaming required) + +If your media folders use a standard `Title (Year)` naming convention (e.g., `Avatar (2009)`) instead of the `{imdb-ttXXXXXXX}` format, you can use the **auto-mapping** feature. It uses the TMDB API to automatically resolve IMDb IDs from your folder names. + +### Setup + +1. Get a free TMDB API key at [themoviedb.org](https://www.themoviedb.org/settings/api) +2. Add these variables to your `.env`: + +```env +TMDB_API_KEY=your_tmdb_api_key +MEDIA_DIRS=/mnt/plex/Movies,/mnt/plex/TV Shows,/mnt/plex/Anime +``` + +The `MEDIA_DIRS` variable accepts a comma-separated list of your media library root directories. Trailerfin will automatically detect the media type (movie/tv) from the directory name. + +### How it works + +- On startup (and every ~1 hour), Trailerfin scans each directory in `MEDIA_DIRS` +- For each subfolder, it parses the title and year from the folder name +- It queries the TMDB API to find the matching title, then resolves the IMDb ID +- Results are cached in `media_mapping.json` so TMDB is only queried once per folder +- Folders that already use the `{imdb-...}` naming convention are skipped + +### Manual mapping build + +You can build or update the mapping manually: + +```sh +python trailerfin.py --build-mapping +``` + +### Editing the mapping + +The `media_mapping.json` file can be manually edited to fix incorrect matches: + +```json +{ + "/mnt/plex/Movies/Avatar (2009)": { + "imdb_id": "tt0499549", + "title": "Avatar", + "year": 2009 + } +} +``` + +Set `"not_found": true` to skip a folder, or update `"imdb_id"` to correct a wrong match. Delete an entry to have Trailerfin retry the lookup on next scan. + ## Features in Detail ### Ignore List diff --git a/trailerfin.py b/trailerfin.py index 2e62e32..825d820 100644 --- a/trailerfin.py +++ b/trailerfin.py @@ -26,6 +26,10 @@ video_filename = os.getenv("VIDEO_FILENAME") schedule_days = int(os.getenv("SCHEDULE_DAYS", 1)) video_start_time = int(os.getenv("VIDEO_START_TIME", 10)) # Default to 10 seconds if not set +tmdb_api_key = os.getenv("TMDB_API_KEY") +media_dirs = [d.strip() for d in os.getenv("MEDIA_DIRS", "").split(",") if d.strip()] +mapping_file_path = Path(os.getenv("MAPPING_FILE", "media_mapping.json")) + headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', @@ -42,6 +46,8 @@ except ValueError: default_worker_count = 4 +# --- TMDB helpers --- + def tmdb_to_imdb(tmdb_id): """Convert TMDB ID to IMDB ID using TMDB API. Try both as movie and TV show.""" if not tmdb_api_key: @@ -70,11 +76,165 @@ def tmdb_to_imdb(tmdb_id): logging.error(f"Error converting TMDB->IMDB: {e}") return None +# --- Media mapping --- + +def load_media_mapping(): + """Load the media path -> imdb_id mapping from file""" + try: + if mapping_file_path.exists(): + with open(mapping_file_path, 'r') as f: + return json.load(f) + except Exception as e: + logging.error(f"Error loading media mapping: {e}") + return {} + +def save_media_mapping(mapping): + """Save the media mapping to file""" + try: + mapping_file_path.parent.mkdir(parents=True, exist_ok=True) + with open(mapping_file_path, 'w') as f: + json.dump(mapping, f, indent=2, ensure_ascii=False) + except Exception as e: + logging.error(f"Error saving media mapping: {e}") + +def parse_folder_name(folder_name): + """Extract title and year from 'Title (Year)' format""" + match = re.match(r'^(.+?)\s*\((\d{4})\)', folder_name) + if match: + return match.group(1).strip(), int(match.group(2)) + return folder_name.strip(), None + +def detect_media_type(dir_path): + """Guess movie vs tv from the directory name""" + lower = dir_path.lower() + if any(kw in lower for kw in ('film', 'movie', 'pelicula', 'filme')): + return "movie" + if any(kw in lower for kw in ('serie', 'tv', 'show')): + return "tv" + if 'anime' in lower: + if 'film' in lower or 'movie' in lower: + return "movie" + if 'serie' in lower: + return "tv" + return None # will try both + +def search_tmdb_for_imdb_id(title, year=None, media_type=None): + """Search TMDB by title+year, return IMDb ID""" + if not tmdb_api_key: + logging.error("TMDB_API_KEY not set") + return None + + types_to_try = [media_type] if media_type else ["movie", "tv"] + + for mtype in types_to_try: + try: + if mtype == "movie": + url = "https://api.themoviedb.org/3/search/movie" + params = {"api_key": tmdb_api_key, "query": title} + if year: + params["year"] = year + else: + url = "https://api.themoviedb.org/3/search/tv" + params = {"api_key": tmdb_api_key, "query": title} + if year: + params["first_air_date_year"] = year + + response = requests.get(url, params=params, timeout=10) + if response.status_code != 200: + continue + + results = response.json().get("results", []) + if not results: + continue + + tmdb_id = results[0]["id"] + imdb_id = tmdb_to_imdb(tmdb_id) + if imdb_id: + return imdb_id + except Exception as e: + logging.error(f"Error searching TMDB for '{title}': {e}") + + return None + +def build_media_mapping(scan_paths=None): + """Scan media directories, query TMDB, build path->imdb_id mapping""" + mapping = load_media_mapping() + paths = scan_paths if scan_paths else media_dirs + + if not paths: + logging.error("No media directories configured (MEDIA_DIRS)") + return mapping + + new_entries = 0 + for media_dir in paths: + if not os.path.exists(media_dir): + logging.warning(f"Media directory does not exist: {media_dir}") + continue + + media_type = detect_media_type(media_dir) + logging.info(f"Scanning {media_dir} (type: {media_type or 'auto'})") + + for entry in sorted(os.listdir(media_dir)): + full_path = os.path.join(media_dir, entry) + if not os.path.isdir(full_path): + continue + + # Skip already mapped entries (even not_found ones) + if full_path in mapping: + continue + + # Skip folders that already use {imdb-...} naming + if re.search(r'\{imdb-tt\d+\}', entry): + continue + + title, year = parse_folder_name(entry) + logging.info(f"Looking up: {title} ({year or '?'}) [{media_type or 'auto'}]") + + imdb_id = search_tmdb_for_imdb_id(title, year, media_type) + if imdb_id: + mapping[full_path] = {"imdb_id": imdb_id, "title": title, "year": year} + new_entries += 1 + logging.info(f" -> Found: {imdb_id}") + else: + mapping[full_path] = {"imdb_id": None, "title": title, "year": year, "not_found": True} + logging.warning(f" -> Not found on TMDB") + + time.sleep(0.3) # Rate limit TMDB API + + save_media_mapping(mapping) + logging.info(f"Mapping complete: {new_entries} new entries added, {len(mapping)} total") + return mapping + +def get_mapped_folders(mapping): + """Return list of (folder_path, imdb_id) from the mapping, excluding not_found entries""" + folders = [] + for path, info in mapping.items(): + imdb_id = info.get("imdb_id") if isinstance(info, dict) else info + if imdb_id and imdb_id.startswith("tt"): + folders.append((path, imdb_id)) + return folders + +def get_imdb_id_for_path(folder_path, mapping): + """Resolve IMDb ID for a folder path: check mapping first, then folder name pattern""" + # Check mapping + info = mapping.get(folder_path) + if info: + imdb_id = info.get("imdb_id") if isinstance(info, dict) else info + if imdb_id and imdb_id.startswith("tt"): + return imdb_id + # Fallback: check folder name pattern {imdb-ttXXXXX} + match = re.search(r'\{imdb-(tt\d+)\}', folder_path) + if match: + return match.group(1) + return None + +# --- IMDb trailer fetching --- + def get_trailer_video_page_url(imdb_id): def find_trailer_in_page(soup): trailer_spans = soup.find_all('span', class_='ipc-lockup-overlay__text ipc-lockup-overlay__text--clamp-none') logging.debug(f"Found {len(trailer_spans)} spans with video class") - + # First pass: look for trailers for span in trailer_spans: span_text = span.get_text(strip=True) @@ -85,7 +245,7 @@ def find_trailer_in_page(soup): video_page_url = f"https://www.imdb.com{parent_link['href']}" logging.debug(f"Found trailer link: {video_page_url}") return video_page_url - + # Second pass: look for clips if no trailer found for span in trailer_spans: span_text = span.get_text(strip=True) @@ -96,7 +256,7 @@ def find_trailer_in_page(soup): video_page_url = f"https://www.imdb.com{parent_link['href']}" logging.debug(f"Found clip link: {video_page_url}") return video_page_url - + return None try: @@ -121,7 +281,7 @@ def find_trailer_in_page(soup): # If still no trailer found, look for first video longer than 30 seconds video_links = soup.find_all('a', href=lambda x: x and '/video/vi' in x) logging.debug(f"Found {len(video_links)} video links") - + for link in video_links: # Get the duration from the parent div parent_div = link.find_parent('div', class_='video-item') @@ -181,6 +341,8 @@ def quality_key(item): logging.error(f"Error parsing playback URLs from JSON: {e}") return None +# --- .strm file management --- + def create_or_update_strm_file(folder_path, video_url): backdrops_path = os.path.join(folder_path, "backdrops") os.makedirs(backdrops_path, exist_ok=True) @@ -201,7 +363,6 @@ def is_strm_expired(strm_path): if not expires_list: return True expires = int(expires_list[0]) - import time now = int(time.time()) return now >= expires except Exception as e: @@ -220,6 +381,8 @@ def get_expiration_time(url): logging.error(f"Error parsing expiration time from URL: {e}") return None +# --- State persistence --- + def save_expiration_times(expiration_times): cache_file = Path("trailer_expirations.json") try: @@ -264,6 +427,8 @@ def format_duration(seconds): remaining_seconds = seconds % 60 return f"{minutes}min {remaining_seconds}sec" +# --- Core processing --- + def process_imdb_folder(root, imdb_id, expiration_times, ignored_titles): try: # Check if this title is in the ignore list @@ -273,17 +438,17 @@ def process_imdb_folder(root, imdb_id, expiration_times, ignored_titles): backdrops_path = os.path.join(root, "backdrops") strm_path = os.path.join(backdrops_path, video_filename) - + # Check if we need to refresh based on expiration time current_time = int(time.time()) expiration_time = expiration_times.get(strm_path) - + if expiration_time and current_time < expiration_time: time_until_expiry = expiration_time - current_time formatted_duration = format_duration(time_until_expiry) logging.info(f"Trailer link still valid for {imdb_id} in {root} (expires in {formatted_duration})") return - + logging.info(f"Refreshing trailer for {imdb_id} in {root}") video_page_url = get_trailer_video_page_url(imdb_id) if video_page_url: @@ -307,32 +472,49 @@ def process_imdb_folder(root, imdb_id, expiration_times, ignored_titles): except Exception as e: logging.error(f"Worker error for {imdb_id} in {root}: {e}") -def scan_and_refresh_trailers(scan_path=None, worker_count=4): +def collect_imdb_folders(scan_path=None): + """Collect (folder_path, imdb_id) pairs from both folder names and mapping""" path_to_scan = scan_path if scan_path else base_path - if not os.path.exists(path_to_scan): - logging.error(f"Provided path does not exist: {path_to_scan}") - return - + imdb_folders = [] + seen_paths = set() + + # 1) Collect from mapping file + mapping = load_media_mapping() + for folder_path, imdb_id in get_mapped_folders(mapping): + if os.path.isdir(folder_path): + imdb_folders.append((folder_path, imdb_id)) + seen_paths.add(folder_path) + + # 2) Collect from original {imdb-ttXXXX} folder name convention + if path_to_scan and os.path.exists(path_to_scan): + for root, dirs, files in os.walk(path_to_scan): + if root in seen_paths: + continue + match = re.search(r'\{imdb-(tt\d+)\}', root) + if match: + if not root.rstrip(os.sep).endswith(f'{{imdb-{match.group(1)}}}'): + continue + imdb_id = match.group(1) + imdb_folders.append((root, imdb_id)) + + return imdb_folders + +def scan_and_refresh_trailers(scan_path=None, worker_count=4): # Load existing expiration times and ignored titles expiration_times = load_expiration_times() ignored_titles = load_ignored_titles() - - imdb_folders = [] - for root, dirs, files in os.walk(path_to_scan): - match = re.search(r'\{imdb-(tt\d+)\}', root) - if match: - if not root.rstrip(os.sep).endswith(f'{{imdb-{match.group(1)}}}'): - continue - imdb_id = match.group(1) - imdb_folders.append((root, imdb_id)) - + + imdb_folders = collect_imdb_folders(scan_path) + if not imdb_folders: - logging.info("No IMDb folders found to process.") + logging.info("No media folders found to process.") return - + + logging.info(f"Found {len(imdb_folders)} media folders to process") + with ThreadPoolExecutor(max_workers=worker_count) as executor: future_to_folder = { - executor.submit(process_imdb_folder, root, imdb_id, expiration_times, ignored_titles): (root, imdb_id) + executor.submit(process_imdb_folder, root, imdb_id, expiration_times, ignored_titles): (root, imdb_id) for root, imdb_id in imdb_folders } for future in as_completed(future_to_folder): @@ -359,134 +541,149 @@ def check_expiring_links(expiration_times, scan_path=None, worker_count=4, ignor """Check for links that are about to expire and refresh them""" if ignored_titles is None: ignored_titles = load_ignored_titles() - + + mapping = load_media_mapping() current_time = int(time.time()) expiring_links = [] - + # Find links that will expire in the next hour for strm_path, expiration_time in expiration_times.items(): if expiration_time - current_time < 3600: # Less than 1 hour until expiration - # Extract IMDb ID from the path + # Extract media folder from strm_path (strip /backdrops/video.strm) root = os.path.dirname(os.path.dirname(strm_path)) - match = re.search(r'\{imdb-(tt\d+)\}', root) - if match: - imdb_id = match.group(1) - # Only include if not in ignored titles - if imdb_id not in ignored_titles: - expiring_links.append(strm_path) - + imdb_id = get_imdb_id_for_path(root, mapping) + if imdb_id and imdb_id not in ignored_titles: + expiring_links.append((root, imdb_id)) + if expiring_links: logging.info(f"Found {len(expiring_links)} links expiring soon") - # Extract IMDb IDs from the paths - imdb_folders = [] - for strm_path in expiring_links: - root = os.path.dirname(os.path.dirname(strm_path)) - match = re.search(r'\{imdb-(tt\d+)\}', root) - if match: - imdb_id = match.group(1) - imdb_folders.append((root, imdb_id)) - - if imdb_folders: - with ThreadPoolExecutor(max_workers=worker_count) as executor: - future_to_folder = { - executor.submit(process_imdb_folder, root, imdb_id, expiration_times, ignored_titles): (root, imdb_id) - for root, imdb_id in imdb_folders - } - for future in as_completed(future_to_folder): - root, imdb_id = future_to_folder[future] - try: - future.result() - except Exception as exc: - logging.error(f"Exception in worker for {imdb_id} in {root}: {exc}") + with ThreadPoolExecutor(max_workers=worker_count) as executor: + future_to_folder = { + executor.submit(process_imdb_folder, root, imdb_id, expiration_times, ignored_titles): (root, imdb_id) + for root, imdb_id in expiring_links + } + for future in as_completed(future_to_folder): + root, imdb_id = future_to_folder[future] + try: + future.result() + except Exception as exc: + logging.error(f"Exception in worker for {imdb_id} in {root}: {exc}") def initialize_expiration_database(scan_path=None): """Initialize the expiration database by scanning existing .strm files""" path_to_scan = scan_path if scan_path else base_path - if not os.path.exists(path_to_scan): - logging.error(f"Provided path does not exist: {path_to_scan}") - return {} - expiration_times = {} strm_files_found = False - - # First, try to find existing .strm files - for root, dirs, files in os.walk(path_to_scan): - if video_filename in files: - strm_path = os.path.join(root, video_filename) - try: - with open(strm_path, 'r') as f: - url = f.read().strip() - expiration_time = get_expiration_time(url) - if expiration_time: - expiration_times[strm_path] = expiration_time - strm_files_found = True - logging.info(f"Found existing .strm file: {strm_path}") - except Exception as e: - logging.error(f"Error reading .strm file {strm_path}: {e}") - + + # Scan all media dirs + scan_path for existing .strm files + paths_to_check = list(media_dirs) + if path_to_scan and os.path.exists(path_to_scan): + paths_to_check.append(path_to_scan) + + for check_path in paths_to_check: + if not os.path.exists(check_path): + continue + for root, dirs, files in os.walk(check_path): + if video_filename in files: + strm_path = os.path.join(root, video_filename) + try: + with open(strm_path, 'r') as f: + url = f.read().strip() + expiration_time = get_expiration_time(url) + if expiration_time: + expiration_times[strm_path] = expiration_time + strm_files_found = True + logging.info(f"Found existing .strm file: {strm_path}") + except Exception as e: + logging.error(f"Error reading .strm file {strm_path}: {e}") + if not strm_files_found: logging.info("No existing .strm files found, performing full scan") - # If no .strm files found, do a full scan scan_and_refresh_trailers(scan_path) - # Reload expiration times after full scan expiration_times = load_expiration_times() - + return expiration_times def watch_for_new_media(scan_path=None, worker_count=4): - """Watch for new media folders and process them""" + """Watch for media folders (both mapped and {imdb-...} named) with video files""" path_to_scan = scan_path if scan_path else base_path - if not os.path.exists(path_to_scan): - logging.error(f"Provided path does not exist: {path_to_scan}") - return set() - - # Get current folders current_folders = set() - for root, dirs, files in os.walk(path_to_scan): - match = re.search(r'\{imdb-(tt\d+)\}', root) - if match and root.rstrip(os.sep).endswith(f'{{imdb-{match.group(1)}}}'): - # Verify this is a media folder by checking for video files - has_video = any(f.lower().endswith(('.mp4', '.mkv', '.avi', '.mov', '.wmv')) for f in files) - if has_video: - current_folders.add(root) - logging.debug(f"Found media folder: {root}") - + mapping = load_media_mapping() + + # 1) Mapped folders with video files + for folder_path, imdb_id in get_mapped_folders(mapping): + if not os.path.isdir(folder_path): + continue + has_video = any( + f.lower().endswith(('.mp4', '.mkv', '.avi', '.mov', '.wmv')) + for f in os.listdir(folder_path) + if os.path.isfile(os.path.join(folder_path, f)) + ) + if has_video: + current_folders.add(folder_path) + + # 2) Original {imdb-...} folders with video files + if path_to_scan and os.path.exists(path_to_scan): + for root, dirs, files in os.walk(path_to_scan): + if root in current_folders: + continue + match = re.search(r'\{imdb-(tt\d+)\}', root) + if match and root.rstrip(os.sep).endswith(f'{{imdb-{match.group(1)}}}'): + has_video = any(f.lower().endswith(('.mp4', '.mkv', '.avi', '.mov', '.wmv')) for f in files) + if has_video: + current_folders.add(root) + logging.debug(f"Found media folder: {root}") + return current_folders def run_continuous_monitor(scan_path=None, worker_count=4): logging.info("Starting continuous monitor for expiring links") - + + # Build/update the media mapping on startup if MEDIA_DIRS is configured + if media_dirs: + logging.info("Building media mapping from MEDIA_DIRS...") + build_media_mapping() + # Initialize the database expiration_times = initialize_expiration_database(scan_path) save_expiration_times(expiration_times) - + # Load ignored titles ignored_titles = load_ignored_titles() - + # Get initial set of folders last_known_folders = watch_for_new_media(scan_path, worker_count) logging.info(f"Initial scan found {len(last_known_folders)} media folders") - + + mapping = load_media_mapping() + mapping_refresh_counter = 0 + while True: try: + # Periodically rebuild mapping to catch new media (every 12 cycles = ~1 hour) + mapping_refresh_counter += 1 + if media_dirs and mapping_refresh_counter >= 12: + mapping_refresh_counter = 0 + build_media_mapping() + mapping = load_media_mapping() + # Check for new media current_folders = watch_for_new_media(scan_path, worker_count) new_folders = current_folders - last_known_folders - + if new_folders: logging.info(f"Found {len(new_folders)} new media folders") for root in new_folders: - match = re.search(r'\{imdb-(tt\d+)\}', root) - if match: - imdb_id = match.group(1) + imdb_id = get_imdb_id_for_path(root, mapping) + if imdb_id: logging.info(f"Processing new media: {root}") process_imdb_folder(root, imdb_id, expiration_times, ignored_titles) last_known_folders = current_folders save_expiration_times(expiration_times) - + # Check for expiring links check_expiring_links(expiration_times, scan_path, worker_count, ignored_titles) - + # Sleep for 5 minutes before next check time.sleep(300) except KeyboardInterrupt: @@ -498,13 +695,16 @@ def run_continuous_monitor(scan_path=None, worker_count=4): if __name__ == "__main__": parser = argparse.ArgumentParser(description="Scan and refresh IMDb trailers.") - parser.add_argument('--dir', type=str, help='Directory to scan (defaults to /mnt/plex)') - parser.add_argument('--schedule', action='store_true', help='Run as a weekly scheduled job') + parser.add_argument('--dir', type=str, help='Directory to scan (defaults to SCAN_PATH)') + parser.add_argument('--schedule', action='store_true', help='Run as a scheduled job') parser.add_argument('--workers', type=int, default=default_worker_count, help=f'Number of worker threads (default: {default_worker_count})') parser.add_argument('--monitor', action='store_true', help='Run in continuous monitoring mode') + parser.add_argument('--build-mapping', action='store_true', help='Build media mapping from MEDIA_DIRS using TMDB API, then exit') args = parser.parse_args() - - if args.monitor: + + if args.build_mapping: + build_media_mapping() + elif args.monitor: run_continuous_monitor(args.dir, args.workers) elif args.schedule: run_scheduler(args.dir, args.workers)