diff --git a/octogen/ai/engine.py b/octogen/ai/engine.py index 1fd28b7..14e6098 100644 --- a/octogen/ai/engine.py +++ b/octogen/ai/engine.py @@ -4,6 +4,7 @@ import json from json_repair import repair_json import logging +import math import os import random import re @@ -146,10 +147,10 @@ def analyze_listening_profile(self, favorited_songs: List[Dict], top_artists: Li # Diversity score: higher when more evenly distributed if total > 0: - # Calculate normalized entropy - entropy = sum(-(count/total) * (count/total).bit_length() for count in artist_counts.values() if count > 0) - max_entropy = total.bit_length() if total > 1 else 1 - profile["diversity_score"] = entropy / max_entropy if max_entropy > 0 else 0 + # Calculate normalized Shannon entropy + entropy = -sum((count/total) * math.log2(count/total) for count in artist_counts.values() if count > 0) + max_entropy = math.log2(len(artist_counts)) if len(artist_counts) > 1 else 1 + profile["diversity_score"] = min(entropy / max_entropy, 1.0) if max_entropy > 0 else 0 profile["artist_distribution"] = dict(artist_counts.most_common(10)) @@ -267,6 +268,59 @@ def _invalidate_cache(self) -> None: # Note: We don't delete call tracker to preserve daily limit logger.info("Cache invalidation complete") + def _load_recent_songs(self) -> set: + """Load recently recommended songs from disk. + + Returns: + Set of "artist - title" strings from recent runs, or empty set on failure. + """ + recent_file = self.data_dir / "recent_playlist_songs.json" + try: + if recent_file.exists(): + with open(recent_file, 'r', encoding='utf-8') as f: + data = json.load(f) + if isinstance(data, list): + return set(data) + except Exception as e: + logger.warning("Could not load recent songs: %s", str(e)[:100]) + return set() + + def _save_recent_songs(self, songs: list) -> None: + """Save recently recommended songs to disk (capped at 200 entries across last 2 runs). + + The file is written atomically (temp file + os.replace) so an interrupted + write never leaves a corrupt or empty file on disk. + + Args: + songs: List of song dicts with "artist" and "title" keys from the new playlists. + """ + recent_file = self.data_dir / "recent_playlist_songs.json" + try: + existing = self._load_recent_songs() + new_entries = [ + f"{s.get('artist', '')} - {s.get('title', '')}" + for s in songs + if s.get('artist') and s.get('title') + ] + # Build ordered list: existing first (oldest), new entries appended last + # so that truncation with [-200:] always keeps the most recent songs. + seen: set = set() + ordered: list = [] + for entry in list(existing) + new_entries: + if entry not in seen: + seen.add(entry) + ordered.append(entry) + # Cap at 200 entries (approximately 2 runs worth); drop oldest first + combined = ordered[-200:] + # Atomic write: write to a sibling temp file then replace + tmp_file = recent_file.with_suffix(".json.tmp") + with open(tmp_file, 'w', encoding='utf-8') as f: + json.dump(combined, f, ensure_ascii=False) + os.replace(tmp_file, recent_file) + logger.info("Saved %d recent songs to disk (%d total)", len(new_entries), len(combined)) + except Exception as e: + logger.warning("Could not save recent songs: %s", str(e)[:100]) + def _build_cached_context( self, top_artists: List[str], @@ -285,13 +339,14 @@ def _build_cached_context( Returns: Context string for AI """ - artist_list = ", ".join(top_artists[:10]) - genre_list = ", ".join(top_genres[:6]) + artist_list = ", ".join(random.sample(top_artists[:20], min(10, len(top_artists[:20])))) + genre_list = ", ".join(random.sample(top_genres[:12], min(6, len(top_genres[:12])))) - # Limit context for memory efficiency + # Randomly sample a subset for variety — avoids O(n) shuffle of the full library + k = min(self.max_context_songs, len(favorited_songs)) favorited_sample = [ f"{s.get('artist','')} - {s.get('title','')}" - for s in favorited_songs[: self.max_context_songs] + for s in random.sample(favorited_songs, k) ] favorited_context = "\n".join(favorited_sample) @@ -392,12 +447,18 @@ def _get_or_create_gemini_cache( logger.info("Cache created: %s (expires in 24 hours)", cached_content.name) return cached_content - def _build_task_prompt(self, top_genres: List[str], time_context: Optional[Dict[str, str]] = None) -> str: + def _build_task_prompt( + self, + top_genres: List[str], + time_context: Optional[Dict[str, str]] = None, + recent_songs: Optional[set] = None, + ) -> str: """Build the task-specific prompt with optional time-of-day awareness. Args: top_genres: List of top genres time_context: Optional time-of-day context from get_time_context() + recent_songs: Optional set of recently recommended "artist - title" strings to avoid Returns: Task prompt string @@ -411,7 +472,11 @@ def _build_task_prompt(self, top_genres: List[str], time_context: Optional[Dict[ f'{i+2}. "Daily Mix {i+1}" (30 songs, genre: {genre_name}): 25 library + 5 new' ) - variety_seed = random.randint(1000, 9999) + variety_seed = random.randint(100000, 999999) # 6-digit range reduces collision probability across runs + + # Pick a random decade bias hint for added variety + decade_hints = ["1970s", "1980s", "1990s", "2000s", "2010s", "2020s", "Mix of all eras"] + decade_hint = random.choice(decade_hints) # Add time-of-day context if provided time_guidance = "" @@ -424,6 +489,16 @@ def _build_task_prompt(self, top_genres: List[str], time_context: Optional[Dict[ Guidance: {time_context.get('guidance', '')} Apply this context when selecting NEW songs to match the current time of day. +""" + + # Add recently recommended songs section if provided + recent_songs_section = "" + if recent_songs: + sample_size = min(40, len(recent_songs)) + recent_sample = random.sample(sorted(recent_songs), sample_size) + recent_songs_section = f""" +RECENTLY RECOMMENDED (avoid repeating these): +{chr(10).join(recent_sample)} """ return f"""Generate exactly 11 playlists (Variety Seed: {variety_seed}): @@ -434,7 +509,8 @@ def _build_task_prompt(self, top_genres: List[str], time_context: Optional[Dict[ 9. "Workout Energy" (30 songs): 25 library + 5 new high-energy 10. "Focus Flow" (30 songs): 25 library + 5 new ambient/instrumental 11. "Drive Time" (30 songs): 25 library + 5 new upbeat -{time_guidance} +Decade focus: {decade_hint} — lean toward this era for new discoveries +{time_guidance}{recent_songs_section} Respond ONLY with a valid JSON array of objects, each with "artist" and "title" fields, using double quotes. {{ @@ -454,6 +530,9 @@ def _build_task_prompt(self, top_genres: List[str], time_context: Optional[Dict[ - ESCAPE ALL BACKSLASHES: Use \\\\ not \\ - If song title has backslash, use double backslash - Example: "AC\\\\DC" not "AC\\DC" +- Maximize variety: no artist should appear more than 2 times per playlist +- Each playlist MUST have a different set of songs - NO song should appear in more than one playlist +- Prioritize LESS POPULAR and DEEPER CUTS over well-known hits """ def _generate_with_gemini( @@ -483,7 +562,8 @@ def _generate_with_gemini( if time_context: logger.info(f"🕐 Time context: {time_context.get('description')} - {time_context.get('mood')}") - prompt = self._build_task_prompt(top_genres, time_context) + recent_songs = self._load_recent_songs() + prompt = self._build_task_prompt(top_genres, time_context, recent_songs) # Set thinking budget thinking_budget = 5000 @@ -515,6 +595,19 @@ def _generate_with_gemini( logger.warning("Thinking budget nearly exhausted (%d/%d tokens)", thoughts, thinking_budget) + # Check for empty response + if not response.text or response.text.strip() == "": + logger.error("Gemini returned empty response") + raise ValueError("Empty response from Gemini") + + # Validate JSON structure + try: + json.loads(response.text) + except json.JSONDecodeError as e: + logger.error(f"Gemini returned invalid JSON: {e}") + logger.debug(f"Problematic response start: {response.text[:500]}") + raise ValueError("Invalid JSON response from Gemini") from e + return response.text def _generate_with_openai( @@ -544,7 +637,8 @@ def _generate_with_openai( if time_context: logger.info(f"🕐 Time context: {time_context.get('description')} - {time_context.get('mood')}") - task_prompt = self._build_task_prompt(top_genres, time_context) + recent_songs = self._load_recent_songs() + task_prompt = self._build_task_prompt(top_genres, time_context, recent_songs) full_prompt = f"{cached_context}\n\n{task_prompt}" response = self.client.chat.completions.create( @@ -714,6 +808,11 @@ def generate_all_playlists( self._record_ai_call() total = sum(len(songs) for songs in all_playlists.values()) logger.info("Generated %d playlists (%d songs)", len(all_playlists), total) + + # Persist all new songs for cross-run deduplication + all_new_songs = [song for songs in all_playlists.values() for song in songs] + self._save_recent_songs(all_new_songs) + return all_playlists, None def _generate_with_retry(self, generate_func, *args, **kwargs) -> str: diff --git a/octogen/main.py b/octogen/main.py index 50daaab..7b4a5a0 100644 --- a/octogen/main.py +++ b/octogen/main.py @@ -8,9 +8,9 @@ import os import json import logging +import random import time import argparse -import asyncio import re from datetime import datetime, timedelta, timezone from pathlib import Path @@ -34,7 +34,6 @@ from octogen.api.listenbrainz import ListenBrainzAPI from octogen.api.audiomuse import AudioMuseClient from octogen.ai.engine import AIRecommendationEngine -from octogen.config import load_config_from_env from octogen.models.tracker import ServiceTracker, RunTracker from octogen.web.health import write_health_status from octogen.scheduler.cron import calculate_next_run, wait_until, calculate_cron_interval @@ -466,7 +465,7 @@ def _record_successful_run(self) -> None: 'last_run_timestamp': now.isoformat(), 'last_run_date': now.strftime("%Y-%m-%d"), 'last_run_formatted': now.strftime("%Y-%m-%d %H:%M:%S"), - 'next_scheduled_run': next_scheduled_run, # ✅ Added this! + 'next_scheduled_run': next_scheduled_run, 'services': services_data }, f, indent=2) logger.info("✓ Recorded successful run timestamp with service tracking") @@ -598,7 +597,7 @@ def seen_key(a: str, t: str) -> Tuple[str, str]: artist = (rec.get("artist") or "").strip() title = (rec.get("title") or "").strip() - mbid = rec.get("mbid") # <-- NEW + mbid = rec.get("mbid") if not artist or not title: continue @@ -696,8 +695,6 @@ def seen_key(a: str, t: str) -> Tuple[str, str]: return song_ids[:max_songs] - - def create_playlist(self, name: str, recommendations: List[Dict], max_songs: int = 100) -> None: """Create a playlist from recommendations.""" @@ -739,6 +736,7 @@ def _generate_hybrid_daily_mix( List of song dicts: [{"artist": "...", "title": "..."}] """ songs = [] + label = f"Daily Mix {mix_number}" if mix_number in [1,2,3,4,5,6] else playlist_name # Get configuration audiomuse_songs_count = self.config["audiomuse"]["songs_per_mix"] @@ -747,7 +745,7 @@ def _generate_hybrid_daily_mix( # Get songs from AudioMuse-AI if enabled audiomuse_actual_count = 0 if self.audiomuse_client: - logger.debug(f"Requesting {audiomuse_songs_count} songs from AudioMuse-AI for Daily Mix {mix_number}") + logger.debug(f"Requesting {audiomuse_songs_count} songs from AudioMuse-AI for {label}") # --- Begin multi-version prompt logic --- modifiers = characteristics.split() if characteristics else [] prompt_variants = [] @@ -782,7 +780,6 @@ def _generate_hybrid_daily_mix( break songs.extend(audiomuse_collected) audiomuse_actual_count = len(audiomuse_collected) - label = f"Daily Mix {mix_number}" if mix_number in [1,2,3,4,5,6] else playlist_name logger.info(f"📻 {label}: Got {audiomuse_actual_count} songs from AudioMuse-AI") if audiomuse_actual_count < audiomuse_songs_count: logger.debug(f"AudioMuse returned fewer songs than requested ({audiomuse_actual_count}/{audiomuse_songs_count})") @@ -799,7 +796,7 @@ def _generate_hybrid_daily_mix( logger.info(f"🔄 AudioMuse returned {audiomuse_actual_count}/{audiomuse_songs_count} songs, " f"requesting {num_llm_songs} from LLM (includes {buffer} song buffer)") - logger.debug(f"Requesting {num_llm_songs} songs from LLM for Daily Mix {mix_number}") + logger.debug(f"Requesting {num_llm_songs} songs from LLM for {label}") # We'll use the AI engine to generate just the LLM portion llm_songs = self._generate_llm_songs_for_daily_mix( mix_number=mix_number, @@ -813,6 +810,7 @@ def _generate_hybrid_daily_mix( ) songs.extend(llm_songs) + random.shuffle(songs) logger.info(f"🤖 {label}: Got {len(llm_songs)} songs from LLM") logger.info(f"🎵 {label}: Total {len(songs)} songs (AudioMuse: {audiomuse_actual_count}, LLM: {len(llm_songs)})") @@ -841,15 +839,16 @@ def _generate_llm_songs_for_daily_mix( List of song dicts: [{"artist": "...", "title": "..."}] """ # Build a focused prompt for this specific daily mix - artist_list = ", ".join(top_artists[:10]) - genre_list = ", ".join(top_genres[:6]) + artist_list = ", ".join(random.sample(top_artists[:20], min(10, len(top_artists[:20])))) + genre_list = ", ".join(random.sample(top_genres[:12], min(6, len(top_genres[:12])))) - # Sample of favorited songs for context + # Randomly sample a 20-song context window — avoids O(n) shuffle of the full library + k = min(20, len(favorited_songs)) favorited_sample = [ f"{s.get('artist','')} - {s.get('title','')}" - for s in favorited_songs[:50] # Smaller sample for individual mix + for s in random.sample(favorited_songs, k) ] - favorited_context = "\n".join(favorited_sample[:20]) # Limit to 20 for focused prompt + favorited_context = "\n".join(favorited_sample) negative_context = "" if low_rated_songs: @@ -1073,69 +1072,69 @@ def run(self) -> None: sys.exit(1) if should_generate_regular and all_playlists: - # Handle hybrid playlists if AudioMuse is enabled - if self.audiomuse_client: - logger.info("=" * 70) - logger.info("GENERATING HYBRID PLAYLISTS (AudioMuse + LLM)") - logger.info("=" * 70) - - playlists_before_audiomuse = self.stats["playlists_created"] - - # Define all hybrid playlist configurations (everything except Discovery) - hybrid_playlist_configs = [ - # Daily Mixes (num 1-6) - {"name": "Daily Mix 1", "genre": top_genres[0] if len(top_genres) > 0 else DEFAULT_DAILY_MIX_GENRES[0], "characteristics": "energetic", "num": 1}, - {"name": "Daily Mix 2", "genre": top_genres[1] if len(top_genres) > 1 else DEFAULT_DAILY_MIX_GENRES[1], "characteristics": "catchy upbeat", "num": 2}, - {"name": "Daily Mix 3", "genre": top_genres[2] if len(top_genres) > 2 else DEFAULT_DAILY_MIX_GENRES[2], "characteristics": "danceable rhythmic", "num": 3}, - {"name": "Daily Mix 4", "genre": top_genres[3] if len(top_genres) > 3 else DEFAULT_DAILY_MIX_GENRES[3], "characteristics": "rhythmic bass-heavy", "num": 4}, - {"name": "Daily Mix 5", "genre": top_genres[4] if len(top_genres) > 4 else DEFAULT_DAILY_MIX_GENRES[4], "characteristics": "alternative atmospheric", "num": 5}, - {"name": "Daily Mix 6", "genre": top_genres[5] if len(top_genres) > 5 else DEFAULT_DAILY_MIX_GENRES[5], "characteristics": "smooth melodic", "num": 6}, - # Mood/Activity playlists (no num) - {"name": "Chill Vibes", "genre": "ambient", "characteristics": "relaxing calm peaceful", "num": None}, - {"name": "Workout Energy", "genre": "high-energy", "characteristics": "upbeat motivating intense", "num": None}, - {"name": "Focus Flow", "genre": "instrumental", "characteristics": "ambient atmospheric concentration", "num": None}, - {"name": "Drive Time", "genre": "upbeat", "characteristics": "driving energetic feel-good", "num": None} - ] - - # Generate and create hybrid playlists - for mix_config in hybrid_playlist_configs: - playlist_name = mix_config["name"] - mix_number = mix_config.get("num") - hybrid_songs = self._generate_hybrid_daily_mix( - mix_number=mix_number, - genre_focus=mix_config["genre"], - characteristics=mix_config["characteristics"], - top_artists=top_artists, - top_genres=top_genres, - favorited_songs=favorited_songs, - low_rated_songs=low_rated_songs, - playlist_name=playlist_name - ) - if hybrid_songs: - self.create_playlist(playlist_name, hybrid_songs, max_songs=30) - - # Track AudioMuse service - audiomuse_playlists = self.stats["playlists_created"] - playlists_before_audiomuse - self.service_tracker.record( - "audiomuse", - success=True, - playlists=audiomuse_playlists + # Handle hybrid playlists if AudioMuse is enabled + if self.audiomuse_client: + logger.info("=" * 70) + logger.info("GENERATING HYBRID PLAYLISTS (AudioMuse + LLM)") + logger.info("=" * 70) + + playlists_before_audiomuse = self.stats["playlists_created"] + + # Define all hybrid playlist configurations (everything except Discovery) + hybrid_playlist_configs = [ + # Daily Mixes (num 1-6) + {"name": "Daily Mix 1", "genre": top_genres[0] if len(top_genres) > 0 else DEFAULT_DAILY_MIX_GENRES[0], "characteristics": "energetic", "num": 1}, + {"name": "Daily Mix 2", "genre": top_genres[1] if len(top_genres) > 1 else DEFAULT_DAILY_MIX_GENRES[1], "characteristics": "catchy upbeat", "num": 2}, + {"name": "Daily Mix 3", "genre": top_genres[2] if len(top_genres) > 2 else DEFAULT_DAILY_MIX_GENRES[2], "characteristics": "danceable rhythmic", "num": 3}, + {"name": "Daily Mix 4", "genre": top_genres[3] if len(top_genres) > 3 else DEFAULT_DAILY_MIX_GENRES[3], "characteristics": "rhythmic bass-heavy", "num": 4}, + {"name": "Daily Mix 5", "genre": top_genres[4] if len(top_genres) > 4 else DEFAULT_DAILY_MIX_GENRES[4], "characteristics": "alternative atmospheric", "num": 5}, + {"name": "Daily Mix 6", "genre": top_genres[5] if len(top_genres) > 5 else DEFAULT_DAILY_MIX_GENRES[5], "characteristics": "smooth melodic", "num": 6}, + # Mood/Activity playlists (no num) + {"name": "Chill Vibes", "genre": "ambient", "characteristics": "relaxing calm peaceful", "num": None}, + {"name": "Workout Energy", "genre": "high-energy", "characteristics": "upbeat motivating intense", "num": None}, + {"name": "Focus Flow", "genre": "instrumental", "characteristics": "ambient atmospheric concentration", "num": None}, + {"name": "Drive Time", "genre": "upbeat", "characteristics": "driving energetic feel-good", "num": None} + ] + + # Generate and create hybrid playlists + for mix_config in hybrid_playlist_configs: + playlist_name = mix_config["name"] + mix_number = mix_config.get("num") + hybrid_songs = self._generate_hybrid_daily_mix( + mix_number=mix_number, + genre_focus=mix_config["genre"], + characteristics=mix_config["characteristics"], + top_artists=top_artists, + top_genres=top_genres, + favorited_songs=favorited_songs, + low_rated_songs=low_rated_songs, + playlist_name=playlist_name ) - logger.info("AudioMuse-AI service succeeded: %d playlists", audiomuse_playlists) - - # Create Discovery from AI response (LLM-only for new discoveries) - if "Discovery" in all_playlists: - discovery_songs = all_playlists["Discovery"] - if isinstance(discovery_songs, list) and discovery_songs: - logger.info("=" * 70) - logger.info("DISCOVERY (LLM-only for new discoveries)") - logger.info("=" * 70) - self.create_playlist("Discovery", discovery_songs, max_songs=50) - else: - # Original behavior: use all AI-generated playlists - for playlist_name, songs in all_playlists.items(): - if isinstance(songs, list) and songs: - self.create_playlist(playlist_name, songs, max_songs=100) + if hybrid_songs: + self.create_playlist(playlist_name, hybrid_songs, max_songs=30) + + # Track AudioMuse service + audiomuse_playlists = self.stats["playlists_created"] - playlists_before_audiomuse + self.service_tracker.record( + "audiomuse", + success=True, + playlists=audiomuse_playlists + ) + logger.info("AudioMuse-AI service succeeded: %d playlists", audiomuse_playlists) + + # Create Discovery from AI response (LLM-only for new discoveries) + if "Discovery" in all_playlists: + discovery_songs = all_playlists["Discovery"] + if isinstance(discovery_songs, list) and discovery_songs: + logger.info("=" * 70) + logger.info("DISCOVERY (LLM-only for new discoveries)") + logger.info("=" * 70) + self.create_playlist("Discovery", discovery_songs, max_songs=50) + else: + # Original behavior: use all AI-generated playlists + for playlist_name, songs in all_playlists.items(): + if isinstance(songs, list) and songs: + self.create_playlist(playlist_name, songs, max_songs=100) # External services (run regardless of starred songs, but only if should_generate_regular) @@ -1324,7 +1323,7 @@ def run(self) -> None: try: if favorited_songs: # Use random seed from favorited songs - seed_song = favorited_songs[len(favorited_songs) // 2] + seed_song = random.choice(favorited_songs) # Build a natural language request for AudioMuse mood = time_context.get("mood", "") request_text = f"{mood} music similar to {seed_song.get('title', '')} by {seed_song.get('artist', '')}" @@ -1393,7 +1392,6 @@ def run(self) -> None: llm_response = response.choices[0].message.content # Parse response - import json llm_data = json.loads(llm_response) llm_songs = llm_data.get("songs", []) @@ -1502,10 +1500,9 @@ def run(self) -> None: self._record_successful_run() except Exception as e: - write_health_status(BASE_DIR, "unhealthy", f"Error: {str(e)[:200]}") - logger.error("Fatal error: %s", e, exc_info=True) - sys.exit(1) - + write_health_status(BASE_DIR, "unhealthy", f"Error: {str(e)[:200]}") + logger.error("Fatal error: %s", e, exc_info=True) + sys.exit(1) # ============================================================================