Waifassistant-Discord-Bot/optimized_discord_bot.py at main · xsploit/Waifassistant-Discord-Bot · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
"""
Optimized Discord Bot with Ollama + POML Integration
- All performance optimizations (KV cache, flash attention, BPE)
- Complete tool suite (web search, scrape, calculate, etc.)
- Perfect Discord @ mention handling
- Anti-repetition system
- Dynamic model management
- POML (Prompt Orchestration Markup Language) support
"""

import discord
from discord.ext import commands
import asyncio
import aiohttp
import json
import os
import time
import re
from datetime import datetime
from typing import Dict, List, Optional, Any, Tuple
from conversation_memory import ConversationMemoryManager
import hashlib
import faiss
import pickle
import numpy as np
from sentence_transformers import SentenceTransformer

# Try to import AI Intent Classifier (optional dependency)
try:
    from ai_intent_classifier import AIIntentClassifier, IntentClassification
    AI_CLASSIFIER_AVAILABLE = True
    print("[OK] AI Intent Classifier import successful")
except ImportError as e:
    AI_CLASSIFIER_AVAILABLE = False
    print(f"[WARNING] AI Intent Classifier not available: {e}")
    print("[INFO] Bot will use fallback mood system until torch/transformers are installed")

# Load environment variables from .env file
try:
    from dotenv import load_dotenv
    load_dotenv()
    print("[OK] Environment variables loaded from .env file")
except ImportError:
    print("[WARNING] python-dotenv not installed - install with: pip install python-dotenv")

# POML Integration (optional)
try:
    from poml import poml
    POML_AVAILABLE = True
    print("[OK] POML available - Using correct core API")
except ImportError:
    POML_AVAILABLE = False
    print("[WARNING] POML not installed - Using basic prompts (pip install poml to enable)")

# =============================================================================
# POML TEMPLATE CACHING SYSTEM
# =============================================================================

class POMLCache:
    """Context-aware POML template cache to eliminate processing delays"""

    def __init__(self):
        self.compiled_results = {}  # Cache compiled results, not raw templates
        self.template_hashes = {}
        self.cache_hits = 0
        self.cache_misses = 0
        self.max_cache_size = 100  # Prevent memory bloat

    def _generate_context_key(self, template_name: str, context: dict) -> str:
        """Generate a cache key based on template and context"""
        # Create a stable key from context values that affect POML processing
        context_parts = []

        # Include template name
        context_parts.append(f"template:{template_name}")

        # Include mood (grouped by larger intervals for better cache hits)
        if 'mood_points' in context:
            # Group moods into larger buckets: very negative, negative, neutral, positive, very positive
            mood_points = context['mood_points']
            if mood_points <= -5:
                mood_group = "very_negative"
            elif mood_points <= -1:
                mood_group = "negative"
            elif mood_points <= 1:
                mood_group = "neutral"
            elif mood_points <= 5:
                mood_group = "positive"
            else:
                mood_group = "very_positive"
            context_parts.append(f"mood:{mood_group}")

        # Include memory context for better personalization
        if 'user_memory' in context:
            memory = context['user_memory']
            # Include memory block count (grouped for cache efficiency)
            block_count = memory.get('block_count', 0)
            if block_count == 0:
                memory_group = "no_memory"
            elif block_count <= 5:
                memory_group = "low_memory"
            elif block_count <= 15:
                memory_group = "medium_memory"
            else:
                memory_group = "high_memory"
            context_parts.append(f"memory:{memory_group}")

            # Include last activity (grouped by time ranges)
            last_activity = memory.get('last_activity', 0)
            if last_activity == 0:
                activity_group = "new_user"
            elif time.time() - last_activity < 3600:  # 1 hour
                activity_group = "recent"
            elif time.time() - last_activity < 86400:  # 1 day
                activity_group = "daily"
            else:
                activity_group = "older"
            context_parts.append(f"activity:{activity_group}")

        # Don't include tone in cache key - it changes too frequently and reduces cache hits
        # The mood grouping above should capture most personality variations

        # Create a hash of the context key
        context_str = "|".join(sorted(context_parts))
        context_key = hashlib.md5(context_str.encode()).hexdigest()[:12]

        return context_key

    def get_cached_result(self, template_name: str, context: dict):
        """Get cached result if available for this context combination"""
        context_key = self._generate_context_key(template_name, context)
        cache_key = f"{template_name}:{context_key}"

        # Debug: Show what we're looking for
        print(f"[POML CACHE DEBUG] Looking for key: {cache_key}")
        print(f"[POML CACHE DEBUG] Available keys: {list(self.compiled_results.keys())[:5]}...")

        if cache_key in self.compiled_results:
            self.cache_hits += 1
            print(f"[POML CACHE DEBUG] CACHE HIT! Found: {cache_key}")
            return self.compiled_results[cache_key]

        self.cache_misses += 1
        print(f"[POML CACHE DEBUG] CACHE MISS! Not found: {cache_key}")
        return None

    def cache_result(self, template_name: str, context: dict, result):
        """Cache a compiled result for this context combination"""
        context_key = self._generate_context_key(template_name, context)
        cache_key = f"{template_name}:{context_key}"

        # Debug: Show what we're caching
        print(f"[POML CACHE DEBUG] Caching result for key: {cache_key}")
        print(f"[POML CACHE DEBUG] Context: {context}")

        # Store the compiled result
        self.compiled_results[cache_key] = result

        # Clean up if cache gets too large
        if len(self.compiled_results) > self.max_cache_size:
            # Remove oldest entries (simple FIFO)
            oldest_keys = list(self.compiled_results.keys())[:20]
            for key in oldest_keys:
                del self.compiled_results[key]

        return True

    def is_template_loaded(self, template_name: str) -> bool:
        """Check if template is available for caching"""
        return template_name in self.template_hashes

    def get_cache_stats(self) -> dict:
        """Get cache performance statistics"""
        total_requests = self.cache_hits + self.cache_misses
        hit_rate = self.cache_hits / total_requests if total_requests > 0 else 0.0

        return {
            "cached_results": len(self.compiled_results),
            "cache_hits": self.cache_hits,
            "cache_misses": self.cache_misses,
            "hit_rate": hit_rate,
            "max_cache_size": self.max_cache_size
        }

    def clear_cache(self):
        """Clear all cached results"""
        self.compiled_results.clear()
        self.cache_hits = 0
        self.cache_misses = 0

# =============================================================================
# ENVIRONMENT SETUP - OLLAMA OPTIMIZATIONS
# =============================================================================

# Set optimal Ollama environment variables for Q4_K_M models
os.environ['OLLAMA_FLASH_ATTENTION'] = '1'           # Enable flash attention
os.environ['OLLAMA_KV_CACHE_TYPE'] = 'f16'          # Full precision for Q4 models (better quality)
os.environ['OLLAMA_NUM_PARALLEL'] = '2'             # Reduced for Q4 models
os.environ['OLLAMA_MAX_LOADED_MODELS'] = '1'        # Single model for Q4 efficiency

# =============================================================================
# TOOL DEFINITIONS
# =============================================================================

# Tool schemas for Ollama (exact from merged bot)
get_weather_tool = {
    'type': 'function',
    'function': {
        'name': 'get_weather',
        'description': 'Get current weather for a city.',
        'parameters': {
            'type': 'object',
            'properties': {
                'city': {'type': 'string', 'description': 'The city name'}
            },
            'required': ['city'],
        },
    },
}
calculate_tool = {
    'type': 'function',
    'function': {
        'name': 'calculate',
        'description': 'Safely evaluate mathematical expressions.',
        'parameters': {
            'type': 'object',
            'properties': {
                'expression': {'type': 'string', 'description': 'Mathematical expression to evaluate'}
            },
            'required': ['expression'],
        },
    },
}
web_search_tool = {
    'type': 'function',
    'function': {
        'name': 'web_search',
        'description': 'Search the web for information.',
        'parameters': {
            'type': 'object',
            'properties': {
                'query': {'type': 'string', 'description': 'Search query'},
                'num_results': {'type': 'integer', 'description': 'Number of results (1-10)', 'default': 5}
            },
            'required': ['query'],
        },
    },
}
web_scrape_tool = {
    'type': 'function',
    'function': {
        'name': 'web_scrape',
        'description': 'Scrape content from a webpage.',
        'parameters': {
            'type': 'object',
            'properties': {
                'url': {'type': 'string', 'description': 'URL to scrape'}
            },
            'required': ['url'],
        },
    },
}
news_search_tool = {
    'type': 'function',
    'function': {
        'name': 'news_search',
        'description': 'Search for recent news articles.',
        'parameters': {
            'type': 'object',
            'properties': {
                'query': {'type': 'string', 'description': 'News search query'},
                'num_results': {'type': 'integer', 'description': 'Number of results (1-10)', 'default': 5}
            },
            'required': ['query'],
        },
    },
}
get_time_tool = {
    'type': 'function',
    'function': {
        'name': 'get_time',
        'description': 'Get current date and time information including formatted time, date, timezone, and timestamp',
        'parameters': {
            'type': 'object',
            'properties': {},
            'required': []
        }
    }
}

analyze_user_profile_tool = {
    "type": "function",
    "function": {
        "name": "analyze_user_profile",
        "description": "Get comprehensive Discord profile analysis including activity patterns and social connections",
        "parameters": {
            "type": "object",
            "properties": {
                "user_id": {
                    "type": "string",
                    "description": "Discord user ID to analyze"
                }
            },
            "required": ["user_id"]
        }
    }
}

dox_user_tool = {
    "type": "function",
    "function": {
        "name": "dox_user",
        "description": "Comprehensive Discord user profile analysis including avatar AI vision analysis, roles, activities, and personality insights",
        "parameters": {
            "type": "object",
            "properties": {
                "user_id": {
                    "type": "string",
                    "description": "Discord user ID to analyze"
                }
            },
            "required": ["user_id"]
        }
    }
}

analyze_image_tool_schema = {
    "type": "function",
    "function": {
        "name": "analyze_image_tool",
        "description": "Analyze any image using AI vision to identify content, style, mood, colors, and context",
        "parameters": {
            "type": "object",
            "properties": {
                "image_url": {
                    "type": "string",
                    "description": "URL of the image to analyze"
                },
                "filename": {
                    "type": "string",
                    "description": "Optional filename for the image",
                    "default": "uploaded_image"
                }
            },
            "required": ["image_url"]
        }
    }
}

discord_action_tool = {
    "type": "function",
    "function": {
        "name": "discord_action",
        "description": "Perform Discord server management and moderation tasks. Use this tool for: sending messages to channels/DMs, getting user/channel/server info, listing channels, banning/kicking/timing out users, and any Discord-specific actions. Examples: 'send a message to #general', 'DM @user hello', 'ban @user', 'kick @spammer', 'timeout @user', 'get info about @user'",
        "parameters": {
            "type": "object",
            "properties": {
                "action_type": {
                    "type": "string",
                    "description": "Type of action to perform",
                    "enum": ["send_message", "send_dm", "get_user_info", "get_channel_info", "get_guild_info", "list_channels", "ban_user", "kick_user", "timeout_user", "react_to_message", "get_message", "list_online", "send_embed", "channel_history", "search_messages", "server_emojis", "check_status", "avatar_full"]
                },
                "target_id": {
                    "type": "string",
                    "description": "Target ID (user_id for get_user_info, guild_id for get_guild_info/list_channels)"
                },
                "message": {
                    "type": "string",
                    "description": "Message content for send_message action"
                },
                "channel_id": {
                    "type": "string",
                    "description": "Channel ID for send_message or get_channel_info actions"
                },
                "role_name": {
                    "type": "string",
                    "description": "Role name for role-related actions"
                },
                "reason": {
                    "type": "string",
                    "description": "Reason for moderation actions"
                },
                "duration_minutes": {
                    "type": "integer",
                    "description": "Duration in minutes for timeout_user action (default: 10)",
                    "default": 10
                },
                "message_id": {
                    "type": "string",
                    "description": "Message ID for react_to_message or get_message actions"
                },
                "emoji": {
                    "type": "string",
                    "description": "Emoji for react_to_message action (unicode or custom emoji)"
                },
                "embed_title": {
                    "type": "string",
                    "description": "Title for send_embed action"
                },
                "embed_description": {
                    "type": "string",
                    "description": "Description for send_embed action"
                },
                "embed_color": {
                    "type": "string",
                    "description": "Hex color for send_embed action (e.g. #ff0000)"
                },
                "search_query": {
                    "type": "string",
                    "description": "Search query for search_messages action"
                },
                "limit": {
                    "type": "integer",
                    "description": "Number limit for channel_history or search results (default: 10)",
                    "default": 10
                }
            },
            "required": ["action_type"]
        }
    }
}

# =============================================================================
# TOOL EXECUTION FUNCTIONS
# =============================================================================

async def execute_web_search(query: str, num_results: int = 5) -> dict:
    """Execute web search using Serper API"""
    try:
        serper_api_key = os.getenv('SERPER_API_KEY')
        if not serper_api_key:
            return {"error": "SERPER_API_KEY not configured"}

        url = "https://google.serper.dev/search"
        headers = {
            'X-API-KEY': serper_api_key,
            'Content-Type': 'application/json'
        }

        payload = {
            'q': query,
            'num': min(num_results, 10)
        }

        async with aiohttp.ClientSession() as session:
            async with session.post(url, json=payload, headers=headers) as response:
                if response.status == 200:
                    data = await response.json()
                    results = []

                    for item in data.get('organic', []):
                        results.append({
                            'title': item.get('title', ''),
                            'link': item.get('link', ''),
                            'snippet': item.get('snippet', '')
                        })

                    return {'query': query, 'results': results}
                else:
                    return {"error": f"Search API error: {response.status}"}

    except Exception as e:
        return {"error": f"Search failed: {str(e)}"}

async def execute_web_scrape(url: str, max_length: int = 2000) -> dict:
    """Scrape content from URL"""
    try:
        async with aiohttp.ClientSession() as session:
            async with session.get(url) as response:
                if response.status == 200:
                    content = await response.text()

                    # Basic content extraction
                    import re
                    clean_content = re.sub(r'<[^>]+>', '', content)
                    clean_content = ' '.join(clean_content.split())

                    if len(clean_content) > max_length:
                        clean_content = clean_content[:max_length] + "..."

                    return {'url': url, 'content': clean_content}
                else:
                    return {"error": f"HTTP {response.status}"}

    except Exception as e:
        return {"error": f"Scraping failed: {str(e)}"}

async def execute_calculate(expression: str) -> dict:
    """Safely evaluate mathematical expressions"""
    try:
        # Safe evaluation - only allow basic math
        allowed_chars = set('0123456789+-*/.() ')
        if not all(c in allowed_chars for c in expression):
            return {"error": "Invalid characters in expression"}

        result = eval(expression)
        return {'expression': expression, 'result': result}

    except Exception as e:
        return {"error": f"Calculation failed: {str(e)}"}

async def execute_get_time() -> dict:
    """Get current date and time"""
    now = datetime.now()
    return {
        'datetime': now.isoformat(),
        'formatted': now.strftime("%Y-%m-%d %H:%M:%S"),
        'timezone': str(now.astimezone().tzinfo)
    }

# Tool execution mapping
TOOL_FUNCTIONS = {
    'web_search': execute_web_search,
    'web_scrape': execute_web_scrape,
    'calculate': execute_calculate,
    'get_time': execute_get_time
}

# All tools list for Ollama
ALL_TOOLS = [
    {
        "type": "function",
        "function": {
            "name": "web_search",
            "description": "Search the web for current information",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {"type": "string", "description": "Search query"},
                    "num_results": {"type": "integer", "description": "Number of results (1-10)", "default": 5}
                },
                "required": ["query"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "web_scrape",
            "description": "Scrape content from a specific URL",
            "parameters": {
                "type": "object",
                "properties": {
                    "url": {"type": "string", "description": "URL to scrape"},
                    "max_length": {"type": "integer", "description": "Max content length", "default": 2000}
                },
                "required": ["url"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "calculate",
            "description": "Perform mathematical calculations",
            "parameters": {
                "type": "object",
                "properties": {
                    "expression": {"type": "string", "description": "Math expression to evaluate"}
                },
                "required": ["expression"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "get_time",
            "description": "Get current date and time",
            "parameters": {"type": "object", "properties": {}}
        }
    }
]

# =============================================================================
# PERFORMANCE MONITORING (FROM MERGED BOT)
# =============================================================================

try:
    import psutil
    import GPUtil
    import threading
    from ollama import AsyncClient
    PERFORMANCE_MONITORING = True
except ImportError:
    PERFORMANCE_MONITORING = False
    print("[WARNING] Performance monitoring disabled - install: pip install psutil gputil ollama")

class PerformanceMonitor:
    """Monitor system performance and adjust Ollama settings dynamically"""

    def __init__(self):
        self.gpu_available = PERFORMANCE_MONITORING and GPUtil and len(GPUtil.getGPUs()) > 0
        self.last_check = time.time()
        self.performance_history = []

    def get_system_stats(self) -> Dict:
        """Get current system performance stats"""
        if not PERFORMANCE_MONITORING:
            return {'cpu_percent': 50, 'memory_percent': 50, 'timestamp': time.time()}

        stats = {
            'cpu_percent': psutil.cpu_percent(interval=1),
            'memory_percent': psutil.virtual_memory().percent,
            'timestamp': time.time()
        }

        if self.gpu_available:
            try:
                gpus = GPUtil.getGPUs()
                if gpus:
                    gpu = gpus[0]  # Use first GPU
                    stats.update({
                        'gpu_load': gpu.load * 100,
                        'gpu_memory_percent': (gpu.memoryUsed / gpu.memoryTotal) * 100,
                        'gpu_temperature': gpu.temperature
                    })
            except Exception as e:
                print(f"GPU monitoring error: {e}")

        return stats

    def should_adjust_settings(self) -> Dict[str, Any]:
        """Determine if Ollama settings should be adjusted based on load"""
        stats = self.get_system_stats()
        self.performance_history.append(stats)

        # Keep only last 10 readings
        if len(self.performance_history) > 10:
            self.performance_history.pop(0)

        adjustments = {}

        # If GPU memory is high, reduce parallel requests
        if stats.get('gpu_memory_percent', 0) > 85:
            adjustments['reduce_parallel'] = True

        # If CPU is high, increase keep_alive to avoid reloading
        if stats.get('cpu_percent', 0) > 80:
            adjustments['increase_keep_alive'] = True

        # If system is idle, we can be more aggressive
        if (stats.get('cpu_percent', 0) < 30 and
            stats.get('gpu_memory_percent', 0) < 50):
            adjustments['increase_parallel'] = True

        return adjustments

# =============================================================================
# BLAZING FAST OLLAMA CLIENT (FROM MERGED BOT)
# =============================================================================

class OptimizedOllamaClient:
    """Enhanced Ollama client with performance optimizations - BLAZING FAST VERSION"""

    def __init__(self, base_url="http://localhost:11434"):
        self.base_url = base_url
        self.performance_monitor = PerformanceMonitor()
        self.model_cache = {}  # Track loaded models
        self.active_requests = 0
        self.max_concurrent = 4
        self.session = None  # Add persistent session attribute

        if PERFORMANCE_MONITORING:
            self.async_client = AsyncClient(host=base_url)
            # Start performance monitoring
            self.monitor_thread = threading.Thread(target=self._monitor_performance, daemon=True)
            self.monitor_thread.start()
        else:
            self.async_client = None

    def _monitor_performance(self):
        """Background performance monitoring and adjustment"""
        while True:
            try:
                adjustments = self.performance_monitor.should_adjust_settings()

                if adjustments.get('reduce_parallel'):
                    self.max_concurrent = max(1, self.max_concurrent - 1)
                    print(f"[INFO] Reduced concurrent requests to {self.max_concurrent}")

                elif adjustments.get('increase_parallel'):
                    self.max_concurrent = min(8, self.max_concurrent + 1)
                    print(f"🔺 Increased concurrent requests to {self.max_concurrent}")

                time.sleep(30)  # Check every 30 seconds

            except Exception as e:
                print(f"Performance monitor error: {e}")
                time.sleep(60)

    async def chat(self, model: str, messages: List[Dict], tools: List[Dict] = None, **kwargs):
        """BLAZING FAST optimized chat with performance monitoring"""

        # Wait for available slot
        while self.active_requests >= self.max_concurrent:
            await asyncio.sleep(0.1)

        self.active_requests += 1
        start_time = time.time()

        try:
            # Add performance options for Q4 models (YOUR FAST SETTINGS)
            options = kwargs.get('options', {})

            # Optimize for Q4 models - BLAZING FAST SETTINGS
            options.update({
                'num_ctx': 8192,  # Large context for Discord conversations
                'temperature': 0.7,
                'top_p': 0.9,
                'repeat_penalty': 1.1,
                'num_predict': 512,  # Reasonable response length
                'num_batch': 128,    # Batch size for Q4 models
                'num_thread': min(8, psutil.cpu_count() if PERFORMANCE_MONITORING else 8),
                'num_gpu_layers': -1 if self.performance_monitor.gpu_available else 0
            })

            kwargs['options'] = options

            # Format messages with proper BPE tags
            formatted_messages = self.format_messages_for_bpe(messages)

            if self.async_client:
                # Use AsyncClient for maximum speed
                chat_params = {
                    'model': model,
                    'messages': formatted_messages,
                    'stream': False,
                    **kwargs
                }

                # Add tools if provided
                if tools:
                    chat_params['tools'] = tools

                response = await self.async_client.chat(**chat_params)
            else:
                # Fallback to aiohttp
                response = await self._fallback_chat(model, formatted_messages, tools, **kwargs)

            # Track performance
            end_time = time.time()
            response_time = end_time - start_time

            # Log slow responses
            if response_time > 10:
                print(f"[WARNING] Slow response: {response_time:.2f}s for {model}")
            else:
                print(f"[INFO] Fast response: {response_time:.2f}s")

            return response

        except Exception as e:
            print(f"Ollama request error: {e}")
            raise
        finally:
            self.active_requests -= 1

    async def _fallback_chat(self, model: str, messages: List[Dict], tools: List[Dict] = None, **kwargs):
        """Fallback aiohttp method"""
        async with aiohttp.ClientSession() as session:
            payload = {
                "model": model,
                "messages": messages,
                "stream": False,
                **kwargs
            }

            if tools:
                payload["tools"] = tools

            async with session.post(f"{self.base_url}/api/chat", json=payload) as response:
                if response.status == 200:
                    return await response.json()
                else:
                    raise Exception(f"Ollama error: {response.status}")

    async def create_session(self):
        """Create persistent aiohttp session"""
        if self.session is None:
            self.session = aiohttp.ClientSession()

    async def close_session(self):
        """Close persistent aiohttp session"""
        if self.session and not self.session.closed:
            await self.session.close()
            self.session = None

    def format_messages_for_bpe(self, messages: List[Dict]) -> List[Dict]:
        """Format messages with proper BPE tags for optimal tokenization"""
        formatted = []

        for msg in messages:
            role = msg.get('role', 'user')
            content = msg.get('content', '')

            if role == 'tool':
                # Wrap tool responses in proper BPE tags
                formatted.append({
                    'role': 'tool',
                    'content': f"<tool_response>\n{content}\n</tool_response>",
                    'name': msg.get('name', 'unknown_tool')
                })
            else:
                formatted.append(msg)

        return formatted

# =============================================================================
# TOOL IMPLEMENTATIONS
# =============================================================================

async def web_search(query: str, num_results: int = 5) -> dict:
    """Search the web using Serper API."""
    print(f"[TOOL] web_search called with query: {query}, num_results: {num_results}")
    try:
        serper_api_key = os.getenv('SERPER_API_KEY')
        if not serper_api_key:
            return {"error": "SERPER_API_KEY not configured"}

        headers = {
            "X-API-KEY": serper_api_key,
            "Content-Type": "application/json"
        }
        payload = json.dumps({"q": query, "num": num_results})

        async with aiohttp.ClientSession() as session:
            async with session.post("https://google.serper.dev/search",
                                  headers=headers, data=payload, timeout=30) as response:
                if response.status != 200:
                    return {"error": f"Search failed with status {response.status}"}

                data = await response.json()
                items = []
                for it in data.get('organic', [])[:num_results]:
                    items.append({
                        "title": it.get('title', 'N/A'),
                        "snippet": it.get('snippet', 'N/A'),
                        "link": it.get('link', 'N/A')
                    })
                result_dict = {"results": items, "query": query}
                print(f"[TOOL] web_search success: Found {len(items)} results")
                return result_dict
    except Exception as e:
        error_msg = f"Search error: {str(e)}"
        print(f"[TOOL] web_search error: {error_msg}")
        return {"error": error_msg}

async def web_scrape(url: str) -> dict:
    """Scrape a webpage using Serper API."""
    print(f"[TOOL] web_scrape called with url: {url}")
    try:
        serper_api_key = os.getenv('SERPER_API_KEY')
        if not serper_api_key:
            return {"error": "SERPER_API_KEY not configured"}

        headers = {
            "X-API-KEY": serper_api_key,
            "Content-Type": "application/json"
        }
        payload = json.dumps({"url": url})

        async with aiohttp.ClientSession() as session:
            async with session.post("https://scrape.serper.dev",
                                  headers=headers, data=payload, timeout=45) as response:
                if response.status != 200:
                    return {"error": f"Scrape failed with status {response.status}"}

                data = await response.json()
                result_dict = {
                    "url": url,
                    "title": data.get('title', 'N/A'),
                    "content": data.get('text', 'No content found')[:2000]  # Limit content
                }
                print(f"[TOOL] web_scrape success: Scraped {len(result_dict['content'])} characters")
                return result_dict
    except Exception as e:
        error_msg = f"Scrape error: {str(e)}"
        print(f"[TOOL] web_scrape error: {error_msg}")
        return {"error": error_msg}

async def calculate(expression: str) -> dict:
    """Safely evaluate math expression."""
    print(f"[TOOL] calculate called with expression: {expression}")
    try:
        expr = str(expression).replace('^', '**').replace('×', '*').replace('÷', '/')
        if not re.fullmatch(r"[0-9\.\+\-\*/\(\) ,]+", expr):
            return {"error": "Invalid characters in expression"}
        result = eval(expr)
        result_dict = {"result": str(result), "expression": expr}
        print(f"[TOOL] calculate success: {result_dict}")
        return result_dict
    except Exception as e:
        error_msg = f"Calculation error: {str(e)}"
        print(f"[TOOL] calculate error: {error_msg}")
        return {"error": error_msg}

async def get_time() -> dict:
    """Get current date and time."""
    print(f"[TOOL] get_time called")
    try:
        import datetime
        now = datetime.datetime.now()

        result_dict = {
            "current_time": now.strftime("%I:%M:%S %p"),
            "current_date": now.strftime("%A, %B %d, %Y"),
            "timezone": "Local Time",
            "iso_format": now.isoformat(),
            "unix_timestamp": int(now.timestamp())
        }
        print(f"[TOOL] get_time success: {result_dict['current_time']}")
        return result_dict
    except Exception as e:
        error_msg = f"Time retrieval error: {str(e)}"
        print(f"[TOOL] get_time error: {error_msg}")
        return {"error": error_msg}

async def get_weather(city: str) -> dict:
    """Get current weather for a city using free weather API with improved geocoding."""
    print(f"[TOOL] get_weather called with city: {city}")
    try:
        async with aiohttp.ClientSession() as session:
            # Try Open-Meteo's geocoding API first (more reliable)
            geocode_url = f"https://geocoding-api.open-meteo.com/v1/search?name={city}&count=1"

            try:
                async with session.get(geocode_url, timeout=15) as geocode_response:
                    if geocode_response.status == 200:
                        geocode_data = await geocode_response.json()
                        results = geocode_data.get('results', [])

                        if results:
                            location = results[0]
                            latitude = location.get('latitude')
                            longitude = location.get('longitude')
                            city_name = location.get('name', city)

                            if latitude is not None and longitude is not None:
                                # Get weather data
                                weather_url = f"https://api.open-meteo.com/v1/forecast?latitude={latitude}&longitude={longitude}&current_weather=true&temperature_unit=celsius"
                                async with session.get(weather_url, timeout=15) as weather_response:
                                    if weather_response.status != 200:
                                        return {"error": f"Weather API failed with status {weather_response.status}"}

                                    weather_data = await weather_response.json()
                                    current = weather_data.get('current_weather', {})

                                    # Convert weather code to description
                                    weather_code = current.get('weathercode', 0)
                                    weather_descriptions = {
                                        0: "Clear sky", 1: "Mainly clear", 2: "Partly cloudy", 3: "Overcast",
                                        45: "Fog", 48: "Depositing rime fog", 51: "Light drizzle", 53: "Moderate drizzle", 55: "Dense drizzle",
                                        56: "Light freezing drizzle", 57: "Dense freezing drizzle", 61: "Slight rain", 63: "Moderate rain", 65: "Heavy rain",
                                        66: "Light freezing rain", 67: "Heavy freezing rain", 71: "Slight snow", 73: "Moderate snow", 75: "Heavy snow",
                                        77: "Snow grains", 80: "Slight rain showers", 81: "Moderate rain showers", 82: "Violent rain showers",
                                        85: "Slight snow showers", 86: "Heavy snow showers", 95: "Thunderstorm", 96: "Thunderstorm with hail", 99: "Thunderstorm with heavy hail"
                                    }

                                    result = {
                                        "city": city_name,
                                        "temperature": f"{current.get('temperature', 'N/A')}°C",
                                        "condition": weather_descriptions.get(weather_code, f"Weather code {weather_code}"),
                                        "wind_speed": f"{current.get('windspeed', 'N/A')} km/h",
                                        "wind_direction": f"{current.get('winddirection', 'N/A')}°"
                                    }
                                    print(f"[TOOL] get_weather success: {result}")
                                    return result
            except Exception as e:
                print(f"[WARNING] Open-Meteo geocoding failed: {e}")

        return {"error": f"Could not get weather for '{city}'"}
    except Exception as e:
        error_msg = f"Weather service error: {str(e)}"
        print(f"[TOOL] get_weather error: {error_msg}")
        return {"error": error_msg}

async def news_search(query: str, num_results: int = 5) -> dict:
    """Search for recent news using dedicated Serper News API."""
    print(f"[TOOL] news_search called with query: {query}, num_results: {num_results}")
    try:
        serper_api_key = os.getenv('SERPER_API_KEY')
        if not serper_api_key:
            return {"error": "SERPER_API_KEY not configured"}

        headers = {
            "X-API-KEY": serper_api_key,
            "Content-Type": "application/json"
        }
        payload = json.dumps({"q": query, "num": num_results})

        async with aiohttp.ClientSession() as session:
            async with session.post("https://google.serper.dev/news",
                                  headers=headers, data=payload, timeout=30) as response:
                if response.status != 200:
                    return {"error": f"News search failed with status {response.status}"}

                data = await response.json()
                items = []