RAG-System/test_memory_optimized.py at main · Uday1772002/RAG-System · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/env python3
"""
Memory-optimized test for RAG system
"""

import sys
import os
import gc
from pathlib import Path

# Add project to path
sys.path.append(str(Path(__file__).parent))

def test_memory_optimized_rag():
    """Test RAG system with memory optimizations"""
    try:
        print("🧠 Testing Memory-Optimized RAG System...")

        # Monitor memory before loading
        def get_memory():
            pid = os.getpid()
            with os.popen(f'ps -o rss -p {pid}') as f:
                lines = f.readlines()
                if len(lines) > 1:
                    return int(lines[1].strip()) / 1024  # MB
            return 0

        initial_memory = get_memory()
        print(f"📊 Initial memory: {initial_memory:.1f} MB")

        # Load optimized config
        from src.main_pipeline import load_config, RAGPipeline

        # Use memory-optimized settings
        config = {
            'ingestion': {
                'chunk_size': 500,  # Smaller chunks
                'chunk_overlap': 100,
                'max_file_size_mb': 10
            },
            'embeddings': {
                'model': 'all-MiniLM-L6-v2',
                'batch_size': 4,  # Very small batches
                'device': 'cpu'
            },
            'vector_db': {
                'persist_directory': './data/vector_db'
            },
            'retrieval': {
                'top_k': 3  # Fewer results
            },
            'generation': {
                'provider': 'ollama',
                'model': 'llama2',
                'max_tokens': 500
            },
            'ocr': {
                'enabled': True,
                'language': 'eng'
            }
        }

        print("⚙️ Initializing RAG pipeline with memory optimizations...")
        pipeline = RAGPipeline(config)

        after_load_memory = get_memory()
        print(f"📊 Memory after loading: {after_load_memory:.1f} MB")
        print(f"📈 Memory increase: {after_load_memory - initial_memory:.1f} MB")

        # Test simple query without documents
        print("🔍 Testing simple query...")
        try:
            response = pipeline.query_documents("What is AI?", top_k=3)
            print(f"✅ Query successful: {response.answer[:100]}...")
        except Exception as e:
            print(f"⚠️ Query failed (expected without documents): {e}")

        final_memory = get_memory()
        print(f"📊 Final memory: {final_memory:.1f} MB")

        # Force cleanup
        del pipeline
        gc.collect()

        cleanup_memory = get_memory()
        print(f"📊 Memory after cleanup: {cleanup_memory:.1f} MB")

        print("✅ Memory-optimized RAG test completed!")

        # Memory recommendations
        max_memory_used = max(after_load_memory, final_memory)
        if max_memory_used > 400:
            print("⚠️ High memory usage detected. Consider:")
            print("   - Using smaller batch sizes")
            print("   - Processing smaller documents")
            print("   - Using the memory-optimized config")
        else:
            print("✅ Memory usage is acceptable for deployment")

        return True

    except Exception as e:
        print(f"❌ Test failed: {e}")
        import traceback
        traceback.print_exc()
        return False

if __name__ == "__main__":
    success = test_memory_optimized_rag()
    if success:
        print("\n🎉 Ready for deployment with memory optimizations!")
    else:
        print("\n💡 Consider using memory-optimized settings")