adaptive_cot_framework/test_seed_timing.py at main · macto94/adaptive_cot_framework · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#!/usr/bin/env python3
"""
Test if seed timing affects generation results.
"""

import sys
import torch
import numpy as np
from pathlib import Path

# Add src to path
sys.path.append(str(Path(__file__).parent / "src"))

from models.model_factory import ModelFactory

def test_seed_timing():
    """Test if seed timing affects generation results."""
    print("🔬 Test Seed Timing")
    print("=" * 60)

    # Load model
    print("🔧 Loading model...")
    model = ModelFactory.create_model(
        model_type="deepseek",
        model_name="/raid/LLM/llama3.1-8b-instruct",
        config={"gpu_id": 0}
    )
    model.load_model()

    # Test problem
    problem = "Janet's ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?"
    prompt = f"Q: {problem}\nA:"

    print(f"Problem: {problem}")
    print()

    # Test 1: Set seed before each generation
    print("🔧 Test 1: Set seed before each generation")
    print("-" * 50)

    results = []
    for i in range(3):
        print(f"Generation {i+1}:")

        # Set seed before each generation
        torch.manual_seed(42)
        np.random.seed(42)
        if torch.cuda.is_available():
            torch.cuda.manual_seed(42)
            torch.cuda.manual_seed_all(42)

        generated = model.generate(
            prompt,
            max_tokens=512,
            temperature=0.0,
            top_p=1.0,
            do_sample=False,
            num_return_sequences=1
        )

        results.append(generated)
        print(f"  Length: {len(generated)}")
        print(f"  First 100 chars: {generated[:100]}")
        print()

    # Check if all results are identical
    all_identical = all(result == results[0] for result in results)
    print(f"All results identical: {all_identical}")

    # Test 2: Set seed once at the beginning
    print("\n🔧 Test 2: Set seed once at the beginning")
    print("-" * 50)

    # Set seed once at the beginning
    torch.manual_seed(42)
    np.random.seed(42)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)
        torch.cuda.manual_seed_all(42)

    results2 = []
    for i in range(3):
        print(f"Generation {i+1}:")

        generated = model.generate(
            prompt,
            max_tokens=512,
            temperature=0.0,
            top_p=1.0,
            do_sample=False,
            num_return_sequences=1
        )

        results2.append(generated)
        print(f"  Length: {len(generated)}")
        print(f"  First 100 chars: {generated[:100]}")
        print()

    # Check if all results are identical
    all_identical2 = all(result == results2[0] for result in results2)
    print(f"All results identical: {all_identical2}")

    # Test 3: Compare results between the two approaches
    print("\n🔧 Test 3: Compare approaches")
    print("-" * 50)

    print(f"Test 1 first result: {results[0][:100]}")
    print(f"Test 2 first result: {results2[0][:100]}")
    print(f"Results identical: {results[0] == results2[0]}")

    # Test 4: Check if the issue is in our framework vs direct generation
    print("\n🔧 Test 4: Framework vs Direct Generation")
    print("-" * 50)

    # Simulate our framework approach (set seed in generation method)
    print("Simulating our framework approach:")
    torch.manual_seed(42)
    np.random.seed(42)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)
        torch.cuda.manual_seed_all(42)

    # Simulate some processing that might change random state
    # (like what happens in our framework)
    dummy_tensor = torch.randn(10, 10)
    dummy_np = np.random.randn(10, 10)

    # Now generate
    framework_result = model.generate(
        prompt,
        max_tokens=512,
        temperature=0.0,
        top_p=1.0,
        do_sample=False,
        num_return_sequences=1
    )

    print(f"Framework result: {framework_result[:100]}")

    # Simulate direct generation approach (set seed right before generation)
    print("\nSimulating direct generation approach:")
    torch.manual_seed(42)
    np.random.seed(42)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)
        torch.cuda.manual_seed_all(42)

    # Generate immediately
    direct_result = model.generate(
        prompt,
        max_tokens=512,
        temperature=0.0,
        top_p=1.0,
        do_sample=False,
        num_return_sequences=1
    )

    print(f"Direct result: {direct_result[:100]}")
    print(f"Results identical: {framework_result == direct_result}")

if __name__ == "__main__":
    test_seed_timing()