AI/deepseek_final at main · ds-TopHat/AI · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
import torch, gc, re, json

# Clean up
torch.cuda.empty_cache(); gc.collect()

model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
device = "cuda" if torch.cuda.is_available() else "cpu"

model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto").to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token_id = tokenizer.eos_token_id
model.generation_config.pad_token_id = tokenizer.eos_token_id

# -------- Compact English rules --------
RULES_EN = (
    "Rules: Output ONLY a JSON array. No code fences, no extra text, no HTML, no tags. "
    "Allowed keys: \"step n\", \"answer\", \"type\", \"next_step\" only. "
    "At least 3 step objects. All text in English. "
    "The value for \"type\" must be EXACTLY ONE from this list:\n"
    "[Properties of Natural Numbers, Integers and Rational Numbers, Rational Numbers and Decimals, Letters and Expressions, "
    "Linear Equations, Inequalities, Coordinate Plane and Graphs, Basic Figures, Transformations of Figures, Data Collection and Organization, "
    "Mean and Median, Possibility, Rational Numbers and Repeating Decimals, Algebraic Operations, Linear Functions, Simultaneous Linear Equations, "
    "Properties of Figures, Constructions and Congruence, Triangles and Quadrilaterals, Data Representation and Interpretation, Probability, "
    "Real Numbers and Square Roots, Factorization, Quadratic Equations, Quadratic Functions, Pythagorean Theorem, Equation of a Circle, "
    "Understanding and Applications of Statistics]\n"
    "next_step policy: The user provides RAW student steps as an array of {index, expression}. "
    "Let c be the last index that is correct and meaningfully aligned with your generated steps; set next_step to \"step {c+1}\". "
    "If none align, use c=0 → next_step = \"step 1\". Do NOT base next_step on your step count."
)

# Tiny few-shot to lock format
FEW_SHOT = (
    'User: {"question": "Compute 2+3."}\n'
    '---\n'
    '{"steps": [{"index":1,"expression":"Add 2 and 3"}]}\n'
    'Assistant: <think>(Simple arithmetic; produce at least 3 steps and the required fields)</think>\n'
    '<answer>\n'
    '[{"step 1": "Identify the addends: 2 and 3."}, {"step 2": "Compute 2 + 3 = 5."}, {"step 3": "Confirm the result."}, '
    '{"answer": "5"}, {"type": "Integers and Rational Numbers"}, {"next_step": "step 2"}]\n'
    '</answer>\n'
)

class PromptTemplate:
    def __init__(self):
        self.header = (
            "A conversation between User and Assistant. The user asks a question, and the Assistant solves it.\n"
            "The Assistant must return ONLY the final JSON array inside <answer> </answer>.\n\n"
        )

    def generate_math_prompt(self, user_payload: str, rules_text: str) -> str:
        # 문자열 연결 방식(중괄호 KeyError 방지)
        return (
            self.header
            + FEW_SHOT + "\n"
            + "User: " + user_payload + "\n"
            + "Assistant: <think>\n"
            + rules_text + "\n"
            + "</think>\n"
            + "<answer>\n"
            + "You MUST begin with '[' and end with ']'.\n"
            # 모델이 바로 JSON 배열을 작성하게 유도
        )

# ---- stop at </answer> ----
class StopOnSubstrings(StoppingCriteria):
    def __init__(self, tokenizer, stop_strs):
        self.tokenizer = tokenizer
        self.stop_ids = [tokenizer.encode(s, add_special_tokens=False) for s in stop_strs]
    def __call__(self, input_ids, scores, **kwargs):
        for stop in self.stop_ids:
            if len(input_ids[0]) >= len(stop) and input_ids[0].tolist()[-len(stop):] == stop:
                return True
        return False

if __name__ == "__main__":
    template = PromptTemplate()

    # ---- Your two-JSON-block user message ----
    question_json = {
        "question": "Jaeyeol, Gicheol, and Minjeong each prepare a distinct gift. The three gifts are randomly assigned so that each person receives exactly one gift. Find the probability that at least one person receives the gift they prepared."
    }
    student_steps_json = {
        "steps": [
            {"index": 1, "expression": "Total outcomes: 3! = 6."},
            {"index": 2, "expression": "Derangements for 3 items D3 = 2 (tentative)."}
        ]
    }
    user_message = json.dumps(question_json, ensure_ascii=False) + "\n---\n" + json.dumps(student_steps_json, ensure_ascii=False)

    prompt = template.generate_math_prompt(user_message, RULES_EN)

    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    stopper = StopOnSubstrings(tokenizer, stop_strs=["</answer>"])
    outputs = model.generate(
        **inputs,
        max_new_tokens=800,
        do_sample=True,
        temperature=0.6,
        top_p=0.9,
        repetition_penalty=1.1,
        use_cache=True,
        stopping_criteria=StoppingCriteriaList([stopper])
    )
    decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

    # 1) 정석: <answer> ... </answer> 사이 추출
    m = re.search(r"<answer>\s*(.*?)\s*</answer>", decoded, flags=re.DOTALL)
    if m and m.group(1).strip():
        answer_payload = m.group(1).strip()
    else:
        # 2) 보조: "마지막" JSON 배열을 추출 (입력 steps가 먼저 나오므로 맨 마지막이 정답일 확률↑)
        arrays = re.findall(r"\[\s*\{.*?\}\s*\]", decoded, flags=re.DOTALL)
        answer_payload = arrays[-1].strip() if arrays else "[]"

    print(answer_payload)

    # cleanup
    del model, tokenizer, inputs, outputs
    if device == "cuda":
        torch.cuda.empty_cache()
    gc.collect()