diff --git a/scripts/08_generate_qa_llm.py b/scripts/08_generate_qa_llm.py index 1365ea4..57105c6 100644 --- a/scripts/08_generate_qa_llm.py +++ b/scripts/08_generate_qa_llm.py @@ -52,7 +52,8 @@ # primed by the Q1: prefix. The raw output then starts with the Q1 question # text (without "Q1:" itself), so _parse_qa prepends "Q1:" before matching. PROMPT_DE = """\ -Text: Der Eiffelturm ist ein aus Eisen erbauter Gitterturm auf dem Champ de Mars in Paris. Er wurde zwischen 1887 und 1889 errichtet und ist 330 Meter hoch. +Text: Der Eiffelturm ist ein aus Eisen erbauter Gitterturm auf dem Champ de Mars in Paris. +Er wurde zwischen 1887 und 1889 errichtet und ist 330 Meter hoch. Q1: Wo steht der Eiffelturm? A1: Der Eiffelturm steht auf dem Champ de Mars in Paris. @@ -66,7 +67,8 @@ Q1:""" PROMPT_EN = """\ -Text: The Eiffel Tower is a wrought-iron lattice tower on the Champ de Mars in Paris, France. It was built between 1887 and 1889 and stands 330 metres tall. +Text: The Eiffel Tower is a wrought-iron lattice tower on the Champ de Mars in Paris, France. +It was built between 1887 and 1889 and stands 330 metres tall. Q1: Where is the Eiffel Tower located? A1: The Eiffel Tower is located on the Champ de Mars in Paris, France. @@ -79,6 +81,7 @@ Q1:""" + # Simple heuristic: use German prompt for 'de' docs, English for everything else def _pick_prompt(lang: str, text: str, n: int) -> str: excerpt = text[:2000].strip() @@ -123,6 +126,7 @@ def _parse_qa(raw: str, source: str, lang: str) -> list[dict[str, str]]: # ── Main ────────────────────────────────────────────────────────────────────── + def main() -> None: parser = argparse.ArgumentParser( description="Generate LLM-based Q&A pairs from corpus.jsonl using vLLM" diff --git a/scripts/patch_vllm_verneed_hash.py b/scripts/patch_vllm_verneed_hash.py index c6f578b..f97379c 100644 --- a/scripts/patch_vllm_verneed_hash.py +++ b/scripts/patch_vllm_verneed_hash.py @@ -12,8 +12,8 @@ """ import struct -import sys from pathlib import Path + from elftools.elf.elffile import ELFFile TARGET_VERSION = b"libcudart.so.13\x00" @@ -44,7 +44,7 @@ def patch_file(path: Path) -> int: """Return number of vna_hash fields patched.""" data = bytearray(path.read_bytes()) - with open(path, "rb") as f: + with path.open("rb") as f: elf = ELFFile(f) section = elf.get_section_by_name(".gnu.version_r") if section is None: @@ -62,13 +62,13 @@ def patch_file(path: Path) -> int: patched = 0 vn_off = 0 while vn_off < sec_size: - vn_version, vn_cnt, vn_file, vn_aux, vn_next = struct.unpack_from( + _vn_version, vn_cnt, _vn_file, vn_aux, vn_next = struct.unpack_from( " int: patched += 1 print(f" → patched to 0x{NEW_HASH:08x}") elif current == NEW_HASH: - print(f" → already correct, skipping") + print(" → already correct, skipping") else: print(f" → unexpected hash 0x{current:08x}, skipping") if vna_next == 0: