Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions scripts/08_generate_qa_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@
# primed by the Q1: prefix. The raw output then starts with the Q1 question
# text (without "Q1:" itself), so _parse_qa prepends "Q1:" before matching.
PROMPT_DE = """\
Text: Der Eiffelturm ist ein aus Eisen erbauter Gitterturm auf dem Champ de Mars in Paris. Er wurde zwischen 1887 und 1889 errichtet und ist 330 Meter hoch.
Text: Der Eiffelturm ist ein aus Eisen erbauter Gitterturm auf dem Champ de Mars in Paris.
Er wurde zwischen 1887 und 1889 errichtet und ist 330 Meter hoch.

Q1: Wo steht der Eiffelturm?
A1: Der Eiffelturm steht auf dem Champ de Mars in Paris.
Expand All @@ -66,7 +67,8 @@
Q1:"""

PROMPT_EN = """\
Text: The Eiffel Tower is a wrought-iron lattice tower on the Champ de Mars in Paris, France. It was built between 1887 and 1889 and stands 330 metres tall.
Text: The Eiffel Tower is a wrought-iron lattice tower on the Champ de Mars in Paris, France.
It was built between 1887 and 1889 and stands 330 metres tall.

Q1: Where is the Eiffel Tower located?
A1: The Eiffel Tower is located on the Champ de Mars in Paris, France.
Expand All @@ -79,6 +81,7 @@

Q1:"""


# Simple heuristic: use German prompt for 'de' docs, English for everything else
def _pick_prompt(lang: str, text: str, n: int) -> str:
excerpt = text[:2000].strip()
Expand Down Expand Up @@ -123,6 +126,7 @@ def _parse_qa(raw: str, source: str, lang: str) -> list[dict[str, str]]:

# ── Main ──────────────────────────────────────────────────────────────────────


def main() -> None:
parser = argparse.ArgumentParser(
description="Generate LLM-based Q&A pairs from corpus.jsonl using vLLM"
Expand Down
10 changes: 5 additions & 5 deletions scripts/patch_vllm_verneed_hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
"""

import struct
import sys
from pathlib import Path

from elftools.elf.elffile import ELFFile

TARGET_VERSION = b"libcudart.so.13\x00"
Expand Down Expand Up @@ -44,7 +44,7 @@ def patch_file(path: Path) -> int:
"""Return number of vna_hash fields patched."""
data = bytearray(path.read_bytes())

with open(path, "rb") as f:
with path.open("rb") as f:
elf = ELFFile(f)
section = elf.get_section_by_name(".gnu.version_r")
if section is None:
Expand All @@ -62,13 +62,13 @@ def patch_file(path: Path) -> int:
patched = 0
vn_off = 0
while vn_off < sec_size:
vn_version, vn_cnt, vn_file, vn_aux, vn_next = struct.unpack_from(
_vn_version, vn_cnt, _vn_file, vn_aux, vn_next = struct.unpack_from(
"<HHIII", section.data(), vn_off
)
# Walk vernaux chain
va_off = vn_off + vn_aux
for _ in range(vn_cnt):
vna_hash, vna_flags, vna_other, vna_name, vna_next = struct.unpack_from(
_vna_hash, _vna_flags, _vna_other, vna_name, vna_next = struct.unpack_from(
"<IHHII", section.data(), va_off
)
# Check if this vernaux refers to 'libcudart.so.13'
Expand All @@ -85,7 +85,7 @@ def patch_file(path: Path) -> int:
patched += 1
print(f" → patched to 0x{NEW_HASH:08x}")
elif current == NEW_HASH:
print(f" → already correct, skipping")
print(" → already correct, skipping")
else:
print(f" → unexpected hash 0x{current:08x}, skipping")
if vna_next == 0:
Expand Down
Loading