diff --git a/.baseline/coverage_2026-05-05_unit.xml b/.baseline/coverage_2026-05-05_unit.xml
new file mode 100644
index 0000000..152f8c1
--- /dev/null
+++ b/.baseline/coverage_2026-05-05_unit.xml
@@ -0,0 +1,9067 @@
+<?xml version="1.0" ?>
+<coverage version="7.13.4" timestamp="1777988328346" lines-valid="8470" lines-covered="6159" line-rate="0.7272" branches-covered="0" branches-valid="0" branch-rate="0" complexity="0">
+	<!-- Generated by coverage.py: https://coverage.readthedocs.io/en/7.13.4 -->
+	<!-- Based on https://raw.githubusercontent.com/cobertura/web/master/htdocs/xml/coverage-04.dtd -->
+	<sources>
+		<source>/home/frapercan/Thesis/repositories/PROTEA/protea</source>
+	</sources>
+	<packages>
+		<package name="." line-rate="1" branch-rate="0" complexity="0">
+			<classes>
+				<class name="__init__.py" filename="__init__.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+					</lines>
+				</class>
+			</classes>
+		</package>
+		<package name="api" line-rate="0.9073" branch-rate="0" complexity="0">
+			<classes>
+				<class name="__init__.py" filename="api/__init__.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines/>
+				</class>
+				<class name="app.py" filename="api/app.py" complexity="0" line-rate="0.9268" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="2" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="15" hits="1"/>
+						<line number="16" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="21" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="25" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="36" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="107" hits="1"/>
+						<line number="108" hits="1"/>
+						<line number="109" hits="1"/>
+						<line number="110" hits="1"/>
+						<line number="111" hits="1"/>
+						<line number="113" hits="1"/>
+						<line number="118" hits="1"/>
+						<line number="128" hits="1"/>
+						<line number="130" hits="1"/>
+						<line number="131" hits="1"/>
+						<line number="133" hits="1"/>
+						<line number="135" hits="1"/>
+						<line number="136" hits="1"/>
+						<line number="145" hits="1"/>
+						<line number="147" hits="1"/>
+						<line number="149" hits="1"/>
+						<line number="150" hits="1"/>
+						<line number="152" hits="1"/>
+						<line number="154" hits="1"/>
+						<line number="155" hits="1"/>
+						<line number="156" hits="1"/>
+						<line number="157" hits="1"/>
+						<line number="158" hits="1"/>
+						<line number="160" hits="1"/>
+						<line number="161" hits="0"/>
+						<line number="162" hits="0"/>
+						<line number="164" hits="0"/>
+						<line number="165" hits="0"/>
+						<line number="166" hits="0"/>
+						<line number="167" hits="0"/>
+						<line number="169" hits="1"/>
+						<line number="171" hits="1"/>
+						<line number="172" hits="1"/>
+						<line number="173" hits="1"/>
+						<line number="174" hits="1"/>
+						<line number="175" hits="1"/>
+						<line number="176" hits="1"/>
+						<line number="177" hits="1"/>
+						<line number="178" hits="1"/>
+						<line number="179" hits="1"/>
+						<line number="180" hits="1"/>
+						<line number="181" hits="1"/>
+						<line number="182" hits="1"/>
+						<line number="183" hits="1"/>
+						<line number="184" hits="1"/>
+						<line number="186" hits="1"/>
+						<line number="187" hits="1"/>
+						<line number="188" hits="1"/>
+						<line number="190" hits="1"/>
+						<line number="191" hits="1"/>
+						<line number="192" hits="1"/>
+						<line number="194" hits="1"/>
+					</lines>
+				</class>
+				<class name="cache.py" filename="api/cache.py" complexity="0" line-rate="0.9167" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="11" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="15" hits="1"/>
+						<line number="16" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="21" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="30" hits="0"/>
+						<line number="31" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="43" hits="0"/>
+						<line number="46" hits="1"/>
+					</lines>
+				</class>
+				<class name="deps.py" filename="api/deps.py" complexity="0" line-rate="0.8065" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="3" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="15" hits="1"/>
+						<line number="16" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="21" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="24" hits="0"/>
+						<line number="25" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="36" hits="0"/>
+						<line number="37" hits="0"/>
+						<line number="38" hits="0"/>
+						<line number="39" hits="0"/>
+						<line number="42" hits="1"/>
+						<line number="43" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="45" hits="0"/>
+						<line number="46" hits="1"/>
+					</lines>
+				</class>
+				<class name="stages.py" filename="api/stages.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="12" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="16" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="42" hits="1"/>
+					</lines>
+				</class>
+			</classes>
+		</package>
+		<package name="api.middleware" line-rate="0.5696" branch-rate="0" complexity="0">
+			<classes>
+				<class name="__init__.py" filename="api/middleware/__init__.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+					</lines>
+				</class>
+				<class name="visitor_counter.py" filename="api/middleware/visitor_counter.py" complexity="0" line-rate="0.5641" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="26" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="36" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="62" hits="1"/>
+						<line number="65" hits="1"/>
+						<line number="68" hits="1"/>
+						<line number="69" hits="1"/>
+						<line number="70" hits="1"/>
+						<line number="71" hits="1"/>
+						<line number="73" hits="1"/>
+						<line number="74" hits="0"/>
+						<line number="75" hits="0"/>
+						<line number="76" hits="0"/>
+						<line number="77" hits="0"/>
+						<line number="78" hits="0"/>
+						<line number="79" hits="0"/>
+						<line number="82" hits="1"/>
+						<line number="85" hits="1"/>
+						<line number="93" hits="0"/>
+						<line number="94" hits="0"/>
+						<line number="96" hits="0"/>
+						<line number="97" hits="0"/>
+						<line number="98" hits="0"/>
+						<line number="99" hits="0"/>
+						<line number="100" hits="0"/>
+						<line number="101" hits="0"/>
+						<line number="102" hits="0"/>
+						<line number="105" hits="1"/>
+						<line number="116" hits="1"/>
+						<line number="117" hits="1"/>
+						<line number="122" hits="1"/>
+						<line number="123" hits="1"/>
+						<line number="124" hits="0"/>
+						<line number="125" hits="0"/>
+						<line number="126" hits="1"/>
+						<line number="129" hits="1"/>
+						<line number="130" hits="1"/>
+						<line number="131" hits="0"/>
+						<line number="132" hits="1"/>
+						<line number="133" hits="1"/>
+						<line number="134" hits="1"/>
+						<line number="135" hits="1"/>
+						<line number="136" hits="0"/>
+						<line number="139" hits="1"/>
+						<line number="140" hits="0"/>
+						<line number="143" hits="1"/>
+						<line number="144" hits="0"/>
+						<line number="145" hits="0"/>
+						<line number="146" hits="0"/>
+						<line number="149" hits="1"/>
+						<line number="158" hits="1"/>
+						<line number="159" hits="1"/>
+						<line number="161" hits="1"/>
+						<line number="162" hits="1"/>
+						<line number="164" hits="1"/>
+						<line number="165" hits="1"/>
+						<line number="167" hits="0"/>
+						<line number="168" hits="0"/>
+						<line number="169" hits="0"/>
+						<line number="171" hits="0"/>
+						<line number="172" hits="0"/>
+						<line number="173" hits="0"/>
+						<line number="180" hits="0"/>
+						<line number="181" hits="0"/>
+						<line number="182" hits="0"/>
+						<line number="184" hits="0"/>
+						<line number="188" hits="0"/>
+					</lines>
+				</class>
+			</classes>
+		</package>
+		<package name="api.routers" line-rate="0.9411" branch-rate="0" complexity="0">
+			<classes>
+				<class name="__init__.py" filename="api/routers/__init__.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines/>
+				</class>
+				<class name="admin.py" filename="api/routers/admin.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="15" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="36" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="43" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="53" hits="1"/>
+						<line number="54" hits="1"/>
+						<line number="57" hits="1"/>
+						<line number="59" hits="1"/>
+					</lines>
+				</class>
+				<class name="annotate.py" filename="api/routers/annotate.py" complexity="0" line-rate="0.9293" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="9" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="15" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="21" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="25" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="48" hits="1"/>
+						<line number="51" hits="1"/>
+						<line number="61" hits="1"/>
+						<line number="62" hits="1"/>
+						<line number="63" hits="1"/>
+						<line number="64" hits="1"/>
+						<line number="65" hits="1"/>
+						<line number="66" hits="1"/>
+						<line number="69" hits="1"/>
+						<line number="70" hits="1"/>
+						<line number="73" hits="1"/>
+						<line number="74" hits="1"/>
+						<line number="77" hits="1"/>
+						<line number="78" hits="1"/>
+						<line number="95" hits="1"/>
+						<line number="96" hits="1"/>
+						<line number="97" hits="1"/>
+						<line number="98" hits="1"/>
+						<line number="99" hits="0"/>
+						<line number="100" hits="1"/>
+						<line number="101" hits="1"/>
+						<line number="102" hits="1"/>
+						<line number="103" hits="1"/>
+						<line number="106" hits="1"/>
+						<line number="107" hits="1"/>
+						<line number="108" hits="0"/>
+						<line number="109" hits="1"/>
+						<line number="111" hits="1"/>
+						<line number="113" hits="1"/>
+						<line number="114" hits="1"/>
+						<line number="115" hits="1"/>
+						<line number="117" hits="1"/>
+						<line number="118" hits="1"/>
+						<line number="119" hits="1"/>
+						<line number="120" hits="1"/>
+						<line number="121" hits="1"/>
+						<line number="124" hits="1"/>
+						<line number="126" hits="1"/>
+						<line number="127" hits="1"/>
+						<line number="128" hits="1"/>
+						<line number="133" hits="1"/>
+						<line number="134" hits="0"/>
+						<line number="135" hits="1"/>
+						<line number="136" hits="1"/>
+						<line number="137" hits="1"/>
+						<line number="138" hits="1"/>
+						<line number="139" hits="1"/>
+						<line number="140" hits="1"/>
+						<line number="142" hits="1"/>
+						<line number="143" hits="1"/>
+						<line number="144" hits="1"/>
+						<line number="145" hits="1"/>
+						<line number="153" hits="1"/>
+						<line number="154" hits="1"/>
+						<line number="155" hits="1"/>
+						<line number="158" hits="1"/>
+						<line number="159" hits="1"/>
+						<line number="160" hits="0"/>
+						<line number="161" hits="0"/>
+						<line number="162" hits="0"/>
+						<line number="163" hits="1"/>
+						<line number="165" hits="1"/>
+						<line number="166" hits="1"/>
+						<line number="167" hits="1"/>
+						<line number="171" hits="1"/>
+						<line number="173" hits="1"/>
+						<line number="174" hits="1"/>
+						<line number="175" hits="0"/>
+						<line number="179" hits="1"/>
+						<line number="182" hits="1"/>
+						<line number="185" hits="1"/>
+						<line number="188" hits="1"/>
+						<line number="196" hits="1"/>
+						<line number="201" hits="1"/>
+						<line number="202" hits="1"/>
+						<line number="203" hits="1"/>
+						<line number="204" hits="1"/>
+						<line number="212" hits="1"/>
+						<line number="215" hits="1"/>
+						<line number="227" hits="1"/>
+					</lines>
+				</class>
+				<class name="annotations.py" filename="api/routers/annotations.py" complexity="0" line-rate="0.9972" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="15" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="21" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="25" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="36" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="43" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="50" hits="1"/>
+						<line number="51" hits="1"/>
+						<line number="60" hits="1"/>
+						<line number="61" hits="1"/>
+						<line number="62" hits="1"/>
+						<line number="70" hits="1"/>
+						<line number="76" hits="1"/>
+						<line number="88" hits="1"/>
+						<line number="91" hits="1"/>
+						<line number="92" hits="1"/>
+						<line number="97" hits="1"/>
+						<line number="98" hits="1"/>
+						<line number="99" hits="1"/>
+						<line number="100" hits="1"/>
+						<line number="102" hits="1"/>
+						<line number="108" hits="1"/>
+						<line number="118" hits="1"/>
+						<line number="119" hits="1"/>
+						<line number="138" hits="1"/>
+						<line number="139" hits="1"/>
+						<line number="140" hits="1"/>
+						<line number="144" hits="1"/>
+						<line number="145" hits="1"/>
+						<line number="146" hits="1"/>
+						<line number="147" hits="1"/>
+						<line number="148" hits="1"/>
+						<line number="149" hits="1"/>
+						<line number="150" hits="1"/>
+						<line number="157" hits="1"/>
+						<line number="158" hits="1"/>
+						<line number="168" hits="1"/>
+						<line number="169" hits="1"/>
+						<line number="170" hits="1"/>
+						<line number="171" hits="1"/>
+						<line number="173" hits="1"/>
+						<line number="174" hits="1"/>
+						<line number="175" hits="1"/>
+						<line number="176" hits="1"/>
+						<line number="177" hits="1"/>
+						<line number="178" hits="1"/>
+						<line number="186" hits="1"/>
+						<line number="187" hits="1"/>
+						<line number="193" hits="1"/>
+						<line number="194" hits="1"/>
+						<line number="204" hits="1"/>
+						<line number="205" hits="1"/>
+						<line number="206" hits="1"/>
+						<line number="214" hits="1"/>
+						<line number="217" hits="1"/>
+						<line number="218" hits="1"/>
+						<line number="219" hits="1"/>
+						<line number="220" hits="1"/>
+						<line number="234" hits="1"/>
+						<line number="237" hits="1"/>
+						<line number="238" hits="1"/>
+						<line number="243" hits="1"/>
+						<line number="244" hits="1"/>
+						<line number="245" hits="1"/>
+						<line number="246" hits="1"/>
+						<line number="248" hits="1"/>
+						<line number="254" hits="1"/>
+						<line number="266" hits="1"/>
+						<line number="267" hits="1"/>
+						<line number="272" hits="1"/>
+						<line number="273" hits="1"/>
+						<line number="274" hits="1"/>
+						<line number="275" hits="1"/>
+						<line number="276" hits="1"/>
+						<line number="281" hits="1"/>
+						<line number="282" hits="1"/>
+						<line number="283" hits="1"/>
+						<line number="284" hits="1"/>
+						<line number="285" hits="1"/>
+						<line number="289" hits="1"/>
+						<line number="292" hits="1"/>
+						<line number="293" hits="1"/>
+						<line number="300" hits="1"/>
+						<line number="301" hits="1"/>
+						<line number="302" hits="1"/>
+						<line number="303" hits="1"/>
+						<line number="305" hits="1"/>
+						<line number="306" hits="1"/>
+						<line number="307" hits="1"/>
+						<line number="308" hits="1"/>
+						<line number="309" hits="1"/>
+						<line number="310" hits="1"/>
+						<line number="318" hits="1"/>
+						<line number="319" hits="1"/>
+						<line number="322" hits="1"/>
+						<line number="323" hits="1"/>
+						<line number="330" hits="1"/>
+						<line number="331" hits="1"/>
+						<line number="332" hits="1"/>
+						<line number="333" hits="1"/>
+						<line number="335" hits="1"/>
+						<line number="336" hits="1"/>
+						<line number="337" hits="1"/>
+						<line number="338" hits="1"/>
+						<line number="339" hits="1"/>
+						<line number="340" hits="1"/>
+						<line number="348" hits="1"/>
+						<line number="349" hits="1"/>
+						<line number="355" hits="1"/>
+						<line number="356" hits="1"/>
+						<line number="367" hits="1"/>
+						<line number="368" hits="1"/>
+						<line number="369" hits="1"/>
+						<line number="370" hits="1"/>
+						<line number="372" hits="1"/>
+						<line number="373" hits="1"/>
+						<line number="374" hits="1"/>
+						<line number="375" hits="1"/>
+						<line number="376" hits="1"/>
+						<line number="377" hits="1"/>
+						<line number="385" hits="1"/>
+						<line number="386" hits="1"/>
+						<line number="389" hits="1"/>
+						<line number="390" hits="1"/>
+						<line number="394" hits="1"/>
+						<line number="395" hits="1"/>
+						<line number="396" hits="1"/>
+						<line number="409" hits="1"/>
+						<line number="410" hits="1"/>
+						<line number="417" hits="1"/>
+						<line number="418" hits="1"/>
+						<line number="419" hits="1"/>
+						<line number="421" hits="1"/>
+						<line number="422" hits="1"/>
+						<line number="423" hits="1"/>
+						<line number="424" hits="1"/>
+						<line number="425" hits="1"/>
+						<line number="426" hits="1"/>
+						<line number="431" hits="1"/>
+						<line number="432" hits="1"/>
+						<line number="434" hits="1"/>
+						<line number="435" hits="1"/>
+						<line number="436" hits="1"/>
+						<line number="437" hits="1"/>
+						<line number="438" hits="1"/>
+						<line number="441" hits="1"/>
+						<line number="442" hits="1"/>
+						<line number="446" hits="1"/>
+						<line number="447" hits="1"/>
+						<line number="448" hits="1"/>
+						<line number="449" hits="1"/>
+						<line number="450" hits="1"/>
+						<line number="460" hits="1"/>
+						<line number="461" hits="1"/>
+						<line number="462" hits="1"/>
+						<line number="463" hits="1"/>
+						<line number="464" hits="1"/>
+						<line number="467" hits="1"/>
+						<line number="472" hits="1"/>
+						<line number="479" hits="1"/>
+						<line number="480" hits="1"/>
+						<line number="481" hits="1"/>
+						<line number="482" hits="1"/>
+						<line number="487" hits="1"/>
+						<line number="494" hits="1"/>
+						<line number="499" hits="1"/>
+						<line number="506" hits="1"/>
+						<line number="507" hits="1"/>
+						<line number="508" hits="1"/>
+						<line number="509" hits="1"/>
+						<line number="514" hits="1"/>
+						<line number="521" hits="1"/>
+						<line number="526" hits="1"/>
+						<line number="535" hits="1"/>
+						<line number="536" hits="1"/>
+						<line number="537" hits="1"/>
+						<line number="538" hits="1"/>
+						<line number="543" hits="1"/>
+						<line number="550" hits="1"/>
+						<line number="555" hits="1"/>
+						<line number="563" hits="1"/>
+						<line number="564" hits="1"/>
+						<line number="565" hits="1"/>
+						<line number="566" hits="1"/>
+						<line number="571" hits="1"/>
+						<line number="578" hits="1"/>
+						<line number="583" hits="1"/>
+						<line number="595" hits="1"/>
+						<line number="596" hits="1"/>
+						<line number="597" hits="1"/>
+						<line number="600" hits="1"/>
+						<line number="601" hits="1"/>
+						<line number="602" hits="1"/>
+						<line number="603" hits="1"/>
+						<line number="604" hits="1"/>
+						<line number="605" hits="1"/>
+						<line number="606" hits="1"/>
+						<line number="607" hits="1"/>
+						<line number="608" hits="1"/>
+						<line number="609" hits="1"/>
+						<line number="611" hits="1"/>
+						<line number="612" hits="1"/>
+						<line number="620" hits="1"/>
+						<line number="628" hits="1"/>
+						<line number="629" hits="1"/>
+						<line number="630" hits="1"/>
+						<line number="631" hits="1"/>
+						<line number="632" hits="1"/>
+						<line number="633" hits="1"/>
+						<line number="634" hits="1"/>
+						<line number="635" hits="1"/>
+						<line number="636" hits="1"/>
+						<line number="637" hits="1"/>
+						<line number="638" hits="1"/>
+						<line number="639" hits="1"/>
+						<line number="641" hits="1"/>
+						<line number="642" hits="1"/>
+						<line number="643" hits="1"/>
+						<line number="645" hits="1"/>
+						<line number="655" hits="1"/>
+						<line number="659" hits="1"/>
+						<line number="671" hits="1"/>
+						<line number="672" hits="1"/>
+						<line number="673" hits="1"/>
+						<line number="674" hits="1"/>
+						<line number="675" hits="1"/>
+						<line number="677" hits="1"/>
+						<line number="678" hits="1"/>
+						<line number="679" hits="1"/>
+						<line number="683" hits="1"/>
+						<line number="685" hits="1"/>
+						<line number="688" hits="1"/>
+						<line number="689" hits="1"/>
+						<line number="690" hits="1"/>
+						<line number="691" hits="1"/>
+						<line number="692" hits="1"/>
+						<line number="693" hits="1"/>
+						<line number="694" hits="1"/>
+						<line number="702" hits="1"/>
+						<line number="703" hits="1"/>
+						<line number="706" hits="1"/>
+						<line number="710" hits="1"/>
+						<line number="715" hits="1"/>
+						<line number="716" hits="1"/>
+						<line number="717" hits="1"/>
+						<line number="718" hits="1"/>
+						<line number="720" hits="1"/>
+						<line number="721" hits="1"/>
+						<line number="722" hits="1"/>
+						<line number="723" hits="1"/>
+						<line number="724" hits="1"/>
+						<line number="725" hits="1"/>
+						<line number="726" hits="1"/>
+						<line number="727" hits="1"/>
+						<line number="728" hits="1"/>
+						<line number="734" hits="1"/>
+						<line number="741" hits="1"/>
+						<line number="745" hits="1"/>
+						<line number="750" hits="1"/>
+						<line number="751" hits="1"/>
+						<line number="752" hits="1"/>
+						<line number="753" hits="1"/>
+						<line number="754" hits="1"/>
+						<line number="756" hits="1"/>
+						<line number="757" hits="1"/>
+						<line number="759" hits="1"/>
+						<line number="760" hits="1"/>
+						<line number="762" hits="1"/>
+						<line number="763" hits="1"/>
+						<line number="764" hits="1"/>
+						<line number="766" hits="1"/>
+						<line number="767" hits="1"/>
+						<line number="768" hits="1"/>
+						<line number="769" hits="1"/>
+						<line number="770" hits="1"/>
+						<line number="771" hits="1"/>
+						<line number="772" hits="1"/>
+						<line number="774" hits="1"/>
+						<line number="781" hits="1"/>
+						<line number="785" hits="1"/>
+						<line number="789" hits="1"/>
+						<line number="790" hits="1"/>
+						<line number="791" hits="1"/>
+						<line number="792" hits="1"/>
+						<line number="798" hits="1"/>
+						<line number="814" hits="1"/>
+						<line number="819" hits="1"/>
+						<line number="824" hits="1"/>
+						<line number="825" hits="1"/>
+						<line number="827" hits="1"/>
+						<line number="828" hits="1"/>
+						<line number="829" hits="1"/>
+						<line number="830" hits="1"/>
+						<line number="831" hits="1"/>
+						<line number="832" hits="1"/>
+						<line number="834" hits="1"/>
+						<line number="835" hits="1"/>
+						<line number="836" hits="1"/>
+						<line number="837" hits="1"/>
+						<line number="843" hits="1"/>
+						<line number="844" hits="1"/>
+						<line number="851" hits="1"/>
+						<line number="852" hits="1"/>
+						<line number="853" hits="1"/>
+						<line number="854" hits="1"/>
+						<line number="856" hits="1"/>
+						<line number="859" hits="1"/>
+						<line number="868" hits="1"/>
+						<line number="869" hits="1"/>
+						<line number="872" hits="1"/>
+						<line number="873" hits="1"/>
+						<line number="874" hits="1"/>
+						<line number="875" hits="1"/>
+						<line number="877" hits="1"/>
+						<line number="878" hits="1"/>
+						<line number="879" hits="0"/>
+						<line number="880" hits="1"/>
+						<line number="889" hits="1"/>
+						<line number="890" hits="1"/>
+						<line number="891" hits="1"/>
+						<line number="899" hits="1"/>
+						<line number="900" hits="1"/>
+						<line number="901" hits="1"/>
+						<line number="902" hits="1"/>
+						<line number="903" hits="1"/>
+						<line number="904" hits="1"/>
+						<line number="906" hits="1"/>
+						<line number="908" hits="1"/>
+						<line number="909" hits="1"/>
+						<line number="919" hits="1"/>
+					</lines>
+				</class>
+				<class name="benchmark.py" filename="api/routers/benchmark.py" complexity="0" line-rate="0.9434" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="35" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="45" hits="1"/>
+						<line number="46" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="48" hits="1"/>
+						<line number="49" hits="1"/>
+						<line number="50" hits="1"/>
+						<line number="51" hits="1"/>
+						<line number="52" hits="1"/>
+						<line number="53" hits="1"/>
+						<line number="54" hits="1"/>
+						<line number="55" hits="1"/>
+						<line number="56" hits="1"/>
+						<line number="57" hits="1"/>
+						<line number="59" hits="1"/>
+						<line number="62" hits="1"/>
+						<line number="64" hits="1"/>
+						<line number="65" hits="1"/>
+						<line number="66" hits="1"/>
+						<line number="67" hits="1"/>
+						<line number="68" hits="0"/>
+						<line number="71" hits="1"/>
+						<line number="78" hits="1"/>
+						<line number="91" hits="1"/>
+						<line number="104" hits="1"/>
+						<line number="105" hits="0"/>
+						<line number="106" hits="1"/>
+						<line number="107" hits="1"/>
+						<line number="108" hits="1"/>
+						<line number="109" hits="1"/>
+						<line number="110" hits="0"/>
+						<line number="116" hits="1"/>
+						<line number="120" hits="1"/>
+						<line number="129" hits="1"/>
+						<line number="130" hits="1"/>
+						<line number="135" hits="1"/>
+						<line number="136" hits="1"/>
+						<line number="139" hits="1"/>
+						<line number="143" hits="1"/>
+						<line number="171" hits="1"/>
+						<line number="174" hits="1"/>
+						<line number="186" hits="1"/>
+						<line number="187" hits="0"/>
+						<line number="188" hits="1"/>
+						<line number="189" hits="0"/>
+						<line number="195" hits="1"/>
+						<line number="196" hits="1"/>
+						<line number="197" hits="1"/>
+						<line number="198" hits="1"/>
+						<line number="199" hits="1"/>
+						<line number="201" hits="1"/>
+						<line number="202" hits="1"/>
+						<line number="203" hits="1"/>
+						<line number="204" hits="0"/>
+						<line number="205" hits="1"/>
+						<line number="206" hits="1"/>
+						<line number="207" hits="1"/>
+						<line number="209" hits="1"/>
+						<line number="210" hits="1"/>
+						<line number="211" hits="1"/>
+						<line number="212" hits="1"/>
+						<line number="213" hits="1"/>
+						<line number="214" hits="1"/>
+						<line number="216" hits="1"/>
+						<line number="217" hits="1"/>
+						<line number="218" hits="1"/>
+						<line number="219" hits="1"/>
+						<line number="220" hits="1"/>
+						<line number="221" hits="1"/>
+						<line number="222" hits="1"/>
+						<line number="223" hits="1"/>
+						<line number="224" hits="1"/>
+						<line number="226" hits="1"/>
+						<line number="227" hits="1"/>
+						<line number="228" hits="1"/>
+						<line number="229" hits="1"/>
+						<line number="245" hits="1"/>
+						<line number="257" hits="1"/>
+						<line number="273" hits="1"/>
+						<line number="274" hits="1"/>
+						<line number="275" hits="1"/>
+						<line number="276" hits="1"/>
+						<line number="277" hits="1"/>
+						<line number="278" hits="1"/>
+						<line number="279" hits="1"/>
+						<line number="301" hits="1"/>
+						<line number="302" hits="1"/>
+						<line number="303" hits="1"/>
+						<line number="304" hits="1"/>
+						<line number="305" hits="1"/>
+						<line number="306" hits="1"/>
+						<line number="307" hits="1"/>
+						<line number="323" hits="1"/>
+						<line number="326" hits="1"/>
+						<line number="327" hits="1"/>
+						<line number="328" hits="1"/>
+						<line number="344" hits="1"/>
+						<line number="346" hits="1"/>
+						<line number="364" hits="1"/>
+						<line number="365" hits="1"/>
+					</lines>
+				</class>
+				<class name="datasets.py" filename="api/routers/datasets.py" complexity="0" line-rate="0.9577" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="13" hits="1"/>
+						<line number="15" hits="1"/>
+						<line number="16" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="25" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="43" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="45" hits="1"/>
+						<line number="46" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="48" hits="1"/>
+						<line number="49" hits="1"/>
+						<line number="50" hits="1"/>
+						<line number="51" hits="1"/>
+						<line number="53" hits="1"/>
+						<line number="54" hits="1"/>
+						<line number="55" hits="1"/>
+						<line number="56" hits="1"/>
+						<line number="57" hits="0"/>
+						<line number="58" hits="1"/>
+						<line number="61" hits="1"/>
+						<line number="62" hits="1"/>
+						<line number="89" hits="1"/>
+						<line number="90" hits="1"/>
+						<line number="102" hits="1"/>
+						<line number="103" hits="1"/>
+						<line number="106" hits="1"/>
+						<line number="107" hits="1"/>
+						<line number="112" hits="1"/>
+						<line number="113" hits="1"/>
+						<line number="114" hits="1"/>
+						<line number="120" hits="1"/>
+						<line number="121" hits="1"/>
+						<line number="122" hits="1"/>
+						<line number="123" hits="1"/>
+						<line number="135" hits="1"/>
+						<line number="136" hits="1"/>
+						<line number="139" hits="1"/>
+						<line number="140" hits="1"/>
+						<line number="146" hits="1"/>
+						<line number="147" hits="1"/>
+						<line number="148" hits="1"/>
+						<line number="149" hits="0"/>
+						<line number="150" hits="1"/>
+						<line number="151" hits="0"/>
+						<line number="152" hits="1"/>
+						<line number="153" hits="1"/>
+						<line number="156" hits="1"/>
+						<line number="157" hits="1"/>
+						<line number="161" hits="1"/>
+						<line number="162" hits="1"/>
+						<line number="163" hits="1"/>
+						<line number="164" hits="1"/>
+						<line number="165" hits="1"/>
+						<line number="167" hits="1"/>
+						<line number="168" hits="1"/>
+						<line number="169" hits="1"/>
+						<line number="170" hits="1"/>
+						<line number="171" hits="1"/>
+					</lines>
+				</class>
+				<class name="embeddings.py" filename="api/routers/embeddings.py" complexity="0" line-rate="0.9745" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="15" hits="1"/>
+						<line number="16" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="21" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="25" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="43" hits="1"/>
+						<line number="45" hits="1"/>
+						<line number="46" hits="1"/>
+						<line number="51" hits="1"/>
+						<line number="53" hits="1"/>
+						<line number="54" hits="1"/>
+						<line number="55" hits="1"/>
+						<line number="57" hits="1"/>
+						<line number="58" hits="1"/>
+						<line number="59" hits="1"/>
+						<line number="61" hits="1"/>
+						<line number="62" hits="1"/>
+						<line number="63" hits="1"/>
+						<line number="65" hits="1"/>
+						<line number="66" hits="1"/>
+						<line number="67" hits="1"/>
+						<line number="69" hits="1"/>
+						<line number="70" hits="1"/>
+						<line number="71" hits="1"/>
+						<line number="73" hits="1"/>
+						<line number="74" hits="1"/>
+						<line number="75" hits="1"/>
+						<line number="77" hits="1"/>
+						<line number="78" hits="1"/>
+						<line number="79" hits="1"/>
+						<line number="81" hits="1"/>
+						<line number="82" hits="1"/>
+						<line number="83" hits="1"/>
+						<line number="85" hits="1"/>
+						<line number="86" hits="1"/>
+						<line number="87" hits="1"/>
+						<line number="90" hits="1"/>
+						<line number="95" hits="1"/>
+						<line number="99" hits="1"/>
+						<line number="100" hits="1"/>
+						<line number="102" hits="1"/>
+						<line number="118" hits="1"/>
+						<line number="119" hits="1"/>
+						<line number="135" hits="1"/>
+						<line number="136" hits="1"/>
+						<line number="137" hits="1"/>
+						<line number="143" hits="1"/>
+						<line number="144" hits="1"/>
+						<line number="154" hits="1"/>
+						<line number="155" hits="1"/>
+						<line number="156" hits="1"/>
+						<line number="157" hits="1"/>
+						<line number="166" hits="1"/>
+						<line number="168" hits="1"/>
+						<line number="171" hits="1"/>
+						<line number="172" hits="1"/>
+						<line number="181" hits="1"/>
+						<line number="183" hits="1"/>
+						<line number="184" hits="1"/>
+						<line number="185" hits="1"/>
+						<line number="186" hits="1"/>
+						<line number="187" hits="1"/>
+						<line number="189" hits="1"/>
+						<line number="192" hits="1"/>
+						<line number="193" hits="1"/>
+						<line number="198" hits="1"/>
+						<line number="199" hits="1"/>
+						<line number="200" hits="1"/>
+						<line number="201" hits="1"/>
+						<line number="203" hits="1"/>
+						<line number="209" hits="1"/>
+						<line number="212" hits="1"/>
+						<line number="213" hits="1"/>
+						<line number="218" hits="1"/>
+						<line number="219" hits="1"/>
+						<line number="220" hits="1"/>
+						<line number="221" hits="1"/>
+						<line number="225" hits="1"/>
+						<line number="231" hits="1"/>
+						<line number="232" hits="1"/>
+						<line number="233" hits="1"/>
+						<line number="240" hits="1"/>
+						<line number="247" hits="1"/>
+						<line number="254" hits="1"/>
+						<line number="256" hits="1"/>
+						<line number="267" hits="1"/>
+						<line number="268" hits="1"/>
+						<line number="288" hits="1"/>
+						<line number="289" hits="1"/>
+						<line number="290" hits="1"/>
+						<line number="291" hits="1"/>
+						<line number="292" hits="1"/>
+						<line number="293" hits="1"/>
+						<line number="295" hits="1"/>
+						<line number="296" hits="1"/>
+						<line number="297" hits="1"/>
+						<line number="299" hits="1"/>
+						<line number="300" hits="1"/>
+						<line number="301" hits="1"/>
+						<line number="302" hits="1"/>
+						<line number="303" hits="1"/>
+						<line number="304" hits="1"/>
+						<line number="305" hits="1"/>
+						<line number="307" hits="1"/>
+						<line number="308" hits="1"/>
+						<line number="309" hits="1"/>
+						<line number="310" hits="1"/>
+						<line number="311" hits="1"/>
+						<line number="319" hits="1"/>
+						<line number="320" hits="1"/>
+						<line number="326" hits="1"/>
+						<line number="327" hits="1"/>
+						<line number="341" hits="1"/>
+						<line number="342" hits="1"/>
+						<line number="343" hits="1"/>
+						<line number="357" hits="1"/>
+						<line number="366" hits="1"/>
+						<line number="388" hits="1"/>
+						<line number="391" hits="1"/>
+						<line number="392" hits="1"/>
+						<line number="397" hits="1"/>
+						<line number="398" hits="1"/>
+						<line number="399" hits="1"/>
+						<line number="400" hits="1"/>
+						<line number="402" hits="1"/>
+						<line number="408" hits="1"/>
+						<line number="415" hits="1"/>
+						<line number="429" hits="1"/>
+						<line number="430" hits="1"/>
+						<line number="439" hits="1"/>
+						<line number="440" hits="1"/>
+						<line number="441" hits="1"/>
+						<line number="442" hits="1"/>
+						<line number="444" hits="1"/>
+						<line number="447" hits="1"/>
+						<line number="449" hits="1"/>
+						<line number="458" hits="1"/>
+						<line number="459" hits="1"/>
+						<line number="461" hits="1"/>
+						<line number="462" hits="1"/>
+						<line number="464" hits="1"/>
+						<line number="465" hits="1"/>
+						<line number="470" hits="1"/>
+						<line number="471" hits="1"/>
+						<line number="472" hits="1"/>
+						<line number="473" hits="1"/>
+						<line number="487" hits="1"/>
+						<line number="507" hits="1"/>
+						<line number="525" hits="1"/>
+						<line number="528" hits="1"/>
+						<line number="535" hits="1"/>
+						<line number="536" hits="1"/>
+						<line number="538" hits="1"/>
+						<line number="539" hits="1"/>
+						<line number="540" hits="1"/>
+						<line number="542" hits="1"/>
+						<line number="553" hits="1"/>
+						<line number="595" hits="1"/>
+						<line number="598" hits="1"/>
+						<line number="605" hits="1"/>
+						<line number="606" hits="1"/>
+						<line number="608" hits="1"/>
+						<line number="609" hits="1"/>
+						<line number="610" hits="1"/>
+						<line number="612" hits="1"/>
+						<line number="627" hits="1"/>
+						<line number="628" hits="1"/>
+						<line number="629" hits="1"/>
+						<line number="630" hits="1"/>
+						<line number="632" hits="1"/>
+						<line number="640" hits="1"/>
+						<line number="650" hits="1"/>
+						<line number="690" hits="1"/>
+						<line number="695" hits="1"/>
+						<line number="715" hits="1"/>
+						<line number="718" hits="1"/>
+						<line number="719" hits="1"/>
+						<line number="720" hits="1"/>
+						<line number="722" hits="1"/>
+						<line number="723" hits="1"/>
+						<line number="724" hits="1"/>
+						<line number="725" hits="1"/>
+						<line number="726" hits="1"/>
+						<line number="727" hits="1"/>
+						<line number="729" hits="1"/>
+						<line number="734" hits="1"/>
+						<line number="735" hits="1"/>
+						<line number="736" hits="1"/>
+						<line number="737" hits="1"/>
+						<line number="738" hits="1"/>
+						<line number="739" hits="1"/>
+						<line number="741" hits="1"/>
+						<line number="743" hits="1"/>
+						<line number="744" hits="1"/>
+						<line number="745" hits="1"/>
+						<line number="746" hits="1"/>
+						<line number="785" hits="1"/>
+						<line number="787" hits="1"/>
+						<line number="788" hits="1"/>
+						<line number="795" hits="1"/>
+						<line number="797" hits="1"/>
+						<line number="798" hits="1"/>
+						<line number="799" hits="1"/>
+						<line number="802" hits="1"/>
+						<line number="807" hits="1"/>
+						<line number="828" hits="1"/>
+						<line number="829" hits="1"/>
+						<line number="830" hits="1"/>
+						<line number="831" hits="1"/>
+						<line number="833" hits="1"/>
+						<line number="834" hits="1"/>
+						<line number="835" hits="1"/>
+						<line number="837" hits="1"/>
+						<line number="838" hits="1"/>
+						<line number="839" hits="0"/>
+						<line number="840" hits="0"/>
+						<line number="841" hits="0"/>
+						<line number="842" hits="0"/>
+						<line number="843" hits="0"/>
+						<line number="849" hits="0"/>
+						<line number="851" hits="1"/>
+						<line number="852" hits="1"/>
+						<line number="856" hits="1"/>
+						<line number="858" hits="1"/>
+						<line number="863" hits="1"/>
+						<line number="864" hits="1"/>
+						<line number="865" hits="1"/>
+						<line number="869" hits="1"/>
+						<line number="872" hits="1"/>
+						<line number="873" hits="1"/>
+						<line number="874" hits="1"/>
+						<line number="875" hits="0"/>
+						<line number="877" hits="1"/>
+						<line number="879" hits="1"/>
+						<line number="880" hits="1"/>
+						<line number="881" hits="1"/>
+						<line number="883" hits="1"/>
+						<line number="884" hits="1"/>
+						<line number="891" hits="1"/>
+						<line number="892" hits="1"/>
+						<line number="897" hits="1"/>
+						<line number="898" hits="1"/>
+						<line number="899" hits="1"/>
+						<line number="900" hits="1"/>
+						<line number="902" hits="1"/>
+						<line number="907" hits="1"/>
+						<line number="909" hits="1"/>
+					</lines>
+				</class>
+				<class name="jobs.py" filename="api/routers/jobs.py" complexity="0" line-rate="0.8305" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="2" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="15" hits="1"/>
+						<line number="16" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="41" hits="0"/>
+						<line number="42" hits="0"/>
+						<line number="43" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="45" hits="1"/>
+						<line number="46" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="48" hits="1"/>
+						<line number="49" hits="1"/>
+						<line number="50" hits="0"/>
+						<line number="52" hits="1"/>
+						<line number="53" hits="0"/>
+						<line number="54" hits="0"/>
+						<line number="55" hits="1"/>
+						<line number="56" hits="1"/>
+						<line number="58" hits="1"/>
+						<line number="61" hits="1"/>
+						<line number="62" hits="1"/>
+						<line number="65" hits="1"/>
+						<line number="68" hits="1"/>
+						<line number="71" hits="1"/>
+						<line number="75" hits="1"/>
+						<line number="76" hits="1"/>
+						<line number="77" hits="1"/>
+						<line number="78" hits="1"/>
+						<line number="79" hits="0"/>
+						<line number="80" hits="1"/>
+						<line number="83" hits="1"/>
+						<line number="84" hits="1"/>
+						<line number="94" hits="1"/>
+						<line number="95" hits="1"/>
+						<line number="101" hits="1"/>
+						<line number="102" hits="1"/>
+						<line number="103" hits="1"/>
+						<line number="104" hits="1"/>
+						<line number="113" hits="1"/>
+						<line number="114" hits="1"/>
+						<line number="117" hits="1"/>
+						<line number="118" hits="1"/>
+						<line number="132" hits="1"/>
+						<line number="133" hits="1"/>
+						<line number="135" hits="1"/>
+						<line number="136" hits="0"/>
+						<line number="137" hits="1"/>
+						<line number="138" hits="1"/>
+						<line number="140" hits="1"/>
+						<line number="141" hits="1"/>
+						<line number="142" hits="1"/>
+						<line number="143" hits="1"/>
+						<line number="144" hits="1"/>
+						<line number="145" hits="0"/>
+						<line number="147" hits="1"/>
+						<line number="148" hits="0"/>
+						<line number="150" hits="1"/>
+						<line number="151" hits="1"/>
+						<line number="152" hits="1"/>
+						<line number="153" hits="1"/>
+						<line number="156" hits="1"/>
+						<line number="174" hits="1"/>
+						<line number="177" hits="1"/>
+						<line number="178" hits="1"/>
+						<line number="184" hits="1"/>
+						<line number="185" hits="1"/>
+						<line number="186" hits="1"/>
+						<line number="187" hits="1"/>
+						<line number="189" hits="1"/>
+						<line number="192" hits="1"/>
+						<line number="211" hits="1"/>
+						<line number="212" hits="1"/>
+						<line number="222" hits="1"/>
+						<line number="224" hits="1"/>
+						<line number="225" hits="1"/>
+						<line number="226" hits="1"/>
+						<line number="228" hits="1"/>
+						<line number="236" hits="1"/>
+						<line number="249" hits="1"/>
+						<line number="250" hits="1"/>
+						<line number="255" hits="0"/>
+						<line number="256" hits="0"/>
+						<line number="257" hits="0"/>
+						<line number="258" hits="0"/>
+						<line number="259" hits="0"/>
+						<line number="260" hits="0"/>
+						<line number="261" hits="0"/>
+						<line number="262" hits="0"/>
+						<line number="265" hits="1"/>
+						<line number="266" hits="1"/>
+						<line number="277" hits="1"/>
+						<line number="278" hits="1"/>
+						<line number="279" hits="1"/>
+						<line number="280" hits="1"/>
+						<line number="282" hits="1"/>
+						<line number="283" hits="1"/>
+						<line number="285" hits="1"/>
+						<line number="286" hits="1"/>
+						<line number="287" hits="1"/>
+						<line number="290" hits="1"/>
+						<line number="298" hits="1"/>
+						<line number="299" hits="0"/>
+						<line number="300" hits="0"/>
+						<line number="301" hits="0"/>
+						<line number="309" hits="1"/>
+					</lines>
+				</class>
+				<class name="maintenance.py" filename="api/routers/maintenance.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="48" hits="1"/>
+						<line number="49" hits="1"/>
+						<line number="57" hits="1"/>
+						<line number="59" hits="1"/>
+						<line number="75" hits="1"/>
+						<line number="76" hits="1"/>
+						<line number="78" hits="1"/>
+						<line number="84" hits="1"/>
+						<line number="87" hits="1"/>
+						<line number="88" hits="1"/>
+						<line number="96" hits="1"/>
+						<line number="97" hits="1"/>
+						<line number="98" hits="1"/>
+						<line number="108" hits="1"/>
+						<line number="115" hits="1"/>
+						<line number="116" hits="1"/>
+						<line number="124" hits="1"/>
+						<line number="125" hits="1"/>
+						<line number="138" hits="1"/>
+						<line number="139" hits="1"/>
+						<line number="141" hits="1"/>
+						<line number="147" hits="1"/>
+					</lines>
+				</class>
+				<class name="proteins.py" filename="api/routers/proteins.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="15" hits="1"/>
+						<line number="16" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="53" hits="1"/>
+						<line number="62" hits="1"/>
+						<line number="71" hits="1"/>
+						<line number="75" hits="1"/>
+						<line number="86" hits="1"/>
+						<line number="92" hits="1"/>
+						<line number="93" hits="1"/>
+						<line number="102" hits="1"/>
+						<line number="103" hits="1"/>
+						<line number="104" hits="1"/>
+						<line number="105" hits="1"/>
+						<line number="106" hits="1"/>
+						<line number="107" hits="1"/>
+						<line number="108" hits="1"/>
+						<line number="110" hits="1"/>
+						<line number="111" hits="1"/>
+						<line number="112" hits="1"/>
+						<line number="118" hits="1"/>
+						<line number="119" hits="1"/>
+						<line number="120" hits="1"/>
+						<line number="144" hits="1"/>
+						<line number="145" hits="1"/>
+						<line number="151" hits="1"/>
+						<line number="152" hits="1"/>
+						<line number="153" hits="1"/>
+						<line number="154" hits="1"/>
+						<line number="156" hits="1"/>
+						<line number="162" hits="1"/>
+						<line number="170" hits="1"/>
+						<line number="176" hits="1"/>
+						<line number="177" hits="1"/>
+						<line number="178" hits="1"/>
+						<line number="189" hits="1"/>
+						<line number="232" hits="1"/>
+						<line number="233" hits="1"/>
+						<line number="240" hits="1"/>
+						<line number="241" hits="1"/>
+						<line number="247" hits="1"/>
+						<line number="248" hits="1"/>
+						<line number="249" hits="1"/>
+						<line number="250" hits="1"/>
+						<line number="251" hits="1"/>
+						<line number="252" hits="1"/>
+						<line number="253" hits="1"/>
+					</lines>
+				</class>
+				<class name="query_sets.py" filename="api/routers/query_sets.py" complexity="0" line-rate="0.9826" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="16" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="50" hits="1"/>
+						<line number="51" hits="1"/>
+						<line number="52" hits="1"/>
+						<line number="53" hits="1"/>
+						<line number="55" hits="1"/>
+						<line number="56" hits="1"/>
+						<line number="57" hits="1"/>
+						<line number="58" hits="1"/>
+						<line number="59" hits="1"/>
+						<line number="61" hits="1"/>
+						<line number="62" hits="1"/>
+						<line number="63" hits="1"/>
+						<line number="64" hits="1"/>
+						<line number="65" hits="1"/>
+						<line number="67" hits="1"/>
+						<line number="68" hits="1"/>
+						<line number="69" hits="1"/>
+						<line number="70" hits="1"/>
+						<line number="71" hits="1"/>
+						<line number="72" hits="1"/>
+						<line number="73" hits="1"/>
+						<line number="74" hits="1"/>
+						<line number="75" hits="1"/>
+						<line number="76" hits="1"/>
+						<line number="77" hits="1"/>
+						<line number="79" hits="1"/>
+						<line number="81" hits="1"/>
+						<line number="82" hits="1"/>
+						<line number="85" hits="1"/>
+						<line number="86" hits="1"/>
+						<line number="98" hits="1"/>
+						<line number="99" hits="1"/>
+						<line number="112" hits="1"/>
+						<line number="113" hits="1"/>
+						<line number="114" hits="1"/>
+						<line number="115" hits="0"/>
+						<line number="116" hits="1"/>
+						<line number="117" hits="1"/>
+						<line number="118" hits="1"/>
+						<line number="119" hits="1"/>
+						<line number="121" hits="1"/>
+						<line number="122" hits="1"/>
+						<line number="123" hits="1"/>
+						<line number="126" hits="1"/>
+						<line number="127" hits="1"/>
+						<line number="128" hits="1"/>
+						<line number="129" hits="1"/>
+						<line number="133" hits="1"/>
+						<line number="135" hits="1"/>
+						<line number="137" hits="1"/>
+						<line number="138" hits="1"/>
+						<line number="140" hits="1"/>
+						<line number="145" hits="1"/>
+						<line number="146" hits="0"/>
+						<line number="148" hits="1"/>
+						<line number="149" hits="1"/>
+						<line number="150" hits="1"/>
+						<line number="151" hits="1"/>
+						<line number="152" hits="1"/>
+						<line number="153" hits="1"/>
+						<line number="156" hits="1"/>
+						<line number="157" hits="1"/>
+						<line number="158" hits="1"/>
+						<line number="161" hits="1"/>
+						<line number="162" hits="1"/>
+						<line number="163" hits="1"/>
+						<line number="164" hits="1"/>
+						<line number="173" hits="1"/>
+						<line number="174" hits="1"/>
+						<line number="176" hits="1"/>
+						<line number="178" hits="1"/>
+						<line number="181" hits="1"/>
+						<line number="182" hits="1"/>
+						<line number="186" hits="1"/>
+						<line number="187" hits="1"/>
+						<line number="188" hits="1"/>
+						<line number="197" hits="1"/>
+						<line number="200" hits="1"/>
+						<line number="201" hits="1"/>
+						<line number="206" hits="1"/>
+						<line number="207" hits="1"/>
+						<line number="208" hits="1"/>
+						<line number="209" hits="1"/>
+						<line number="211" hits="1"/>
+						<line number="216" hits="1"/>
+						<line number="228" hits="1"/>
+						<line number="229" hits="1"/>
+						<line number="238" hits="1"/>
+						<line number="241" hits="1"/>
+						<line number="242" hits="1"/>
+						<line number="247" hits="1"/>
+						<line number="248" hits="1"/>
+						<line number="249" hits="1"/>
+						<line number="250" hits="1"/>
+						<line number="251" hits="1"/>
+						<line number="252" hits="1"/>
+					</lines>
+				</class>
+				<class name="reranker_models.py" filename="api/routers/reranker_models.py" complexity="0" line-rate="0.8699" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="21" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="25" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="36" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="46" hits="1"/>
+						<line number="49" hits="1"/>
+						<line number="55" hits="1"/>
+						<line number="56" hits="0"/>
+						<line number="57" hits="1"/>
+						<line number="58" hits="1"/>
+						<line number="59" hits="1"/>
+						<line number="60" hits="1"/>
+						<line number="61" hits="0"/>
+						<line number="64" hits="1"/>
+						<line number="65" hits="1"/>
+						<line number="66" hits="1"/>
+						<line number="67" hits="1"/>
+						<line number="68" hits="1"/>
+						<line number="69" hits="0"/>
+						<line number="70" hits="1"/>
+						<line number="71" hits="1"/>
+						<line number="72" hits="1"/>
+						<line number="73" hits="1"/>
+						<line number="74" hits="1"/>
+						<line number="75" hits="1"/>
+						<line number="76" hits="1"/>
+						<line number="77" hits="0"/>
+						<line number="80" hits="1"/>
+						<line number="89" hits="1"/>
+						<line number="90" hits="1"/>
+						<line number="93" hits="1"/>
+						<line number="94" hits="1"/>
+						<line number="95" hits="1"/>
+						<line number="96" hits="0"/>
+						<line number="99" hits="1"/>
+						<line number="112" hits="1"/>
+						<line number="113" hits="1"/>
+						<line number="114" hits="1"/>
+						<line number="115" hits="1"/>
+						<line number="119" hits="0"/>
+						<line number="120" hits="0"/>
+						<line number="122" hits="1"/>
+						<line number="124" hits="1"/>
+						<line number="125" hits="1"/>
+						<line number="126" hits="1"/>
+						<line number="127" hits="0"/>
+						<line number="129" hits="1"/>
+						<line number="130" hits="1"/>
+						<line number="131" hits="1"/>
+						<line number="132" hits="1"/>
+						<line number="133" hits="0"/>
+						<line number="135" hits="1"/>
+						<line number="136" hits="1"/>
+						<line number="137" hits="1"/>
+						<line number="143" hits="1"/>
+						<line number="144" hits="1"/>
+						<line number="145" hits="1"/>
+						<line number="146" hits="1"/>
+						<line number="147" hits="0"/>
+						<line number="149" hits="1"/>
+						<line number="150" hits="1"/>
+						<line number="151" hits="1"/>
+						<line number="152" hits="1"/>
+						<line number="153" hits="0"/>
+						<line number="155" hits="1"/>
+						<line number="181" hits="1"/>
+						<line number="182" hits="1"/>
+						<line number="183" hits="0"/>
+						<line number="184" hits="0"/>
+						<line number="185" hits="0"/>
+						<line number="186" hits="1"/>
+						<line number="187" hits="1"/>
+						<line number="188" hits="1"/>
+						<line number="191" hits="1"/>
+						<line number="192" hits="1"/>
+						<line number="209" hits="1"/>
+						<line number="210" hits="1"/>
+						<line number="211" hits="1"/>
+						<line number="212" hits="1"/>
+						<line number="213" hits="0"/>
+						<line number="214" hits="0"/>
+						<line number="216" hits="1"/>
+						<line number="217" hits="1"/>
+						<line number="219" hits="1"/>
+						<line number="220" hits="1"/>
+						<line number="221" hits="1"/>
+						<line number="222" hits="1"/>
+						<line number="224" hits="1"/>
+						<line number="225" hits="1"/>
+						<line number="238" hits="1"/>
+						<line number="246" hits="1"/>
+						<line number="253" hits="1"/>
+						<line number="254" hits="1"/>
+						<line number="255" hits="1"/>
+						<line number="256" hits="1"/>
+						<line number="257" hits="1"/>
+						<line number="258" hits="1"/>
+						<line number="259" hits="1"/>
+						<line number="260" hits="1"/>
+						<line number="261" hits="1"/>
+						<line number="264" hits="1"/>
+						<line number="269" hits="1"/>
+						<line number="279" hits="1"/>
+						<line number="280" hits="1"/>
+						<line number="282" hits="1"/>
+						<line number="283" hits="1"/>
+						<line number="296" hits="1"/>
+						<line number="299" hits="1"/>
+						<line number="300" hits="1"/>
+						<line number="303" hits="1"/>
+					</lines>
+				</class>
+				<class name="scoring.py" filename="api/routers/scoring.py" complexity="0" line-rate="0.8514" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="30" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="43" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="45" hits="1"/>
+						<line number="46" hits="1"/>
+						<line number="49" hits="1"/>
+						<line number="50" hits="1"/>
+						<line number="51" hits="1"/>
+						<line number="52" hits="1"/>
+						<line number="53" hits="1"/>
+						<line number="54" hits="1"/>
+						<line number="55" hits="1"/>
+						<line number="62" hits="1"/>
+						<line number="63" hits="1"/>
+						<line number="64" hits="1"/>
+						<line number="67" hits="1"/>
+						<line number="74" hits="0"/>
+						<line number="75" hits="0"/>
+						<line number="76" hits="0"/>
+						<line number="77" hits="0"/>
+						<line number="78" hits="0"/>
+						<line number="79" hits="0"/>
+						<line number="84" hits="0"/>
+						<line number="92" hits="1"/>
+						<line number="101" hits="1"/>
+						<line number="231" hits="1"/>
+						<line number="254" hits="1"/>
+						<line number="255" hits="1"/>
+						<line number="256" hits="1"/>
+						<line number="257" hits="1"/>
+						<line number="264" hits="1"/>
+						<line number="266" hits="1"/>
+						<line number="268" hits="1"/>
+						<line number="269" hits="1"/>
+						<line number="270" hits="1"/>
+						<line number="272" hits="1"/>
+						<line number="273" hits="0"/>
+						<line number="274" hits="1"/>
+						<line number="275" hits="1"/>
+						<line number="276" hits="1"/>
+						<line number="277" hits="1"/>
+						<line number="280" hits="1"/>
+						<line number="281" hits="1"/>
+						<line number="282" hits="1"/>
+						<line number="283" hits="0"/>
+						<line number="286" hits="1"/>
+						<line number="289" hits="1"/>
+						<line number="290" hits="1"/>
+						<line number="291" hits="1"/>
+						<line number="292" hits="1"/>
+						<line number="293" hits="1"/>
+						<line number="294" hits="1"/>
+						<line number="295" hits="1"/>
+						<line number="298" hits="1"/>
+						<line number="300" hits="1"/>
+						<line number="311" hits="1"/>
+						<line number="318" hits="1"/>
+						<line number="330" hits="1"/>
+						<line number="340" hits="1"/>
+						<line number="353" hits="1"/>
+						<line number="354" hits="1"/>
+						<line number="355" hits="1"/>
+						<line number="356" hits="1"/>
+						<line number="357" hits="1"/>
+						<line number="358" hits="1"/>
+						<line number="361" hits="1"/>
+						<line number="362" hits="1"/>
+						<line number="363" hits="0"/>
+						<line number="365" hits="1"/>
+						<line number="366" hits="1"/>
+						<line number="377" hits="1"/>
+						<line number="378" hits="1"/>
+						<line number="379" hits="1"/>
+						<line number="380" hits="1"/>
+						<line number="381" hits="1"/>
+						<line number="382" hits="1"/>
+						<line number="383" hits="1"/>
+						<line number="384" hits="1"/>
+						<line number="399" hits="1"/>
+						<line number="400" hits="1"/>
+						<line number="402" hits="1"/>
+						<line number="403" hits="1"/>
+						<line number="404" hits="1"/>
+						<line number="407" hits="1"/>
+						<line number="408" hits="1"/>
+						<line number="418" hits="1"/>
+						<line number="419" hits="1"/>
+						<line number="423" hits="1"/>
+						<line number="424" hits="1"/>
+						<line number="425" hits="1"/>
+						<line number="426" hits="1"/>
+						<line number="434" hits="1"/>
+						<line number="435" hits="1"/>
+						<line number="442" hits="1"/>
+						<line number="443" hits="1"/>
+						<line number="444" hits="1"/>
+						<line number="447" hits="1"/>
+						<line number="448" hits="1"/>
+						<line number="454" hits="1"/>
+						<line number="455" hits="1"/>
+						<line number="456" hits="1"/>
+						<line number="457" hits="1"/>
+						<line number="458" hits="1"/>
+						<line number="459" hits="1"/>
+						<line number="460" hits="1"/>
+						<line number="461" hits="1"/>
+						<line number="462" hits="1"/>
+						<line number="465" hits="1"/>
+						<line number="466" hits="1"/>
+						<line number="471" hits="1"/>
+						<line number="472" hits="1"/>
+						<line number="473" hits="1"/>
+						<line number="474" hits="1"/>
+						<line number="475" hits="1"/>
+						<line number="478" hits="1"/>
+						<line number="479" hits="1"/>
+						<line number="484" hits="1"/>
+						<line number="485" hits="1"/>
+						<line number="486" hits="1"/>
+						<line number="487" hits="1"/>
+						<line number="488" hits="1"/>
+						<line number="496" hits="1"/>
+						<line number="497" hits="1"/>
+						<line number="525" hits="1"/>
+						<line number="526" hits="1"/>
+						<line number="527" hits="1"/>
+						<line number="528" hits="1"/>
+						<line number="529" hits="1"/>
+						<line number="530" hits="1"/>
+						<line number="531" hits="1"/>
+						<line number="532" hits="1"/>
+						<line number="534" hits="1"/>
+						<line number="535" hits="1"/>
+						<line number="553" hits="1"/>
+						<line number="555" hits="1"/>
+						<line number="556" hits="1"/>
+						<line number="561" hits="1"/>
+						<line number="562" hits="1"/>
+						<line number="564" hits="1"/>
+						<line number="565" hits="1"/>
+						<line number="573" hits="1"/>
+						<line number="574" hits="1"/>
+						<line number="575" hits="1"/>
+						<line number="577" hits="1"/>
+						<line number="599" hits="1"/>
+						<line number="601" hits="1"/>
+						<line number="602" hits="1"/>
+						<line number="614" hits="1"/>
+						<line number="615" hits="1"/>
+						<line number="645" hits="1"/>
+						<line number="646" hits="1"/>
+						<line number="647" hits="1"/>
+						<line number="648" hits="1"/>
+						<line number="649" hits="1"/>
+						<line number="650" hits="1"/>
+						<line number="651" hits="1"/>
+						<line number="652" hits="1"/>
+						<line number="654" hits="1"/>
+						<line number="661" hits="1"/>
+						<line number="668" hits="1"/>
+						<line number="669" hits="1"/>
+						<line number="670" hits="1"/>
+						<line number="680" hits="1"/>
+						<line number="681" hits="1"/>
+						<line number="683" hits="1"/>
+						<line number="685" hits="1"/>
+						<line number="706" hits="1"/>
+						<line number="746" hits="1"/>
+						<line number="751" hits="1"/>
+						<line number="777" hits="1"/>
+						<line number="778" hits="1"/>
+						<line number="779" hits="1"/>
+						<line number="780" hits="1"/>
+						<line number="782" hits="1"/>
+						<line number="783" hits="1"/>
+						<line number="784" hits="1"/>
+						<line number="786" hits="1"/>
+						<line number="788" hits="1"/>
+						<line number="795" hits="1"/>
+						<line number="796" hits="1"/>
+						<line number="797" hits="1"/>
+						<line number="798" hits="1"/>
+						<line number="799" hits="1"/>
+						<line number="801" hits="1"/>
+						<line number="802" hits="1"/>
+						<line number="804" hits="1"/>
+						<line number="805" hits="1"/>
+						<line number="811" hits="1"/>
+						<line number="812" hits="1"/>
+						<line number="814" hits="1"/>
+						<line number="815" hits="1"/>
+						<line number="817" hits="1"/>
+						<line number="855" hits="1"/>
+						<line number="857" hits="1"/>
+						<line number="858" hits="1"/>
+						<line number="870" hits="1"/>
+						<line number="873" hits="1"/>
+						<line number="876" hits="1"/>
+						<line number="877" hits="1"/>
+						<line number="878" hits="1"/>
+						<line number="879" hits="1"/>
+						<line number="880" hits="1"/>
+						<line number="881" hits="1"/>
+						<line number="882" hits="1"/>
+						<line number="883" hits="1"/>
+						<line number="884" hits="1"/>
+						<line number="887" hits="1"/>
+						<line number="888" hits="1"/>
+						<line number="901" hits="1"/>
+						<line number="902" hits="1"/>
+						<line number="904" hits="1"/>
+						<line number="905" hits="1"/>
+						<line number="906" hits="1"/>
+						<line number="909" hits="1"/>
+						<line number="910" hits="1"/>
+						<line number="912" hits="1"/>
+						<line number="913" hits="1"/>
+						<line number="914" hits="1"/>
+						<line number="915" hits="1"/>
+						<line number="916" hits="1"/>
+						<line number="919" hits="1"/>
+						<line number="920" hits="1"/>
+						<line number="922" hits="1"/>
+						<line number="923" hits="1"/>
+						<line number="924" hits="1"/>
+						<line number="925" hits="1"/>
+						<line number="926" hits="1"/>
+						<line number="929" hits="1"/>
+						<line number="934" hits="1"/>
+						<line number="948" hits="1"/>
+						<line number="950" hits="1"/>
+						<line number="951" hits="1"/>
+						<line number="952" hits="1"/>
+						<line number="953" hits="1"/>
+						<line number="954" hits="1"/>
+						<line number="955" hits="1"/>
+						<line number="956" hits="1"/>
+						<line number="957" hits="0"/>
+						<line number="959" hits="0"/>
+						<line number="960" hits="0"/>
+						<line number="966" hits="0"/>
+						<line number="1006" hits="0"/>
+						<line number="1008" hits="0"/>
+						<line number="1009" hits="0"/>
+						<line number="1011" hits="0"/>
+						<line number="1017" hits="0"/>
+						<line number="1018" hits="0"/>
+						<line number="1019" hits="0"/>
+						<line number="1022" hits="0"/>
+						<line number="1024" hits="0"/>
+						<line number="1035" hits="0"/>
+						<line number="1036" hits="0"/>
+						<line number="1037" hits="0"/>
+						<line number="1038" hits="0"/>
+						<line number="1039" hits="0"/>
+						<line number="1040" hits="0"/>
+						<line number="1055" hits="0"/>
+						<line number="1057" hits="0"/>
+						<line number="1058" hits="0"/>
+						<line number="1065" hits="1"/>
+						<line number="1066" hits="1"/>
+						<line number="1080" hits="1"/>
+						<line number="1082" hits="1"/>
+						<line number="1083" hits="1"/>
+						<line number="1084" hits="1"/>
+						<line number="1085" hits="1"/>
+						<line number="1086" hits="1"/>
+						<line number="1087" hits="1"/>
+						<line number="1088" hits="1"/>
+						<line number="1089" hits="1"/>
+						<line number="1090" hits="1"/>
+						<line number="1091" hits="1"/>
+						<line number="1096" hits="1"/>
+						<line number="1102" hits="1"/>
+						<line number="1103" hits="1"/>
+						<line number="1105" hits="0"/>
+						<line number="1112" hits="0"/>
+						<line number="1113" hits="0"/>
+						<line number="1119" hits="0"/>
+						<line number="1154" hits="0"/>
+						<line number="1155" hits="0"/>
+						<line number="1170" hits="0"/>
+						<line number="1172" hits="0"/>
+						<line number="1173" hits="0"/>
+						<line number="1175" hits="0"/>
+						<line number="1184" hits="0"/>
+						<line number="1186" hits="0"/>
+					</lines>
+				</class>
+				<class name="showcase.py" filename="api/routers/showcase.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="20" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="25" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="36" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="51" hits="1"/>
+						<line number="54" hits="1"/>
+						<line number="61" hits="1"/>
+						<line number="62" hits="1"/>
+						<line number="63" hits="1"/>
+						<line number="64" hits="1"/>
+						<line number="65" hits="1"/>
+						<line number="66" hits="1"/>
+						<line number="67" hits="1"/>
+						<line number="68" hits="1"/>
+						<line number="69" hits="1"/>
+						<line number="70" hits="1"/>
+						<line number="71" hits="1"/>
+						<line number="74" hits="1"/>
+						<line number="75" hits="1"/>
+						<line number="93" hits="1"/>
+						<line number="98" hits="1"/>
+						<line number="99" hits="1"/>
+						<line number="105" hits="1"/>
+						<line number="106" hits="1"/>
+						<line number="107" hits="1"/>
+						<line number="108" hits="1"/>
+						<line number="109" hits="1"/>
+						<line number="112" hits="1"/>
+						<line number="121" hits="1"/>
+						<line number="122" hits="1"/>
+						<line number="123" hits="1"/>
+						<line number="125" hits="1"/>
+						<line number="126" hits="1"/>
+						<line number="127" hits="1"/>
+						<line number="128" hits="1"/>
+						<line number="129" hits="1"/>
+						<line number="130" hits="1"/>
+						<line number="131" hits="1"/>
+						<line number="132" hits="1"/>
+						<line number="148" hits="1"/>
+						<line number="160" hits="1"/>
+						<line number="179" hits="1"/>
+						<line number="186" hits="1"/>
+						<line number="187" hits="1"/>
+						<line number="188" hits="1"/>
+						<line number="189" hits="1"/>
+						<line number="190" hits="1"/>
+						<line number="191" hits="1"/>
+						<line number="192" hits="1"/>
+						<line number="193" hits="1"/>
+						<line number="194" hits="1"/>
+						<line number="209" hits="1"/>
+					</lines>
+				</class>
+				<class name="support.py" filename="api/routers/support.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="15" hits="1"/>
+						<line number="16" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="51" hits="1"/>
+						<line number="52" hits="1"/>
+						<line number="57" hits="1"/>
+						<line number="58" hits="1"/>
+						<line number="59" hits="1"/>
+						<line number="61" hits="1"/>
+						<line number="62" hits="1"/>
+						<line number="63" hits="1"/>
+						<line number="64" hits="1"/>
+						<line number="65" hits="1"/>
+						<line number="66" hits="1"/>
+					</lines>
+				</class>
+			</classes>
+		</package>
+		<package name="config" line-rate="1" branch-rate="0" complexity="0">
+			<classes>
+				<class name="__init__.py" filename="config/__init__.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines/>
+				</class>
+			</classes>
+		</package>
+		<package name="core" line-rate="0.569" branch-rate="0" complexity="0">
+			<classes>
+				<class name="__init__.py" filename="core/__init__.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines/>
+				</class>
+				<class name="anc2vec_embeddings.py" filename="core/anc2vec_embeddings.py" complexity="0" line-rate="0.4242" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="12" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="15" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="25" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="28" hits="0"/>
+						<line number="29" hits="0"/>
+						<line number="30" hits="0"/>
+						<line number="31" hits="0"/>
+						<line number="32" hits="0"/>
+						<line number="33" hits="0"/>
+						<line number="34" hits="0"/>
+						<line number="36" hits="1"/>
+						<line number="37" hits="0"/>
+						<line number="39" hits="1"/>
+						<line number="40" hits="0"/>
+						<line number="42" hits="1"/>
+						<line number="43" hits="0"/>
+						<line number="44" hits="0"/>
+						<line number="46" hits="1"/>
+						<line number="48" hits="0"/>
+						<line number="49" hits="0"/>
+						<line number="50" hits="0"/>
+						<line number="51" hits="0"/>
+						<line number="52" hits="0"/>
+						<line number="53" hits="0"/>
+						<line number="54" hits="0"/>
+						<line number="57" hits="1"/>
+						<line number="58" hits="1"/>
+						<line number="60" hits="0"/>
+					</lines>
+				</class>
+				<class name="annotation_intern.py" filename="core/annotation_intern.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="24" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="46" hits="1"/>
+						<line number="49" hits="1"/>
+					</lines>
+				</class>
+				<class name="disk_cache.py" filename="core/disk_cache.py" complexity="0" line-rate="0.5385" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="24" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="36" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="48" hits="1"/>
+						<line number="54" hits="1"/>
+						<line number="55" hits="1"/>
+						<line number="58" hits="1"/>
+						<line number="64" hits="0"/>
+						<line number="65" hits="0"/>
+						<line number="66" hits="0"/>
+						<line number="74" hits="1"/>
+						<line number="83" hits="1"/>
+						<line number="84" hits="1"/>
+						<line number="85" hits="1"/>
+						<line number="86" hits="1"/>
+						<line number="87" hits="1"/>
+						<line number="88" hits="1"/>
+						<line number="89" hits="1"/>
+						<line number="90" hits="1"/>
+						<line number="91" hits="1"/>
+						<line number="92" hits="1"/>
+						<line number="93" hits="1"/>
+						<line number="101" hits="1"/>
+						<line number="107" hits="0"/>
+						<line number="110" hits="0"/>
+						<line number="111" hits="0"/>
+						<line number="112" hits="0"/>
+						<line number="113" hits="0"/>
+						<line number="119" hits="0"/>
+						<line number="120" hits="0"/>
+						<line number="123" hits="1"/>
+						<line number="132" hits="0"/>
+						<line number="135" hits="0"/>
+						<line number="136" hits="0"/>
+						<line number="137" hits="0"/>
+						<line number="138" hits="0"/>
+						<line number="139" hits="0"/>
+						<line number="142" hits="1"/>
+						<line number="152" hits="1"/>
+						<line number="153" hits="1"/>
+						<line number="154" hits="1"/>
+						<line number="155" hits="1"/>
+						<line number="156" hits="1"/>
+						<line number="157" hits="1"/>
+						<line number="158" hits="1"/>
+						<line number="159" hits="0"/>
+						<line number="160" hits="1"/>
+						<line number="168" hits="1"/>
+						<line number="171" hits="1"/>
+						<line number="186" hits="0"/>
+						<line number="187" hits="0"/>
+						<line number="193" hits="0"/>
+						<line number="194" hits="0"/>
+						<line number="195" hits="0"/>
+						<line number="196" hits="0"/>
+						<line number="204" hits="1"/>
+						<line number="208" hits="0"/>
+						<line number="209" hits="0"/>
+						<line number="210" hits="0"/>
+						<line number="211" hits="0"/>
+						<line number="212" hits="0"/>
+						<line number="213" hits="0"/>
+						<line number="214" hits="0"/>
+						<line number="215" hits="0"/>
+						<line number="216" hits="0"/>
+						<line number="219" hits="1"/>
+						<line number="225" hits="0"/>
+						<line number="226" hits="0"/>
+						<line number="227" hits="0"/>
+						<line number="228" hits="0"/>
+						<line number="231" hits="1"/>
+					</lines>
+				</class>
+				<class name="evaluation.py" filename="core/evaluation.py" complexity="0" line-rate="0.7763" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="36" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="45" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="50" hits="1"/>
+						<line number="55" hits="1"/>
+						<line number="59" hits="1"/>
+						<line number="67" hits="1"/>
+						<line number="68" hits="1"/>
+						<line number="72" hits="1"/>
+						<line number="73" hits="1"/>
+						<line number="74" hits="1"/>
+						<line number="76" hits="1"/>
+						<line number="78" hits="1"/>
+						<line number="80" hits="1"/>
+						<line number="81" hits="1"/>
+						<line number="82" hits="1"/>
+						<line number="84" hits="1"/>
+						<line number="85" hits="1"/>
+						<line number="86" hits="1"/>
+						<line number="88" hits="1"/>
+						<line number="89" hits="1"/>
+						<line number="90" hits="1"/>
+						<line number="92" hits="1"/>
+						<line number="93" hits="1"/>
+						<line number="94" hits="1"/>
+						<line number="96" hits="1"/>
+						<line number="97" hits="1"/>
+						<line number="98" hits="1"/>
+						<line number="100" hits="1"/>
+						<line number="101" hits="1"/>
+						<line number="102" hits="1"/>
+						<line number="104" hits="1"/>
+						<line number="105" hits="1"/>
+						<line number="106" hits="1"/>
+						<line number="108" hits="1"/>
+						<line number="109" hits="1"/>
+						<line number="110" hits="1"/>
+						<line number="112" hits="1"/>
+						<line number="113" hits="1"/>
+						<line number="130" hits="1"/>
+						<line number="136" hits="1"/>
+						<line number="146" hits="1"/>
+						<line number="147" hits="1"/>
+						<line number="148" hits="1"/>
+						<line number="149" hits="1"/>
+						<line number="152" hits="1"/>
+						<line number="154" hits="1"/>
+						<line number="155" hits="1"/>
+						<line number="156" hits="1"/>
+						<line number="157" hits="1"/>
+						<line number="158" hits="1"/>
+						<line number="159" hits="0"/>
+						<line number="160" hits="1"/>
+						<line number="161" hits="1"/>
+						<line number="162" hits="1"/>
+						<line number="165" hits="1"/>
+						<line number="173" hits="1"/>
+						<line number="179" hits="1"/>
+						<line number="180" hits="1"/>
+						<line number="181" hits="1"/>
+						<line number="184" hits="1"/>
+						<line number="194" hits="1"/>
+						<line number="204" hits="1"/>
+						<line number="205" hits="1"/>
+						<line number="206" hits="1"/>
+						<line number="208" hits="1"/>
+						<line number="209" hits="1"/>
+						<line number="210" hits="1"/>
+						<line number="211" hits="1"/>
+						<line number="212" hits="1"/>
+						<line number="213" hits="1"/>
+						<line number="214" hits="1"/>
+						<line number="216" hits="1"/>
+						<line number="219" hits="1"/>
+						<line number="231" hits="1"/>
+						<line number="242" hits="1"/>
+						<line number="243" hits="1"/>
+						<line number="244" hits="1"/>
+						<line number="245" hits="1"/>
+						<line number="246" hits="1"/>
+						<line number="247" hits="1"/>
+						<line number="248" hits="1"/>
+						<line number="249" hits="1"/>
+						<line number="250" hits="1"/>
+						<line number="258" hits="1"/>
+						<line number="281" hits="1"/>
+						<line number="282" hits="1"/>
+						<line number="284" hits="1"/>
+						<line number="290" hits="1"/>
+						<line number="293" hits="1"/>
+						<line number="297" hits="1"/>
+						<line number="298" hits="1"/>
+						<line number="299" hits="1"/>
+						<line number="300" hits="1"/>
+						<line number="302" hits="1"/>
+						<line number="303" hits="1"/>
+						<line number="304" hits="1"/>
+						<line number="305" hits="1"/>
+						<line number="307" hits="1"/>
+						<line number="308" hits="1"/>
+						<line number="309" hits="1"/>
+						<line number="311" hits="1"/>
+						<line number="313" hits="1"/>
+						<line number="316" hits="1"/>
+						<line number="319" hits="1"/>
+						<line number="320" hits="1"/>
+						<line number="321" hits="1"/>
+						<line number="322" hits="1"/>
+						<line number="323" hits="1"/>
+						<line number="324" hits="1"/>
+						<line number="325" hits="1"/>
+						<line number="327" hits="1"/>
+						<line number="330" hits="1"/>
+						<line number="331" hits="1"/>
+						<line number="334" hits="1"/>
+						<line number="338" hits="1"/>
+						<line number="352" hits="1"/>
+						<line number="354" hits="0"/>
+						<line number="365" hits="0"/>
+						<line number="366" hits="0"/>
+						<line number="367" hits="0"/>
+						<line number="368" hits="0"/>
+						<line number="371" hits="1"/>
+						<line number="373" hits="0"/>
+						<line number="384" hits="0"/>
+						<line number="385" hits="0"/>
+						<line number="386" hits="0"/>
+						<line number="387" hits="0"/>
+						<line number="390" hits="1"/>
+						<line number="392" hits="1"/>
+						<line number="393" hits="1"/>
+						<line number="394" hits="1"/>
+						<line number="395" hits="1"/>
+						<line number="396" hits="1"/>
+						<line number="397" hits="1"/>
+						<line number="398" hits="1"/>
+						<line number="399" hits="1"/>
+						<line number="400" hits="1"/>
+						<line number="403" hits="1"/>
+						<line number="411" hits="0"/>
+						<line number="418" hits="0"/>
+						<line number="419" hits="0"/>
+						<line number="420" hits="0"/>
+						<line number="421" hits="0"/>
+						<line number="422" hits="0"/>
+						<line number="423" hits="0"/>
+						<line number="426" hits="1"/>
+						<line number="430" hits="0"/>
+						<line number="441" hits="0"/>
+						<line number="442" hits="0"/>
+						<line number="443" hits="0"/>
+						<line number="444" hits="0"/>
+						<line number="447" hits="1"/>
+						<line number="451" hits="0"/>
+						<line number="461" hits="0"/>
+						<line number="462" hits="0"/>
+						<line number="463" hits="0"/>
+						<line number="464" hits="0"/>
+						<line number="467" hits="1"/>
+						<line number="481" hits="0"/>
+						<line number="482" hits="0"/>
+						<line number="484" hits="0"/>
+						<line number="485" hits="0"/>
+						<line number="486" hits="0"/>
+						<line number="487" hits="0"/>
+						<line number="488" hits="0"/>
+						<line number="489" hits="0"/>
+						<line number="490" hits="0"/>
+						<line number="491" hits="0"/>
+						<line number="492" hits="0"/>
+						<line number="493" hits="0"/>
+						<line number="494" hits="0"/>
+						<line number="495" hits="0"/>
+						<line number="496" hits="0"/>
+						<line number="497" hits="0"/>
+						<line number="500" hits="1"/>
+						<line number="517" hits="0"/>
+						<line number="518" hits="0"/>
+						<line number="520" hits="0"/>
+						<line number="521" hits="0"/>
+						<line number="522" hits="0"/>
+						<line number="523" hits="0"/>
+						<line number="524" hits="0"/>
+						<line number="525" hits="0"/>
+						<line number="526" hits="0"/>
+						<line number="527" hits="0"/>
+						<line number="528" hits="0"/>
+						<line number="531" hits="1"/>
+						<line number="536" hits="1"/>
+						<line number="537" hits="1"/>
+						<line number="538" hits="1"/>
+						<line number="539" hits="1"/>
+						<line number="540" hits="1"/>
+						<line number="541" hits="1"/>
+						<line number="542" hits="1"/>
+						<line number="543" hits="1"/>
+						<line number="544" hits="1"/>
+						<line number="545" hits="1"/>
+						<line number="546" hits="1"/>
+						<line number="549" hits="1"/>
+						<line number="574" hits="1"/>
+						<line number="575" hits="1"/>
+						<line number="577" hits="1"/>
+						<line number="580" hits="1"/>
+						<line number="584" hits="1"/>
+						<line number="587" hits="1"/>
+						<line number="590" hits="1"/>
+						<line number="591" hits="1"/>
+						<line number="592" hits="1"/>
+						<line number="593" hits="1"/>
+						<line number="595" hits="1"/>
+						<line number="596" hits="1"/>
+						<line number="598" hits="1"/>
+						<line number="599" hits="1"/>
+						<line number="600" hits="1"/>
+						<line number="601" hits="1"/>
+						<line number="603" hits="1"/>
+						<line number="604" hits="1"/>
+						<line number="605" hits="1"/>
+						<line number="606" hits="1"/>
+						<line number="608" hits="1"/>
+						<line number="609" hits="1"/>
+						<line number="610" hits="0"/>
+						<line number="612" hits="1"/>
+						<line number="614" hits="1"/>
+						<line number="615" hits="1"/>
+						<line number="617" hits="1"/>
+						<line number="618" hits="1"/>
+						<line number="619" hits="1"/>
+						<line number="620" hits="1"/>
+						<line number="621" hits="1"/>
+						<line number="622" hits="1"/>
+						<line number="623" hits="1"/>
+						<line number="624" hits="1"/>
+						<line number="626" hits="1"/>
+						<line number="627" hits="1"/>
+						<line number="629" hits="1"/>
+						<line number="633" hits="1"/>
+						<line number="642" hits="1"/>
+						<line number="648" hits="1"/>
+						<line number="650" hits="1"/>
+						<line number="651" hits="1"/>
+						<line number="658" hits="1"/>
+						<line number="659" hits="1"/>
+						<line number="660" hits="1"/>
+						<line number="661" hits="1"/>
+						<line number="662" hits="1"/>
+						<line number="665" hits="1"/>
+						<line number="668" hits="1"/>
+						<line number="670" hits="1"/>
+						<line number="671" hits="1"/>
+						<line number="672" hits="1"/>
+						<line number="673" hits="1"/>
+						<line number="674" hits="1"/>
+						<line number="675" hits="1"/>
+						<line number="678" hits="1"/>
+						<line number="681" hits="1"/>
+						<line number="682" hits="1"/>
+						<line number="688" hits="1"/>
+						<line number="695" hits="1"/>
+						<line number="696" hits="1"/>
+						<line number="697" hits="1"/>
+						<line number="698" hits="1"/>
+						<line number="701" hits="1"/>
+						<line number="703" hits="1"/>
+						<line number="705" hits="1"/>
+						<line number="706" hits="1"/>
+						<line number="709" hits="1"/>
+						<line number="723" hits="1"/>
+						<line number="724" hits="1"/>
+						<line number="725" hits="1"/>
+						<line number="727" hits="1"/>
+						<line number="728" hits="1"/>
+						<line number="730" hits="1"/>
+						<line number="731" hits="1"/>
+						<line number="732" hits="1"/>
+						<line number="733" hits="1"/>
+						<line number="735" hits="0"/>
+						<line number="737" hits="0"/>
+						<line number="738" hits="0"/>
+						<line number="744" hits="0"/>
+						<line number="745" hits="0"/>
+						<line number="746" hits="0"/>
+						<line number="747" hits="0"/>
+						<line number="748" hits="0"/>
+						<line number="749" hits="0"/>
+						<line number="750" hits="0"/>
+						<line number="753" hits="1"/>
+						<line number="755" hits="1"/>
+					</lines>
+				</class>
+				<class name="evidence_codes.py" filename="core/evidence_codes.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="14" hits="1"/>
+						<line number="21" hits="1"/>
+						<line number="60" hits="1"/>
+						<line number="79" hits="1"/>
+						<line number="94" hits="1"/>
+						<line number="95" hits="1"/>
+						<line number="96" hits="1"/>
+						<line number="99" hits="1"/>
+						<line number="109" hits="1"/>
+					</lines>
+				</class>
+				<class name="feature_engineering.py" filename="core/feature_engineering.py" complexity="0" line-rate="0.8559" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="17" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="25" hits="1"/>
+						<line number="26" hits="0"/>
+						<line number="27" hits="0"/>
+						<line number="29" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="33" hits="0"/>
+						<line number="34" hits="0"/>
+						<line number="42" hits="1"/>
+						<line number="49" hits="1"/>
+						<line number="50" hits="1"/>
+						<line number="54" hits="1"/>
+						<line number="55" hits="1"/>
+						<line number="58" hits="1"/>
+						<line number="65" hits="1"/>
+						<line number="66" hits="1"/>
+						<line number="70" hits="1"/>
+						<line number="71" hits="1"/>
+						<line number="74" hits="1"/>
+						<line number="75" hits="1"/>
+						<line number="76" hits="1"/>
+						<line number="77" hits="1"/>
+						<line number="79" hits="1"/>
+						<line number="80" hits="1"/>
+						<line number="81" hits="1"/>
+						<line number="89" hits="1"/>
+						<line number="92" hits="1"/>
+						<line number="93" hits="1"/>
+						<line number="94" hits="1"/>
+						<line number="102" hits="1"/>
+						<line number="103" hits="1"/>
+						<line number="104" hits="1"/>
+						<line number="106" hits="1"/>
+						<line number="109" hits="1"/>
+						<line number="111" hits="1"/>
+						<line number="112" hits="1"/>
+						<line number="113" hits="1"/>
+						<line number="120" hits="1"/>
+						<line number="123" hits="1"/>
+						<line number="125" hits="0"/>
+						<line number="126" hits="0"/>
+						<line number="129" hits="0"/>
+						<line number="130" hits="0"/>
+						<line number="131" hits="0"/>
+						<line number="134" hits="1"/>
+						<line number="140" hits="1"/>
+						<line number="141" hits="1"/>
+						<line number="142" hits="1"/>
+						<line number="144" hits="1"/>
+						<line number="145" hits="1"/>
+						<line number="146" hits="1"/>
+						<line number="147" hits="1"/>
+						<line number="150" hits="1"/>
+						<line number="151" hits="1"/>
+						<line number="152" hits="0"/>
+						<line number="155" hits="1"/>
+						<line number="156" hits="1"/>
+						<line number="157" hits="1"/>
+						<line number="158" hits="1"/>
+						<line number="159" hits="1"/>
+						<line number="160" hits="1"/>
+						<line number="161" hits="1"/>
+						<line number="164" hits="1"/>
+						<line number="171" hits="1"/>
+						<line number="172" hits="1"/>
+						<line number="174" hits="1"/>
+						<line number="181" hits="1"/>
+						<line number="182" hits="1"/>
+						<line number="184" hits="1"/>
+						<line number="185" hits="1"/>
+						<line number="192" hits="1"/>
+						<line number="193" hits="1"/>
+						<line number="194" hits="1"/>
+						<line number="195" hits="1"/>
+						<line number="196" hits="1"/>
+						<line number="198" hits="1"/>
+						<line number="199" hits="1"/>
+						<line number="200" hits="1"/>
+						<line number="202" hits="1"/>
+						<line number="203" hits="1"/>
+						<line number="204" hits="1"/>
+						<line number="205" hits="0"/>
+						<line number="206" hits="0"/>
+						<line number="208" hits="0"/>
+						<line number="210" hits="1"/>
+						<line number="212" hits="1"/>
+						<line number="220" hits="1"/>
+						<line number="228" hits="1"/>
+						<line number="229" hits="1"/>
+						<line number="230" hits="1"/>
+						<line number="231" hits="1"/>
+						<line number="232" hits="1"/>
+						<line number="233" hits="1"/>
+						<line number="234" hits="1"/>
+						<line number="235" hits="0"/>
+						<line number="236" hits="1"/>
+						<line number="237" hits="0"/>
+						<line number="238" hits="1"/>
+						<line number="239" hits="1"/>
+						<line number="240" hits="1"/>
+						<line number="241" hits="1"/>
+						<line number="242" hits="1"/>
+						<line number="243" hits="1"/>
+						<line number="244" hits="1"/>
+						<line number="245" hits="1"/>
+						<line number="246" hits="0"/>
+					</lines>
+				</class>
+				<class name="feature_enricher.py" filename="core/feature_enricher.py" complexity="0" line-rate="0.09705" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="28" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="36" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="51" hits="1"/>
+						<line number="70" hits="1"/>
+						<line number="78" hits="0"/>
+						<line number="79" hits="0"/>
+						<line number="80" hits="0"/>
+						<line number="81" hits="0"/>
+						<line number="82" hits="0"/>
+						<line number="83" hits="0"/>
+						<line number="86" hits="1"/>
+						<line number="96" hits="0"/>
+						<line number="97" hits="0"/>
+						<line number="98" hits="0"/>
+						<line number="99" hits="0"/>
+						<line number="100" hits="0"/>
+						<line number="101" hits="0"/>
+						<line number="102" hits="0"/>
+						<line number="103" hits="0"/>
+						<line number="108" hits="0"/>
+						<line number="109" hits="0"/>
+						<line number="110" hits="0"/>
+						<line number="111" hits="0"/>
+						<line number="119" hits="1"/>
+						<line number="129" hits="0"/>
+						<line number="130" hits="0"/>
+						<line number="131" hits="0"/>
+						<line number="132" hits="0"/>
+						<line number="133" hits="0"/>
+						<line number="134" hits="0"/>
+						<line number="135" hits="0"/>
+						<line number="136" hits="0"/>
+						<line number="137" hits="0"/>
+						<line number="138" hits="0"/>
+						<line number="139" hits="0"/>
+						<line number="147" hits="1"/>
+						<line number="166" hits="0"/>
+						<line number="167" hits="0"/>
+						<line number="168" hits="0"/>
+						<line number="169" hits="0"/>
+						<line number="170" hits="0"/>
+						<line number="171" hits="0"/>
+						<line number="172" hits="0"/>
+						<line number="173" hits="0"/>
+						<line number="174" hits="0"/>
+						<line number="175" hits="0"/>
+						<line number="176" hits="0"/>
+						<line number="177" hits="0"/>
+						<line number="178" hits="0"/>
+						<line number="179" hits="0"/>
+						<line number="180" hits="0"/>
+						<line number="181" hits="0"/>
+						<line number="182" hits="0"/>
+						<line number="183" hits="0"/>
+						<line number="184" hits="0"/>
+						<line number="185" hits="0"/>
+						<line number="186" hits="0"/>
+						<line number="187" hits="0"/>
+						<line number="188" hits="0"/>
+						<line number="189" hits="0"/>
+						<line number="190" hits="0"/>
+						<line number="191" hits="0"/>
+						<line number="192" hits="0"/>
+						<line number="193" hits="0"/>
+						<line number="194" hits="0"/>
+						<line number="200" hits="0"/>
+						<line number="201" hits="0"/>
+						<line number="202" hits="0"/>
+						<line number="203" hits="0"/>
+						<line number="211" hits="1"/>
+						<line number="232" hits="0"/>
+						<line number="233" hits="0"/>
+						<line number="234" hits="0"/>
+						<line number="235" hits="0"/>
+						<line number="236" hits="0"/>
+						<line number="237" hits="0"/>
+						<line number="238" hits="0"/>
+						<line number="240" hits="0"/>
+						<line number="241" hits="0"/>
+						<line number="242" hits="0"/>
+						<line number="243" hits="0"/>
+						<line number="244" hits="0"/>
+						<line number="245" hits="0"/>
+						<line number="246" hits="0"/>
+						<line number="247" hits="0"/>
+						<line number="248" hits="0"/>
+						<line number="249" hits="0"/>
+						<line number="250" hits="0"/>
+						<line number="251" hits="0"/>
+						<line number="252" hits="0"/>
+						<line number="253" hits="0"/>
+						<line number="254" hits="0"/>
+						<line number="255" hits="0"/>
+						<line number="256" hits="0"/>
+						<line number="257" hits="0"/>
+						<line number="258" hits="0"/>
+						<line number="259" hits="0"/>
+						<line number="260" hits="0"/>
+						<line number="261" hits="0"/>
+						<line number="262" hits="0"/>
+						<line number="263" hits="0"/>
+						<line number="264" hits="0"/>
+						<line number="265" hits="0"/>
+						<line number="266" hits="0"/>
+						<line number="274" hits="1"/>
+						<line number="284" hits="0"/>
+						<line number="285" hits="0"/>
+						<line number="286" hits="0"/>
+						<line number="287" hits="0"/>
+						<line number="297" hits="1"/>
+						<line number="327" hits="0"/>
+						<line number="328" hits="0"/>
+						<line number="330" hits="0"/>
+						<line number="333" hits="0"/>
+						<line number="334" hits="0"/>
+						<line number="337" hits="0"/>
+						<line number="340" hits="0"/>
+						<line number="352" hits="0"/>
+						<line number="363" hits="0"/>
+						<line number="364" hits="0"/>
+						<line number="367" hits="0"/>
+						<line number="368" hits="0"/>
+						<line number="369" hits="0"/>
+						<line number="370" hits="0"/>
+						<line number="371" hits="0"/>
+						<line number="373" hits="0"/>
+						<line number="374" hits="0"/>
+						<line number="375" hits="0"/>
+						<line number="376" hits="0"/>
+						<line number="377" hits="0"/>
+						<line number="382" hits="0"/>
+						<line number="385" hits="0"/>
+						<line number="387" hits="0"/>
+						<line number="388" hits="0"/>
+						<line number="389" hits="0"/>
+						<line number="391" hits="0"/>
+						<line number="392" hits="0"/>
+						<line number="393" hits="0"/>
+						<line number="395" hits="0"/>
+						<line number="396" hits="0"/>
+						<line number="397" hits="0"/>
+						<line number="399" hits="0"/>
+						<line number="400" hits="0"/>
+						<line number="401" hits="0"/>
+						<line number="404" hits="0"/>
+						<line number="407" hits="0"/>
+						<line number="408" hits="0"/>
+						<line number="414" hits="0"/>
+						<line number="415" hits="0"/>
+						<line number="416" hits="0"/>
+						<line number="418" hits="0"/>
+						<line number="419" hits="0"/>
+						<line number="420" hits="0"/>
+						<line number="421" hits="0"/>
+						<line number="422" hits="0"/>
+						<line number="424" hits="0"/>
+						<line number="425" hits="0"/>
+						<line number="428" hits="1"/>
+						<line number="489" hits="0"/>
+						<line number="490" hits="0"/>
+						<line number="494" hits="0"/>
+						<line number="497" hits="0"/>
+						<line number="499" hits="0"/>
+						<line number="500" hits="0"/>
+						<line number="501" hits="0"/>
+						<line number="502" hits="0"/>
+						<line number="503" hits="0"/>
+						<line number="504" hits="0"/>
+						<line number="505" hits="0"/>
+						<line number="506" hits="0"/>
+						<line number="507" hits="0"/>
+						<line number="508" hits="0"/>
+						<line number="509" hits="0"/>
+						<line number="510" hits="0"/>
+						<line number="511" hits="0"/>
+						<line number="512" hits="0"/>
+						<line number="513" hits="0"/>
+						<line number="515" hits="0"/>
+						<line number="516" hits="0"/>
+						<line number="517" hits="0"/>
+						<line number="518" hits="0"/>
+						<line number="519" hits="0"/>
+						<line number="520" hits="0"/>
+						<line number="521" hits="0"/>
+						<line number="522" hits="0"/>
+						<line number="524" hits="0"/>
+						<line number="530" hits="0"/>
+						<line number="531" hits="0"/>
+						<line number="532" hits="0"/>
+						<line number="533" hits="0"/>
+						<line number="535" hits="0"/>
+						<line number="536" hits="0"/>
+						<line number="537" hits="0"/>
+						<line number="538" hits="0"/>
+						<line number="539" hits="0"/>
+						<line number="540" hits="0"/>
+						<line number="541" hits="0"/>
+						<line number="542" hits="0"/>
+						<line number="543" hits="0"/>
+						<line number="544" hits="0"/>
+						<line number="545" hits="0"/>
+						<line number="550" hits="0"/>
+						<line number="551" hits="0"/>
+						<line number="552" hits="0"/>
+						<line number="553" hits="0"/>
+						<line number="554" hits="0"/>
+						<line number="555" hits="0"/>
+						<line number="556" hits="0"/>
+						<line number="557" hits="0"/>
+						<line number="558" hits="0"/>
+						<line number="559" hits="0"/>
+						<line number="560" hits="0"/>
+						<line number="563" hits="0"/>
+						<line number="568" hits="0"/>
+						<line number="569" hits="0"/>
+						<line number="571" hits="0"/>
+						<line number="576" hits="0"/>
+						<line number="577" hits="0"/>
+						<line number="578" hits="0"/>
+						<line number="581" hits="1"/>
+						<line number="587" hits="0"/>
+						<line number="589" hits="0"/>
+						<line number="600" hits="0"/>
+						<line number="601" hits="0"/>
+						<line number="602" hits="0"/>
+						<line number="603" hits="0"/>
+						<line number="606" hits="1"/>
+					</lines>
+				</class>
+				<class name="knn_search.py" filename="core/knn_search.py" complexity="0" line-rate="0.9279" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="32" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="36" hits="1"/>
+						<line number="43" hits="1"/>
+						<line number="99" hits="1"/>
+						<line number="100" hits="1"/>
+						<line number="113" hits="1"/>
+						<line number="114" hits="1"/>
+						<line number="123" hits="1"/>
+						<line number="135" hits="1"/>
+						<line number="138" hits="1"/>
+						<line number="157" hits="1"/>
+						<line number="158" hits="1"/>
+						<line number="159" hits="0"/>
+						<line number="161" hits="1"/>
+						<line number="162" hits="1"/>
+						<line number="163" hits="1"/>
+						<line number="164" hits="1"/>
+						<line number="165" hits="1"/>
+						<line number="167" hits="0"/>
+						<line number="169" hits="1"/>
+						<line number="170" hits="1"/>
+						<line number="171" hits="1"/>
+						<line number="173" hits="1"/>
+						<line number="174" hits="1"/>
+						<line number="175" hits="1"/>
+						<line number="176" hits="1"/>
+						<line number="177" hits="1"/>
+						<line number="178" hits="1"/>
+						<line number="179" hits="0"/>
+						<line number="181" hits="1"/>
+						<line number="182" hits="1"/>
+						<line number="184" hits="1"/>
+						<line number="185" hits="1"/>
+						<line number="188" hits="1"/>
+						<line number="189" hits="1"/>
+						<line number="192" hits="1"/>
+						<line number="193" hits="1"/>
+						<line number="194" hits="1"/>
+						<line number="195" hits="1"/>
+						<line number="196" hits="1"/>
+						<line number="198" hits="1"/>
+						<line number="200" hits="1"/>
+						<line number="201" hits="1"/>
+						<line number="202" hits="1"/>
+						<line number="203" hits="1"/>
+						<line number="204" hits="1"/>
+						<line number="205" hits="1"/>
+						<line number="206" hits="1"/>
+						<line number="207" hits="1"/>
+						<line number="210" hits="1"/>
+						<line number="222" hits="1"/>
+						<line number="223" hits="1"/>
+						<line number="224" hits="1"/>
+						<line number="225" hits="1"/>
+						<line number="226" hits="1"/>
+						<line number="228" hits="1"/>
+						<line number="229" hits="1"/>
+						<line number="230" hits="1"/>
+						<line number="232" hits="1"/>
+						<line number="240" hits="1"/>
+						<line number="255" hits="1"/>
+						<line number="256" hits="1"/>
+						<line number="257" hits="0"/>
+						<line number="258" hits="0"/>
+						<line number="260" hits="1"/>
+						<line number="262" hits="1"/>
+						<line number="263" hits="1"/>
+						<line number="266" hits="1"/>
+						<line number="267" hits="1"/>
+						<line number="268" hits="1"/>
+						<line number="269" hits="1"/>
+						<line number="271" hits="1"/>
+						<line number="285" hits="1"/>
+						<line number="286" hits="1"/>
+						<line number="288" hits="1"/>
+						<line number="289" hits="1"/>
+						<line number="290" hits="1"/>
+						<line number="291" hits="1"/>
+						<line number="292" hits="1"/>
+						<line number="293" hits="1"/>
+						<line number="294" hits="0"/>
+						<line number="296" hits="1"/>
+						<line number="297" hits="1"/>
+						<line number="298" hits="0"/>
+						<line number="299" hits="1"/>
+						<line number="300" hits="1"/>
+						<line number="301" hits="0"/>
+						<line number="302" hits="1"/>
+						<line number="303" hits="1"/>
+						<line number="304" hits="1"/>
+						<line number="305" hits="1"/>
+						<line number="306" hits="1"/>
+						<line number="308" hits="1"/>
+						<line number="311" hits="1"/>
+						<line number="324" hits="1"/>
+						<line number="326" hits="1"/>
+						<line number="328" hits="1"/>
+						<line number="329" hits="1"/>
+						<line number="331" hits="1"/>
+						<line number="333" hits="1"/>
+						<line number="334" hits="1"/>
+						<line number="335" hits="1"/>
+						<line number="336" hits="1"/>
+						<line number="337" hits="1"/>
+						<line number="339" hits="1"/>
+						<line number="340" hits="1"/>
+						<line number="341" hits="1"/>
+						<line number="344" hits="1"/>
+						<line number="348" hits="1"/>
+						<line number="349" hits="1"/>
+					</lines>
+				</class>
+				<class name="metrics.py" filename="core/metrics.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="24" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="45" hits="1"/>
+						<line number="46" hits="1"/>
+						<line number="49" hits="1"/>
+						<line number="50" hits="1"/>
+						<line number="51" hits="1"/>
+						<line number="52" hits="1"/>
+						<line number="53" hits="1"/>
+						<line number="54" hits="1"/>
+						<line number="56" hits="1"/>
+						<line number="58" hits="1"/>
+						<line number="59" hits="1"/>
+						<line number="70" hits="1"/>
+						<line number="93" hits="1"/>
+						<line number="94" hits="1"/>
+						<line number="96" hits="1"/>
+						<line number="99" hits="1"/>
+						<line number="100" hits="1"/>
+						<line number="101" hits="1"/>
+						<line number="102" hits="1"/>
+						<line number="103" hits="1"/>
+						<line number="105" hits="1"/>
+						<line number="106" hits="1"/>
+						<line number="108" hits="1"/>
+						<line number="109" hits="1"/>
+						<line number="110" hits="1"/>
+						<line number="111" hits="1"/>
+						<line number="113" hits="1"/>
+						<line number="114" hits="1"/>
+						<line number="115" hits="1"/>
+						<line number="116" hits="1"/>
+						<line number="117" hits="1"/>
+						<line number="118" hits="1"/>
+						<line number="120" hits="1"/>
+						<line number="121" hits="1"/>
+						<line number="122" hits="1"/>
+						<line number="123" hits="1"/>
+						<line number="124" hits="1"/>
+						<line number="125" hits="1"/>
+						<line number="126" hits="1"/>
+						<line number="127" hits="1"/>
+						<line number="129" hits="1"/>
+						<line number="130" hits="1"/>
+						<line number="131" hits="1"/>
+						<line number="133" hits="1"/>
+						<line number="139" hits="1"/>
+						<line number="140" hits="1"/>
+						<line number="141" hits="1"/>
+						<line number="144" hits="1"/>
+						<line number="145" hits="1"/>
+						<line number="146" hits="1"/>
+						<line number="148" hits="1"/>
+					</lines>
+				</class>
+				<class name="operation_catalog.py" filename="core/operation_catalog.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="9" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="21" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="25" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="36" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="43" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="45" hits="1"/>
+						<line number="46" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="48" hits="1"/>
+						<line number="49" hits="1"/>
+						<line number="50" hits="1"/>
+						<line number="56" hits="1"/>
+						<line number="57" hits="1"/>
+					</lines>
+				</class>
+				<class name="parquet_export.py" filename="core/parquet_export.py" complexity="0" line-rate="0" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="19" hits="0"/>
+						<line number="21" hits="0"/>
+						<line number="22" hits="0"/>
+						<line number="23" hits="0"/>
+						<line number="24" hits="0"/>
+						<line number="25" hits="0"/>
+						<line number="26" hits="0"/>
+						<line number="28" hits="0"/>
+						<line number="30" hits="0"/>
+						<line number="31" hits="0"/>
+						<line number="33" hits="0"/>
+						<line number="35" hits="0"/>
+						<line number="36" hits="0"/>
+						<line number="39" hits="0"/>
+						<line number="43" hits="0"/>
+						<line number="44" hits="0"/>
+						<line number="45" hits="0"/>
+						<line number="52" hits="0"/>
+						<line number="53" hits="0"/>
+						<line number="54" hits="0"/>
+						<line number="57" hits="0"/>
+						<line number="58" hits="0"/>
+						<line number="59" hits="0"/>
+						<line number="60" hits="0"/>
+						<line number="61" hits="0"/>
+						<line number="62" hits="0"/>
+						<line number="65" hits="0"/>
+						<line number="73" hits="0"/>
+						<line number="74" hits="0"/>
+						<line number="75" hits="0"/>
+						<line number="76" hits="0"/>
+						<line number="77" hits="0"/>
+						<line number="78" hits="0"/>
+						<line number="81" hits="0"/>
+						<line number="132" hits="0"/>
+						<line number="133" hits="0"/>
+						<line number="135" hits="0"/>
+						<line number="136" hits="0"/>
+						<line number="137" hits="0"/>
+						<line number="138" hits="0"/>
+						<line number="139" hits="0"/>
+						<line number="140" hits="0"/>
+						<line number="141" hits="0"/>
+						<line number="142" hits="0"/>
+						<line number="143" hits="0"/>
+						<line number="144" hits="0"/>
+						<line number="145" hits="0"/>
+						<line number="146" hits="0"/>
+						<line number="147" hits="0"/>
+						<line number="148" hits="0"/>
+						<line number="149" hits="0"/>
+						<line number="150" hits="0"/>
+						<line number="151" hits="0"/>
+						<line number="154" hits="0"/>
+						<line number="156" hits="0"/>
+						<line number="157" hits="0"/>
+						<line number="158" hits="0"/>
+						<line number="159" hits="0"/>
+						<line number="160" hits="0"/>
+						<line number="161" hits="0"/>
+						<line number="162" hits="0"/>
+						<line number="163" hits="0"/>
+						<line number="164" hits="0"/>
+						<line number="165" hits="0"/>
+						<line number="166" hits="0"/>
+						<line number="167" hits="0"/>
+						<line number="168" hits="0"/>
+						<line number="169" hits="0"/>
+						<line number="172" hits="0"/>
+						<line number="174" hits="0"/>
+						<line number="181" hits="0"/>
+						<line number="182" hits="0"/>
+						<line number="184" hits="0"/>
+						<line number="185" hits="0"/>
+						<line number="186" hits="0"/>
+						<line number="187" hits="0"/>
+						<line number="188" hits="0"/>
+						<line number="189" hits="0"/>
+						<line number="190" hits="0"/>
+						<line number="192" hits="0"/>
+						<line number="196" hits="0"/>
+						<line number="212" hits="0"/>
+						<line number="213" hits="0"/>
+						<line number="214" hits="0"/>
+						<line number="216" hits="0"/>
+						<line number="225" hits="0"/>
+						<line number="226" hits="0"/>
+						<line number="227" hits="0"/>
+						<line number="228" hits="0"/>
+						<line number="229" hits="0"/>
+						<line number="230" hits="0"/>
+						<line number="231" hits="0"/>
+						<line number="233" hits="0"/>
+					</lines>
+				</class>
+				<class name="pca_cache.py" filename="core/pca_cache.py" complexity="0" line-rate="0.375" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="15" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="21" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="25" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="34" hits="0"/>
+						<line number="37" hits="1"/>
+						<line number="40" hits="0"/>
+						<line number="41" hits="0"/>
+						<line number="42" hits="0"/>
+						<line number="43" hits="0"/>
+						<line number="44" hits="0"/>
+						<line number="45" hits="0"/>
+						<line number="49" hits="0"/>
+						<line number="50" hits="0"/>
+						<line number="53" hits="1"/>
+						<line number="58" hits="0"/>
+						<line number="59" hits="0"/>
+						<line number="60" hits="0"/>
+						<line number="63" hits="1"/>
+						<line number="73" hits="0"/>
+						<line number="74" hits="0"/>
+						<line number="75" hits="0"/>
+						<line number="76" hits="0"/>
+						<line number="77" hits="0"/>
+						<line number="78" hits="0"/>
+						<line number="79" hits="0"/>
+						<line number="80" hits="0"/>
+						<line number="83" hits="1"/>
+					</lines>
+				</class>
+				<class name="reranker.py" filename="core/reranker.py" complexity="0" line-rate="0.4328" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="21" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="25" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="102" hits="1"/>
+						<line number="104" hits="1"/>
+						<line number="111" hits="1"/>
+						<line number="114" hits="1"/>
+						<line number="128" hits="0"/>
+						<line number="129" hits="0"/>
+						<line number="130" hits="0"/>
+						<line number="131" hits="0"/>
+						<line number="132" hits="0"/>
+						<line number="133" hits="0"/>
+						<line number="134" hits="0"/>
+						<line number="136" hits="0"/>
+						<line number="137" hits="0"/>
+						<line number="138" hits="0"/>
+						<line number="139" hits="0"/>
+						<line number="140" hits="0"/>
+						<line number="141" hits="0"/>
+						<line number="142" hits="0"/>
+						<line number="144" hits="1"/>
+						<line number="152" hits="1"/>
+						<line number="165" hits="1"/>
+						<line number="166" hits="1"/>
+						<line number="167" hits="1"/>
+						<line number="168" hits="1"/>
+						<line number="169" hits="1"/>
+						<line number="170" hits="1"/>
+						<line number="171" hits="1"/>
+						<line number="172" hits="1"/>
+						<line number="173" hits="1"/>
+						<line number="174" hits="1"/>
+						<line number="175" hits="1"/>
+						<line number="176" hits="1"/>
+						<line number="193" hits="1"/>
+						<line number="215" hits="1"/>
+						<line number="216" hits="1"/>
+						<line number="222" hits="1"/>
+						<line number="223" hits="1"/>
+						<line number="224" hits="1"/>
+						<line number="225" hits="1"/>
+						<line number="226" hits="0"/>
+						<line number="227" hits="1"/>
+						<line number="228" hits="1"/>
+						<line number="229" hits="1"/>
+						<line number="230" hits="1"/>
+						<line number="231" hits="1"/>
+						<line number="232" hits="1"/>
+						<line number="233" hits="1"/>
+						<line number="237" hits="0"/>
+						<line number="238" hits="0"/>
+						<line number="242" hits="1"/>
+						<line number="243" hits="1"/>
+						<line number="245" hits="1"/>
+						<line number="246" hits="1"/>
+						<line number="247" hits="0"/>
+						<line number="250" hits="1"/>
+						<line number="251" hits="0"/>
+						<line number="252" hits="1"/>
+						<line number="260" hits="1"/>
+						<line number="262" hits="1"/>
+						<line number="269" hits="1"/>
+						<line number="270" hits="1"/>
+						<line number="273" hits="1"/>
+						<line number="279" hits="0"/>
+						<line number="282" hits="1"/>
+						<line number="283" hits="0"/>
+						<line number="284" hits="0"/>
+						<line number="287" hits="1"/>
+						<line number="295" hits="0"/>
+						<line number="296" hits="0"/>
+						<line number="297" hits="0"/>
+						<line number="298" hits="0"/>
+						<line number="299" hits="0"/>
+						<line number="300" hits="0"/>
+						<line number="301" hits="0"/>
+						<line number="302" hits="0"/>
+						<line number="303" hits="0"/>
+						<line number="304" hits="0"/>
+						<line number="305" hits="0"/>
+						<line number="306" hits="0"/>
+						<line number="309" hits="1"/>
+						<line number="335" hits="0"/>
+						<line number="336" hits="0"/>
+						<line number="337" hits="0"/>
+						<line number="338" hits="0"/>
+						<line number="340" hits="0"/>
+						<line number="341" hits="0"/>
+						<line number="344" hits="0"/>
+						<line number="345" hits="0"/>
+						<line number="346" hits="0"/>
+						<line number="348" hits="0"/>
+						<line number="349" hits="0"/>
+						<line number="350" hits="0"/>
+						<line number="351" hits="0"/>
+						<line number="352" hits="0"/>
+						<line number="354" hits="0"/>
+						<line number="355" hits="0"/>
+						<line number="356" hits="0"/>
+						<line number="357" hits="0"/>
+						<line number="360" hits="1"/>
+						<line number="373" hits="0"/>
+						<line number="374" hits="0"/>
+						<line number="375" hits="0"/>
+						<line number="376" hits="0"/>
+						<line number="377" hits="0"/>
+						<line number="378" hits="0"/>
+						<line number="382" hits="0"/>
+						<line number="383" hits="0"/>
+						<line number="384" hits="0"/>
+						<line number="385" hits="0"/>
+						<line number="386" hits="0"/>
+						<line number="387" hits="0"/>
+						<line number="390" hits="0"/>
+						<line number="391" hits="0"/>
+						<line number="392" hits="0"/>
+						<line number="395" hits="1"/>
+						<line number="409" hits="0"/>
+						<line number="410" hits="0"/>
+						<line number="411" hits="0"/>
+						<line number="412" hits="0"/>
+						<line number="413" hits="0"/>
+						<line number="414" hits="0"/>
+						<line number="415" hits="0"/>
+						<line number="416" hits="0"/>
+						<line number="420" hits="0"/>
+					</lines>
+				</class>
+				<class name="scoring.py" filename="core/scoring.py" complexity="0" line-rate="0.4608" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="27" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="56" hits="1"/>
+						<line number="83" hits="1"/>
+						<line number="84" hits="1"/>
+						<line number="87" hits="1"/>
+						<line number="90" hits="1"/>
+						<line number="91" hits="1"/>
+						<line number="93" hits="1"/>
+						<line number="101" hits="1"/>
+						<line number="132" hits="1"/>
+						<line number="133" hits="1"/>
+						<line number="135" hits="1"/>
+						<line number="136" hits="1"/>
+						<line number="138" hits="1"/>
+						<line number="141" hits="1"/>
+						<line number="142" hits="1"/>
+						<line number="143" hits="1"/>
+						<line number="144" hits="1"/>
+						<line number="145" hits="1"/>
+						<line number="148" hits="1"/>
+						<line number="149" hits="1"/>
+						<line number="150" hits="1"/>
+						<line number="153" hits="1"/>
+						<line number="156" hits="1"/>
+						<line number="159" hits="1"/>
+						<line number="160" hits="1"/>
+						<line number="163" hits="1"/>
+						<line number="164" hits="1"/>
+						<line number="165" hits="1"/>
+						<line number="168" hits="1"/>
+						<line number="170" hits="1"/>
+						<line number="171" hits="1"/>
+						<line number="173" hits="1"/>
+						<line number="179" hits="1"/>
+						<line number="180" hits="1"/>
+						<line number="182" hits="1"/>
+						<line number="190" hits="1"/>
+						<line number="210" hits="0"/>
+						<line number="212" hits="0"/>
+						<line number="213" hits="0"/>
+						<line number="214" hits="0"/>
+						<line number="215" hits="0"/>
+						<line number="216" hits="0"/>
+						<line number="217" hits="0"/>
+						<line number="218" hits="0"/>
+						<line number="219" hits="0"/>
+						<line number="220" hits="0"/>
+						<line number="221" hits="0"/>
+						<line number="222" hits="0"/>
+						<line number="223" hits="0"/>
+						<line number="224" hits="0"/>
+						<line number="225" hits="0"/>
+						<line number="227" hits="0"/>
+						<line number="228" hits="0"/>
+						<line number="229" hits="0"/>
+						<line number="230" hits="0"/>
+						<line number="231" hits="0"/>
+						<line number="232" hits="0"/>
+						<line number="233" hits="0"/>
+						<line number="234" hits="0"/>
+						<line number="236" hits="0"/>
+						<line number="237" hits="0"/>
+						<line number="238" hits="0"/>
+						<line number="239" hits="0"/>
+						<line number="240" hits="0"/>
+						<line number="241" hits="0"/>
+						<line number="242" hits="0"/>
+						<line number="243" hits="0"/>
+						<line number="244" hits="0"/>
+						<line number="246" hits="0"/>
+						<line number="249" hits="1"/>
+						<line number="259" hits="0"/>
+						<line number="261" hits="0"/>
+						<line number="262" hits="0"/>
+						<line number="263" hits="0"/>
+						<line number="264" hits="0"/>
+						<line number="265" hits="0"/>
+						<line number="266" hits="0"/>
+						<line number="267" hits="0"/>
+						<line number="268" hits="0"/>
+						<line number="269" hits="0"/>
+						<line number="270" hits="0"/>
+						<line number="271" hits="0"/>
+						<line number="272" hits="0"/>
+						<line number="273" hits="0"/>
+						<line number="274" hits="0"/>
+						<line number="276" hits="0"/>
+						<line number="277" hits="0"/>
+						<line number="278" hits="0"/>
+						<line number="279" hits="0"/>
+						<line number="280" hits="0"/>
+						<line number="281" hits="0"/>
+						<line number="282" hits="0"/>
+						<line number="285" hits="1"/>
+						<line number="304" hits="1"/>
+						<line number="305" hits="1"/>
+						<line number="306" hits="1"/>
+					</lines>
+				</class>
+				<class name="utils.py" filename="core/utils.py" complexity="0" line-rate="0.9545" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="16" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="21" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="48" hits="1"/>
+						<line number="49" hits="1"/>
+						<line number="51" hits="1"/>
+						<line number="52" hits="1"/>
+						<line number="53" hits="1"/>
+						<line number="54" hits="1"/>
+						<line number="55" hits="1"/>
+						<line number="56" hits="1"/>
+						<line number="57" hits="1"/>
+						<line number="58" hits="1"/>
+						<line number="59" hits="1"/>
+						<line number="60" hits="1"/>
+						<line number="61" hits="1"/>
+						<line number="62" hits="1"/>
+						<line number="63" hits="1"/>
+						<line number="66" hits="1"/>
+						<line number="68" hits="1"/>
+						<line number="69" hits="1"/>
+						<line number="71" hits="1"/>
+						<line number="72" hits="1"/>
+						<line number="73" hits="1"/>
+						<line number="74" hits="1"/>
+						<line number="75" hits="1"/>
+						<line number="76" hits="1"/>
+						<line number="77" hits="1"/>
+						<line number="78" hits="1"/>
+						<line number="84" hits="1"/>
+						<line number="86" hits="1"/>
+						<line number="87" hits="1"/>
+						<line number="89" hits="0"/>
+						<line number="91" hits="1"/>
+						<line number="92" hits="1"/>
+						<line number="93" hits="1"/>
+						<line number="94" hits="1"/>
+						<line number="100" hits="1"/>
+						<line number="102" hits="1"/>
+						<line number="103" hits="1"/>
+						<line number="104" hits="1"/>
+						<line number="105" hits="1"/>
+						<line number="106" hits="1"/>
+						<line number="107" hits="0"/>
+						<line number="108" hits="0"/>
+					</lines>
+				</class>
+			</classes>
+		</package>
+		<package name="core.contracts" line-rate="1" branch-rate="0" complexity="0">
+			<classes>
+				<class name="__init__.py" filename="core/contracts/__init__.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines/>
+				</class>
+				<class name="operation.py" filename="core/contracts/operation.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="2" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="16" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="36" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="48" hits="1"/>
+						<line number="49" hits="1"/>
+						<line number="50" hits="1"/>
+						<line number="53" hits="1"/>
+						<line number="62" hits="1"/>
+						<line number="65" hits="1"/>
+						<line number="82" hits="1"/>
+						<line number="83" hits="1"/>
+						<line number="85" hits="1"/>
+						<line number="89" hits="1"/>
+					</lines>
+				</class>
+				<class name="registry.py" filename="core/contracts/registry.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="2" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="16" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="20" hits="1"/>
+					</lines>
+				</class>
+			</classes>
+		</package>
+		<package name="core.domain" line-rate="1" branch-rate="0" complexity="0">
+			<classes>
+				<class name="__init__.py" filename="core/domain/__init__.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines/>
+				</class>
+				<class name="aspect.py" filename="core/domain/aspect.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="32" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="45" hits="1"/>
+						<line number="46" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="49" hits="1"/>
+						<line number="50" hits="1"/>
+						<line number="57" hits="1"/>
+						<line number="59" hits="1"/>
+						<line number="60" hits="1"/>
+						<line number="66" hits="1"/>
+						<line number="68" hits="1"/>
+						<line number="69" hits="1"/>
+						<line number="71" hits="1"/>
+						<line number="73" hits="1"/>
+						<line number="74" hits="1"/>
+						<line number="76" hits="1"/>
+						<line number="79" hits="1"/>
+						<line number="85" hits="1"/>
+						<line number="87" hits="1"/>
+						<line number="88" hits="1"/>
+						<line number="94" hits="1"/>
+						<line number="95" hits="1"/>
+						<line number="98" hits="1"/>
+					</lines>
+				</class>
+			</classes>
+		</package>
+		<package name="core.operations" line-rate="0.599" branch-rate="0" complexity="0">
+			<classes>
+				<class name="__init__.py" filename="core/operations/__init__.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines/>
+				</class>
+				<class name="compute_embeddings.py" filename="core/operations/compute_embeddings.py" complexity="0" line-rate="0.6636" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="15" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="21" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="46" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="48" hits="1"/>
+						<line number="56" hits="1"/>
+						<line number="83" hits="1"/>
+						<line number="84" hits="1"/>
+						<line number="85" hits="1"/>
+						<line number="86" hits="1"/>
+						<line number="87" hits="1"/>
+						<line number="88" hits="1"/>
+						<line number="89" hits="1"/>
+						<line number="91" hits="1"/>
+						<line number="92" hits="1"/>
+						<line number="93" hits="1"/>
+						<line number="94" hits="1"/>
+						<line number="95" hits="1"/>
+						<line number="96" hits="1"/>
+						<line number="99" hits="1"/>
+						<line number="102" hits="1"/>
+						<line number="103" hits="1"/>
+						<line number="104" hits="1"/>
+						<line number="105" hits="1"/>
+						<line number="106" hits="1"/>
+						<line number="107" hits="1"/>
+						<line number="115" hits="1"/>
+						<line number="155" hits="1"/>
+						<line number="156" hits="1"/>
+						<line number="161" hits="1"/>
+						<line number="162" hits="0"/>
+						<line number="163" hits="0"/>
+						<line number="165" hits="0"/>
+						<line number="166" hits="0"/>
+						<line number="167" hits="0"/>
+						<line number="168" hits="0"/>
+						<line number="169" hits="0"/>
+						<line number="170" hits="0"/>
+						<line number="171" hits="0"/>
+						<line number="172" hits="0"/>
+						<line number="173" hits="0"/>
+						<line number="174" hits="0"/>
+						<line number="175" hits="0"/>
+						<line number="176" hits="0"/>
+						<line number="177" hits="0"/>
+						<line number="178" hits="0"/>
+						<line number="179" hits="0"/>
+						<line number="180" hits="0"/>
+						<line number="181" hits="0"/>
+						<line number="182" hits="0"/>
+						<line number="184" hits="0"/>
+						<line number="185" hits="0"/>
+						<line number="186" hits="0"/>
+						<line number="187" hits="0"/>
+						<line number="188" hits="0"/>
+						<line number="189" hits="0"/>
+						<line number="190" hits="0"/>
+						<line number="191" hits="0"/>
+						<line number="192" hits="0"/>
+						<line number="193" hits="0"/>
+						<line number="194" hits="0"/>
+						<line number="195" hits="0"/>
+						<line number="197" hits="1"/>
+						<line number="201" hits="1"/>
+						<line number="202" hits="1"/>
+						<line number="203" hits="1"/>
+						<line number="205" hits="1"/>
+						<line number="206" hits="1"/>
+						<line number="207" hits="0"/>
+						<line number="210" hits="1"/>
+						<line number="219" hits="1"/>
+						<line number="220" hits="1"/>
+						<line number="226" hits="1"/>
+						<line number="227" hits="1"/>
+						<line number="228" hits="1"/>
+						<line number="229" hits="1"/>
+						<line number="232" hits="1"/>
+						<line number="236" hits="1"/>
+						<line number="238" hits="1"/>
+						<line number="249" hits="1"/>
+						<line number="250" hits="1"/>
+						<line number="251" hits="1"/>
+						<line number="269" hits="1"/>
+						<line number="277" hits="1"/>
+						<line number="284" hits="0"/>
+						<line number="286" hits="0"/>
+						<line number="287" hits="0"/>
+						<line number="288" hits="0"/>
+						<line number="294" hits="0"/>
+						<line number="295" hits="0"/>
+						<line number="296" hits="0"/>
+						<line number="303" hits="0"/>
+						<line number="305" hits="0"/>
+						<line number="307" hits="0"/>
+						<line number="308" hits="0"/>
+						<line number="312" hits="0"/>
+						<line number="314" hits="0"/>
+						<line number="315" hits="0"/>
+						<line number="318" hits="0"/>
+						<line number="320" hits="1"/>
+						<line number="329" hits="1"/>
+						<line number="330" hits="0"/>
+						<line number="331" hits="1"/>
+						<line number="332" hits="0"/>
+						<line number="333" hits="1"/>
+						<line number="334" hits="0"/>
+						<line number="336" hits="1"/>
+						<line number="344" hits="1"/>
+						<line number="352" hits="1"/>
+						<line number="358" hits="1"/>
+						<line number="359" hits="0"/>
+						<line number="360" hits="0"/>
+						<line number="361" hits="0"/>
+						<line number="362" hits="0"/>
+						<line number="363" hits="0"/>
+						<line number="364" hits="0"/>
+						<line number="365" hits="0"/>
+						<line number="366" hits="0"/>
+						<line number="368" hits="1"/>
+						<line number="371" hits="1"/>
+						<line number="372" hits="1"/>
+						<line number="373" hits="1"/>
+						<line number="377" hits="1"/>
+						<line number="378" hits="1"/>
+						<line number="379" hits="0"/>
+						<line number="385" hits="0"/>
+						<line number="387" hits="1"/>
+						<line number="388" hits="1"/>
+						<line number="389" hits="0"/>
+						<line number="391" hits="1"/>
+						<line number="393" hits="1"/>
+						<line number="394" hits="1"/>
+						<line number="404" hits="1"/>
+						<line number="407" hits="1"/>
+						<line number="408" hits="1"/>
+						<line number="409" hits="1"/>
+						<line number="410" hits="1"/>
+						<line number="411" hits="1"/>
+						<line number="412" hits="1"/>
+						<line number="413" hits="1"/>
+						<line number="428" hits="1"/>
+						<line number="429" hits="1"/>
+						<line number="440" hits="1"/>
+						<line number="459" hits="1"/>
+						<line number="460" hits="0"/>
+						<line number="462" hits="1"/>
+						<line number="470" hits="1"/>
+						<line number="471" hits="0"/>
+						<line number="472" hits="1"/>
+						<line number="473" hits="0"/>
+						<line number="474" hits="1"/>
+						<line number="475" hits="1"/>
+						<line number="477" hits="0"/>
+						<line number="485" hits="1"/>
+						<line number="488" hits="1"/>
+						<line number="489" hits="1"/>
+						<line number="490" hits="1"/>
+						<line number="491" hits="1"/>
+						<line number="494" hits="1"/>
+						<line number="501" hits="1"/>
+						<line number="502" hits="1"/>
+						<line number="507" hits="1"/>
+						<line number="508" hits="0"/>
+						<line number="509" hits="0"/>
+						<line number="510" hits="0"/>
+						<line number="512" hits="1"/>
+						<line number="515" hits="1"/>
+						<line number="516" hits="1"/>
+						<line number="517" hits="1"/>
+						<line number="519" hits="1"/>
+						<line number="520" hits="1"/>
+						<line number="521" hits="1"/>
+						<line number="527" hits="1"/>
+						<line number="529" hits="1"/>
+						<line number="530" hits="1"/>
+						<line number="532" hits="1"/>
+						<line number="534" hits="1"/>
+						<line number="535" hits="1"/>
+						<line number="536" hits="1"/>
+						<line number="538" hits="1"/>
+						<line number="539" hits="1"/>
+						<line number="544" hits="1"/>
+						<line number="545" hits="1"/>
+						<line number="546" hits="1"/>
+						<line number="548" hits="1"/>
+						<line number="552" hits="1"/>
+						<line number="553" hits="1"/>
+						<line number="563" hits="1"/>
+						<line number="565" hits="1"/>
+						<line number="566" hits="1"/>
+						<line number="571" hits="1"/>
+						<line number="581" hits="1"/>
+						<line number="583" hits="1"/>
+						<line number="590" hits="1"/>
+						<line number="591" hits="1"/>
+						<line number="598" hits="1"/>
+						<line number="599" hits="1"/>
+						<line number="601" hits="1"/>
+						<line number="608" hits="1"/>
+						<line number="609" hits="1"/>
+						<line number="617" hits="1"/>
+						<line number="633" hits="1"/>
+						<line number="634" hits="1"/>
+						<line number="637" hits="1"/>
+						<line number="638" hits="0"/>
+						<line number="639" hits="0"/>
+						<line number="640" hits="0"/>
+						<line number="641" hits="0"/>
+						<line number="642" hits="0"/>
+						<line number="643" hits="0"/>
+						<line number="644" hits="0"/>
+						<line number="646" hits="0"/>
+						<line number="648" hits="0"/>
+						<line number="649" hits="0"/>
+						<line number="650" hits="0"/>
+						<line number="651" hits="0"/>
+						<line number="652" hits="0"/>
+						<line number="653" hits="0"/>
+						<line number="656" hits="1"/>
+						<line number="657" hits="0"/>
+						<line number="659" hits="0"/>
+						<line number="666" hits="0"/>
+						<line number="667" hits="0"/>
+						<line number="669" hits="0"/>
+						<line number="670" hits="0"/>
+						<line number="671" hits="0"/>
+						<line number="672" hits="0"/>
+						<line number="673" hits="0"/>
+						<line number="674" hits="0"/>
+						<line number="675" hits="0"/>
+						<line number="677" hits="0"/>
+						<line number="678" hits="0"/>
+						<line number="680" hits="0"/>
+						<line number="681" hits="0"/>
+						<line number="682" hits="0"/>
+						<line number="683" hits="0"/>
+						<line number="684" hits="0"/>
+						<line number="685" hits="0"/>
+						<line number="686" hits="0"/>
+						<line number="688" hits="0"/>
+						<line number="689" hits="0"/>
+						<line number="691" hits="0"/>
+						<line number="692" hits="0"/>
+						<line number="693" hits="0"/>
+						<line number="694" hits="0"/>
+						<line number="699" hits="0"/>
+						<line number="700" hits="0"/>
+						<line number="702" hits="0"/>
+						<line number="712" hits="0"/>
+						<line number="714" hits="0"/>
+						<line number="715" hits="0"/>
+						<line number="716" hits="0"/>
+						<line number="717" hits="0"/>
+						<line number="722" hits="0"/>
+						<line number="723" hits="0"/>
+						<line number="726" hits="0"/>
+						<line number="728" hits="0"/>
+						<line number="729" hits="0"/>
+						<line number="737" hits="1"/>
+						<line number="750" hits="1"/>
+						<line number="751" hits="1"/>
+						<line number="753" hits="1"/>
+						<line number="755" hits="1"/>
+						<line number="756" hits="1"/>
+						<line number="757" hits="1"/>
+						<line number="764" hits="1"/>
+						<line number="765" hits="1"/>
+						<line number="766" hits="1"/>
+						<line number="768" hits="1"/>
+						<line number="772" hits="1"/>
+						<line number="774" hits="0"/>
+						<line number="777" hits="0"/>
+						<line number="778" hits="0"/>
+						<line number="779" hits="0"/>
+						<line number="780" hits="0"/>
+						<line number="784" hits="1"/>
+						<line number="785" hits="1"/>
+						<line number="789" hits="1"/>
+						<line number="790" hits="1"/>
+						<line number="791" hits="0"/>
+						<line number="792" hits="1"/>
+						<line number="794" hits="1"/>
+						<line number="795" hits="1"/>
+						<line number="797" hits="1"/>
+						<line number="805" hits="1"/>
+						<line number="853" hits="1"/>
+						<line number="854" hits="1"/>
+						<line number="856" hits="1"/>
+						<line number="857" hits="1"/>
+						<line number="860" hits="1"/>
+						<line number="862" hits="1"/>
+						<line number="865" hits="1"/>
+						<line number="875" hits="1"/>
+						<line number="878" hits="1"/>
+						<line number="886" hits="1"/>
+						<line number="888" hits="1"/>
+						<line number="889" hits="1"/>
+						<line number="895" hits="1"/>
+						<line number="896" hits="1"/>
+						<line number="897" hits="1"/>
+						<line number="899" hits="1"/>
+						<line number="902" hits="1"/>
+						<line number="904" hits="1"/>
+						<line number="905" hits="1"/>
+						<line number="907" hits="1"/>
+						<line number="909" hits="1"/>
+						<line number="912" hits="0"/>
+						<line number="913" hits="0"/>
+						<line number="914" hits="0"/>
+						<line number="915" hits="0"/>
+						<line number="916" hits="0"/>
+						<line number="920" hits="1"/>
+						<line number="924" hits="1"/>
+						<line number="925" hits="1"/>
+						<line number="926" hits="0"/>
+						<line number="927" hits="1"/>
+						<line number="929" hits="1"/>
+						<line number="930" hits="1"/>
+						<line number="932" hits="1"/>
+						<line number="940" hits="1"/>
+						<line number="960" hits="1"/>
+						<line number="976" hits="1"/>
+						<line number="991" hits="1"/>
+						<line number="992" hits="1"/>
+						<line number="993" hits="1"/>
+						<line number="995" hits="1"/>
+						<line number="996" hits="1"/>
+						<line number="998" hits="1"/>
+						<line number="999" hits="1"/>
+						<line number="1000" hits="1"/>
+						<line number="1002" hits="1"/>
+						<line number="1007" hits="1"/>
+						<line number="1008" hits="1"/>
+						<line number="1013" hits="1"/>
+						<line number="1014" hits="1"/>
+						<line number="1015" hits="0"/>
+						<line number="1018" hits="1"/>
+						<line number="1019" hits="0"/>
+						<line number="1021" hits="1"/>
+						<line number="1023" hits="1"/>
+						<line number="1025" hits="0"/>
+						<line number="1026" hits="0"/>
+						<line number="1027" hits="0"/>
+						<line number="1028" hits="0"/>
+						<line number="1029" hits="0"/>
+						<line number="1032" hits="1"/>
+						<line number="1033" hits="1"/>
+						<line number="1034" hits="1"/>
+						<line number="1035" hits="0"/>
+						<line number="1036" hits="1"/>
+						<line number="1038" hits="1"/>
+						<line number="1039" hits="1"/>
+						<line number="1041" hits="1"/>
+						<line number="1049" hits="1"/>
+						<line number="1061" hits="1"/>
+						<line number="1063" hits="1"/>
+						<line number="1064" hits="0"/>
+						<line number="1066" hits="1"/>
+						<line number="1068" hits="1"/>
+						<line number="1069" hits="1"/>
+						<line number="1070" hits="1"/>
+						<line number="1071" hits="1"/>
+						<line number="1075" hits="1"/>
+						<line number="1078" hits="1"/>
+						<line number="1080" hits="1"/>
+						<line number="1082" hits="1"/>
+						<line number="1083" hits="0"/>
+						<line number="1084" hits="1"/>
+						<line number="1085" hits="1"/>
+						<line number="1086" hits="1"/>
+						<line number="1087" hits="1"/>
+						<line number="1089" hits="1"/>
+						<line number="1092" hits="1"/>
+						<line number="1094" hits="1"/>
+						<line number="1096" hits="1"/>
+						<line number="1097" hits="0"/>
+						<line number="1098" hits="1"/>
+						<line number="1099" hits="1"/>
+						<line number="1100" hits="1"/>
+						<line number="1101" hits="1"/>
+						<line number="1103" hits="1"/>
+						<line number="1106" hits="1"/>
+						<line number="1112" hits="1"/>
+						<line number="1113" hits="1"/>
+						<line number="1115" hits="1"/>
+						<line number="1116" hits="1"/>
+						<line number="1118" hits="1"/>
+						<line number="1120" hits="1"/>
+						<line number="1121" hits="1"/>
+						<line number="1122" hits="1"/>
+						<line number="1124" hits="1"/>
+						<line number="1125" hits="1"/>
+						<line number="1126" hits="1"/>
+						<line number="1127" hits="0"/>
+						<line number="1128" hits="1"/>
+						<line number="1129" hits="1"/>
+						<line number="1131" hits="0"/>
+						<line number="1136" hits="1"/>
+						<line number="1137" hits="1"/>
+						<line number="1139" hits="1"/>
+						<line number="1140" hits="1"/>
+						<line number="1148" hits="1"/>
+						<line number="1151" hits="1"/>
+						<line number="1157" hits="1"/>
+						<line number="1158" hits="1"/>
+						<line number="1161" hits="1"/>
+						<line number="1162" hits="1"/>
+						<line number="1163" hits="1"/>
+						<line number="1164" hits="1"/>
+						<line number="1165" hits="1"/>
+						<line number="1166" hits="1"/>
+						<line number="1167" hits="1"/>
+						<line number="1168" hits="1"/>
+					</lines>
+				</class>
+				<class name="export_research_dataset.py" filename="core/operations/export_research_dataset.py" complexity="0" line-rate="0.4135" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="20" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="25" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="43" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="45" hits="1"/>
+						<line number="46" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="51" hits="1"/>
+						<line number="54" hits="1"/>
+						<line number="55" hits="1"/>
+						<line number="56" hits="1"/>
+						<line number="57" hits="1"/>
+						<line number="58" hits="1"/>
+						<line number="59" hits="1"/>
+						<line number="61" hits="1"/>
+						<line number="62" hits="1"/>
+						<line number="63" hits="1"/>
+						<line number="64" hits="0"/>
+						<line number="65" hits="0"/>
+						<line number="66" hits="0"/>
+						<line number="68" hits="1"/>
+						<line number="69" hits="1"/>
+						<line number="70" hits="1"/>
+						<line number="71" hits="0"/>
+						<line number="72" hits="0"/>
+						<line number="73" hits="0"/>
+						<line number="75" hits="1"/>
+						<line number="76" hits="1"/>
+						<line number="77" hits="1"/>
+						<line number="78" hits="0"/>
+						<line number="79" hits="0"/>
+						<line number="80" hits="0"/>
+						<line number="83" hits="1"/>
+						<line number="84" hits="1"/>
+						<line number="85" hits="1"/>
+						<line number="90" hits="1"/>
+						<line number="92" hits="1"/>
+						<line number="95" hits="0"/>
+						<line number="96" hits="0"/>
+						<line number="97" hits="0"/>
+						<line number="98" hits="0"/>
+						<line number="99" hits="0"/>
+						<line number="100" hits="0"/>
+						<line number="101" hits="0"/>
+						<line number="102" hits="0"/>
+						<line number="103" hits="0"/>
+						<line number="104" hits="0"/>
+						<line number="105" hits="0"/>
+						<line number="106" hits="0"/>
+						<line number="107" hits="0"/>
+						<line number="109" hits="1"/>
+						<line number="112" hits="0"/>
+						<line number="113" hits="0"/>
+						<line number="114" hits="0"/>
+						<line number="116" hits="0"/>
+						<line number="117" hits="0"/>
+						<line number="118" hits="0"/>
+						<line number="122" hits="0"/>
+						<line number="125" hits="0"/>
+						<line number="126" hits="0"/>
+						<line number="128" hits="0"/>
+						<line number="131" hits="0"/>
+						<line number="132" hits="0"/>
+						<line number="133" hits="0"/>
+						<line number="135" hits="0"/>
+						<line number="136" hits="0"/>
+						<line number="137" hits="0"/>
+						<line number="154" hits="0"/>
+						<line number="156" hits="0"/>
+						<line number="157" hits="0"/>
+						<line number="158" hits="0"/>
+						<line number="159" hits="0"/>
+						<line number="160" hits="0"/>
+						<line number="166" hits="0"/>
+						<line number="168" hits="0"/>
+						<line number="169" hits="0"/>
+						<line number="170" hits="0"/>
+						<line number="174" hits="0"/>
+						<line number="175" hits="0"/>
+						<line number="176" hits="0"/>
+						<line number="178" hits="0"/>
+						<line number="191" hits="0"/>
+						<line number="214" hits="0"/>
+						<line number="215" hits="0"/>
+						<line number="216" hits="0"/>
+						<line number="217" hits="0"/>
+						<line number="224" hits="0"/>
+						<line number="225" hits="0"/>
+						<line number="237" hits="0"/>
+						<line number="240" hits="1"/>
+						<line number="242" hits="0"/>
+					</lines>
+				</class>
+				<class name="fetch_uniprot_metadata.py" filename="core/operations/fetch_uniprot_metadata.py" complexity="0" line-rate="0.9368" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="16" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="21" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="25" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="36" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="43" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="56" hits="1"/>
+						<line number="57" hits="1"/>
+						<line number="61" hits="1"/>
+						<line number="63" hits="1"/>
+						<line number="64" hits="0"/>
+						<line number="65" hits="0"/>
+						<line number="66" hits="0"/>
+						<line number="67" hits="0"/>
+						<line number="68" hits="0"/>
+						<line number="69" hits="0"/>
+						<line number="70" hits="0"/>
+						<line number="71" hits="0"/>
+						<line number="72" hits="0"/>
+						<line number="73" hits="0"/>
+						<line number="74" hits="0"/>
+						<line number="77" hits="1"/>
+						<line number="98" hits="1"/>
+						<line number="99" hits="1"/>
+						<line number="100" hits="1"/>
+						<line number="101" hits="1"/>
+						<line number="102" hits="1"/>
+						<line number="104" hits="1"/>
+						<line number="107" hits="1"/>
+						<line number="108" hits="1"/>
+						<line number="109" hits="1"/>
+						<line number="111" hits="1"/>
+						<line number="113" hits="1"/>
+						<line number="114" hits="1"/>
+						<line number="121" hits="1"/>
+						<line number="122" hits="1"/>
+						<line number="123" hits="1"/>
+						<line number="124" hits="1"/>
+						<line number="126" hits="1"/>
+						<line number="127" hits="1"/>
+						<line number="128" hits="1"/>
+						<line number="129" hits="1"/>
+						<line number="131" hits="1"/>
+						<line number="132" hits="1"/>
+						<line number="133" hits="1"/>
+						<line number="134" hits="0"/>
+						<line number="136" hits="1"/>
+						<line number="138" hits="1"/>
+						<line number="139" hits="1"/>
+						<line number="140" hits="1"/>
+						<line number="142" hits="1"/>
+						<line number="162" hits="1"/>
+						<line number="163" hits="1"/>
+						<line number="165" hits="1"/>
+						<line number="166" hits="1"/>
+						<line number="172" hits="1"/>
+						<line number="174" hits="1"/>
+						<line number="175" hits="1"/>
+						<line number="184" hits="1"/>
+						<line number="185" hits="1"/>
+						<line number="189" hits="1"/>
+						<line number="192" hits="1"/>
+						<line number="194" hits="1"/>
+						<line number="222" hits="1"/>
+						<line number="229" hits="1"/>
+						<line number="231" hits="1"/>
+						<line number="232" hits="1"/>
+						<line number="234" hits="1"/>
+						<line number="235" hits="1"/>
+						<line number="236" hits="1"/>
+						<line number="237" hits="1"/>
+						<line number="244" hits="1"/>
+						<line number="245" hits="1"/>
+						<line number="246" hits="1"/>
+						<line number="247" hits="1"/>
+						<line number="248" hits="1"/>
+						<line number="250" hits="1"/>
+						<line number="251" hits="1"/>
+						<line number="253" hits="1"/>
+						<line number="254" hits="1"/>
+						<line number="256" hits="1"/>
+						<line number="257" hits="1"/>
+						<line number="258" hits="1"/>
+						<line number="260" hits="1"/>
+						<line number="261" hits="1"/>
+						<line number="262" hits="1"/>
+						<line number="263" hits="1"/>
+						<line number="264" hits="1"/>
+						<line number="268" hits="1"/>
+						<line number="269" hits="1"/>
+						<line number="270" hits="1"/>
+						<line number="272" hits="1"/>
+						<line number="279" hits="1"/>
+						<line number="280" hits="1"/>
+						<line number="281" hits="1"/>
+						<line number="283" hits="1"/>
+						<line number="285" hits="1"/>
+						<line number="286" hits="1"/>
+						<line number="287" hits="1"/>
+						<line number="288" hits="1"/>
+						<line number="290" hits="1"/>
+						<line number="291" hits="1"/>
+						<line number="293" hits="1"/>
+						<line number="294" hits="1"/>
+						<line number="295" hits="1"/>
+						<line number="296" hits="1"/>
+						<line number="297" hits="1"/>
+						<line number="299" hits="1"/>
+						<line number="300" hits="1"/>
+						<line number="301" hits="1"/>
+						<line number="302" hits="1"/>
+						<line number="303" hits="1"/>
+						<line number="305" hits="1"/>
+						<line number="306" hits="1"/>
+						<line number="307" hits="1"/>
+						<line number="308" hits="1"/>
+						<line number="309" hits="1"/>
+						<line number="310" hits="1"/>
+						<line number="311" hits="1"/>
+						<line number="312" hits="1"/>
+						<line number="314" hits="1"/>
+						<line number="315" hits="1"/>
+						<line number="316" hits="1"/>
+						<line number="317" hits="1"/>
+						<line number="319" hits="1"/>
+						<line number="320" hits="1"/>
+						<line number="321" hits="1"/>
+						<line number="322" hits="1"/>
+						<line number="323" hits="1"/>
+						<line number="324" hits="1"/>
+						<line number="325" hits="1"/>
+						<line number="326" hits="1"/>
+						<line number="328" hits="1"/>
+						<line number="329" hits="1"/>
+						<line number="330" hits="1"/>
+						<line number="331" hits="1"/>
+						<line number="333" hits="1"/>
+						<line number="334" hits="1"/>
+						<line number="335" hits="1"/>
+						<line number="336" hits="1"/>
+						<line number="338" hits="1"/>
+						<line number="339" hits="1"/>
+						<line number="340" hits="1"/>
+						<line number="341" hits="1"/>
+						<line number="343" hits="1"/>
+						<line number="344" hits="1"/>
+						<line number="345" hits="1"/>
+						<line number="346" hits="1"/>
+						<line number="348" hits="1"/>
+						<line number="349" hits="1"/>
+						<line number="351" hits="1"/>
+						<line number="353" hits="1"/>
+						<line number="359" hits="1"/>
+						<line number="360" hits="1"/>
+						<line number="361" hits="1"/>
+						<line number="366" hits="1"/>
+						<line number="367" hits="1"/>
+						<line number="368" hits="1"/>
+					</lines>
+				</class>
+				<class name="generate_evaluation_set.py" filename="core/operations/generate_evaluation_set.py" complexity="0" line-rate="0.7979" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="21" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="25" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="41" hits="0"/>
+						<line number="42" hits="1"/>
+						<line number="43" hits="0"/>
+						<line number="44" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="57" hits="1"/>
+						<line number="58" hits="1"/>
+						<line number="64" hits="1"/>
+						<line number="65" hits="0"/>
+						<line number="66" hits="0"/>
+						<line number="67" hits="0"/>
+						<line number="68" hits="0"/>
+						<line number="69" hits="0"/>
+						<line number="70" hits="0"/>
+						<line number="71" hits="0"/>
+						<line number="72" hits="0"/>
+						<line number="73" hits="0"/>
+						<line number="74" hits="0"/>
+						<line number="75" hits="0"/>
+						<line number="76" hits="0"/>
+						<line number="77" hits="0"/>
+						<line number="78" hits="0"/>
+						<line number="79" hits="0"/>
+						<line number="80" hits="0"/>
+						<line number="82" hits="1"/>
+						<line number="85" hits="1"/>
+						<line number="87" hits="1"/>
+						<line number="88" hits="1"/>
+						<line number="90" hits="1"/>
+						<line number="91" hits="1"/>
+						<line number="92" hits="1"/>
+						<line number="93" hits="1"/>
+						<line number="94" hits="1"/>
+						<line number="95" hits="1"/>
+						<line number="97" hits="1"/>
+						<line number="98" hits="1"/>
+						<line number="99" hits="1"/>
+						<line number="100" hits="0"/>
+						<line number="102" hits="1"/>
+						<line number="104" hits="1"/>
+						<line number="107" hits="1"/>
+						<line number="109" hits="1"/>
+						<line number="123" hits="1"/>
+						<line number="124" hits="1"/>
+						<line number="125" hits="1"/>
+						<line number="127" hits="1"/>
+						<line number="136" hits="1"/>
+						<line number="137" hits="1"/>
+						<line number="138" hits="1"/>
+						<line number="139" hits="1"/>
+						<line number="141" hits="1"/>
+						<line number="146" hits="1"/>
+						<line number="147" hits="1"/>
+						<line number="152" hits="1"/>
+						<line number="153" hits="1"/>
+						<line number="154" hits="1"/>
+						<line number="155" hits="1"/>
+						<line number="156" hits="1"/>
+						<line number="157" hits="1"/>
+						<line number="158" hits="1"/>
+						<line number="159" hits="1"/>
+						<line number="160" hits="1"/>
+						<line number="161" hits="1"/>
+						<line number="168" hits="1"/>
+						<line number="169" hits="1"/>
+						<line number="170" hits="1"/>
+					</lines>
+				</class>
+				<class name="insert_proteins.py" filename="core/operations/insert_proteins.py" complexity="0" line-rate="0.9554" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="15" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="36" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="43" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="56" hits="1"/>
+						<line number="57" hits="1"/>
+						<line number="61" hits="1"/>
+						<line number="63" hits="1"/>
+						<line number="64" hits="0"/>
+						<line number="65" hits="0"/>
+						<line number="66" hits="0"/>
+						<line number="67" hits="0"/>
+						<line number="68" hits="0"/>
+						<line number="69" hits="0"/>
+						<line number="70" hits="0"/>
+						<line number="71" hits="0"/>
+						<line number="72" hits="0"/>
+						<line number="73" hits="0"/>
+						<line number="74" hits="0"/>
+						<line number="76" hits="1"/>
+						<line number="77" hits="1"/>
+						<line number="78" hits="1"/>
+						<line number="80" hits="1"/>
+						<line number="81" hits="1"/>
+						<line number="82" hits="1"/>
+						<line number="83" hits="1"/>
+						<line number="84" hits="1"/>
+						<line number="86" hits="1"/>
+						<line number="89" hits="1"/>
+						<line number="90" hits="1"/>
+						<line number="91" hits="1"/>
+						<line number="93" hits="1"/>
+						<line number="95" hits="1"/>
+						<line number="96" hits="1"/>
+						<line number="103" hits="1"/>
+						<line number="104" hits="1"/>
+						<line number="105" hits="1"/>
+						<line number="106" hits="1"/>
+						<line number="107" hits="1"/>
+						<line number="108" hits="1"/>
+						<line number="109" hits="1"/>
+						<line number="111" hits="1"/>
+						<line number="112" hits="1"/>
+						<line number="113" hits="1"/>
+						<line number="114" hits="1"/>
+						<line number="116" hits="1"/>
+						<line number="117" hits="1"/>
+						<line number="118" hits="1"/>
+						<line number="119" hits="0"/>
+						<line number="121" hits="1"/>
+						<line number="122" hits="1"/>
+						<line number="124" hits="1"/>
+						<line number="125" hits="1"/>
+						<line number="126" hits="1"/>
+						<line number="127" hits="1"/>
+						<line number="128" hits="1"/>
+						<line number="130" hits="1"/>
+						<line number="152" hits="1"/>
+						<line number="153" hits="1"/>
+						<line number="156" hits="1"/>
+						<line number="158" hits="1"/>
+						<line number="159" hits="1"/>
+						<line number="177" hits="1"/>
+						<line number="193" hits="1"/>
+						<line number="196" hits="1"/>
+						<line number="197" hits="1"/>
+						<line number="198" hits="1"/>
+						<line number="199" hits="1"/>
+						<line number="200" hits="1"/>
+						<line number="201" hits="1"/>
+						<line number="203" hits="1"/>
+						<line number="204" hits="1"/>
+						<line number="205" hits="1"/>
+						<line number="207" hits="1"/>
+						<line number="208" hits="1"/>
+						<line number="209" hits="1"/>
+						<line number="210" hits="1"/>
+						<line number="217" hits="1"/>
+						<line number="218" hits="1"/>
+						<line number="219" hits="1"/>
+						<line number="220" hits="1"/>
+						<line number="221" hits="1"/>
+						<line number="223" hits="1"/>
+						<line number="224" hits="1"/>
+						<line number="226" hits="1"/>
+						<line number="227" hits="1"/>
+						<line number="229" hits="1"/>
+						<line number="230" hits="1"/>
+						<line number="231" hits="1"/>
+						<line number="233" hits="1"/>
+						<line number="234" hits="1"/>
+						<line number="235" hits="1"/>
+						<line number="236" hits="1"/>
+						<line number="237" hits="1"/>
+						<line number="238" hits="1"/>
+						<line number="241" hits="1"/>
+						<line number="242" hits="1"/>
+						<line number="243" hits="1"/>
+						<line number="244" hits="1"/>
+						<line number="246" hits="1"/>
+						<line number="248" hits="1"/>
+						<line number="249" hits="1"/>
+						<line number="250" hits="1"/>
+						<line number="251" hits="1"/>
+						<line number="252" hits="1"/>
+						<line number="253" hits="1"/>
+						<line number="254" hits="1"/>
+						<line number="256" hits="1"/>
+						<line number="257" hits="1"/>
+						<line number="258" hits="1"/>
+						<line number="259" hits="1"/>
+						<line number="260" hits="1"/>
+						<line number="262" hits="1"/>
+						<line number="263" hits="1"/>
+						<line number="265" hits="1"/>
+						<line number="266" hits="1"/>
+						<line number="267" hits="1"/>
+						<line number="268" hits="1"/>
+						<line number="269" hits="1"/>
+						<line number="270" hits="1"/>
+						<line number="271" hits="1"/>
+						<line number="273" hits="1"/>
+						<line number="274" hits="1"/>
+						<line number="275" hits="1"/>
+						<line number="277" hits="1"/>
+						<line number="278" hits="1"/>
+						<line number="279" hits="1"/>
+						<line number="281" hits="1"/>
+						<line number="282" hits="1"/>
+						<line number="283" hits="1"/>
+						<line number="285" hits="1"/>
+						<line number="286" hits="1"/>
+						<line number="288" hits="1"/>
+						<line number="290" hits="1"/>
+						<line number="291" hits="1"/>
+						<line number="292" hits="1"/>
+						<line number="294" hits="1"/>
+						<line number="295" hits="1"/>
+						<line number="296" hits="1"/>
+						<line number="297" hits="1"/>
+						<line number="298" hits="1"/>
+						<line number="299" hits="1"/>
+						<line number="300" hits="1"/>
+						<line number="301" hits="1"/>
+						<line number="302" hits="1"/>
+						<line number="304" hits="1"/>
+						<line number="317" hits="1"/>
+						<line number="320" hits="1"/>
+						<line number="321" hits="1"/>
+						<line number="324" hits="1"/>
+						<line number="325" hits="1"/>
+						<line number="326" hits="1"/>
+						<line number="327" hits="1"/>
+						<line number="328" hits="1"/>
+						<line number="330" hits="1"/>
+						<line number="331" hits="1"/>
+						<line number="333" hits="1"/>
+						<line number="334" hits="1"/>
+						<line number="336" hits="1"/>
+						<line number="339" hits="1"/>
+						<line number="340" hits="1"/>
+						<line number="342" hits="1"/>
+						<line number="343" hits="1"/>
+						<line number="345" hits="1"/>
+						<line number="348" hits="1"/>
+						<line number="349" hits="1"/>
+						<line number="351" hits="1"/>
+						<line number="352" hits="1"/>
+						<line number="354" hits="1"/>
+						<line number="355" hits="1"/>
+						<line number="358" hits="1"/>
+						<line number="359" hits="1"/>
+						<line number="362" hits="1"/>
+						<line number="363" hits="1"/>
+						<line number="364" hits="1"/>
+						<line number="366" hits="1"/>
+						<line number="367" hits="1"/>
+						<line number="368" hits="1"/>
+						<line number="370" hits="1"/>
+						<line number="371" hits="1"/>
+						<line number="372" hits="1"/>
+						<line number="374" hits="1"/>
+						<line number="375" hits="1"/>
+						<line number="376" hits="1"/>
+						<line number="378" hits="1"/>
+						<line number="379" hits="1"/>
+						<line number="380" hits="1"/>
+						<line number="382" hits="1"/>
+						<line number="383" hits="1"/>
+						<line number="384" hits="1"/>
+						<line number="386" hits="1"/>
+						<line number="387" hits="1"/>
+						<line number="388" hits="1"/>
+						<line number="390" hits="1"/>
+						<line number="391" hits="1"/>
+						<line number="392" hits="1"/>
+						<line number="394" hits="1"/>
+						<line number="395" hits="1"/>
+						<line number="396" hits="1"/>
+						<line number="398" hits="1"/>
+						<line number="399" hits="1"/>
+						<line number="400" hits="1"/>
+						<line number="402" hits="1"/>
+						<line number="403" hits="1"/>
+						<line number="404" hits="1"/>
+						<line number="406" hits="1"/>
+						<line number="407" hits="1"/>
+						<line number="408" hits="1"/>
+						<line number="410" hits="1"/>
+						<line number="411" hits="1"/>
+						<line number="412" hits="1"/>
+						<line number="414" hits="1"/>
+						<line number="415" hits="1"/>
+						<line number="418" hits="1"/>
+						<line number="433" hits="1"/>
+						<line number="435" hits="1"/>
+						<line number="436" hits="1"/>
+						<line number="437" hits="1"/>
+						<line number="438" hits="1"/>
+						<line number="439" hits="1"/>
+						<line number="441" hits="1"/>
+						<line number="443" hits="1"/>
+						<line number="446" hits="1"/>
+						<line number="447" hits="1"/>
+						<line number="448" hits="1"/>
+						<line number="453" hits="1"/>
+						<line number="454" hits="1"/>
+						<line number="455" hits="1"/>
+						<line number="457" hits="1"/>
+						<line number="460" hits="1"/>
+						<line number="461" hits="1"/>
+						<line number="462" hits="1"/>
+						<line number="463" hits="1"/>
+						<line number="464" hits="1"/>
+						<line number="465" hits="1"/>
+					</lines>
+				</class>
+				<class name="load_goa_annotations.py" filename="core/operations/load_goa_annotations.py" complexity="0" line-rate="0.9137" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="15" hits="1"/>
+						<line number="16" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="21" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="36" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="43" hits="1"/>
+						<line number="46" hits="1"/>
+						<line number="68" hits="1"/>
+						<line number="69" hits="1"/>
+						<line number="74" hits="1"/>
+						<line number="75" hits="0"/>
+						<line number="76" hits="0"/>
+						<line number="77" hits="0"/>
+						<line number="79" hits="0"/>
+						<line number="80" hits="0"/>
+						<line number="81" hits="0"/>
+						<line number="82" hits="0"/>
+						<line number="83" hits="0"/>
+						<line number="84" hits="0"/>
+						<line number="85" hits="0"/>
+						<line number="86" hits="0"/>
+						<line number="87" hits="0"/>
+						<line number="90" hits="1"/>
+						<line number="91" hits="1"/>
+						<line number="92" hits="1"/>
+						<line number="93" hits="1"/>
+						<line number="94" hits="1"/>
+						<line number="95" hits="1"/>
+						<line number="96" hits="1"/>
+						<line number="97" hits="1"/>
+						<line number="99" hits="1"/>
+						<line number="102" hits="1"/>
+						<line number="104" hits="1"/>
+						<line number="105" hits="1"/>
+						<line number="106" hits="1"/>
+						<line number="107" hits="1"/>
+						<line number="109" hits="1"/>
+						<line number="110" hits="1"/>
+						<line number="120" hits="1"/>
+						<line number="121" hits="1"/>
+						<line number="122" hits="1"/>
+						<line number="123" hits="1"/>
+						<line number="125" hits="1"/>
+						<line number="127" hits="1"/>
+						<line number="133" hits="1"/>
+						<line number="134" hits="1"/>
+						<line number="136" hits="1"/>
+						<line number="143" hits="1"/>
+						<line number="144" hits="1"/>
+						<line number="145" hits="1"/>
+						<line number="146" hits="1"/>
+						<line number="147" hits="1"/>
+						<line number="149" hits="1"/>
+						<line number="150" hits="1"/>
+						<line number="152" hits="1"/>
+						<line number="153" hits="1"/>
+						<line number="159" hits="1"/>
+						<line number="161" hits="1"/>
+						<line number="163" hits="1"/>
+						<line number="164" hits="1"/>
+						<line number="165" hits="1"/>
+						<line number="168" hits="1"/>
+						<line number="169" hits="1"/>
+						<line number="170" hits="1"/>
+						<line number="172" hits="1"/>
+						<line number="184" hits="1"/>
+						<line number="185" hits="1"/>
+						<line number="188" hits="1"/>
+						<line number="189" hits="1"/>
+						<line number="190" hits="1"/>
+						<line number="193" hits="1"/>
+						<line number="194" hits="1"/>
+						<line number="196" hits="1"/>
+						<line number="197" hits="1"/>
+						<line number="210" hits="1"/>
+						<line number="211" hits="1"/>
+						<line number="212" hits="1"/>
+						<line number="213" hits="0"/>
+						<line number="214" hits="0"/>
+						<line number="216" hits="1"/>
+						<line number="217" hits="1"/>
+						<line number="219" hits="1"/>
+						<line number="220" hits="1"/>
+						<line number="223" hits="1"/>
+						<line number="224" hits="0"/>
+						<line number="225" hits="1"/>
+						<line number="226" hits="1"/>
+						<line number="227" hits="0"/>
+						<line number="228" hits="0"/>
+						<line number="230" hits="1"/>
+						<line number="236" hits="1"/>
+						<line number="244" hits="1"/>
+						<line number="249" hits="1"/>
+						<line number="250" hits="1"/>
+						<line number="256" hits="1"/>
+						<line number="257" hits="1"/>
+						<line number="259" hits="1"/>
+						<line number="267" hits="1"/>
+						<line number="268" hits="1"/>
+						<line number="279" hits="1"/>
+						<line number="281" hits="1"/>
+						<line number="285" hits="1"/>
+						<line number="290" hits="1"/>
+						<line number="291" hits="1"/>
+						<line number="292" hits="1"/>
+						<line number="305" hits="1"/>
+						<line number="317" hits="1"/>
+						<line number="319" hits="1"/>
+						<line number="320" hits="1"/>
+						<line number="321" hits="1"/>
+						<line number="322" hits="1"/>
+						<line number="328" hits="1"/>
+						<line number="330" hits="1"/>
+						<line number="333" hits="1"/>
+						<line number="334" hits="1"/>
+						<line number="339" hits="1"/>
+						<line number="340" hits="1"/>
+						<line number="341" hits="1"/>
+						<line number="343" hits="1"/>
+						<line number="344" hits="1"/>
+						<line number="345" hits="1"/>
+						<line number="346" hits="1"/>
+						<line number="348" hits="1"/>
+						<line number="349" hits="1"/>
+						<line number="350" hits="1"/>
+						<line number="353" hits="1"/>
+						<line number="354" hits="1"/>
+						<line number="355" hits="1"/>
+						<line number="357" hits="1"/>
+						<line number="359" hits="1"/>
+						<line number="360" hits="1"/>
+						<line number="361" hits="1"/>
+						<line number="362" hits="1"/>
+						<line number="363" hits="1"/>
+						<line number="364" hits="1"/>
+						<line number="365" hits="1"/>
+						<line number="366" hits="1"/>
+						<line number="367" hits="1"/>
+						<line number="378" hits="1"/>
+						<line number="386" hits="1"/>
+						<line number="387" hits="1"/>
+						<line number="388" hits="1"/>
+						<line number="390" hits="1"/>
+						<line number="391" hits="1"/>
+						<line number="392" hits="1"/>
+						<line number="393" hits="1"/>
+						<line number="394" hits="1"/>
+						<line number="396" hits="1"/>
+						<line number="397" hits="1"/>
+						<line number="398" hits="1"/>
+						<line number="399" hits="1"/>
+						<line number="400" hits="1"/>
+						<line number="402" hits="1"/>
+						<line number="403" hits="1"/>
+						<line number="404" hits="1"/>
+						<line number="405" hits="1"/>
+						<line number="406" hits="1"/>
+						<line number="407" hits="1"/>
+						<line number="409" hits="1"/>
+						<line number="423" hits="1"/>
+						<line number="424" hits="1"/>
+						<line number="426" hits="1"/>
+						<line number="427" hits="1"/>
+						<line number="428" hits="1"/>
+						<line number="429" hits="1"/>
+						<line number="430" hits="1"/>
+						<line number="431" hits="1"/>
+						<line number="433" hits="1"/>
+					</lines>
+				</class>
+				<class name="load_ontology_snapshot.py" filename="core/operations/load_ontology_snapshot.py" complexity="0" line-rate="0.922" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="25" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="43" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="49" hits="1"/>
+						<line number="50" hits="0"/>
+						<line number="51" hits="0"/>
+						<line number="52" hits="0"/>
+						<line number="54" hits="0"/>
+						<line number="55" hits="0"/>
+						<line number="56" hits="0"/>
+						<line number="57" hits="0"/>
+						<line number="59" hits="1"/>
+						<line number="62" hits="1"/>
+						<line number="64" hits="1"/>
+						<line number="65" hits="1"/>
+						<line number="67" hits="1"/>
+						<line number="68" hits="1"/>
+						<line number="70" hits="1"/>
+						<line number="73" hits="1"/>
+						<line number="74" hits="1"/>
+						<line number="75" hits="1"/>
+						<line number="77" hits="1"/>
+						<line number="84" hits="1"/>
+						<line number="85" hits="1"/>
+						<line number="91" hits="1"/>
+						<line number="100" hits="1"/>
+						<line number="106" hits="1"/>
+						<line number="107" hits="1"/>
+						<line number="113" hits="1"/>
+						<line number="114" hits="1"/>
+						<line number="115" hits="1"/>
+						<line number="116" hits="1"/>
+						<line number="117" hits="1"/>
+						<line number="118" hits="1"/>
+						<line number="119" hits="0"/>
+						<line number="120" hits="0"/>
+						<line number="121" hits="0"/>
+						<line number="122" hits="0"/>
+						<line number="130" hits="1"/>
+						<line number="131" hits="1"/>
+						<line number="132" hits="1"/>
+						<line number="138" hits="1"/>
+						<line number="147" hits="1"/>
+						<line number="148" hits="1"/>
+						<line number="149" hits="1"/>
+						<line number="151" hits="1"/>
+						<line number="152" hits="1"/>
+						<line number="154" hits="1"/>
+						<line number="165" hits="1"/>
+						<line number="166" hits="1"/>
+						<line number="169" hits="1"/>
+						<line number="171" hits="1"/>
+						<line number="172" hits="1"/>
+						<line number="173" hits="1"/>
+						<line number="174" hits="1"/>
+						<line number="175" hits="1"/>
+						<line number="176" hits="1"/>
+						<line number="177" hits="1"/>
+						<line number="178" hits="1"/>
+						<line number="179" hits="1"/>
+						<line number="180" hits="1"/>
+						<line number="188" hits="1"/>
+						<line number="189" hits="1"/>
+						<line number="191" hits="1"/>
+						<line number="192" hits="1"/>
+						<line number="204" hits="1"/>
+						<line number="214" hits="1"/>
+						<line number="215" hits="1"/>
+						<line number="216" hits="1"/>
+						<line number="217" hits="1"/>
+						<line number="218" hits="1"/>
+						<line number="219" hits="1"/>
+						<line number="220" hits="1"/>
+						<line number="222" hits="1"/>
+						<line number="223" hits="1"/>
+						<line number="224" hits="1"/>
+						<line number="225" hits="1"/>
+						<line number="226" hits="1"/>
+						<line number="228" hits="1"/>
+						<line number="235" hits="1"/>
+						<line number="245" hits="1"/>
+						<line number="246" hits="1"/>
+						<line number="247" hits="1"/>
+						<line number="249" hits="1"/>
+						<line number="250" hits="1"/>
+						<line number="251" hits="1"/>
+						<line number="261" hits="1"/>
+						<line number="263" hits="1"/>
+						<line number="264" hits="1"/>
+						<line number="265" hits="1"/>
+						<line number="266" hits="1"/>
+						<line number="267" hits="1"/>
+						<line number="268" hits="1"/>
+						<line number="269" hits="1"/>
+						<line number="270" hits="1"/>
+						<line number="271" hits="1"/>
+						<line number="272" hits="1"/>
+						<line number="273" hits="1"/>
+						<line number="274" hits="1"/>
+						<line number="275" hits="1"/>
+						<line number="277" hits="1"/>
+						<line number="278" hits="1"/>
+						<line number="279" hits="1"/>
+						<line number="280" hits="1"/>
+						<line number="281" hits="1"/>
+						<line number="282" hits="1"/>
+						<line number="283" hits="1"/>
+						<line number="284" hits="1"/>
+						<line number="285" hits="1"/>
+						<line number="286" hits="1"/>
+						<line number="287" hits="1"/>
+						<line number="288" hits="1"/>
+						<line number="290" hits="1"/>
+						<line number="291" hits="1"/>
+						<line number="292" hits="1"/>
+						<line number="294" hits="1"/>
+						<line number="295" hits="1"/>
+						<line number="300" hits="1"/>
+						<line number="302" hits="1"/>
+						<line number="303" hits="1"/>
+					</lines>
+				</class>
+				<class name="load_quickgo_annotations.py" filename="core/operations/load_quickgo_annotations.py" complexity="0" line-rate="0.9602" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="15" hits="1"/>
+						<line number="16" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="21" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="36" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="43" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="45" hits="1"/>
+						<line number="46" hits="1"/>
+						<line number="48" hits="1"/>
+						<line number="49" hits="1"/>
+						<line number="50" hits="1"/>
+						<line number="51" hits="1"/>
+						<line number="52" hits="1"/>
+						<line number="53" hits="1"/>
+						<line number="56" hits="1"/>
+						<line number="73" hits="1"/>
+						<line number="74" hits="1"/>
+						<line number="79" hits="1"/>
+						<line number="80" hits="0"/>
+						<line number="81" hits="0"/>
+						<line number="82" hits="0"/>
+						<line number="83" hits="0"/>
+						<line number="84" hits="0"/>
+						<line number="85" hits="0"/>
+						<line number="86" hits="0"/>
+						<line number="88" hits="1"/>
+						<line number="91" hits="1"/>
+						<line number="93" hits="1"/>
+						<line number="94" hits="1"/>
+						<line number="95" hits="1"/>
+						<line number="97" hits="1"/>
+						<line number="98" hits="1"/>
+						<line number="108" hits="1"/>
+						<line number="109" hits="1"/>
+						<line number="110" hits="1"/>
+						<line number="111" hits="1"/>
+						<line number="113" hits="1"/>
+						<line number="115" hits="1"/>
+						<line number="116" hits="1"/>
+						<line number="118" hits="1"/>
+						<line number="126" hits="1"/>
+						<line number="127" hits="1"/>
+						<line number="129" hits="1"/>
+						<line number="136" hits="1"/>
+						<line number="137" hits="1"/>
+						<line number="138" hits="1"/>
+						<line number="139" hits="1"/>
+						<line number="140" hits="1"/>
+						<line number="142" hits="1"/>
+						<line number="143" hits="1"/>
+						<line number="145" hits="1"/>
+						<line number="146" hits="1"/>
+						<line number="152" hits="1"/>
+						<line number="154" hits="1"/>
+						<line number="156" hits="1"/>
+						<line number="157" hits="1"/>
+						<line number="158" hits="1"/>
+						<line number="166" hits="1"/>
+						<line number="167" hits="1"/>
+						<line number="168" hits="1"/>
+						<line number="170" hits="1"/>
+						<line number="182" hits="1"/>
+						<line number="183" hits="1"/>
+						<line number="185" hits="1"/>
+						<line number="186" hits="1"/>
+						<line number="187" hits="1"/>
+						<line number="195" hits="1"/>
+						<line number="196" hits="1"/>
+						<line number="198" hits="1"/>
+						<line number="199" hits="1"/>
+						<line number="207" hits="1"/>
+						<line number="208" hits="1"/>
+						<line number="212" hits="1"/>
+						<line number="219" hits="1"/>
+						<line number="220" hits="1"/>
+						<line number="221" hits="1"/>
+						<line number="222" hits="1"/>
+						<line number="231" hits="1"/>
+						<line number="233" hits="1"/>
+						<line number="236" hits="1"/>
+						<line number="237" hits="1"/>
+						<line number="242" hits="1"/>
+						<line number="243" hits="1"/>
+						<line number="246" hits="1"/>
+						<line number="248" hits="1"/>
+						<line number="250" hits="1"/>
+						<line number="251" hits="1"/>
+						<line number="252" hits="1"/>
+						<line number="253" hits="1"/>
+						<line number="254" hits="1"/>
+						<line number="255" hits="1"/>
+						<line number="256" hits="1"/>
+						<line number="257" hits="1"/>
+						<line number="258" hits="1"/>
+						<line number="259" hits="1"/>
+						<line number="260" hits="1"/>
+						<line number="261" hits="1"/>
+						<line number="263" hits="1"/>
+						<line number="269" hits="1"/>
+						<line number="272" hits="1"/>
+						<line number="273" hits="1"/>
+						<line number="276" hits="1"/>
+						<line number="279" hits="1"/>
+						<line number="283" hits="1"/>
+						<line number="284" hits="1"/>
+						<line number="295" hits="1"/>
+						<line number="296" hits="1"/>
+						<line number="300" hits="1"/>
+						<line number="308" hits="1"/>
+						<line number="309" hits="1"/>
+						<line number="310" hits="1"/>
+						<line number="312" hits="1"/>
+						<line number="316" hits="1"/>
+						<line number="329" hits="1"/>
+						<line number="336" hits="1"/>
+						<line number="338" hits="1"/>
+						<line number="339" hits="1"/>
+						<line number="341" hits="1"/>
+						<line number="342" hits="1"/>
+						<line number="343" hits="1"/>
+						<line number="344" hits="1"/>
+						<line number="345" hits="1"/>
+						<line number="346" hits="1"/>
+						<line number="347" hits="1"/>
+						<line number="348" hits="1"/>
+						<line number="349" hits="1"/>
+						<line number="350" hits="1"/>
+						<line number="351" hits="1"/>
+						<line number="352" hits="1"/>
+						<line number="353" hits="1"/>
+						<line number="355" hits="1"/>
+						<line number="364" hits="1"/>
+						<line number="365" hits="1"/>
+						<line number="367" hits="1"/>
+						<line number="368" hits="1"/>
+						<line number="369" hits="1"/>
+						<line number="370" hits="1"/>
+						<line number="371" hits="1"/>
+						<line number="373" hits="1"/>
+						<line number="374" hits="1"/>
+						<line number="375" hits="1"/>
+						<line number="376" hits="1"/>
+						<line number="377" hits="1"/>
+						<line number="379" hits="1"/>
+						<line number="380" hits="1"/>
+						<line number="382" hits="1"/>
+						<line number="396" hits="1"/>
+						<line number="397" hits="1"/>
+						<line number="399" hits="1"/>
+						<line number="400" hits="1"/>
+						<line number="401" hits="1"/>
+						<line number="402" hits="1"/>
+						<line number="403" hits="1"/>
+						<line number="404" hits="1"/>
+						<line number="406" hits="1"/>
+					</lines>
+				</class>
+				<class name="ping.py" filename="core/operations/ping.py" complexity="0" line-rate="0.9231" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="2" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="15" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="23" hits="0"/>
+					</lines>
+				</class>
+				<class name="predict_go_terms.py" filename="core/operations/predict_go_terms.py" complexity="0" line-rate="0.2795" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="15" hits="1"/>
+						<line number="16" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="43" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="45" hits="1"/>
+						<line number="46" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="48" hits="1"/>
+						<line number="49" hits="1"/>
+						<line number="50" hits="1"/>
+						<line number="51" hits="1"/>
+						<line number="52" hits="1"/>
+						<line number="53" hits="1"/>
+						<line number="54" hits="1"/>
+						<line number="55" hits="1"/>
+						<line number="56" hits="1"/>
+						<line number="58" hits="1"/>
+						<line number="60" hits="1"/>
+						<line number="61" hits="1"/>
+						<line number="62" hits="1"/>
+						<line number="66" hits="1"/>
+						<line number="81" hits="1"/>
+						<line number="82" hits="1"/>
+						<line number="88" hits="1"/>
+						<line number="118" hits="1"/>
+						<line number="127" hits="1"/>
+						<line number="128" hits="1"/>
+						<line number="129" hits="0"/>
+						<line number="130" hits="0"/>
+						<line number="131" hits="0"/>
+						<line number="132" hits="0"/>
+						<line number="135" hits="1"/>
+						<line number="140" hits="1"/>
+						<line number="150" hits="1"/>
+						<line number="151" hits="1"/>
+						<line number="152" hits="1"/>
+						<line number="160" hits="1"/>
+						<line number="163" hits="1"/>
+						<line number="164" hits="1"/>
+						<line number="165" hits="1"/>
+						<line number="166" hits="1"/>
+						<line number="167" hits="1"/>
+						<line number="168" hits="1"/>
+						<line number="169" hits="1"/>
+						<line number="170" hits="1"/>
+						<line number="173" hits="1"/>
+						<line number="174" hits="1"/>
+						<line number="175" hits="1"/>
+						<line number="176" hits="1"/>
+						<line number="177" hits="1"/>
+						<line number="178" hits="1"/>
+						<line number="179" hits="1"/>
+						<line number="185" hits="1"/>
+						<line number="186" hits="1"/>
+						<line number="187" hits="1"/>
+						<line number="194" hits="1"/>
+						<line number="204" hits="1"/>
+						<line number="214" hits="1"/>
+						<line number="220" hits="1"/>
+						<line number="222" hits="1"/>
+						<line number="225" hits="1"/>
+						<line number="226" hits="1"/>
+						<line number="227" hits="1"/>
+						<line number="228" hits="1"/>
+						<line number="229" hits="1"/>
+						<line number="232" hits="1"/>
+						<line number="235" hits="1"/>
+						<line number="236" hits="1"/>
+						<line number="237" hits="1"/>
+						<line number="238" hits="1"/>
+						<line number="239" hits="1"/>
+						<line number="240" hits="1"/>
+						<line number="241" hits="1"/>
+						<line number="242" hits="1"/>
+						<line number="243" hits="1"/>
+						<line number="244" hits="1"/>
+						<line number="245" hits="1"/>
+						<line number="246" hits="1"/>
+						<line number="247" hits="1"/>
+						<line number="248" hits="1"/>
+						<line number="249" hits="1"/>
+						<line number="250" hits="1"/>
+						<line number="252" hits="1"/>
+						<line number="253" hits="1"/>
+						<line number="254" hits="1"/>
+						<line number="255" hits="1"/>
+						<line number="256" hits="1"/>
+						<line number="257" hits="1"/>
+						<line number="262" hits="1"/>
+						<line number="263" hits="1"/>
+						<line number="264" hits="1"/>
+						<line number="267" hits="1"/>
+						<line number="270" hits="1"/>
+						<line number="271" hits="1"/>
+						<line number="272" hits="1"/>
+						<line number="278" hits="1"/>
+						<line number="286" hits="1"/>
+						<line number="303" hits="1"/>
+						<line number="304" hits="1"/>
+						<line number="309" hits="1"/>
+						<line number="310" hits="0"/>
+						<line number="311" hits="0"/>
+						<line number="313" hits="0"/>
+						<line number="314" hits="0"/>
+						<line number="315" hits="0"/>
+						<line number="316" hits="0"/>
+						<line number="317" hits="0"/>
+						<line number="318" hits="0"/>
+						<line number="319" hits="0"/>
+						<line number="320" hits="0"/>
+						<line number="321" hits="0"/>
+						<line number="322" hits="0"/>
+						<line number="323" hits="0"/>
+						<line number="325" hits="0"/>
+						<line number="326" hits="0"/>
+						<line number="327" hits="0"/>
+						<line number="328" hits="0"/>
+						<line number="329" hits="0"/>
+						<line number="330" hits="0"/>
+						<line number="331" hits="0"/>
+						<line number="332" hits="0"/>
+						<line number="333" hits="0"/>
+						<line number="334" hits="0"/>
+						<line number="335" hits="0"/>
+						<line number="337" hits="0"/>
+						<line number="338" hits="0"/>
+						<line number="339" hits="0"/>
+						<line number="340" hits="0"/>
+						<line number="341" hits="0"/>
+						<line number="342" hits="0"/>
+						<line number="343" hits="0"/>
+						<line number="344" hits="0"/>
+						<line number="345" hits="0"/>
+						<line number="346" hits="0"/>
+						<line number="347" hits="0"/>
+						<line number="348" hits="0"/>
+						<line number="349" hits="0"/>
+						<line number="350" hits="0"/>
+						<line number="351" hits="0"/>
+						<line number="352" hits="0"/>
+						<line number="354" hits="1"/>
+						<line number="357" hits="1"/>
+						<line number="358" hits="1"/>
+						<line number="360" hits="1"/>
+						<line number="361" hits="1"/>
+						<line number="362" hits="1"/>
+						<line number="364" hits="1"/>
+						<line number="365" hits="1"/>
+						<line number="366" hits="1"/>
+						<line number="367" hits="1"/>
+						<line number="368" hits="1"/>
+						<line number="369" hits="1"/>
+						<line number="370" hits="1"/>
+						<line number="372" hits="1"/>
+						<line number="385" hits="1"/>
+						<line number="386" hits="1"/>
+						<line number="387" hits="1"/>
+						<line number="388" hits="1"/>
+						<line number="389" hits="1"/>
+						<line number="390" hits="1"/>
+						<line number="391" hits="1"/>
+						<line number="392" hits="1"/>
+						<line number="396" hits="1"/>
+						<line number="397" hits="0"/>
+						<line number="401" hits="1"/>
+						<line number="402" hits="1"/>
+						<line number="403" hits="1"/>
+						<line number="414" hits="1"/>
+						<line number="415" hits="1"/>
+						<line number="416" hits="1"/>
+						<line number="417" hits="1"/>
+						<line number="419" hits="1"/>
+						<line number="428" hits="1"/>
+						<line number="429" hits="1"/>
+						<line number="431" hits="1"/>
+						<line number="435" hits="1"/>
+						<line number="437" hits="1"/>
+						<line number="448" hits="1"/>
+						<line number="449" hits="1"/>
+						<line number="450" hits="1"/>
+						<line number="487" hits="1"/>
+						<line number="499" hits="1"/>
+						<line number="507" hits="0"/>
+						<line number="509" hits="0"/>
+						<line number="510" hits="0"/>
+						<line number="511" hits="0"/>
+						<line number="512" hits="0"/>
+						<line number="513" hits="0"/>
+						<line number="524" hits="0"/>
+						<line number="533" hits="0"/>
+						<line number="534" hits="0"/>
+						<line number="535" hits="0"/>
+						<line number="537" hits="0"/>
+						<line number="538" hits="0"/>
+						<line number="539" hits="0"/>
+						<line number="547" hits="1"/>
+						<line number="557" hits="1"/>
+						<line number="558" hits="1"/>
+						<line number="563" hits="1"/>
+						<line number="564" hits="0"/>
+						<line number="565" hits="0"/>
+						<line number="566" hits="0"/>
+						<line number="568" hits="1"/>
+						<line number="571" hits="1"/>
+						<line number="572" hits="0"/>
+						<line number="573" hits="0"/>
+						<line number="574" hits="0"/>
+						<line number="575" hits="0"/>
+						<line number="578" hits="0"/>
+						<line number="579" hits="0"/>
+						<line number="580" hits="0"/>
+						<line number="586" hits="0"/>
+						<line number="591" hits="0"/>
+						<line number="592" hits="0"/>
+						<line number="594" hits="0"/>
+						<line number="595" hits="0"/>
+						<line number="596" hits="0"/>
+						<line number="597" hits="0"/>
+						<line number="607" hits="0"/>
+						<line number="608" hits="0"/>
+						<line number="612" hits="0"/>
+						<line number="617" hits="0"/>
+						<line number="620" hits="0"/>
+						<line number="621" hits="0"/>
+						<line number="623" hits="0"/>
+						<line number="625" hits="0"/>
+						<line number="627" hits="0"/>
+						<line number="628" hits="0"/>
+						<line number="642" hits="0"/>
+						<line number="643" hits="0"/>
+						<line number="649" hits="0"/>
+						<line number="650" hits="0"/>
+						<line number="651" hits="0"/>
+						<line number="652" hits="0"/>
+						<line number="655" hits="0"/>
+						<line number="656" hits="0"/>
+						<line number="659" hits="0"/>
+						<line number="676" hits="0"/>
+						<line number="677" hits="0"/>
+						<line number="678" hits="0"/>
+						<line number="679" hits="0"/>
+						<line number="680" hits="0"/>
+						<line number="683" hits="0"/>
+						<line number="684" hits="0"/>
+						<line number="685" hits="0"/>
+						<line number="686" hits="0"/>
+						<line number="688" hits="0"/>
+						<line number="689" hits="0"/>
+						<line number="690" hits="0"/>
+						<line number="692" hits="0"/>
+						<line number="693" hits="0"/>
+						<line number="694" hits="0"/>
+						<line number="696" hits="0"/>
+						<line number="701" hits="0"/>
+						<line number="713" hits="0"/>
+						<line number="716" hits="0"/>
+						<line number="723" hits="0"/>
+						<line number="724" hits="0"/>
+						<line number="727" hits="0"/>
+						<line number="728" hits="0"/>
+						<line number="734" hits="0"/>
+						<line number="740" hits="0"/>
+						<line number="742" hits="0"/>
+						<line number="743" hits="0"/>
+						<line number="754" hits="0"/>
+						<line number="769" hits="0"/>
+						<line number="770" hits="0"/>
+						<line number="772" hits="0"/>
+						<line number="783" hits="0"/>
+						<line number="784" hits="0"/>
+						<line number="787" hits="0"/>
+						<line number="790" hits="0"/>
+						<line number="791" hits="0"/>
+						<line number="792" hits="0"/>
+						<line number="793" hits="0"/>
+						<line number="794" hits="0"/>
+						<line number="796" hits="0"/>
+						<line number="797" hits="0"/>
+						<line number="807" hits="0"/>
+						<line number="811" hits="0"/>
+						<line number="812" hits="0"/>
+						<line number="818" hits="0"/>
+						<line number="819" hits="0"/>
+						<line number="822" hits="0"/>
+						<line number="828" hits="0"/>
+						<line number="841" hits="0"/>
+						<line number="842" hits="0"/>
+						<line number="843" hits="0"/>
+						<line number="847" hits="0"/>
+						<line number="849" hits="0"/>
+						<line number="854" hits="0"/>
+						<line number="855" hits="0"/>
+						<line number="856" hits="0"/>
+						<line number="871" hits="0"/>
+						<line number="872" hits="0"/>
+						<line number="876" hits="0"/>
+						<line number="877" hits="0"/>
+						<line number="878" hits="0"/>
+						<line number="894" hits="0"/>
+						<line number="904" hits="1"/>
+						<line number="928" hits="0"/>
+						<line number="929" hits="0"/>
+						<line number="935" hits="0"/>
+						<line number="937" hits="0"/>
+						<line number="938" hits="0"/>
+						<line number="939" hits="0"/>
+						<line number="940" hits="0"/>
+						<line number="950" hits="0"/>
+						<line number="952" hits="0"/>
+						<line number="957" hits="0"/>
+						<line number="958" hits="0"/>
+						<line number="959" hits="0"/>
+						<line number="970" hits="0"/>
+						<line number="977" hits="0"/>
+						<line number="979" hits="0"/>
+						<line number="980" hits="0"/>
+						<line number="981" hits="0"/>
+						<line number="982" hits="0"/>
+						<line number="988" hits="0"/>
+						<line number="989" hits="0"/>
+						<line number="990" hits="0"/>
+						<line number="992" hits="0"/>
+						<line number="993" hits="0"/>
+						<line number="995" hits="0"/>
+						<line number="996" hits="0"/>
+						<line number="997" hits="0"/>
+						<line number="1006" hits="1"/>
+						<line number="1015" hits="0"/>
+						<line number="1016" hits="0"/>
+						<line number="1017" hits="0"/>
+						<line number="1018" hits="0"/>
+						<line number="1023" hits="0"/>
+						<line number="1024" hits="0"/>
+						<line number="1025" hits="0"/>
+						<line number="1026" hits="0"/>
+						<line number="1028" hits="1"/>
+						<line number="1045" hits="0"/>
+						<line number="1047" hits="0"/>
+						<line number="1048" hits="0"/>
+						<line number="1049" hits="0"/>
+						<line number="1059" hits="0"/>
+						<line number="1061" hits="0"/>
+						<line number="1067" hits="0"/>
+						<line number="1083" hits="0"/>
+						<line number="1084" hits="0"/>
+						<line number="1085" hits="0"/>
+						<line number="1090" hits="0"/>
+						<line number="1091" hits="0"/>
+						<line number="1096" hits="0"/>
+						<line number="1097" hits="0"/>
+						<line number="1098" hits="0"/>
+						<line number="1099" hits="0"/>
+						<line number="1100" hits="0"/>
+						<line number="1102" hits="0"/>
+						<line number="1104" hits="0"/>
+						<line number="1115" hits="0"/>
+						<line number="1117" hits="1"/>
+						<line number="1152" hits="0"/>
+						<line number="1163" hits="0"/>
+						<line number="1164" hits="0"/>
+						<line number="1165" hits="0"/>
+						<line number="1170" hits="0"/>
+						<line number="1173" hits="0"/>
+						<line number="1174" hits="0"/>
+						<line number="1177" hits="0"/>
+						<line number="1187" hits="0"/>
+						<line number="1188" hits="0"/>
+						<line number="1191" hits="0"/>
+						<line number="1192" hits="0"/>
+						<line number="1211" hits="0"/>
+						<line number="1212" hits="0"/>
+						<line number="1213" hits="0"/>
+						<line number="1214" hits="0"/>
+						<line number="1223" hits="0"/>
+						<line number="1225" hits="0"/>
+						<line number="1226" hits="0"/>
+						<line number="1230" hits="0"/>
+						<line number="1231" hits="0"/>
+						<line number="1234" hits="0"/>
+						<line number="1235" hits="0"/>
+						<line number="1238" hits="0"/>
+						<line number="1242" hits="0"/>
+						<line number="1243" hits="0"/>
+						<line number="1244" hits="0"/>
+						<line number="1245" hits="0"/>
+						<line number="1247" hits="0"/>
+						<line number="1248" hits="0"/>
+						<line number="1249" hits="0"/>
+						<line number="1250" hits="0"/>
+						<line number="1252" hits="0"/>
+						<line number="1253" hits="0"/>
+						<line number="1254" hits="0"/>
+						<line number="1255" hits="0"/>
+						<line number="1256" hits="0"/>
+						<line number="1264" hits="0"/>
+						<line number="1271" hits="0"/>
+						<line number="1272" hits="0"/>
+						<line number="1283" hits="0"/>
+						<line number="1291" hits="0"/>
+						<line number="1293" hits="1"/>
+						<line number="1325" hits="0"/>
+						<line number="1330" hits="0"/>
+						<line number="1338" hits="0"/>
+						<line number="1339" hits="0"/>
+						<line number="1340" hits="0"/>
+						<line number="1342" hits="0"/>
+						<line number="1345" hits="0"/>
+						<line number="1346" hits="0"/>
+						<line number="1347" hits="0"/>
+						<line number="1350" hits="0"/>
+						<line number="1351" hits="0"/>
+						<line number="1353" hits="0"/>
+						<line number="1354" hits="0"/>
+						<line number="1357" hits="0"/>
+						<line number="1359" hits="0"/>
+						<line number="1360" hits="0"/>
+						<line number="1361" hits="0"/>
+						<line number="1362" hits="0"/>
+						<line number="1363" hits="0"/>
+						<line number="1364" hits="0"/>
+						<line number="1365" hits="0"/>
+						<line number="1366" hits="0"/>
+						<line number="1367" hits="0"/>
+						<line number="1368" hits="0"/>
+						<line number="1369" hits="0"/>
+						<line number="1370" hits="0"/>
+						<line number="1371" hits="0"/>
+						<line number="1375" hits="0"/>
+						<line number="1376" hits="0"/>
+						<line number="1377" hits="0"/>
+						<line number="1378" hits="0"/>
+						<line number="1379" hits="0"/>
+						<line number="1381" hits="0"/>
+						<line number="1382" hits="0"/>
+						<line number="1383" hits="0"/>
+						<line number="1393" hits="0"/>
+						<line number="1397" hits="0"/>
+						<line number="1400" hits="0"/>
+						<line number="1401" hits="0"/>
+						<line number="1402" hits="0"/>
+						<line number="1403" hits="0"/>
+						<line number="1404" hits="0"/>
+						<line number="1405" hits="0"/>
+						<line number="1406" hits="0"/>
+						<line number="1407" hits="0"/>
+						<line number="1410" hits="0"/>
+						<line number="1411" hits="0"/>
+						<line number="1412" hits="0"/>
+						<line number="1413" hits="0"/>
+						<line number="1414" hits="0"/>
+						<line number="1415" hits="0"/>
+						<line number="1416" hits="0"/>
+						<line number="1417" hits="0"/>
+						<line number="1418" hits="0"/>
+						<line number="1419" hits="0"/>
+						<line number="1420" hits="0"/>
+						<line number="1421" hits="0"/>
+						<line number="1422" hits="0"/>
+						<line number="1423" hits="0"/>
+						<line number="1424" hits="0"/>
+						<line number="1425" hits="0"/>
+						<line number="1426" hits="0"/>
+						<line number="1428" hits="0"/>
+						<line number="1429" hits="0"/>
+						<line number="1431" hits="0"/>
+						<line number="1432" hits="0"/>
+						<line number="1433" hits="0"/>
+						<line number="1434" hits="0"/>
+						<line number="1435" hits="0"/>
+						<line number="1436" hits="0"/>
+						<line number="1437" hits="0"/>
+						<line number="1438" hits="0"/>
+						<line number="1439" hits="0"/>
+						<line number="1440" hits="0"/>
+						<line number="1441" hits="0"/>
+						<line number="1442" hits="0"/>
+						<line number="1443" hits="0"/>
+						<line number="1444" hits="0"/>
+						<line number="1445" hits="0"/>
+						<line number="1446" hits="0"/>
+						<line number="1448" hits="0"/>
+						<line number="1450" hits="0"/>
+						<line number="1451" hits="0"/>
+						<line number="1452" hits="0"/>
+						<line number="1453" hits="0"/>
+						<line number="1454" hits="0"/>
+						<line number="1455" hits="0"/>
+						<line number="1462" hits="0"/>
+						<line number="1463" hits="0"/>
+						<line number="1464" hits="0"/>
+						<line number="1465" hits="0"/>
+						<line number="1466" hits="0"/>
+						<line number="1467" hits="0"/>
+						<line number="1468" hits="0"/>
+						<line number="1469" hits="0"/>
+						<line number="1472" hits="0"/>
+						<line number="1473" hits="0"/>
+						<line number="1474" hits="0"/>
+						<line number="1477" hits="0"/>
+						<line number="1480" hits="0"/>
+						<line number="1481" hits="0"/>
+						<line number="1482" hits="0"/>
+						<line number="1483" hits="0"/>
+						<line number="1484" hits="0"/>
+						<line number="1485" hits="0"/>
+						<line number="1505" hits="0"/>
+						<line number="1506" hits="0"/>
+						<line number="1507" hits="0"/>
+						<line number="1508" hits="0"/>
+						<line number="1510" hits="0"/>
+						<line number="1512" hits="1"/>
+						<line number="1526" hits="0"/>
+						<line number="1527" hits="0"/>
+						<line number="1529" hits="0"/>
+						<line number="1530" hits="0"/>
+						<line number="1531" hits="0"/>
+						<line number="1532" hits="0"/>
+						<line number="1533" hits="0"/>
+						<line number="1534" hits="0"/>
+						<line number="1536" hits="0"/>
+						<line number="1541" hits="0"/>
+						<line number="1556" hits="0"/>
+						<line number="1557" hits="0"/>
+						<line number="1558" hits="0"/>
+						<line number="1559" hits="0"/>
+						<line number="1561" hits="0"/>
+						<line number="1563" hits="1"/>
+						<line number="1584" hits="0"/>
+						<line number="1585" hits="0"/>
+						<line number="1586" hits="0"/>
+						<line number="1587" hits="0"/>
+						<line number="1589" hits="0"/>
+						<line number="1590" hits="0"/>
+						<line number="1591" hits="0"/>
+						<line number="1593" hits="0"/>
+						<line number="1594" hits="0"/>
+						<line number="1595" hits="0"/>
+						<line number="1597" hits="0"/>
+						<line number="1599" hits="1"/>
+						<line number="1621" hits="0"/>
+						<line number="1622" hits="0"/>
+						<line number="1624" hits="0"/>
+						<line number="1625" hits="0"/>
+						<line number="1626" hits="0"/>
+						<line number="1642" hits="0"/>
+						<line number="1645" hits="0"/>
+						<line number="1646" hits="0"/>
+						<line number="1647" hits="0"/>
+						<line number="1648" hits="0"/>
+						<line number="1659" hits="0"/>
+						<line number="1661" hits="1"/>
+						<line number="1674" hits="0"/>
+						<line number="1675" hits="0"/>
+						<line number="1676" hits="0"/>
+						<line number="1690" hits="0"/>
+						<line number="1702" hits="0"/>
+						<line number="1703" hits="0"/>
+						<line number="1705" hits="0"/>
+						<line number="1707" hits="0"/>
+						<line number="1708" hits="0"/>
+						<line number="1710" hits="1"/>
+						<line number="1736" hits="0"/>
+						<line number="1737" hits="0"/>
+						<line number="1738" hits="0"/>
+						<line number="1739" hits="0"/>
+						<line number="1741" hits="0"/>
+						<line number="1742" hits="0"/>
+						<line number="1743" hits="0"/>
+						<line number="1744" hits="0"/>
+						<line number="1745" hits="0"/>
+						<line number="1760" hits="0"/>
+						<line number="1761" hits="0"/>
+						<line number="1764" hits="0"/>
+						<line number="1767" hits="0"/>
+						<line number="1768" hits="0"/>
+						<line number="1769" hits="0"/>
+						<line number="1770" hits="0"/>
+						<line number="1771" hits="0"/>
+						<line number="1772" hits="0"/>
+						<line number="1773" hits="0"/>
+						<line number="1774" hits="0"/>
+						<line number="1775" hits="0"/>
+						<line number="1777" hits="0"/>
+						<line number="1778" hits="0"/>
+						<line number="1781" hits="0"/>
+						<line number="1782" hits="0"/>
+						<line number="1783" hits="0"/>
+						<line number="1784" hits="0"/>
+						<line number="1785" hits="0"/>
+						<line number="1786" hits="0"/>
+						<line number="1787" hits="0"/>
+						<line number="1790" hits="0"/>
+						<line number="1791" hits="0"/>
+						<line number="1792" hits="0"/>
+						<line number="1793" hits="0"/>
+						<line number="1794" hits="0"/>
+						<line number="1795" hits="0"/>
+						<line number="1796" hits="0"/>
+						<line number="1797" hits="0"/>
+						<line number="1798" hits="0"/>
+						<line number="1799" hits="0"/>
+						<line number="1801" hits="0"/>
+						<line number="1802" hits="0"/>
+						<line number="1803" hits="0"/>
+						<line number="1804" hits="0"/>
+						<line number="1806" hits="0"/>
+						<line number="1807" hits="0"/>
+						<line number="1808" hits="0"/>
+						<line number="1809" hits="0"/>
+						<line number="1810" hits="0"/>
+						<line number="1812" hits="0"/>
+						<line number="1813" hits="0"/>
+						<line number="1814" hits="0"/>
+						<line number="1815" hits="0"/>
+						<line number="1816" hits="0"/>
+						<line number="1817" hits="0"/>
+						<line number="1818" hits="0"/>
+						<line number="1820" hits="0"/>
+						<line number="1822" hits="0"/>
+						<line number="1824" hits="0"/>
+						<line number="1825" hits="0"/>
+						<line number="1826" hits="0"/>
+						<line number="1827" hits="0"/>
+						<line number="1828" hits="0"/>
+						<line number="1830" hits="0"/>
+						<line number="1837" hits="0"/>
+						<line number="1838" hits="0"/>
+						<line number="1839" hits="0"/>
+						<line number="1840" hits="0"/>
+						<line number="1841" hits="0"/>
+						<line number="1842" hits="0"/>
+						<line number="1843" hits="0"/>
+						<line number="1844" hits="0"/>
+						<line number="1845" hits="0"/>
+						<line number="1846" hits="0"/>
+						<line number="1847" hits="0"/>
+						<line number="1848" hits="0"/>
+						<line number="1851" hits="0"/>
+						<line number="1852" hits="0"/>
+						<line number="1853" hits="0"/>
+						<line number="1854" hits="0"/>
+						<line number="1874" hits="0"/>
+						<line number="1875" hits="0"/>
+						<line number="1876" hits="0"/>
+						<line number="1877" hits="0"/>
+						<line number="1879" hits="0"/>
+						<line number="1886" hits="1"/>
+						<line number="1889" hits="0"/>
+						<line number="1890" hits="0"/>
+						<line number="1891" hits="0"/>
+						<line number="1892" hits="0"/>
+						<line number="1893" hits="0"/>
+						<line number="1899" hits="0"/>
+						<line number="1900" hits="0"/>
+						<line number="1901" hits="0"/>
+						<line number="1903" hits="1"/>
+						<line number="1909" hits="0"/>
+						<line number="1910" hits="0"/>
+						<line number="1911" hits="0"/>
+						<line number="1917" hits="0"/>
+						<line number="1918" hits="0"/>
+						<line number="1920" hits="1"/>
+						<line number="1923" hits="0"/>
+						<line number="1924" hits="0"/>
+						<line number="1925" hits="0"/>
+						<line number="1926" hits="0"/>
+						<line number="1927" hits="0"/>
+						<line number="1932" hits="0"/>
+						<line number="1933" hits="0"/>
+						<line number="1934" hits="0"/>
+						<line number="1936" hits="1"/>
+						<line number="1942" hits="0"/>
+						<line number="1943" hits="0"/>
+						<line number="1944" hits="0"/>
+						<line number="1945" hits="0"/>
+						<line number="1946" hits="0"/>
+						<line number="1947" hits="0"/>
+						<line number="1952" hits="0"/>
+						<line number="1953" hits="0"/>
+						<line number="1954" hits="0"/>
+						<line number="1962" hits="1"/>
+						<line number="1971" hits="1"/>
+						<line number="1972" hits="1"/>
+						<line number="1977" hits="1"/>
+						<line number="1978" hits="0"/>
+						<line number="1979" hits="0"/>
+						<line number="1980" hits="0"/>
+						<line number="1982" hits="1"/>
+						<line number="1985" hits="1"/>
+						<line number="1986" hits="1"/>
+						<line number="1987" hits="1"/>
+						<line number="1989" hits="1"/>
+						<line number="1990" hits="1"/>
+						<line number="1991" hits="1"/>
+						<line number="1994" hits="1"/>
+						<line number="1996" hits="1"/>
+						<line number="1997" hits="1"/>
+						<line number="2002" hits="1"/>
+						<line number="2012" hits="1"/>
+						<line number="2013" hits="1"/>
+						<line number="2015" hits="1"/>
+						<line number="2017" hits="1"/>
+						<line number="2018" hits="1"/>
+						<line number="2025" hits="1"/>
+						<line number="2026" hits="1"/>
+						<line number="2028" hits="1"/>
+						<line number="2035" hits="1"/>
+						<line number="2036" hits="1"/>
+						<line number="2044" hits="1"/>
+					</lines>
+				</class>
+				<class name="run_cafa_evaluation.py" filename="core/operations/run_cafa_evaluation.py" complexity="0" line-rate="0.5485" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="15" hits="1"/>
+						<line number="16" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="21" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="25" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="43" hits="1"/>
+						<line number="48" hits="1"/>
+						<line number="54" hits="1"/>
+						<line number="91" hits="1"/>
+						<line number="101" hits="0"/>
+						<line number="109" hits="0"/>
+						<line number="110" hits="0"/>
+						<line number="111" hits="0"/>
+						<line number="114" hits="1"/>
+						<line number="136" hits="0"/>
+						<line number="138" hits="0"/>
+						<line number="139" hits="0"/>
+						<line number="141" hits="0"/>
+						<line number="143" hits="0"/>
+						<line number="144" hits="0"/>
+						<line number="146" hits="0"/>
+						<line number="147" hits="0"/>
+						<line number="148" hits="0"/>
+						<line number="149" hits="0"/>
+						<line number="150" hits="0"/>
+						<line number="151" hits="0"/>
+						<line number="152" hits="0"/>
+						<line number="153" hits="0"/>
+						<line number="154" hits="0"/>
+						<line number="155" hits="0"/>
+						<line number="156" hits="0"/>
+						<line number="157" hits="0"/>
+						<line number="158" hits="0"/>
+						<line number="160" hits="0"/>
+						<line number="161" hits="0"/>
+						<line number="162" hits="0"/>
+						<line number="164" hits="0"/>
+						<line number="166" hits="0"/>
+						<line number="167" hits="0"/>
+						<line number="168" hits="0"/>
+						<line number="169" hits="0"/>
+						<line number="170" hits="0"/>
+						<line number="171" hits="0"/>
+						<line number="176" hits="0"/>
+						<line number="177" hits="0"/>
+						<line number="178" hits="0"/>
+						<line number="179" hits="0"/>
+						<line number="180" hits="0"/>
+						<line number="181" hits="0"/>
+						<line number="183" hits="0"/>
+						<line number="184" hits="0"/>
+						<line number="185" hits="0"/>
+						<line number="186" hits="0"/>
+						<line number="187" hits="0"/>
+						<line number="188" hits="0"/>
+						<line number="189" hits="0"/>
+						<line number="190" hits="0"/>
+						<line number="191" hits="0"/>
+						<line number="193" hits="0"/>
+						<line number="196" hits="0"/>
+						<line number="199" hits="0"/>
+						<line number="202" hits="1"/>
+						<line number="203" hits="1"/>
+						<line number="204" hits="1"/>
+						<line number="205" hits="1"/>
+						<line number="206" hits="1"/>
+						<line number="207" hits="1"/>
+						<line number="208" hits="1"/>
+						<line number="209" hits="1"/>
+						<line number="210" hits="1"/>
+						<line number="218" hits="1"/>
+						<line number="229" hits="1"/>
+						<line number="240" hits="1"/>
+						<line number="241" hits="1"/>
+						<line number="242" hits="1"/>
+						<line number="243" hits="1"/>
+						<line number="244" hits="1"/>
+						<line number="245" hits="1"/>
+						<line number="248" hits="1"/>
+						<line number="271" hits="1"/>
+						<line number="272" hits="1"/>
+						<line number="277" hits="1"/>
+						<line number="278" hits="0"/>
+						<line number="279" hits="0"/>
+						<line number="281" hits="0"/>
+						<line number="282" hits="0"/>
+						<line number="283" hits="0"/>
+						<line number="284" hits="0"/>
+						<line number="285" hits="0"/>
+						<line number="286" hits="0"/>
+						<line number="287" hits="0"/>
+						<line number="288" hits="0"/>
+						<line number="291" hits="0"/>
+						<line number="292" hits="0"/>
+						<line number="293" hits="0"/>
+						<line number="294" hits="0"/>
+						<line number="295" hits="0"/>
+						<line number="296" hits="0"/>
+						<line number="297" hits="0"/>
+						<line number="298" hits="0"/>
+						<line number="300" hits="0"/>
+						<line number="301" hits="0"/>
+						<line number="302" hits="0"/>
+						<line number="303" hits="0"/>
+						<line number="304" hits="0"/>
+						<line number="305" hits="0"/>
+						<line number="306" hits="0"/>
+						<line number="307" hits="0"/>
+						<line number="308" hits="0"/>
+						<line number="309" hits="0"/>
+						<line number="310" hits="0"/>
+						<line number="311" hits="0"/>
+						<line number="313" hits="0"/>
+						<line number="314" hits="0"/>
+						<line number="315" hits="0"/>
+						<line number="316" hits="0"/>
+						<line number="317" hits="0"/>
+						<line number="318" hits="0"/>
+						<line number="319" hits="0"/>
+						<line number="320" hits="0"/>
+						<line number="321" hits="0"/>
+						<line number="322" hits="0"/>
+						<line number="324" hits="0"/>
+						<line number="325" hits="0"/>
+						<line number="326" hits="0"/>
+						<line number="327" hits="0"/>
+						<line number="328" hits="0"/>
+						<line number="330" hits="1"/>
+						<line number="333" hits="1"/>
+						<line number="335" hits="1"/>
+						<line number="337" hits="1"/>
+						<line number="338" hits="1"/>
+						<line number="340" hits="1"/>
+						<line number="341" hits="1"/>
+						<line number="342" hits="1"/>
+						<line number="344" hits="1"/>
+						<line number="345" hits="1"/>
+						<line number="346" hits="1"/>
+						<line number="349" hits="1"/>
+						<line number="350" hits="1"/>
+						<line number="351" hits="1"/>
+						<line number="352" hits="1"/>
+						<line number="353" hits="0"/>
+						<line number="358" hits="1"/>
+						<line number="365" hits="1"/>
+						<line number="377" hits="1"/>
+						<line number="388" hits="1"/>
+						<line number="389" hits="1"/>
+						<line number="392" hits="1"/>
+						<line number="393" hits="1"/>
+						<line number="394" hits="1"/>
+						<line number="395" hits="1"/>
+						<line number="396" hits="1"/>
+						<line number="397" hits="1"/>
+						<line number="405" hits="1"/>
+						<line number="406" hits="1"/>
+						<line number="410" hits="1"/>
+						<line number="425" hits="0"/>
+						<line number="426" hits="0"/>
+						<line number="427" hits="0"/>
+						<line number="428" hits="0"/>
+						<line number="429" hits="0"/>
+						<line number="430" hits="0"/>
+						<line number="435" hits="0"/>
+						<line number="437" hits="0"/>
+						<line number="441" hits="0"/>
+						<line number="442" hits="0"/>
+						<line number="444" hits="1"/>
+						<line number="446" hits="0"/>
+						<line number="447" hits="0"/>
+						<line number="448" hits="0"/>
+						<line number="449" hits="0"/>
+						<line number="450" hits="0"/>
+						<line number="451" hits="0"/>
+						<line number="452" hits="0"/>
+						<line number="453" hits="0"/>
+						<line number="454" hits="0"/>
+						<line number="455" hits="0"/>
+						<line number="456" hits="0"/>
+						<line number="457" hits="0"/>
+						<line number="458" hits="0"/>
+						<line number="459" hits="0"/>
+						<line number="460" hits="0"/>
+						<line number="473" hits="1"/>
+						<line number="478" hits="1"/>
+						<line number="479" hits="0"/>
+						<line number="480" hits="0"/>
+						<line number="481" hits="0"/>
+						<line number="482" hits="0"/>
+						<line number="483" hits="0"/>
+						<line number="484" hits="0"/>
+						<line number="492" hits="1"/>
+						<line number="494" hits="1"/>
+						<line number="495" hits="1"/>
+						<line number="496" hits="1"/>
+						<line number="498" hits="1"/>
+						<line number="499" hits="1"/>
+						<line number="502" hits="1"/>
+						<line number="503" hits="1"/>
+						<line number="505" hits="1"/>
+						<line number="506" hits="1"/>
+						<line number="507" hits="1"/>
+						<line number="514" hits="1"/>
+						<line number="515" hits="1"/>
+						<line number="516" hits="1"/>
+						<line number="517" hits="1"/>
+						<line number="518" hits="1"/>
+						<line number="519" hits="1"/>
+						<line number="520" hits="1"/>
+						<line number="536" hits="1"/>
+						<line number="537" hits="1"/>
+						<line number="538" hits="1"/>
+						<line number="541" hits="1"/>
+						<line number="547" hits="1"/>
+						<line number="548" hits="1"/>
+						<line number="555" hits="1"/>
+						<line number="568" hits="1"/>
+						<line number="569" hits="1"/>
+						<line number="570" hits="1"/>
+						<line number="571" hits="1"/>
+						<line number="572" hits="1"/>
+						<line number="573" hits="1"/>
+						<line number="575" hits="1"/>
+						<line number="576" hits="1"/>
+						<line number="577" hits="1"/>
+						<line number="578" hits="1"/>
+						<line number="579" hits="1"/>
+						<line number="582" hits="1"/>
+						<line number="583" hits="1"/>
+						<line number="584" hits="1"/>
+						<line number="585" hits="0"/>
+						<line number="586" hits="1"/>
+						<line number="593" hits="1"/>
+						<line number="594" hits="1"/>
+						<line number="603" hits="1"/>
+						<line number="604" hits="1"/>
+						<line number="605" hits="1"/>
+						<line number="606" hits="1"/>
+						<line number="607" hits="1"/>
+						<line number="608" hits="1"/>
+						<line number="623" hits="1"/>
+						<line number="626" hits="1"/>
+						<line number="632" hits="1"/>
+						<line number="633" hits="0"/>
+						<line number="634" hits="0"/>
+						<line number="635" hits="0"/>
+						<line number="636" hits="0"/>
+						<line number="641" hits="0"/>
+						<line number="642" hits="0"/>
+						<line number="644" hits="0"/>
+						<line number="645" hits="0"/>
+						<line number="658" hits="0"/>
+						<line number="667" hits="1"/>
+						<line number="668" hits="1"/>
+						<line number="673" hits="1"/>
+						<line number="674" hits="1"/>
+						<line number="675" hits="1"/>
+						<line number="676" hits="1"/>
+						<line number="691" hits="1"/>
+						<line number="692" hits="1"/>
+						<line number="694" hits="1"/>
+						<line number="697" hits="1"/>
+						<line number="698" hits="1"/>
+						<line number="700" hits="1"/>
+						<line number="701" hits="1"/>
+						<line number="702" hits="1"/>
+						<line number="704" hits="1"/>
+						<line number="713" hits="1"/>
+						<line number="714" hits="1"/>
+						<line number="723" hits="1"/>
+						<line number="726" hits="1"/>
+						<line number="727" hits="1"/>
+						<line number="728" hits="1"/>
+						<line number="729" hits="1"/>
+						<line number="730" hits="1"/>
+						<line number="731" hits="1"/>
+						<line number="732" hits="1"/>
+						<line number="733" hits="1"/>
+						<line number="740" hits="1"/>
+						<line number="744" hits="1"/>
+						<line number="745" hits="1"/>
+						<line number="746" hits="0"/>
+						<line number="747" hits="0"/>
+						<line number="748" hits="0"/>
+						<line number="749" hits="0"/>
+						<line number="750" hits="0"/>
+						<line number="751" hits="0"/>
+						<line number="752" hits="1"/>
+						<line number="754" hits="0"/>
+						<line number="755" hits="0"/>
+						<line number="760" hits="0"/>
+						<line number="761" hits="0"/>
+						<line number="762" hits="0"/>
+						<line number="763" hits="0"/>
+						<line number="765" hits="1"/>
+						<line number="774" hits="1"/>
+						<line number="775" hits="1"/>
+						<line number="777" hits="1"/>
+						<line number="788" hits="1"/>
+						<line number="797" hits="1"/>
+						<line number="799" hits="1"/>
+						<line number="801" hits="1"/>
+						<line number="802" hits="1"/>
+						<line number="803" hits="1"/>
+						<line number="804" hits="1"/>
+						<line number="805" hits="1"/>
+						<line number="807" hits="1"/>
+						<line number="808" hits="1"/>
+						<line number="810" hits="1"/>
+						<line number="821" hits="1"/>
+						<line number="822" hits="1"/>
+						<line number="825" hits="1"/>
+						<line number="826" hits="1"/>
+						<line number="827" hits="1"/>
+						<line number="828" hits="1"/>
+						<line number="829" hits="1"/>
+						<line number="831" hits="1"/>
+						<line number="832" hits="1"/>
+						<line number="833" hits="1"/>
+						<line number="834" hits="1"/>
+						<line number="836" hits="1"/>
+						<line number="837" hits="1"/>
+						<line number="839" hits="1"/>
+						<line number="840" hits="1"/>
+						<line number="841" hits="1"/>
+						<line number="842" hits="1"/>
+						<line number="843" hits="1"/>
+						<line number="845" hits="1"/>
+						<line number="846" hits="1"/>
+						<line number="848" hits="1"/>
+						<line number="850" hits="1"/>
+						<line number="851" hits="1"/>
+						<line number="852" hits="1"/>
+						<line number="853" hits="1"/>
+						<line number="855" hits="1"/>
+						<line number="879" hits="1"/>
+						<line number="880" hits="0"/>
+						<line number="890" hits="0"/>
+						<line number="892" hits="1"/>
+						<line number="898" hits="1"/>
+						<line number="899" hits="1"/>
+						<line number="900" hits="1"/>
+						<line number="902" hits="1"/>
+						<line number="903" hits="1"/>
+						<line number="904" hits="1"/>
+						<line number="905" hits="1"/>
+						<line number="906" hits="1"/>
+						<line number="907" hits="1"/>
+						<line number="908" hits="1"/>
+						<line number="909" hits="1"/>
+						<line number="910" hits="1"/>
+						<line number="918" hits="1"/>
+						<line number="920" hits="1"/>
+						<line number="921" hits="1"/>
+						<line number="923" hits="1"/>
+						<line number="935" hits="0"/>
+						<line number="937" hits="0"/>
+						<line number="938" hits="0"/>
+						<line number="940" hits="0"/>
+						<line number="946" hits="0"/>
+						<line number="947" hits="0"/>
+						<line number="949" hits="0"/>
+						<line number="954" hits="0"/>
+						<line number="955" hits="0"/>
+						<line number="957" hits="0"/>
+						<line number="959" hits="0"/>
+						<line number="960" hits="0"/>
+						<line number="961" hits="0"/>
+						<line number="962" hits="0"/>
+						<line number="963" hits="0"/>
+						<line number="966" hits="0"/>
+						<line number="967" hits="0"/>
+						<line number="972" hits="0"/>
+						<line number="973" hits="0"/>
+						<line number="974" hits="0"/>
+						<line number="976" hits="1"/>
+						<line number="997" hits="0"/>
+						<line number="999" hits="0"/>
+						<line number="1000" hits="0"/>
+						<line number="1002" hits="0"/>
+						<line number="1008" hits="0"/>
+						<line number="1009" hits="0"/>
+						<line number="1011" hits="0"/>
+						<line number="1015" hits="0"/>
+						<line number="1016" hits="0"/>
+						<line number="1018" hits="0"/>
+						<line number="1020" hits="0"/>
+						<line number="1021" hits="0"/>
+						<line number="1022" hits="0"/>
+						<line number="1025" hits="0"/>
+						<line number="1026" hits="0"/>
+						<line number="1027" hits="0"/>
+						<line number="1028" hits="0"/>
+						<line number="1029" hits="0"/>
+						<line number="1030" hits="0"/>
+						<line number="1031" hits="0"/>
+						<line number="1036" hits="0"/>
+						<line number="1037" hits="0"/>
+						<line number="1038" hits="0"/>
+						<line number="1039" hits="0"/>
+						<line number="1044" hits="0"/>
+						<line number="1049" hits="0"/>
+						<line number="1050" hits="0"/>
+						<line number="1051" hits="0"/>
+						<line number="1053" hits="1"/>
+						<line number="1055" hits="1"/>
+						<line number="1057" hits="1"/>
+						<line number="1058" hits="1"/>
+						<line number="1059" hits="1"/>
+						<line number="1061" hits="1"/>
+						<line number="1062" hits="1"/>
+						<line number="1063" hits="1"/>
+						<line number="1064" hits="1"/>
+						<line number="1065" hits="1"/>
+						<line number="1066" hits="1"/>
+						<line number="1067" hits="1"/>
+						<line number="1076" hits="1"/>
+					</lines>
+				</class>
+				<class name="train_reranker.py" filename="core/operations/train_reranker.py" complexity="0" line-rate="0.4692" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="14" hits="1"/>
+						<line number="16" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="21" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="36" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="43" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="50" hits="1"/>
+						<line number="51" hits="1"/>
+						<line number="52" hits="1"/>
+						<line number="53" hits="1"/>
+						<line number="54" hits="1"/>
+						<line number="55" hits="1"/>
+						<line number="57" hits="1"/>
+						<line number="59" hits="1"/>
+						<line number="60" hits="1"/>
+						<line number="62" hits="1"/>
+						<line number="75" hits="1"/>
+						<line number="76" hits="1"/>
+						<line number="83" hits="1"/>
+						<line number="84" hits="1"/>
+						<line number="85" hits="1"/>
+						<line number="86" hits="1"/>
+						<line number="89" hits="1"/>
+						<line number="90" hits="1"/>
+						<line number="98" hits="1"/>
+						<line number="99" hits="1"/>
+						<line number="107" hits="1"/>
+						<line number="110" hits="1"/>
+						<line number="111" hits="1"/>
+						<line number="112" hits="1"/>
+						<line number="113" hits="1"/>
+						<line number="114" hits="1"/>
+						<line number="117" hits="1"/>
+						<line number="122" hits="1"/>
+						<line number="123" hits="1"/>
+						<line number="124" hits="1"/>
+						<line number="125" hits="1"/>
+						<line number="126" hits="1"/>
+						<line number="127" hits="1"/>
+						<line number="128" hits="1"/>
+						<line number="131" hits="1"/>
+						<line number="132" hits="1"/>
+						<line number="133" hits="1"/>
+						<line number="134" hits="1"/>
+						<line number="137" hits="1"/>
+						<line number="138" hits="1"/>
+						<line number="146" hits="1"/>
+						<line number="152" hits="1"/>
+						<line number="155" hits="1"/>
+						<line number="157" hits="1"/>
+						<line number="165" hits="1"/>
+						<line number="166" hits="1"/>
+						<line number="167" hits="1"/>
+						<line number="168" hits="1"/>
+						<line number="169" hits="1"/>
+						<line number="171" hits="1"/>
+						<line number="172" hits="1"/>
+						<line number="173" hits="1"/>
+						<line number="174" hits="1"/>
+						<line number="175" hits="1"/>
+						<line number="176" hits="1"/>
+						<line number="196" hits="1"/>
+						<line number="205" hits="0"/>
+						<line number="216" hits="0"/>
+						<line number="217" hits="0"/>
+						<line number="218" hits="0"/>
+						<line number="219" hits="0"/>
+						<line number="224" hits="1"/>
+						<line number="234" hits="0"/>
+						<line number="236" hits="0"/>
+						<line number="249" hits="0"/>
+						<line number="251" hits="0"/>
+						<line number="258" hits="0"/>
+						<line number="259" hits="0"/>
+						<line number="260" hits="0"/>
+						<line number="271" hits="0"/>
+						<line number="272" hits="0"/>
+						<line number="273" hits="0"/>
+						<line number="275" hits="0"/>
+						<line number="276" hits="0"/>
+						<line number="277" hits="0"/>
+						<line number="279" hits="0"/>
+						<line number="281" hits="0"/>
+						<line number="292" hits="0"/>
+						<line number="295" hits="1"/>
+						<line number="308" hits="0"/>
+						<line number="310" hits="0"/>
+						<line number="323" hits="0"/>
+						<line number="324" hits="0"/>
+						<line number="325" hits="0"/>
+						<line number="326" hits="0"/>
+						<line number="327" hits="0"/>
+						<line number="328" hits="0"/>
+						<line number="337" hits="0"/>
+						<line number="338" hits="0"/>
+						<line number="339" hits="0"/>
+						<line number="343" hits="0"/>
+						<line number="349" hits="0"/>
+						<line number="354" hits="0"/>
+						<line number="361" hits="0"/>
+						<line number="366" hits="1"/>
+						<line number="370" hits="1"/>
+						<line number="371" hits="1"/>
+						<line number="372" hits="1"/>
+						<line number="373" hits="1"/>
+						<line number="374" hits="1"/>
+						<line number="380" hits="1"/>
+						<line number="381" hits="1"/>
+						<line number="382" hits="1"/>
+						<line number="385" hits="1"/>
+						<line number="389" hits="1"/>
+						<line number="390" hits="1"/>
+						<line number="391" hits="1"/>
+						<line number="392" hits="1"/>
+						<line number="393" hits="1"/>
+						<line number="398" hits="1"/>
+						<line number="399" hits="1"/>
+						<line number="400" hits="1"/>
+						<line number="405" hits="1"/>
+						<line number="442" hits="1"/>
+						<line number="443" hits="1"/>
+						<line number="444" hits="1"/>
+						<line number="445" hits="1"/>
+						<line number="446" hits="1"/>
+						<line number="448" hits="1"/>
+						<line number="449" hits="1"/>
+						<line number="450" hits="1"/>
+						<line number="452" hits="1"/>
+						<line number="453" hits="1"/>
+						<line number="456" hits="1"/>
+						<line number="457" hits="1"/>
+						<line number="458" hits="1"/>
+						<line number="459" hits="1"/>
+						<line number="460" hits="1"/>
+						<line number="461" hits="1"/>
+						<line number="467" hits="1"/>
+						<line number="468" hits="1"/>
+						<line number="469" hits="0"/>
+						<line number="471" hits="1"/>
+						<line number="472" hits="1"/>
+						<line number="484" hits="1"/>
+						<line number="489" hits="1"/>
+						<line number="490" hits="1"/>
+						<line number="491" hits="1"/>
+						<line number="494" hits="1"/>
+						<line number="495" hits="1"/>
+						<line number="496" hits="1"/>
+						<line number="500" hits="1"/>
+						<line number="501" hits="1"/>
+						<line number="502" hits="1"/>
+						<line number="503" hits="1"/>
+						<line number="504" hits="1"/>
+						<line number="506" hits="1"/>
+						<line number="507" hits="1"/>
+						<line number="508" hits="1"/>
+						<line number="509" hits="1"/>
+						<line number="510" hits="1"/>
+						<line number="511" hits="1"/>
+						<line number="512" hits="1"/>
+						<line number="513" hits="1"/>
+						<line number="514" hits="1"/>
+						<line number="515" hits="1"/>
+						<line number="516" hits="1"/>
+						<line number="517" hits="1"/>
+						<line number="519" hits="1"/>
+						<line number="520" hits="1"/>
+						<line number="522" hits="1"/>
+						<line number="523" hits="1"/>
+						<line number="524" hits="1"/>
+						<line number="525" hits="1"/>
+						<line number="526" hits="1"/>
+						<line number="527" hits="1"/>
+						<line number="528" hits="1"/>
+						<line number="531" hits="1"/>
+						<line number="532" hits="1"/>
+						<line number="533" hits="1"/>
+						<line number="534" hits="1"/>
+						<line number="535" hits="1"/>
+						<line number="536" hits="1"/>
+						<line number="537" hits="1"/>
+						<line number="538" hits="1"/>
+						<line number="539" hits="1"/>
+						<line number="540" hits="1"/>
+						<line number="541" hits="1"/>
+						<line number="542" hits="1"/>
+						<line number="543" hits="1"/>
+						<line number="544" hits="1"/>
+						<line number="545" hits="1"/>
+						<line number="546" hits="1"/>
+						<line number="552" hits="1"/>
+						<line number="553" hits="1"/>
+						<line number="556" hits="1"/>
+						<line number="558" hits="1"/>
+						<line number="564" hits="0"/>
+						<line number="565" hits="0"/>
+						<line number="566" hits="0"/>
+						<line number="567" hits="0"/>
+						<line number="568" hits="0"/>
+						<line number="569" hits="0"/>
+						<line number="570" hits="0"/>
+						<line number="571" hits="0"/>
+						<line number="572" hits="0"/>
+						<line number="573" hits="0"/>
+						<line number="574" hits="0"/>
+						<line number="575" hits="0"/>
+						<line number="576" hits="0"/>
+						<line number="577" hits="0"/>
+						<line number="578" hits="0"/>
+						<line number="579" hits="0"/>
+						<line number="580" hits="0"/>
+						<line number="581" hits="0"/>
+						<line number="582" hits="0"/>
+						<line number="583" hits="0"/>
+						<line number="584" hits="0"/>
+						<line number="585" hits="0"/>
+						<line number="586" hits="0"/>
+						<line number="587" hits="0"/>
+						<line number="588" hits="0"/>
+						<line number="589" hits="0"/>
+						<line number="590" hits="0"/>
+						<line number="591" hits="0"/>
+						<line number="592" hits="0"/>
+						<line number="593" hits="0"/>
+						<line number="598" hits="0"/>
+						<line number="609" hits="1"/>
+						<line number="612" hits="1"/>
+						<line number="613" hits="1"/>
+						<line number="614" hits="1"/>
+						<line number="615" hits="1"/>
+						<line number="616" hits="1"/>
+						<line number="617" hits="0"/>
+						<line number="618" hits="0"/>
+						<line number="619" hits="0"/>
+						<line number="620" hits="0"/>
+						<line number="621" hits="0"/>
+						<line number="622" hits="0"/>
+						<line number="623" hits="0"/>
+						<line number="624" hits="0"/>
+						<line number="625" hits="0"/>
+						<line number="626" hits="0"/>
+						<line number="627" hits="0"/>
+						<line number="628" hits="0"/>
+						<line number="629" hits="0"/>
+						<line number="630" hits="0"/>
+						<line number="631" hits="0"/>
+						<line number="632" hits="0"/>
+						<line number="633" hits="0"/>
+						<line number="634" hits="0"/>
+						<line number="635" hits="0"/>
+						<line number="636" hits="0"/>
+						<line number="637" hits="0"/>
+						<line number="638" hits="0"/>
+						<line number="639" hits="0"/>
+						<line number="640" hits="0"/>
+						<line number="641" hits="0"/>
+						<line number="642" hits="0"/>
+						<line number="651" hits="1"/>
+						<line number="652" hits="1"/>
+						<line number="653" hits="1"/>
+						<line number="654" hits="1"/>
+						<line number="655" hits="1"/>
+						<line number="656" hits="1"/>
+						<line number="657" hits="1"/>
+						<line number="658" hits="1"/>
+						<line number="660" hits="1"/>
+						<line number="661" hits="0"/>
+						<line number="662" hits="0"/>
+						<line number="663" hits="1"/>
+						<line number="664" hits="1"/>
+						<line number="665" hits="1"/>
+						<line number="666" hits="1"/>
+						<line number="667" hits="1"/>
+						<line number="668" hits="1"/>
+						<line number="669" hits="1"/>
+						<line number="670" hits="1"/>
+						<line number="672" hits="1"/>
+						<line number="673" hits="1"/>
+						<line number="674" hits="1"/>
+						<line number="675" hits="1"/>
+						<line number="676" hits="1"/>
+						<line number="677" hits="1"/>
+						<line number="678" hits="0"/>
+						<line number="679" hits="1"/>
+						<line number="680" hits="1"/>
+						<line number="681" hits="1"/>
+						<line number="682" hits="1"/>
+						<line number="683" hits="1"/>
+						<line number="684" hits="1"/>
+						<line number="685" hits="1"/>
+						<line number="686" hits="1"/>
+						<line number="687" hits="1"/>
+						<line number="688" hits="1"/>
+						<line number="689" hits="0"/>
+						<line number="690" hits="1"/>
+						<line number="691" hits="1"/>
+						<line number="692" hits="1"/>
+						<line number="693" hits="0"/>
+						<line number="694" hits="0"/>
+						<line number="695" hits="0"/>
+						<line number="696" hits="0"/>
+						<line number="697" hits="0"/>
+						<line number="704" hits="1"/>
+						<line number="705" hits="1"/>
+						<line number="706" hits="0"/>
+						<line number="707" hits="0"/>
+						<line number="708" hits="0"/>
+						<line number="709" hits="0"/>
+						<line number="710" hits="0"/>
+						<line number="711" hits="0"/>
+						<line number="716" hits="0"/>
+						<line number="717" hits="0"/>
+						<line number="718" hits="0"/>
+						<line number="719" hits="0"/>
+						<line number="720" hits="0"/>
+						<line number="721" hits="0"/>
+						<line number="722" hits="0"/>
+						<line number="723" hits="0"/>
+						<line number="729" hits="1"/>
+						<line number="730" hits="1"/>
+						<line number="731" hits="0"/>
+						<line number="732" hits="0"/>
+						<line number="745" hits="1"/>
+						<line number="746" hits="1"/>
+						<line number="747" hits="1"/>
+						<line number="749" hits="1"/>
+						<line number="750" hits="1"/>
+						<line number="751" hits="1"/>
+						<line number="752" hits="1"/>
+						<line number="753" hits="1"/>
+						<line number="754" hits="1"/>
+						<line number="755" hits="1"/>
+						<line number="756" hits="1"/>
+						<line number="757" hits="1"/>
+						<line number="758" hits="1"/>
+						<line number="759" hits="1"/>
+						<line number="760" hits="1"/>
+						<line number="761" hits="1"/>
+						<line number="762" hits="1"/>
+						<line number="764" hits="1"/>
+						<line number="765" hits="1"/>
+						<line number="766" hits="1"/>
+						<line number="767" hits="0"/>
+						<line number="768" hits="0"/>
+						<line number="769" hits="0"/>
+						<line number="770" hits="0"/>
+						<line number="771" hits="0"/>
+						<line number="773" hits="1"/>
+						<line number="774" hits="1"/>
+						<line number="775" hits="1"/>
+						<line number="776" hits="1"/>
+						<line number="777" hits="1"/>
+						<line number="778" hits="1"/>
+						<line number="780" hits="1"/>
+						<line number="782" hits="1"/>
+						<line number="783" hits="1"/>
+						<line number="784" hits="1"/>
+						<line number="785" hits="1"/>
+						<line number="786" hits="1"/>
+						<line number="787" hits="1"/>
+						<line number="788" hits="1"/>
+						<line number="789" hits="1"/>
+						<line number="791" hits="1"/>
+						<line number="792" hits="1"/>
+						<line number="793" hits="1"/>
+						<line number="794" hits="1"/>
+						<line number="795" hits="1"/>
+						<line number="796" hits="1"/>
+						<line number="797" hits="1"/>
+						<line number="799" hits="1"/>
+						<line number="801" hits="1"/>
+						<line number="802" hits="1"/>
+						<line number="807" hits="1"/>
+						<line number="810" hits="1"/>
+						<line number="811" hits="1"/>
+						<line number="812" hits="1"/>
+						<line number="813" hits="1"/>
+						<line number="814" hits="1"/>
+						<line number="815" hits="0"/>
+						<line number="816" hits="1"/>
+						<line number="820" hits="1"/>
+						<line number="821" hits="1"/>
+						<line number="822" hits="1"/>
+						<line number="823" hits="1"/>
+						<line number="824" hits="1"/>
+						<line number="825" hits="1"/>
+						<line number="826" hits="1"/>
+						<line number="827" hits="1"/>
+						<line number="829" hits="1"/>
+						<line number="830" hits="1"/>
+						<line number="831" hits="0"/>
+						<line number="832" hits="1"/>
+						<line number="833" hits="1"/>
+						<line number="835" hits="1"/>
+						<line number="837" hits="1"/>
+						<line number="838" hits="1"/>
+						<line number="839" hits="0"/>
+						<line number="840" hits="0"/>
+						<line number="845" hits="0"/>
+						<line number="850" hits="0"/>
+						<line number="851" hits="0"/>
+						<line number="856" hits="0"/>
+						<line number="862" hits="1"/>
+						<line number="863" hits="1"/>
+						<line number="864" hits="1"/>
+						<line number="865" hits="1"/>
+						<line number="866" hits="1"/>
+						<line number="868" hits="1"/>
+						<line number="949" hits="1"/>
+						<line number="959" hits="1"/>
+						<line number="960" hits="1"/>
+						<line number="961" hits="1"/>
+						<line number="962" hits="1"/>
+						<line number="963" hits="1"/>
+						<line number="964" hits="1"/>
+						<line number="965" hits="1"/>
+						<line number="966" hits="0"/>
+						<line number="967" hits="0"/>
+						<line number="972" hits="0"/>
+						<line number="973" hits="0"/>
+						<line number="974" hits="0"/>
+						<line number="975" hits="0"/>
+						<line number="976" hits="0"/>
+						<line number="977" hits="1"/>
+						<line number="978" hits="1"/>
+						<line number="979" hits="1"/>
+						<line number="980" hits="1"/>
+						<line number="981" hits="1"/>
+						<line number="982" hits="1"/>
+						<line number="987" hits="1"/>
+						<line number="990" hits="1"/>
+						<line number="992" hits="1"/>
+						<line number="997" hits="1"/>
+						<line number="998" hits="1"/>
+						<line number="999" hits="1"/>
+						<line number="1000" hits="1"/>
+						<line number="1005" hits="1"/>
+						<line number="1006" hits="1"/>
+						<line number="1007" hits="1"/>
+						<line number="1014" hits="1"/>
+						<line number="1015" hits="1"/>
+						<line number="1016" hits="1"/>
+						<line number="1017" hits="1"/>
+						<line number="1018" hits="1"/>
+						<line number="1019" hits="1"/>
+						<line number="1020" hits="1"/>
+						<line number="1021" hits="1"/>
+						<line number="1022" hits="1"/>
+						<line number="1023" hits="1"/>
+						<line number="1024" hits="1"/>
+						<line number="1025" hits="1"/>
+						<line number="1026" hits="0"/>
+						<line number="1028" hits="1"/>
+						<line number="1029" hits="1"/>
+						<line number="1030" hits="1"/>
+						<line number="1031" hits="1"/>
+						<line number="1032" hits="1"/>
+						<line number="1033" hits="1"/>
+						<line number="1042" hits="1"/>
+						<line number="1050" hits="1"/>
+						<line number="1051" hits="1"/>
+						<line number="1052" hits="1"/>
+						<line number="1055" hits="1"/>
+						<line number="1057" hits="1"/>
+						<line number="1060" hits="1"/>
+						<line number="1063" hits="1"/>
+						<line number="1064" hits="1"/>
+						<line number="1065" hits="1"/>
+						<line number="1066" hits="1"/>
+						<line number="1067" hits="1"/>
+						<line number="1068" hits="1"/>
+						<line number="1069" hits="1"/>
+						<line number="1072" hits="1"/>
+						<line number="1073" hits="1"/>
+						<line number="1074" hits="1"/>
+						<line number="1075" hits="1"/>
+						<line number="1081" hits="1"/>
+						<line number="1084" hits="1"/>
+						<line number="1085" hits="1"/>
+						<line number="1091" hits="1"/>
+						<line number="1094" hits="1"/>
+						<line number="1097" hits="1"/>
+						<line number="1107" hits="1"/>
+						<line number="1108" hits="1"/>
+						<line number="1115" hits="1"/>
+						<line number="1116" hits="1"/>
+						<line number="1118" hits="1"/>
+						<line number="1119" hits="1"/>
+						<line number="1120" hits="1"/>
+						<line number="1121" hits="0"/>
+						<line number="1122" hits="0"/>
+						<line number="1123" hits="0"/>
+						<line number="1125" hits="1"/>
+						<line number="1126" hits="1"/>
+						<line number="1127" hits="1"/>
+						<line number="1128" hits="0"/>
+						<line number="1129" hits="0"/>
+						<line number="1130" hits="0"/>
+						<line number="1132" hits="1"/>
+						<line number="1133" hits="1"/>
+						<line number="1134" hits="1"/>
+						<line number="1135" hits="0"/>
+						<line number="1136" hits="0"/>
+						<line number="1137" hits="0"/>
+						<line number="1139" hits="1"/>
+						<line number="1140" hits="1"/>
+						<line number="1141" hits="1"/>
+						<line number="1142" hits="0"/>
+						<line number="1143" hits="0"/>
+						<line number="1144" hits="0"/>
+						<line number="1145" hits="0"/>
+						<line number="1148" hits="1"/>
+						<line number="1151" hits="1"/>
+						<line number="1159" hits="1"/>
+						<line number="1180" hits="1"/>
+						<line number="1181" hits="1"/>
+						<line number="1186" hits="1"/>
+						<line number="1187" hits="0"/>
+						<line number="1188" hits="0"/>
+						<line number="1189" hits="0"/>
+						<line number="1190" hits="0"/>
+						<line number="1192" hits="0"/>
+						<line number="1193" hits="0"/>
+						<line number="1194" hits="0"/>
+						<line number="1195" hits="0"/>
+						<line number="1196" hits="0"/>
+						<line number="1197" hits="0"/>
+						<line number="1198" hits="0"/>
+						<line number="1199" hits="0"/>
+						<line number="1200" hits="0"/>
+						<line number="1202" hits="0"/>
+						<line number="1203" hits="0"/>
+						<line number="1204" hits="0"/>
+						<line number="1205" hits="0"/>
+						<line number="1206" hits="0"/>
+						<line number="1207" hits="0"/>
+						<line number="1208" hits="0"/>
+						<line number="1209" hits="0"/>
+						<line number="1210" hits="0"/>
+						<line number="1212" hits="1"/>
+						<line number="1213" hits="1"/>
+						<line number="1233" hits="0"/>
+						<line number="1234" hits="0"/>
+						<line number="1239" hits="0"/>
+						<line number="1257" hits="0"/>
+						<line number="1258" hits="0"/>
+						<line number="1260" hits="1"/>
+						<line number="1263" hits="0"/>
+						<line number="1264" hits="0"/>
+						<line number="1266" hits="0"/>
+						<line number="1267" hits="0"/>
+						<line number="1270" hits="0"/>
+						<line number="1271" hits="0"/>
+						<line number="1272" hits="0"/>
+						<line number="1273" hits="0"/>
+						<line number="1274" hits="0"/>
+						<line number="1282" hits="0"/>
+						<line number="1283" hits="0"/>
+						<line number="1287" hits="0"/>
+						<line number="1288" hits="0"/>
+						<line number="1290" hits="0"/>
+						<line number="1291" hits="0"/>
+						<line number="1294" hits="0"/>
+						<line number="1295" hits="0"/>
+						<line number="1296" hits="0"/>
+						<line number="1303" hits="0"/>
+						<line number="1304" hits="0"/>
+						<line number="1305" hits="0"/>
+						<line number="1306" hits="0"/>
+						<line number="1307" hits="0"/>
+						<line number="1310" hits="0"/>
+						<line number="1311" hits="0"/>
+						<line number="1312" hits="0"/>
+						<line number="1313" hits="0"/>
+						<line number="1314" hits="0"/>
+						<line number="1315" hits="0"/>
+						<line number="1316" hits="0"/>
+						<line number="1317" hits="0"/>
+						<line number="1318" hits="0"/>
+						<line number="1319" hits="0"/>
+						<line number="1320" hits="0"/>
+						<line number="1321" hits="0"/>
+						<line number="1328" hits="0"/>
+						<line number="1329" hits="0"/>
+						<line number="1355" hits="0"/>
+						<line number="1356" hits="0"/>
+						<line number="1363" hits="0"/>
+						<line number="1364" hits="0"/>
+						<line number="1367" hits="0"/>
+						<line number="1374" hits="0"/>
+						<line number="1377" hits="0"/>
+						<line number="1380" hits="0"/>
+						<line number="1383" hits="0"/>
+						<line number="1387" hits="0"/>
+						<line number="1397" hits="0"/>
+						<line number="1398" hits="0"/>
+						<line number="1415" hits="0"/>
+						<line number="1416" hits="0"/>
+						<line number="1417" hits="0"/>
+						<line number="1418" hits="0"/>
+						<line number="1422" hits="0"/>
+						<line number="1424" hits="0"/>
+						<line number="1425" hits="0"/>
+						<line number="1426" hits="0"/>
+						<line number="1427" hits="0"/>
+						<line number="1428" hits="0"/>
+						<line number="1429" hits="0"/>
+						<line number="1431" hits="0"/>
+						<line number="1443" hits="0"/>
+						<line number="1451" hits="0"/>
+						<line number="1452" hits="0"/>
+						<line number="1457" hits="0"/>
+						<line number="1460" hits="0"/>
+						<line number="1461" hits="0"/>
+						<line number="1462" hits="0"/>
+						<line number="1463" hits="0"/>
+						<line number="1464" hits="0"/>
+						<line number="1465" hits="0"/>
+						<line number="1466" hits="0"/>
+						<line number="1467" hits="0"/>
+						<line number="1468" hits="0"/>
+						<line number="1469" hits="0"/>
+						<line number="1471" hits="0"/>
+						<line number="1472" hits="0"/>
+						<line number="1478" hits="0"/>
+						<line number="1486" hits="0"/>
+						<line number="1489" hits="0"/>
+						<line number="1494" hits="0"/>
+						<line number="1495" hits="0"/>
+						<line number="1496" hits="0"/>
+						<line number="1501" hits="0"/>
+						<line number="1503" hits="0"/>
+						<line number="1504" hits="0"/>
+						<line number="1510" hits="0"/>
+						<line number="1518" hits="0"/>
+						<line number="1519" hits="0"/>
+						<line number="1520" hits="0"/>
+						<line number="1523" hits="0"/>
+						<line number="1524" hits="0"/>
+						<line number="1525" hits="0"/>
+						<line number="1526" hits="0"/>
+						<line number="1527" hits="0"/>
+						<line number="1528" hits="0"/>
+						<line number="1529" hits="0"/>
+						<line number="1530" hits="0"/>
+						<line number="1531" hits="0"/>
+						<line number="1532" hits="0"/>
+						<line number="1533" hits="0"/>
+						<line number="1534" hits="0"/>
+						<line number="1535" hits="0"/>
+						<line number="1536" hits="0"/>
+						<line number="1537" hits="0"/>
+						<line number="1540" hits="0"/>
+						<line number="1541" hits="0"/>
+						<line number="1565" hits="0"/>
+						<line number="1570" hits="0"/>
+						<line number="1571" hits="0"/>
+						<line number="1573" hits="0"/>
+						<line number="1594" hits="0"/>
+						<line number="1595" hits="0"/>
+						<line number="1596" hits="0"/>
+						<line number="1602" hits="0"/>
+						<line number="1607" hits="0"/>
+						<line number="1608" hits="0"/>
+						<line number="1609" hits="0"/>
+						<line number="1610" hits="0"/>
+						<line number="1611" hits="0"/>
+						<line number="1612" hits="0"/>
+						<line number="1613" hits="0"/>
+						<line number="1614" hits="0"/>
+						<line number="1615" hits="0"/>
+						<line number="1616" hits="0"/>
+						<line number="1618" hits="0"/>
+						<line number="1619" hits="0"/>
+						<line number="1620" hits="0"/>
+						<line number="1632" hits="0"/>
+						<line number="1633" hits="0"/>
+						<line number="1634" hits="0"/>
+						<line number="1635" hits="0"/>
+						<line number="1636" hits="0"/>
+						<line number="1638" hits="0"/>
+						<line number="1639" hits="0"/>
+						<line number="1651" hits="0"/>
+						<line number="1652" hits="0"/>
+						<line number="1653" hits="0"/>
+						<line number="1654" hits="0"/>
+						<line number="1656" hits="0"/>
+						<line number="1657" hits="0"/>
+						<line number="1658" hits="0"/>
+						<line number="1659" hits="0"/>
+						<line number="1661" hits="0"/>
+						<line number="1662" hits="0"/>
+						<line number="1670" hits="0"/>
+						<line number="1671" hits="0"/>
+						<line number="1673" hits="0"/>
+						<line number="1674" hits="0"/>
+						<line number="1675" hits="0"/>
+						<line number="1678" hits="0"/>
+						<line number="1679" hits="0"/>
+						<line number="1682" hits="0"/>
+						<line number="1683" hits="0"/>
+						<line number="1684" hits="0"/>
+						<line number="1685" hits="0"/>
+						<line number="1687" hits="0"/>
+						<line number="1694" hits="0"/>
+						<line number="1702" hits="0"/>
+						<line number="1703" hits="0"/>
+						<line number="1708" hits="0"/>
+						<line number="1711" hits="0"/>
+						<line number="1712" hits="0"/>
+						<line number="1713" hits="0"/>
+						<line number="1714" hits="0"/>
+						<line number="1715" hits="0"/>
+						<line number="1716" hits="0"/>
+						<line number="1717" hits="0"/>
+						<line number="1718" hits="0"/>
+						<line number="1719" hits="0"/>
+						<line number="1720" hits="0"/>
+						<line number="1721" hits="0"/>
+						<line number="1723" hits="0"/>
+						<line number="1724" hits="0"/>
+						<line number="1727" hits="0"/>
+						<line number="1728" hits="0"/>
+						<line number="1729" hits="0"/>
+						<line number="1734" hits="0"/>
+						<line number="1735" hits="0"/>
+						<line number="1737" hits="0"/>
+						<line number="1738" hits="0"/>
+						<line number="1739" hits="0"/>
+						<line number="1740" hits="0"/>
+						<line number="1741" hits="0"/>
+						<line number="1742" hits="0"/>
+						<line number="1743" hits="0"/>
+						<line number="1744" hits="0"/>
+						<line number="1745" hits="0"/>
+						<line number="1746" hits="0"/>
+						<line number="1747" hits="0"/>
+						<line number="1748" hits="0"/>
+						<line number="1749" hits="0"/>
+						<line number="1750" hits="0"/>
+						<line number="1751" hits="0"/>
+						<line number="1753" hits="0"/>
+						<line number="1762" hits="0"/>
+						<line number="1763" hits="0"/>
+						<line number="1786" hits="0"/>
+						<line number="1787" hits="0"/>
+						<line number="1789" hits="0"/>
+						<line number="1790" hits="0"/>
+						<line number="1791" hits="0"/>
+						<line number="1792" hits="0"/>
+						<line number="1801" hits="0"/>
+						<line number="1806" hits="0"/>
+						<line number="1807" hits="0"/>
+						<line number="1808" hits="0"/>
+						<line number="1809" hits="0"/>
+						<line number="1810" hits="0"/>
+						<line number="1811" hits="0"/>
+						<line number="1812" hits="0"/>
+						<line number="1813" hits="0"/>
+						<line number="1814" hits="0"/>
+						<line number="1815" hits="0"/>
+						<line number="1816" hits="0"/>
+						<line number="1817" hits="0"/>
+						<line number="1820" hits="0"/>
+						<line number="1821" hits="0"/>
+						<line number="1828" hits="0"/>
+						<line number="1829" hits="0"/>
+						<line number="1830" hits="0"/>
+						<line number="1831" hits="0"/>
+						<line number="1832" hits="0"/>
+						<line number="1833" hits="0"/>
+						<line number="1834" hits="0"/>
+						<line number="1835" hits="0"/>
+						<line number="1839" hits="0"/>
+						<line number="1840" hits="0"/>
+						<line number="1841" hits="0"/>
+						<line number="1842" hits="0"/>
+						<line number="1843" hits="0"/>
+						<line number="1844" hits="0"/>
+						<line number="1845" hits="0"/>
+						<line number="1846" hits="0"/>
+						<line number="1853" hits="0"/>
+						<line number="1854" hits="0"/>
+						<line number="1855" hits="0"/>
+						<line number="1856" hits="0"/>
+						<line number="1859" hits="0"/>
+						<line number="1861" hits="0"/>
+						<line number="1862" hits="0"/>
+						<line number="1863" hits="0"/>
+						<line number="1864" hits="0"/>
+						<line number="1865" hits="0"/>
+						<line number="1866" hits="0"/>
+						<line number="1867" hits="0"/>
+						<line number="1869" hits="0"/>
+						<line number="1870" hits="0"/>
+						<line number="1875" hits="0"/>
+						<line number="1876" hits="0"/>
+						<line number="1883" hits="0"/>
+						<line number="1884" hits="0"/>
+						<line number="1889" hits="0"/>
+						<line number="1902" hits="0"/>
+						<line number="1903" hits="0"/>
+						<line number="1904" hits="0"/>
+						<line number="1910" hits="0"/>
+						<line number="1911" hits="0"/>
+						<line number="1914" hits="0"/>
+					</lines>
+				</class>
+			</classes>
+		</package>
+		<package name="infrastructure" line-rate="0.9595" branch-rate="0" complexity="0">
+			<classes>
+				<class name="__init__.py" filename="infrastructure/__init__.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines/>
+				</class>
+				<class name="benchmark_config.py" filename="infrastructure/benchmark_config.py" complexity="0" line-rate="0.9565" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="9" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="15" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="21" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="25" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="36" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="38" hits="0"/>
+						<line number="41" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="45" hits="1"/>
+						<line number="46" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="48" hits="1"/>
+						<line number="49" hits="0"/>
+						<line number="52" hits="1"/>
+						<line number="53" hits="1"/>
+						<line number="54" hits="1"/>
+						<line number="55" hits="1"/>
+						<line number="58" hits="1"/>
+						<line number="65" hits="1"/>
+						<line number="66" hits="1"/>
+						<line number="67" hits="1"/>
+						<line number="68" hits="1"/>
+						<line number="69" hits="1"/>
+						<line number="71" hits="1"/>
+						<line number="73" hits="1"/>
+						<line number="74" hits="1"/>
+						<line number="75" hits="1"/>
+						<line number="76" hits="1"/>
+						<line number="77" hits="1"/>
+						<line number="79" hits="1"/>
+						<line number="80" hits="1"/>
+						<line number="81" hits="1"/>
+						<line number="83" hits="1"/>
+					</lines>
+				</class>
+				<class name="logging.py" filename="infrastructure/logging.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="8" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="16" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="54" hits="1"/>
+						<line number="55" hits="1"/>
+						<line number="63" hits="1"/>
+						<line number="64" hits="1"/>
+						<line number="65" hits="1"/>
+						<line number="68" hits="1"/>
+						<line number="69" hits="1"/>
+						<line number="71" hits="1"/>
+						<line number="72" hits="1"/>
+						<line number="74" hits="1"/>
+						<line number="77" hits="1"/>
+						<line number="80" hits="1"/>
+						<line number="92" hits="1"/>
+						<line number="93" hits="1"/>
+						<line number="97" hits="1"/>
+						<line number="99" hits="1"/>
+						<line number="100" hits="1"/>
+						<line number="101" hits="1"/>
+						<line number="103" hits="1"/>
+						<line number="105" hits="1"/>
+					</lines>
+				</class>
+				<class name="session.py" filename="infrastructure/session.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="21" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="25" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="28" hits="1"/>
+					</lines>
+				</class>
+				<class name="settings.py" filename="infrastructure/settings.py" complexity="0" line-rate="0.9298" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="15" hits="1"/>
+						<line number="16" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="21" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="28" hits="0"/>
+						<line number="29" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="36" hits="0"/>
+						<line number="37" hits="0"/>
+						<line number="38" hits="0"/>
+						<line number="41" hits="1"/>
+						<line number="58" hits="1"/>
+						<line number="59" hits="1"/>
+						<line number="61" hits="1"/>
+						<line number="62" hits="1"/>
+						<line number="64" hits="1"/>
+						<line number="69" hits="1"/>
+						<line number="73" hits="1"/>
+						<line number="75" hits="1"/>
+						<line number="80" hits="1"/>
+						<line number="81" hits="1"/>
+						<line number="82" hits="1"/>
+						<line number="84" hits="1"/>
+						<line number="88" hits="1"/>
+						<line number="94" hits="1"/>
+						<line number="99" hits="1"/>
+						<line number="100" hits="1"/>
+						<line number="101" hits="1"/>
+						<line number="102" hits="1"/>
+						<line number="104" hits="1"/>
+						<line number="106" hits="1"/>
+						<line number="107" hits="1"/>
+						<line number="108" hits="1"/>
+						<line number="109" hits="1"/>
+						<line number="110" hits="1"/>
+						<line number="111" hits="1"/>
+						<line number="115" hits="1"/>
+					</lines>
+				</class>
+			</classes>
+		</package>
+		<package name="infrastructure.database" line-rate="1" branch-rate="0" complexity="0">
+			<classes>
+				<class name="__init__.py" filename="infrastructure/database/__init__.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines/>
+				</class>
+				<class name="engine.py" filename="infrastructure/database/engine.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+					</lines>
+				</class>
+			</classes>
+		</package>
+		<package name="infrastructure.orm" line-rate="1" branch-rate="0" complexity="0">
+			<classes>
+				<class name="__init__.py" filename="infrastructure/orm/__init__.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines/>
+				</class>
+				<class name="base.py" filename="infrastructure/orm/base.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="6" hits="1"/>
+					</lines>
+				</class>
+			</classes>
+		</package>
+		<package name="infrastructure.orm.models" line-rate="1" branch-rate="0" complexity="0">
+			<classes>
+				<class name="__init__.py" filename="infrastructure/orm/models/__init__.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="2" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="15" hits="1"/>
+						<line number="16" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="21" hits="1"/>
+					</lines>
+				</class>
+				<class name="job.py" filename="infrastructure/orm/models/job.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="21" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="25" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="45" hits="1"/>
+						<line number="46" hits="1"/>
+						<line number="48" hits="1"/>
+						<line number="49" hits="1"/>
+						<line number="51" hits="1"/>
+						<line number="58" hits="1"/>
+						<line number="59" hits="1"/>
+						<line number="68" hits="1"/>
+						<line number="75" hits="1"/>
+						<line number="76" hits="1"/>
+						<line number="78" hits="1"/>
+						<line number="79" hits="1"/>
+						<line number="85" hits="1"/>
+						<line number="86" hits="1"/>
+						<line number="87" hits="1"/>
+						<line number="88" hits="1"/>
+						<line number="90" hits="1"/>
+						<line number="92" hits="1"/>
+						<line number="94" hits="1"/>
+					</lines>
+				</class>
+				<class name="support_entry.py" filename="infrastructure/orm/models/support_entry.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="21" hits="1"/>
+					</lines>
+				</class>
+				<class name="visitor_event.py" filename="infrastructure/orm/models/visitor_event.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="39" hits="1"/>
+					</lines>
+				</class>
+			</classes>
+		</package>
+		<package name="infrastructure.orm.models.annotation" line-rate="0.8862" branch-rate="0" complexity="0">
+			<classes>
+				<class name="__init__.py" filename="infrastructure/orm/models/annotation/__init__.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines/>
+				</class>
+				<class name="annotation_set.py" filename="infrastructure/orm/models/annotation/annotation_set.py" complexity="0" line-rate="0.875" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="14" hits="0"/>
+						<line number="15" hits="0"/>
+						<line number="18" hits="0"/>
+						<line number="21" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="36" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="50" hits="1"/>
+						<line number="53" hits="1"/>
+						<line number="55" hits="1"/>
+						<line number="58" hits="1"/>
+						<line number="59" hits="1"/>
+					</lines>
+				</class>
+				<class name="evaluation_result.py" filename="infrastructure/orm/models/annotation/evaluation_result.py" complexity="0" line-rate="0.8333" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="14" hits="0"/>
+						<line number="15" hits="0"/>
+						<line number="16" hits="0"/>
+						<line number="17" hits="0"/>
+						<line number="18" hits="0"/>
+						<line number="21" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="45" hits="1"/>
+						<line number="51" hits="1"/>
+						<line number="57" hits="1"/>
+						<line number="63" hits="1"/>
+						<line number="69" hits="1"/>
+						<line number="70" hits="1"/>
+						<line number="76" hits="1"/>
+						<line number="79" hits="1"/>
+						<line number="81" hits="1"/>
+						<line number="82" hits="1"/>
+						<line number="83" hits="1"/>
+						<line number="84" hits="1"/>
+						<line number="85" hits="1"/>
+					</lines>
+				</class>
+				<class name="evaluation_set.py" filename="infrastructure/orm/models/annotation/evaluation_set.py" complexity="0" line-rate="0.913" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="14" hits="0"/>
+						<line number="15" hits="0"/>
+						<line number="18" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="53" hits="1"/>
+						<line number="56" hits="1"/>
+						<line number="57" hits="1"/>
+						<line number="59" hits="1"/>
+						<line number="62" hits="1"/>
+						<line number="65" hits="1"/>
+					</lines>
+				</class>
+				<class name="go_term.py" filename="infrastructure/orm/models/annotation/go_term.py" complexity="0" line-rate="0.9091" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="13" hits="0"/>
+						<line number="14" hits="0"/>
+						<line number="19" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="36" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="46" hits="1"/>
+						<line number="49" hits="1"/>
+					</lines>
+				</class>
+				<class name="go_term_relationship.py" filename="infrastructure/orm/models/annotation/go_term_relationship.py" complexity="0" line-rate="0.9048" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="13" hits="0"/>
+						<line number="14" hits="0"/>
+						<line number="17" hits="1"/>
+						<line number="25" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="36" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="48" hits="1"/>
+						<line number="49" hits="1"/>
+						<line number="56" hits="1"/>
+						<line number="57" hits="1"/>
+						<line number="58" hits="1"/>
+					</lines>
+				</class>
+				<class name="ontology_snapshot.py" filename="infrastructure/orm/models/annotation/ontology_snapshot.py" complexity="0" line-rate="0.9" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="14" hits="0"/>
+						<line number="15" hits="0"/>
+						<line number="18" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="43" hits="1"/>
+						<line number="52" hits="1"/>
+						<line number="56" hits="1"/>
+						<line number="59" hits="1"/>
+					</lines>
+				</class>
+				<class name="protein_go_annotation.py" filename="infrastructure/orm/models/annotation/protein_go_annotation.py" complexity="0" line-rate="0.8889" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="13" hits="0"/>
+						<line number="14" hits="0"/>
+						<line number="15" hits="0"/>
+						<line number="18" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="43" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="50" hits="1"/>
+						<line number="56" hits="1"/>
+						<line number="62" hits="1"/>
+						<line number="63" hits="1"/>
+						<line number="64" hits="1"/>
+						<line number="65" hits="1"/>
+						<line number="66" hits="1"/>
+						<line number="67" hits="1"/>
+						<line number="69" hits="1"/>
+						<line number="72" hits="1"/>
+						<line number="73" hits="1"/>
+					</lines>
+				</class>
+			</classes>
+		</package>
+		<package name="infrastructure.orm.models.embedding" line-rate="0.956" branch-rate="0" complexity="0">
+			<classes>
+				<class name="__init__.py" filename="infrastructure/orm/models/embedding/__init__.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines/>
+				</class>
+				<class name="dataset.py" filename="infrastructure/orm/models/embedding/dataset.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="50" hits="1"/>
+						<line number="51" hits="1"/>
+						<line number="52" hits="1"/>
+						<line number="53" hits="1"/>
+						<line number="54" hits="1"/>
+						<line number="57" hits="1"/>
+						<line number="58" hits="1"/>
+						<line number="59" hits="1"/>
+						<line number="60" hits="1"/>
+						<line number="63" hits="1"/>
+						<line number="64" hits="1"/>
+						<line number="65" hits="1"/>
+						<line number="71" hits="1"/>
+						<line number="79" hits="1"/>
+						<line number="82" hits="1"/>
+						<line number="85" hits="1"/>
+						<line number="86" hits="1"/>
+						<line number="91" hits="1"/>
+						<line number="93" hits="1"/>
+					</lines>
+				</class>
+				<class name="embedding_config.py" filename="infrastructure/orm/models/embedding/embedding_config.py" complexity="0" line-rate="0.9688" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="15" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="69" hits="1"/>
+						<line number="71" hits="1"/>
+						<line number="72" hits="1"/>
+						<line number="73" hits="1"/>
+						<line number="74" hits="1"/>
+						<line number="75" hits="1"/>
+						<line number="76" hits="1"/>
+						<line number="77" hits="1"/>
+						<line number="78" hits="1"/>
+						<line number="79" hits="1"/>
+						<line number="80" hits="1"/>
+						<line number="81" hits="1"/>
+						<line number="82" hits="1"/>
+						<line number="83" hits="1"/>
+						<line number="84" hits="1"/>
+						<line number="85" hits="1"/>
+						<line number="86" hits="1"/>
+						<line number="87" hits="1"/>
+						<line number="91" hits="1"/>
+						<line number="92" hits="0"/>
+					</lines>
+				</class>
+				<class name="go_prediction.py" filename="infrastructure/orm/models/embedding/go_prediction.py" complexity="0" line-rate="0.973" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="13" hits="0"/>
+						<line number="14" hits="0"/>
+						<line number="17" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="49" hits="1"/>
+						<line number="55" hits="1"/>
+						<line number="56" hits="1"/>
+						<line number="57" hits="1"/>
+						<line number="58" hits="1"/>
+						<line number="61" hits="1"/>
+						<line number="62" hits="1"/>
+						<line number="63" hits="1"/>
+						<line number="64" hits="1"/>
+						<line number="65" hits="1"/>
+						<line number="68" hits="1"/>
+						<line number="69" hits="1"/>
+						<line number="70" hits="1"/>
+						<line number="71" hits="1"/>
+						<line number="72" hits="1"/>
+						<line number="75" hits="1"/>
+						<line number="76" hits="1"/>
+						<line number="79" hits="1"/>
+						<line number="80" hits="1"/>
+						<line number="81" hits="1"/>
+						<line number="82" hits="1"/>
+						<line number="83" hits="1"/>
+						<line number="86" hits="1"/>
+						<line number="87" hits="1"/>
+						<line number="88" hits="1"/>
+						<line number="91" hits="1"/>
+						<line number="92" hits="1"/>
+						<line number="93" hits="1"/>
+						<line number="94" hits="1"/>
+						<line number="95" hits="1"/>
+						<line number="96" hits="1"/>
+						<line number="99" hits="1"/>
+						<line number="100" hits="1"/>
+						<line number="101" hits="1"/>
+						<line number="102" hits="1"/>
+						<line number="103" hits="1"/>
+						<line number="104" hits="1"/>
+						<line number="107" hits="1"/>
+						<line number="108" hits="1"/>
+						<line number="109" hits="1"/>
+						<line number="112" hits="1"/>
+						<line number="113" hits="1"/>
+						<line number="114" hits="1"/>
+						<line number="115" hits="1"/>
+						<line number="116" hits="1"/>
+						<line number="117" hits="1"/>
+						<line number="118" hits="1"/>
+						<line number="119" hits="1"/>
+						<line number="120" hits="1"/>
+						<line number="121" hits="1"/>
+						<line number="122" hits="1"/>
+						<line number="123" hits="1"/>
+						<line number="124" hits="1"/>
+						<line number="125" hits="1"/>
+						<line number="126" hits="1"/>
+						<line number="127" hits="1"/>
+						<line number="129" hits="1"/>
+						<line number="132" hits="1"/>
+					</lines>
+				</class>
+				<class name="prediction_set.py" filename="infrastructure/orm/models/embedding/prediction_set.py" complexity="0" line-rate="0.8333" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="14" hits="0"/>
+						<line number="15" hits="0"/>
+						<line number="16" hits="0"/>
+						<line number="17" hits="0"/>
+						<line number="18" hits="0"/>
+						<line number="21" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="50" hits="1"/>
+						<line number="56" hits="1"/>
+						<line number="57" hits="1"/>
+						<line number="58" hits="1"/>
+						<line number="61" hits="1"/>
+						<line number="63" hits="1"/>
+						<line number="64" hits="1"/>
+						<line number="65" hits="1"/>
+						<line number="66" hits="1"/>
+						<line number="67" hits="1"/>
+					</lines>
+				</class>
+				<class name="reranker_model.py" filename="infrastructure/orm/models/embedding/reranker_model.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="46" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="51" hits="1"/>
+						<line number="54" hits="1"/>
+						<line number="58" hits="1"/>
+						<line number="60" hits="1"/>
+						<line number="66" hits="1"/>
+						<line number="73" hits="1"/>
+						<line number="74" hits="1"/>
+						<line number="81" hits="1"/>
+						<line number="87" hits="1"/>
+						<line number="90" hits="1"/>
+						<line number="92" hits="1"/>
+						<line number="93" hits="1"/>
+						<line number="94" hits="1"/>
+					</lines>
+				</class>
+				<class name="scoring_config.py" filename="infrastructure/orm/models/embedding/scoring_config.py" complexity="0" line-rate="0.963" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="40" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="43" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="46" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="48" hits="1"/>
+						<line number="49" hits="1"/>
+						<line number="51" hits="1"/>
+						<line number="57" hits="1"/>
+						<line number="58" hits="1"/>
+						<line number="59" hits="1"/>
+						<line number="66" hits="1"/>
+						<line number="99" hits="1"/>
+						<line number="134" hits="1"/>
+						<line number="167" hits="1"/>
+						<line number="175" hits="1"/>
+						<line number="192" hits="1"/>
+						<line number="194" hits="1"/>
+						<line number="195" hits="1"/>
+						<line number="196" hits="1"/>
+						<line number="197" hits="1"/>
+						<line number="198" hits="1"/>
+						<line number="199" hits="1"/>
+						<line number="200" hits="1"/>
+						<line number="204" hits="1"/>
+						<line number="205" hits="0"/>
+					</lines>
+				</class>
+				<class name="sequence_embedding.py" filename="infrastructure/orm/models/embedding/sequence_embedding.py" complexity="0" line-rate="0.92" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="15" hits="0"/>
+						<line number="16" hits="0"/>
+						<line number="19" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="36" hits="1"/>
+						<line number="45" hits="1"/>
+						<line number="46" hits="1"/>
+						<line number="52" hits="1"/>
+						<line number="60" hits="1"/>
+						<line number="63" hits="1"/>
+						<line number="64" hits="1"/>
+						<line number="65" hits="1"/>
+						<line number="66" hits="1"/>
+						<line number="70" hits="1"/>
+						<line number="71" hits="1"/>
+					</lines>
+				</class>
+			</classes>
+		</package>
+		<package name="infrastructure.orm.models.protein" line-rate="0.9403" branch-rate="0" complexity="0">
+			<classes>
+				<class name="__init__.py" filename="infrastructure/orm/models/protein/__init__.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines/>
+				</class>
+				<class name="protein.py" filename="infrastructure/orm/models/protein/protein.py" complexity="0" line-rate="0.9143" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="12" hits="0"/>
+						<line number="13" hits="0"/>
+						<line number="16" hits="1"/>
+						<line number="25" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="43" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="45" hits="1"/>
+						<line number="50" hits="1"/>
+						<line number="57" hits="1"/>
+						<line number="60" hits="1"/>
+						<line number="65" hits="1"/>
+						<line number="70" hits="1"/>
+						<line number="77" hits="1"/>
+						<line number="78" hits="1"/>
+						<line number="83" hits="1"/>
+						<line number="84" hits="1"/>
+						<line number="85" hits="1"/>
+						<line number="86" hits="1"/>
+						<line number="87" hits="1"/>
+						<line number="89" hits="1"/>
+						<line number="90" hits="0"/>
+					</lines>
+				</class>
+				<class name="protein_metadata.py" filename="infrastructure/orm/models/protein/protein_metadata.py" complexity="0" line-rate="0.9688" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="12" hits="0"/>
+						<line number="15" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="36" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="43" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="45" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="50" hits="1"/>
+						<line number="54" hits="1"/>
+					</lines>
+				</class>
+			</classes>
+		</package>
+		<package name="infrastructure.orm.models.query" line-rate="0.9655" branch-rate="0" complexity="0">
+			<classes>
+				<class name="__init__.py" filename="infrastructure/orm/models/query/__init__.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines/>
+				</class>
+				<class name="query_set.py" filename="infrastructure/orm/models/query/query_set.py" complexity="0" line-rate="0.9655" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="23" hits="0"/>
+						<line number="26" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="49" hits="1"/>
+						<line number="57" hits="1"/>
+						<line number="59" hits="1"/>
+						<line number="63" hits="1"/>
+						<line number="64" hits="1"/>
+						<line number="70" hits="1"/>
+						<line number="76" hits="1"/>
+						<line number="77" hits="1"/>
+						<line number="78" hits="1"/>
+						<line number="79" hits="1"/>
+						<line number="83" hits="1"/>
+						<line number="84" hits="1"/>
+					</lines>
+				</class>
+			</classes>
+		</package>
+		<package name="infrastructure.orm.models.sequence" line-rate="0.8846" branch-rate="0" complexity="0">
+			<classes>
+				<class name="__init__.py" filename="infrastructure/orm/models/sequence/__init__.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines/>
+				</class>
+				<class name="sequence.py" filename="infrastructure/orm/models/sequence/sequence.py" complexity="0" line-rate="0.8846" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="13" hits="0"/>
+						<line number="16" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="43" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="46" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="48" hits="1"/>
+						<line number="49" hits="0"/>
+						<line number="51" hits="1"/>
+						<line number="52" hits="0"/>
+					</lines>
+				</class>
+			</classes>
+		</package>
+		<package name="infrastructure.queue" line-rate="0.937" branch-rate="0" complexity="0">
+			<classes>
+				<class name="__init__.py" filename="infrastructure/queue/__init__.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines/>
+				</class>
+				<class name="consumer.py" filename="infrastructure/queue/consumer.py" complexity="0" line-rate="0.9372" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="15" hits="1"/>
+						<line number="16" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="36" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="56" hits="1"/>
+						<line number="65" hits="1"/>
+						<line number="66" hits="1"/>
+						<line number="67" hits="1"/>
+						<line number="68" hits="1"/>
+						<line number="69" hits="1"/>
+						<line number="70" hits="1"/>
+						<line number="72" hits="1"/>
+						<line number="73" hits="1"/>
+						<line number="74" hits="1"/>
+						<line number="80" hits="1"/>
+						<line number="81" hits="1"/>
+						<line number="82" hits="1"/>
+						<line number="83" hits="1"/>
+						<line number="85" hits="1"/>
+						<line number="86" hits="1"/>
+						<line number="91" hits="1"/>
+						<line number="92" hits="1"/>
+						<line number="98" hits="1"/>
+						<line number="99" hits="1"/>
+						<line number="100" hits="1"/>
+						<line number="102" hits="1"/>
+						<line number="103" hits="1"/>
+						<line number="104" hits="1"/>
+						<line number="105" hits="0"/>
+						<line number="107" hits="1"/>
+						<line number="108" hits="1"/>
+						<line number="109" hits="0"/>
+						<line number="110" hits="0"/>
+						<line number="112" hits="1"/>
+						<line number="114" hits="1"/>
+						<line number="115" hits="1"/>
+						<line number="116" hits="1"/>
+						<line number="118" hits="1"/>
+						<line number="126" hits="1"/>
+						<line number="127" hits="1"/>
+						<line number="128" hits="1"/>
+						<line number="131" hits="1"/>
+						<line number="132" hits="1"/>
+						<line number="133" hits="1"/>
+						<line number="134" hits="1"/>
+						<line number="135" hits="1"/>
+						<line number="136" hits="1"/>
+						<line number="137" hits="1"/>
+						<line number="139" hits="1"/>
+						<line number="144" hits="1"/>
+						<line number="145" hits="1"/>
+						<line number="147" hits="1"/>
+						<line number="148" hits="1"/>
+						<line number="149" hits="1"/>
+						<line number="150" hits="1"/>
+						<line number="151" hits="1"/>
+						<line number="152" hits="1"/>
+						<line number="153" hits="1"/>
+						<line number="159" hits="1"/>
+						<line number="160" hits="1"/>
+						<line number="161" hits="1"/>
+						<line number="164" hits="1"/>
+						<line number="182" hits="1"/>
+						<line number="192" hits="1"/>
+						<line number="193" hits="1"/>
+						<line number="194" hits="1"/>
+						<line number="195" hits="1"/>
+						<line number="196" hits="1"/>
+						<line number="197" hits="1"/>
+						<line number="198" hits="1"/>
+						<line number="200" hits="1"/>
+						<line number="201" hits="1"/>
+						<line number="202" hits="1"/>
+						<line number="204" hits="1"/>
+						<line number="205" hits="1"/>
+						<line number="206" hits="1"/>
+						<line number="207" hits="1"/>
+						<line number="209" hits="1"/>
+						<line number="210" hits="1"/>
+						<line number="215" hits="1"/>
+						<line number="216" hits="1"/>
+						<line number="222" hits="1"/>
+						<line number="223" hits="1"/>
+						<line number="224" hits="1"/>
+						<line number="226" hits="1"/>
+						<line number="227" hits="1"/>
+						<line number="228" hits="1"/>
+						<line number="229" hits="0"/>
+						<line number="231" hits="1"/>
+						<line number="232" hits="1"/>
+						<line number="233" hits="0"/>
+						<line number="234" hits="0"/>
+						<line number="236" hits="1"/>
+						<line number="238" hits="1"/>
+						<line number="239" hits="1"/>
+						<line number="240" hits="1"/>
+						<line number="242" hits="1"/>
+						<line number="249" hits="1"/>
+						<line number="250" hits="1"/>
+						<line number="251" hits="1"/>
+						<line number="253" hits="1"/>
+						<line number="254" hits="1"/>
+						<line number="255" hits="1"/>
+						<line number="256" hits="1"/>
+						<line number="257" hits="1"/>
+						<line number="258" hits="1"/>
+						<line number="259" hits="1"/>
+						<line number="260" hits="1"/>
+						<line number="262" hits="1"/>
+						<line number="266" hits="1"/>
+						<line number="267" hits="1"/>
+						<line number="268" hits="1"/>
+						<line number="269" hits="1"/>
+						<line number="270" hits="1"/>
+						<line number="271" hits="1"/>
+						<line number="275" hits="1"/>
+						<line number="276" hits="1"/>
+						<line number="278" hits="1"/>
+						<line number="279" hits="1"/>
+						<line number="280" hits="1"/>
+						<line number="282" hits="1"/>
+						<line number="288" hits="1"/>
+						<line number="289" hits="1"/>
+						<line number="290" hits="1"/>
+						<line number="291" hits="1"/>
+						<line number="292" hits="1"/>
+						<line number="301" hits="1"/>
+						<line number="302" hits="1"/>
+						<line number="303" hits="1"/>
+						<line number="308" hits="1"/>
+						<line number="309" hits="1"/>
+						<line number="310" hits="0"/>
+						<line number="313" hits="1"/>
+						<line number="315" hits="1"/>
+						<line number="316" hits="1"/>
+						<line number="318" hits="1"/>
+						<line number="319" hits="1"/>
+						<line number="320" hits="1"/>
+						<line number="321" hits="1"/>
+						<line number="322" hits="1"/>
+						<line number="323" hits="1"/>
+						<line number="324" hits="1"/>
+						<line number="325" hits="0"/>
+						<line number="332" hits="1"/>
+						<line number="333" hits="1"/>
+						<line number="334" hits="1"/>
+						<line number="336" hits="1"/>
+						<line number="337" hits="0"/>
+						<line number="340" hits="1"/>
+						<line number="341" hits="1"/>
+						<line number="342" hits="1"/>
+						<line number="346" hits="1"/>
+						<line number="353" hits="1"/>
+						<line number="367" hits="1"/>
+						<line number="368" hits="1"/>
+						<line number="369" hits="0"/>
+						<line number="372" hits="1"/>
+						<line number="373" hits="1"/>
+						<line number="374" hits="1"/>
+						<line number="383" hits="1"/>
+						<line number="384" hits="1"/>
+						<line number="385" hits="0"/>
+						<line number="386" hits="0"/>
+						<line number="395" hits="1"/>
+						<line number="400" hits="1"/>
+						<line number="412" hits="1"/>
+						<line number="416" hits="1"/>
+						<line number="418" hits="1"/>
+						<line number="419" hits="1"/>
+						<line number="429" hits="1"/>
+						<line number="434" hits="1"/>
+						<line number="436" hits="1"/>
+						<line number="446" hits="1"/>
+						<line number="447" hits="1"/>
+						<line number="448" hits="1"/>
+						<line number="449" hits="1"/>
+						<line number="450" hits="1"/>
+						<line number="459" hits="1"/>
+						<line number="460" hits="1"/>
+						<line number="461" hits="1"/>
+						<line number="467" hits="1"/>
+						<line number="468" hits="1"/>
+						<line number="469" hits="0"/>
+						<line number="472" hits="1"/>
+					</lines>
+				</class>
+				<class name="publisher.py" filename="infrastructure/queue/publisher.py" complexity="0" line-rate="0.9362" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="1" hits="1"/>
+						<line number="3" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="8" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="12" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="15" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="21" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="25" hits="0"/>
+						<line number="26" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="35" hits="0"/>
+						<line number="37" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="45" hits="1"/>
+						<line number="46" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="48" hits="1"/>
+						<line number="53" hits="1"/>
+						<line number="61" hits="1"/>
+						<line number="62" hits="1"/>
+						<line number="63" hits="1"/>
+						<line number="65" hits="1"/>
+						<line number="66" hits="1"/>
+						<line number="67" hits="1"/>
+						<line number="68" hits="1"/>
+						<line number="76" hits="1"/>
+						<line number="78" hits="1"/>
+						<line number="85" hits="1"/>
+						<line number="90" hits="1"/>
+						<line number="92" hits="1"/>
+						<line number="95" hits="1"/>
+						<line number="107" hits="0"/>
+					</lines>
+				</class>
+			</classes>
+		</package>
+		<package name="infrastructure.storage" line-rate="0.6637" branch-rate="0" complexity="0">
+			<classes>
+				<class name="__init__.py" filename="infrastructure/storage/__init__.py" complexity="0" line-rate="0.7222" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="19" hits="1"/>
+						<line number="21" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="25" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="36" hits="1"/>
+						<line number="38" hits="0"/>
+						<line number="40" hits="1"/>
+						<line number="42" hits="0"/>
+						<line number="44" hits="1"/>
+						<line number="46" hits="0"/>
+						<line number="48" hits="1"/>
+						<line number="50" hits="0"/>
+						<line number="52" hits="1"/>
+						<line number="55" hits="0"/>
+						<line number="58" hits="1"/>
+						<line number="59" hits="1"/>
+						<line number="61" hits="1"/>
+					</lines>
+				</class>
+				<class name="factory.py" filename="infrastructure/storage/factory.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="14" hits="1"/>
+						<line number="16" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="21" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="29" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="43" hits="1"/>
+						<line number="45" hits="1"/>
+						<line number="52" hits="1"/>
+						<line number="53" hits="1"/>
+						<line number="60" hits="1"/>
+						<line number="63" hits="1"/>
+						<line number="64" hits="1"/>
+						<line number="66" hits="1"/>
+						<line number="67" hits="1"/>
+					</lines>
+				</class>
+				<class name="local.py" filename="infrastructure/storage/local.py" complexity="0" line-rate="0.8276" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="3" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="16" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="18" hits="1"/>
+						<line number="20" hits="1"/>
+						<line number="21" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="25" hits="1"/>
+						<line number="26" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="28" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="36" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="42" hits="1"/>
+						<line number="43" hits="0"/>
+						<line number="44" hits="0"/>
+						<line number="45" hits="0"/>
+						<line number="46" hits="0"/>
+						<line number="47" hits="0"/>
+					</lines>
+				</class>
+				<class name="minio_store.py" filename="infrastructure/storage/minio_store.py" complexity="0" line-rate="0.3636" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="8" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="21" hits="1"/>
+						<line number="30" hits="1"/>
+						<line number="31" hits="1"/>
+						<line number="32" hits="0"/>
+						<line number="33" hits="0"/>
+						<line number="38" hits="1"/>
+						<line number="44" hits="1"/>
+						<line number="45" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="48" hits="0"/>
+						<line number="49" hits="0"/>
+						<line number="51" hits="1"/>
+						<line number="52" hits="0"/>
+						<line number="53" hits="0"/>
+						<line number="54" hits="0"/>
+						<line number="58" hits="0"/>
+						<line number="59" hits="0"/>
+						<line number="61" hits="1"/>
+						<line number="62" hits="0"/>
+						<line number="63" hits="0"/>
+						<line number="64" hits="0"/>
+						<line number="66" hits="0"/>
+						<line number="67" hits="0"/>
+						<line number="69" hits="1"/>
+						<line number="70" hits="0"/>
+						<line number="72" hits="1"/>
+						<line number="73" hits="0"/>
+						<line number="75" hits="0"/>
+						<line number="76" hits="0"/>
+						<line number="77" hits="0"/>
+						<line number="78" hits="0"/>
+						<line number="79" hits="0"/>
+						<line number="81" hits="1"/>
+						<line number="82" hits="0"/>
+						<line number="84" hits="0"/>
+						<line number="85" hits="0"/>
+						<line number="86" hits="0"/>
+						<line number="87" hits="0"/>
+						<line number="88" hits="0"/>
+						<line number="89" hits="0"/>
+					</lines>
+				</class>
+			</classes>
+		</package>
+		<package name="workers" line-rate="0.9947" branch-rate="0" complexity="0">
+			<classes>
+				<class name="__init__.py" filename="workers/__init__.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines/>
+				</class>
+				<class name="base_worker.py" filename="workers/base_worker.py" complexity="0" line-rate="0.9923" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="2" hits="1"/>
+						<line number="4" hits="1"/>
+						<line number="5" hits="1"/>
+						<line number="6" hits="1"/>
+						<line number="7" hits="1"/>
+						<line number="9" hits="1"/>
+						<line number="10" hits="1"/>
+						<line number="11" hits="1"/>
+						<line number="13" hits="1"/>
+						<line number="14" hits="1"/>
+						<line number="15" hits="1"/>
+						<line number="16" hits="1"/>
+						<line number="17" hits="1"/>
+						<line number="19" hits="1"/>
+						<line number="22" hits="1"/>
+						<line number="23" hits="1"/>
+						<line number="24" hits="1"/>
+						<line number="27" hits="1"/>
+						<line number="39" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="48" hits="1"/>
+						<line number="49" hits="1"/>
+						<line number="50" hits="1"/>
+						<line number="52" hits="1"/>
+						<line number="60" hits="1"/>
+						<line number="61" hits="1"/>
+						<line number="62" hits="1"/>
+						<line number="63" hits="1"/>
+						<line number="64" hits="1"/>
+						<line number="66" hits="1"/>
+						<line number="67" hits="1"/>
+						<line number="69" hits="1"/>
+						<line number="70" hits="1"/>
+						<line number="71" hits="1"/>
+						<line number="79" hits="1"/>
+						<line number="81" hits="1"/>
+						<line number="84" hits="1"/>
+						<line number="85" hits="1"/>
+						<line number="86" hits="1"/>
+						<line number="87" hits="1"/>
+						<line number="88" hits="1"/>
+						<line number="92" hits="1"/>
+						<line number="93" hits="1"/>
+						<line number="94" hits="1"/>
+						<line number="95" hits="1"/>
+						<line number="96" hits="1"/>
+						<line number="97" hits="1"/>
+						<line number="105" hits="1"/>
+						<line number="106" hits="1"/>
+						<line number="108" hits="1"/>
+						<line number="110" hits="1"/>
+						<line number="118" hits="1"/>
+						<line number="119" hits="1"/>
+						<line number="120" hits="1"/>
+						<line number="121" hits="1"/>
+						<line number="123" hits="1"/>
+						<line number="124" hits="1"/>
+						<line number="125" hits="1"/>
+						<line number="126" hits="1"/>
+						<line number="127" hits="1"/>
+						<line number="128" hits="1"/>
+						<line number="129" hits="1"/>
+						<line number="130" hits="1"/>
+						<line number="132" hits="1"/>
+						<line number="134" hits="1"/>
+						<line number="136" hits="1"/>
+						<line number="137" hits="1"/>
+						<line number="139" hits="1"/>
+						<line number="140" hits="1"/>
+						<line number="141" hits="1"/>
+						<line number="142" hits="1"/>
+						<line number="144" hits="1"/>
+						<line number="146" hits="1"/>
+						<line number="155" hits="1"/>
+						<line number="156" hits="1"/>
+						<line number="157" hits="1"/>
+						<line number="166" hits="1"/>
+						<line number="169" hits="1"/>
+						<line number="170" hits="1"/>
+						<line number="171" hits="1"/>
+						<line number="174" hits="1"/>
+						<line number="175" hits="1"/>
+						<line number="176" hits="1"/>
+						<line number="178" hits="1"/>
+						<line number="181" hits="1"/>
+						<line number="187" hits="1"/>
+						<line number="188" hits="1"/>
+						<line number="190" hits="1"/>
+						<line number="191" hits="1"/>
+						<line number="192" hits="1"/>
+						<line number="200" hits="1"/>
+						<line number="202" hits="1"/>
+						<line number="203" hits="1"/>
+						<line number="205" hits="1"/>
+						<line number="206" hits="1"/>
+						<line number="207" hits="1"/>
+						<line number="208" hits="1"/>
+						<line number="209" hits="1"/>
+						<line number="210" hits="1"/>
+						<line number="218" hits="1"/>
+						<line number="219" hits="1"/>
+						<line number="220" hits="1"/>
+						<line number="221" hits="1"/>
+						<line number="222" hits="1"/>
+						<line number="226" hits="1"/>
+						<line number="231" hits="1"/>
+						<line number="232" hits="1"/>
+						<line number="234" hits="1"/>
+						<line number="236" hits="1"/>
+						<line number="242" hits="1"/>
+						<line number="243" hits="1"/>
+						<line number="244" hits="1"/>
+						<line number="254" hits="1"/>
+						<line number="255" hits="1"/>
+						<line number="256" hits="1"/>
+						<line number="257" hits="1"/>
+						<line number="263" hits="1"/>
+						<line number="265" hits="1"/>
+						<line number="267" hits="1"/>
+						<line number="268" hits="1"/>
+						<line number="273" hits="1"/>
+						<line number="274" hits="1"/>
+						<line number="276" hits="1"/>
+						<line number="281" hits="1"/>
+						<line number="282" hits="0"/>
+						<line number="285" hits="1"/>
+						<line number="295" hits="1"/>
+						<line number="304" hits="1"/>
+						<line number="305" hits="1"/>
+						<line number="314" hits="1"/>
+					</lines>
+				</class>
+				<class name="stale_job_reaper.py" filename="workers/stale_job_reaper.py" complexity="0" line-rate="1" branch-rate="0">
+					<methods/>
+					<lines>
+						<line number="30" hits="1"/>
+						<line number="32" hits="1"/>
+						<line number="33" hits="1"/>
+						<line number="34" hits="1"/>
+						<line number="35" hits="1"/>
+						<line number="37" hits="1"/>
+						<line number="38" hits="1"/>
+						<line number="40" hits="1"/>
+						<line number="41" hits="1"/>
+						<line number="43" hits="1"/>
+						<line number="46" hits="1"/>
+						<line number="47" hits="1"/>
+						<line number="54" hits="1"/>
+						<line number="55" hits="1"/>
+						<line number="56" hits="1"/>
+						<line number="57" hits="1"/>
+						<line number="59" hits="1"/>
+						<line number="60" hits="1"/>
+						<line number="61" hits="1"/>
+						<line number="63" hits="1"/>
+						<line number="69" hits="1"/>
+						<line number="70" hits="1"/>
+						<line number="71" hits="1"/>
+						<line number="72" hits="1"/>
+						<line number="73" hits="1"/>
+						<line number="74" hits="1"/>
+						<line number="75" hits="1"/>
+						<line number="76" hits="1"/>
+						<line number="78" hits="1"/>
+						<line number="80" hits="1"/>
+						<line number="81" hits="1"/>
+						<line number="83" hits="1"/>
+						<line number="84" hits="1"/>
+						<line number="85" hits="1"/>
+						<line number="86" hits="1"/>
+						<line number="87" hits="1"/>
+						<line number="88" hits="1"/>
+						<line number="89" hits="1"/>
+						<line number="97" hits="1"/>
+						<line number="98" hits="1"/>
+						<line number="99" hits="1"/>
+						<line number="102" hits="1"/>
+						<line number="103" hits="1"/>
+						<line number="109" hits="1"/>
+						<line number="111" hits="1"/>
+						<line number="112" hits="1"/>
+						<line number="113" hits="1"/>
+						<line number="114" hits="1"/>
+						<line number="118" hits="1"/>
+						<line number="131" hits="1"/>
+						<line number="139" hits="1"/>
+						<line number="140" hits="1"/>
+						<line number="141" hits="1"/>
+						<line number="142" hits="1"/>
+						<line number="143" hits="1"/>
+						<line number="144" hits="1"/>
+						<line number="145" hits="1"/>
+						<line number="147" hits="1"/>
+						<line number="149" hits="1"/>
+					</lines>
+				</class>
+			</classes>
+		</package>
+	</packages>
+</coverage>
diff --git a/.env b/.env
new file mode 100644
index 0000000..1a96347
--- /dev/null
+++ b/.env
@@ -0,0 +1 @@
+export PROTEA_ADMIN_TOKEN="protea-admin"
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 9b3eca3..4771453 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -27,8 +27,8 @@ jobs:
       - name: Add poetry to PATH
         run: echo "$HOME/.local/bin" >> $GITHUB_PATH
 
-      - name: Install dev dependencies
-        run: poetry install --only dev
+      - name: Install main + dev dependencies
+        run: poetry install --with dev
 
       - name: Build Sphinx docs
         run: poetry run task html_docs
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index ae0079d..2806082 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -27,8 +27,8 @@ jobs:
       - name: Add poetry to PATH
         run: echo "$HOME/.local/bin" >> $GITHUB_PATH
 
-      - name: Install dev dependencies
-        run: poetry install --only dev
+      - name: Install main + dev dependencies
+        run: poetry install --with dev
 
       - name: ruff check
         run: poetry run ruff check protea scripts
diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml
new file mode 100644
index 0000000..45a6db0
--- /dev/null
+++ b/.github/workflows/security.yml
@@ -0,0 +1,70 @@
+name: Security
+
+on:
+  push:
+  pull_request:
+  schedule:
+    # Weekly Monday 06:00 UTC: catches new CVEs against pinned deps
+    # even if no PR has landed.
+    - cron: "0 6 * * 1"
+
+jobs:
+  audit:
+    name: pip-audit
+    runs-on: ubuntu-22.04
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.12"]
+        poetry-version: ["2.1.0"]
+
+    steps:
+      - uses: actions/checkout@v6
+
+      - uses: actions/setup-python@v6
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - uses: abatilo/actions-poetry@v3
+        with:
+          poetry-version: ${{ matrix.poetry-version }}
+
+      - name: Add poetry to PATH
+        run: echo "$HOME/.local/bin" >> $GITHUB_PATH
+
+      - name: Install production deps in the poetry venv
+        # poetry 2.x removed `poetry export`; install with the main group
+        # only and then audit the resolved environment in-place.
+        run: poetry install --only main
+
+      - name: Install pip-audit
+        run: pip install pip-audit
+
+      - name: pip-audit (non-blocking in F0; blocking once F-OPS T-OPS.7 lands)
+        # F0 stance: surface findings without breaking the pipeline so
+        # the team can triage. F-OPS T-OPS.7 of master plan v3 will
+        # flip this to fail on severity HIGH.
+        run: poetry run pip-audit --strict --vulnerability-service osv || true
+
+  bandit:
+    name: bandit (security static analysis)
+    runs-on: ubuntu-22.04
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.12"]
+
+    steps:
+      - uses: actions/checkout@v6
+
+      - uses: actions/setup-python@v6
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install bandit
+        run: pip install "bandit[toml]"
+
+      - name: Run bandit on protea/
+        # Severity HIGH and confidence HIGH only at F0; tighten in F-OPS.
+        # Bandit reads its config from pyproject.toml ([tool.bandit]).
+        run: bandit --severity-level high --confidence-level high -r protea/ -c pyproject.toml || true
diff --git a/.gitignore b/.gitignore
index 818082d..9272872 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,7 @@ htmlcov/
 
 # Docs build
 docs/build/
+docs/_build/
 
 # IDE
 .idea/
@@ -27,7 +28,7 @@ node_modules/
 
 # Misc
 *.log
-logs/pids/
+logs/
 
 CLAUDE.md
 .claude/
@@ -35,7 +36,17 @@ CLAUDE.md
 # Local data
 static/
 storage/
+!protea/infrastructure/storage/
 
 # Large embedding caches and test artifacts
 data/ref_cache/
 apps/web/test-results/
+
+# Local run outputs (per-run dumps, model checkpoints, eval results)
+# Keep curated files under data/benchmarks/ tracked via explicit allow-list.
+data/
+!data/benchmarks/
+!data/benchmarks/**
+artifacts/
+results/
+var/
diff --git a/.~lock.EXPERIMENTS.md# b/.~lock.EXPERIMENTS.md#
new file mode 100644
index 0000000..67ffcb0
--- /dev/null
+++ b/.~lock.EXPERIMENTS.md#
@@ -0,0 +1 @@
+,frapercan,bioxaxi,21.03.2026 13:10,/home/frapercan/snap/onlyoffice-desktopeditors/1067/.local/share/onlyoffice;
\ No newline at end of file
diff --git a/.~lock.RERANKER.md# b/.~lock.RERANKER.md#
new file mode 100644
index 0000000..75e7420
--- /dev/null
+++ b/.~lock.RERANKER.md#
@@ -0,0 +1 @@
+,frapercan,bioxaxi,17.03.2026 17:01,/home/frapercan/snap/onlyoffice-desktopeditors/1067/.local/share/onlyoffice;
\ No newline at end of file
diff --git a/EXPERIMENTAL_DESIGN.md b/EXPERIMENTAL_DESIGN.md
new file mode 100644
index 0000000..ca6ba18
--- /dev/null
+++ b/EXPERIMENTAL_DESIGN.md
@@ -0,0 +1,197 @@
+# PROTEA — Experimental Design
+
+**Version**: 1.0 — 2026-04-10
+**Status**: Active
+**Scope**: Protein language model (PLM) benchmark for GO term prediction via KNN + learned reranking
+
+> This document is **prospective**: it formalises the protocol, hypotheses, and execution plan for the extended PLM comparison. Retrospective results (finished experiments, ablations, external tool comparisons) live in `EXPERIMENTS.md`. The reranker design rationale lives in `RERANKER.md`.
+
+---
+
+## 1. Motivation
+
+The preliminary comparison in `EXPERIMENTS.md` (ESMC-300M vs ProstT5-XL) **confounds two independent variables**: model family and parameter count. ESMC-300M is a ~300M-parameter BERT-like encoder; ProstT5-XL is a ~3B-parameter T5 encoder with structural fine-tuning. Any observed difference in downstream F<sub>max</sub> cannot be attributed to either axis unambiguously.
+
+This document defines the extended benchmark that disentangles those factors and integrates additional PLMs (Ankh, ESM2, ESMC-600M, ProtT5-XL) into a single, statistically comparable grid under an identical downstream pipeline.
+
+---
+
+## 2. Research questions
+
+| ID | Question |
+|---|---|
+| **RQ1** | At matched parameter count, does a BERT-like encoder (ESM2, ESMC) outperform a T5 encoder (ProtT5, Ankh) for GO term transfer via KNN? |
+| **RQ2** | Holding model family fixed, how does F<sub>max</sub> scale with parameter count? Where does the curve saturate? |
+| **RQ3** | Does structure-aware fine-tuning (ProstT5) yield a measurable F<sub>max</sub> improvement over its pure-sequence parent (ProtT5-XL) at identical size? |
+| **RQ4** | Does the learned reranker compensate for weaker embeddings by placing more weight on alignment and taxonomy features? Is there a systematic inverse relationship between embedding quality and reranker feature-importance on these compensatory signals? |
+
+---
+
+## 3. Hypotheses (pre-registered)
+
+| # | Hypothesis | Primary test |
+|---|---|---|
+| **H1** | At small scale (~300–650M), family effect dominates scale effect (ΔF<sub>max</sub> across families ≥ ΔF<sub>max</sub> across sizes within a family) | Wilcoxon signed-rank across 9-cell F<sub>max</sub> vectors, pairwise within the small tier |
+| **H2** | Scale gains within a single family saturate in the 1–3B range | Monotonicity of F<sub>max</sub> across {ESM2-650M, ESM2-3B} and {Ankh-base, Ankh-large, ProtT5-XL} |
+| **H3** | Structure awareness provides a positive but modest gain (+1–3 F<sub>max</sub> points averaged across cells) | Pairwise matched test ProtT5-XL vs ProstT5-XL (same backbone, same size, only fine-tuning differs) |
+| **H4** | Reranker gain-based importance on `{alignment_*, similarity_*, taxonomic_*}` features is inversely correlated with the baseline F<sub>max</sub> of the underlying embedding | Linear regression across the 8 models: `weight_on_compensatory` ~ `baseline_Fmax` |
+
+H1–H3 are confirmatory; H4 is exploratory and carries forward the **F2 finding** from the ESMC vs ProstT5 analysis in `project_reranker_benchmark.md`.
+
+---
+
+## 4. Model matrix
+
+**8 models total** (2 already computed, 6 new).
+
+| # | Model | Backbone | Params | PROTEA backend | Status |
+|---|---|---|---|---|---|
+| 1 | **ESMC-300M** | ESM3c (EvolutionaryScale) | ~300M | `esm3c` | ✓ computed; reranker v4 in progress (`48c91381`) |
+| 2 | **ESMC-600M** | ESM3c (EvolutionaryScale) | ~600M | `esm3c` | new |
+| 3 | **ESM2-650M** | ESM2 `esm2_t33_650M_UR50D` (Meta) | ~650M | `esm` | new |
+| 4 | **ESM2-3B** | ESM2 `esm2_t36_3B_UR50D` (Meta) | ~3B | `esm` | new |
+| 5 | **Ankh-base** | Ankh `ElnaggarLab/ankh-base` | ~450M | `ankh` | new |
+| 6 | **Ankh-large** | Ankh `ElnaggarLab/ankh-large` | ~1.9B | `ankh` | new |
+| 7 | **ProtT5-XL** | ProtT5 `prot_t5_xl_uniref50` (Rostlab) | ~3B | `t5` | new |
+| 8 | **ProstT5-XL** | ProstT5 structure-fine-tuned (Rostlab) | ~3B | `t5` | ✓ computed; reranker v4 in progress (`e923ac70`) |
+
+**Discarded**: ESM2-15B (prohibitive embedding cost over 527k sequences; no matched-size T5 counterpart → breaks symmetry of the grid).
+
+### Explanatory grid (for RQ1 / RQ2 / RQ3)
+
+| Scale | BERT-like encoder | T5 encoder (sequence-only) | T5 encoder (structure-aware) |
+|---|---|---|---|
+| **Small (~300–650M)** | ESMC-300M, ESMC-600M, ESM2-650M | Ankh-base (~450M) | — |
+| **Medium (~1–2B)** | — | Ankh-large (~1.9B) | — |
+| **Large (~3B)** | ESM2-3B | ProtT5-XL | ProstT5-XL |
+
+### Planned pairwise comparisons
+
+| Pair | Isolates | RQ |
+|---|---|---|
+| ESMC-300M ↔ Ankh-base | architecture (BERT vs T5), ~matched size | RQ1 |
+| ESM2-650M ↔ Ankh-base | architecture, ~matched size | RQ1 |
+| ESMC-300M ↔ ESMC-600M | scale, family fixed | RQ2 |
+| ESM2-650M ↔ ESM2-3B | scale, family fixed | RQ2 |
+| Ankh-base ↔ Ankh-large ↔ ProtT5-XL | scale ladder within T5 encoder family | RQ2 |
+| **ProtT5-XL ↔ ProstT5-XL** | structure fine-tuning (cleanest test) | **RQ3** |
+
+---
+
+## 5. Data and splits (fixed across all 8 runs)
+
+Identical to the ESMC/ProstT5 preliminary experiments in `EXPERIMENTS.md` to preserve backward comparability with established findings.
+
+| Item | Value |
+|---|---|
+| Reference annotation sets | GOA releases 160 → 220 (13 temporal splits for reranker training) |
+| Evaluation set | `42b34e79-6fe9-4fa0-b718-02f43a1e3192` (GOA 220 → 229 delta) |
+| Evaluation size | 20,281 proteins (NK=2,831; LK=3,410; PK=15,313) |
+| Ontology snapshot | `947bdff6-d17c-4ca3-a41a-bc8fb4d74b7a` (GO release 2026-01-23) |
+| IA file | `data/benchmarks/IA_cafa6.tsv` (CAFA6 information accretion) |
+
+---
+
+## 6. Pipeline protocol — pinned hyperparameters
+
+Every model is put through the same three-stage pipeline with **identical hyperparameters**. No per-model tuning. Fair comparison requires this invariance.
+
+### 6.1 Embeddings — `compute_embeddings`
+- Pooling: `mean` over residue representations
+- Precision: fp32 at storage (cast to fp16 at KNN load time via `_REF_CACHE`)
+- Storage: pgvector `VECTOR(dim)` per `(sequence, config, chunk)`
+- Full reference set (~527k sequences) + evaluation set query embeddings
+
+### 6.2 KNN retrieval — `predict_go_terms`
+- `k = 5`
+- `metric = cosine`
+- `backend = faiss`, `faiss_index_type = IVFFlat`, `nlist = 256`, `nprobe = 32`
+- `aspect_separated_knn = true`
+- `compute_alignments = true` (NW + SW via parasail/BLOSUM62)
+- `compute_taxonomy = true` (NCBI taxonomy LCA via ete3)
+
+### 6.3 Reranker training — `train_reranker_auto` (v4 budget)
+- `num_boost_round = 5000`
+- `early_stopping_rounds = 100`
+- `val_fraction = 0.2`
+- `neg_pos_ratio = 10`
+- `train_versions = [160, 165, 170, 175, 180, 185, 190, 195, 200, 205, 211, 215, 220]` (13 splits)
+- `test_versions = [229]`
+- `compute_alignments = true`, `compute_taxonomy = true`
+- `ia_file = data/benchmarks/IA_cafa6.tsv` (IA-weighted sample weighting: `sample_weight = IA(go_term)`)
+- **3 models per embedding (NK / LK / PK)** — per-category, not per-aspect (justified in `RERANKER.md` §6.3)
+- Objective: **binary cross-entropy (LightGBM `objective=binary`)**, early stopping on validation AUC. IA weights enter through `sample_weight`, not through the objective. See `RERANKER.md` §6.1 for rationale and the known limitation that a pairwise/listwise rank loss is future work.
+- Name convention: `lgbm_v4_converged_<model_slug>-{nk,lk,pk}`
+
+### 6.4 Evaluation — `run_cafa_evaluation`
+- Library: `cafaeval` (integrated via the `run_cafa_evaluation` operation)
+- Metric: **F<sub>max</sub> with IA weighting**, computed per (tier × aspect) cell → 9-dimensional output vector per model×pipeline-stage
+- Pipeline stages reported: `baseline` (embedding only), `alignment_weighted` (best heuristic from Exp 3), `reranker` (v4 LightGBM)
+
+---
+
+## 7. Statistical protocol
+
+Pre-registered to prevent post-hoc test-shopping.
+
+| Aspect | Method |
+|---|---|
+| **Primary outcome** | 9-cell F<sub>max</sub> vector per (model, pipeline-stage) |
+| **Pairwise test** | Wilcoxon signed-rank over the 9 matched cells |
+| **Multiple comparisons** | Holm–Bonferroni correction across the planned comparisons in §4 (6 RQ1/RQ2/RQ3 tests) |
+| **Effect size** | Mean F<sub>max</sub> delta ± 95% bootstrap CI (1000 resamples over cells) |
+| **H4 regression** | For each (model, tier): `weight_compensatory = Σ importance(feature)` over features in `{alignment_score_*, similarity_*, identity_*, gaps_pct_*, alignment_length_*, taxonomic_*}`. Fit `weight_compensatory ~ baseline_Fmax` across the 8 models via OLS; report slope, p-value, R² |
+| **Reporting convention** | All numbers from `cafaeval` with IA weighting. **Never** use the internal `test_evaluation` field from `train_reranker_auto` for thesis claims — it is unweighted and biased (see `project_reranker_benchmark.md`) |
+
+---
+
+## 8. Execution plan
+
+Ordered so each stage produces usable partial results; no stage blocks on the next.
+
+| Step | Action | Depends on | Compute estimate |
+|---|---|---|---|
+| 1 | Wait for v4 rerankers (ESMC-300M, ProstT5-XL) to finish | running | ~4h total (sequential) |
+| 2 | Create 6 `EmbeddingConfig` rows with pinned pooling/precision | — | minutes |
+| 3 | Run `compute_embeddings` for the 6 new models over ref+eval sets | step 2 | 2–10h per model; ~1.5–2 days total sequential |
+| 4 | Run `predict_go_terms` (with alignments + taxonomy) for the 6 new models | step 3 | 1–2h per model |
+| 5 | Run `train_reranker_auto` v4 for the 6 new models in `protea.training` queue | step 4 | 2–4h per model; ~1 day total sequential |
+| 6 | Run `run_cafa_evaluation` for all 8 models × 3 stages = 24 evals | step 5 + existing | ~10 min per eval; ~4h total |
+| 7 | Extract feature importance from all 24 (model × tier) rerankers | step 5 | minutes (script) |
+| 8 | Apply the statistical protocol in §7 to the aggregated results | steps 6–7 | — |
+| 9 | Update `EXPERIMENTS.md` with the per-model result tables | step 8 | — |
+| 10 | Compile results into thesis chapter / appendix | step 9 | — |
+
+**Total wall-clock (pessimistic, fully serial):** ~3–4 days of compute. Can be compressed with overlapping embedding/training workers if GPU capacity allows.
+
+---
+
+## 9. Deliverables
+
+- `EmbeddingConfig` rows for the 6 new models, committed to the DB.
+- Per-model entries in `EXPERIMENTS.md` mirroring the existing table format (Exp 1 / Exp 3 / Exp 4+ rows).
+- **Master results table**: 8 rows × (baseline F<sub>max</sub> | alignment_weighted F<sub>max</sub> | reranker F<sub>max</sub>) × 9 cells each.
+- **Feature importance heatmap**: 24 (model × tier) rerankers × top-N features, colour-coded by gain.
+- Statistical test report (Wilcoxon p-values + effect sizes + CIs) as a standalone markdown section.
+- Thesis chapter / appendix formalising the grid as evidence for RQ1–RQ4.
+
+---
+
+## 10. Known limitations (honest reporting)
+
+1. **Not training-data matched.** Each PLM was pretrained on different corpora (UniRef50 subsets at different points in time, sometimes Big Fantastic Database for ProtT5, etc.). Perfect controlled comparison is impossible without re-pretraining, which is out of scope.
+2. **Architecture is not a clean isolated variable.** T5 encoders and BERT-style encoders differ in depth, attention masking, objective (span corruption vs MLM), and training data. RQ1's conclusion will be **correlational**, not causal.
+3. **Scale is coarse.** Three tiers (~300M / ~1.5B / ~3B) is the maximum granularity this compute budget allows. Smooth scaling curves are out of reach.
+4. **Ankh backend.** Ankh is exposed in PROTEA as a **dedicated backend** (`model_backend = "ankh"`), not as an alias of `t5`. Internally it reuses the T5 batched pipeline via `_embed_t5(..., use_aa2fold=False)` but uses `AutoTokenizer` instead of `T5Tokenizer` and never injects the `<AA2fold>` prefix — ensuring clean separation in the benchmark tables. The distinction matters for RQ1: Ankh results are reported under their own family row, not merged into "T5 encoder".
+5. **ESMC-600M availability.** EvolutionaryScale's public ESMC release must be confirmed to include the 600M variant at time of execution. If unavailable at that scale, substitute with the closest public ESMC size and document the deviation in step 2.
+6. **No seed-variance analysis.** LightGBM training (with fixed seed), KNN retrieval, and embeddings are all deterministic under PROTEA's default config. Variance across re-runs for the same config should be zero; we do not budget compute for confirming this.
+7. **Single evaluation delta.** Only the GOA 220 → 229 delta is used. A multi-delta sensitivity analysis (e.g. 215 → 229, 220 → 225) is a candidate for future work but not planned here.
+8. **ProstT5 inference requires 3Di tokens**, which PROTEA currently provides via sequence-only input using the AA2fold branch (`use_aa2fold = "prostt5" in model_name.lower()` at `compute_embeddings.py:715`). This means PROTEA's ProstT5 embeddings are generated **without** real 3Di tokens from a structure; the model internally translates sequence to predicted 3Di. This is the setup the Rostlab release supports but is distinct from "true structure-aware" inference with Foldseek-derived 3Di tokens. Document this explicitly in the thesis when discussing RQ3.
+
+---
+
+## 11. Change log
+
+| Date | Change |
+|---|---|
+| 2026-04-10 | Initial draft: 8-model matrix, RQ1–RQ4, hypotheses H1–H4, pinned pipeline, statistical protocol. ESMC-600M confirmed. ESM2-15B discarded. |
diff --git a/EXPERIMENTS.md b/EXPERIMENTS.md
new file mode 100644
index 0000000..7ea1e62
--- /dev/null
+++ b/EXPERIMENTS.md
@@ -0,0 +1,506 @@
+# Plan de Experimentación PROTEA
+
+## Infraestructura
+
+- **Annotation sets:** 15 GOA snapshots (160–229)
+- **Ontology:** releases/2026-01-23 + IA file (IA_cafa6.tsv)
+- **Embeddings:** 527K ESM-C 300M (dim=960)
+- **Evaluation set:** GOA 220→229 (NK: 2831, LK: 3410, PK: 15313 proteínas)
+- **Query set:** `af6bf007` (GOA_220_229, ~20K proteínas)
+- **Evaluador:** cafaeval con IA weighting (information accretion)
+
+**IDs de referencia:**
+- Embedding config: `8e7f78c3-900f-452f-858e-63ca14d103e1`
+- Annotation set (GOA 220): `c7bdb296-a86a-4141-b5e5-53eb77363ad0`
+- Ontology snapshot: `947bdff6-d17c-4ca3-a41a-bc8fb4d74b7a`
+- Evaluation set (220→229): `42b34e79-6fe9-4fa0-b718-02f43a1e3192`
+
+---
+
+## Exp 1 — Baseline KNN: efecto de k
+
+**Scoring:** baseline (`1 - distance/2`), `aspect_separated_knn=true`
+
+| k | NK-BPO | NK-MFO | NK-CCO | LK-BPO | LK-MFO | LK-CCO | PK-BPO | PK-MFO | PK-CCO | Estado |
+|---|--------|--------|--------|--------|--------|--------|--------|--------|--------|--------|
+| **5** | 0.412 | 0.590 | 0.668 | 0.467 | 0.558 | 0.676 | 0.187 | 0.278 | 0.325 | ✅ `d7adeb1e` |
+| 10 | 0.400 | 0.574 | 0.656 | 0.458 | 0.537 | 0.663 | 0.177 | 0.272 | 0.317 | ✅ `30bf6187` |
+| 20 | 0.396 | 0.564 | 0.649 | 0.454 | 0.528 | 0.654 | 0.173 | 0.269 | 0.313 | ✅ `a4442444` |
+| 50 | 0.396 | 0.555 | 0.646 | 0.452 | 0.523 | 0.651 | 0.173 | 0.269 | 0.312 | ✅ `d41b8d05` |
+
+**Conclusión:** k=5 es óptimo en todas las categorías. Más vecinos = más ruido, degradación monotónica.
+
+---
+
+## Exp 2 — Efecto de `aspect_separated_knn`
+
+Con k=5, comparar índice unificado vs separado por aspecto (BPO/MFO/CCO).
+
+| Variante | NK-BPO | NK-MFO | NK-CCO | LK-BPO | LK-MFO | LK-CCO | PK-BPO | PK-MFO | PK-CCO | Estado |
+|----------|--------|--------|--------|--------|--------|--------|--------|--------|--------|--------|
+| aspect_sep=true | 0.412 | 0.590 | 0.668 | 0.467 | 0.558 | 0.676 | 0.187 | 0.278 | 0.325 | ✅ `d7adeb1e` |
+| aspect_sep=false | 0.410 | 0.595 | 0.666 | 0.471 | 0.569 | 0.675 | 0.188 | 0.279 | 0.325 | ✅ `bee8fbe7` |
+
+**Conclusión:** Diferencias mínimas. aspect_sep=false mejora ligeramente MFO (+0.005 NK, +0.011 LK); aspect_sep=true mejora ligeramente BPO. Sin ganancia clara → mantener aspect_sep=true por cobertura uniforme de aspectos.
+
+---
+
+## Exp 3 — Scoring heurístico
+
+**Requisito:** prediction set con `compute_alignments=true, compute_taxonomy=true` (k=5, aspect_sep=mejor de Exp 2).
+
+Usa los 5 ScoringConfig presets del sistema. El scoring se aplica en evaluación (no requiere re-predicción para cada config).
+
+| Config | Fórmula | Pesos | NK-BPO | NK-MFO | NK-CCO | LK-BPO | LK-MFO | LK-CCO | PK-BPO | PK-MFO | PK-CCO | Estado |
+|--------|---------|-------|--------|--------|--------|--------|--------|--------|--------|--------|--------|--------|
+| **embedding_only** | linear | emb=1.0 | 0.412 | 0.590 | 0.668 | 0.467 | 0.558 | 0.675 | 0.187 | 0.278 | 0.325 | ✅ |
+| alignment_weighted | linear | emb=0.5, nw=0.3, sw=0.2 | **0.428** | **0.611** | **0.683** | **0.500** | **0.598** | **0.699** | **0.201** | **0.285** | **0.337** | ✅ |
+| evidence_primary | linear | emb=0.2, evi=0.8 | 0.362 | 0.558 | 0.638 | 0.412 | 0.540 | 0.642 | 0.165 | 0.268 | 0.308 | ✅ |
+| embedding_plus_evidence | evidence_weighted | emb=1.0, evi=1.0 | 0.352 | 0.531 | 0.618 | 0.387 | 0.517 | 0.626 | 0.162 | 0.250 | 0.300 | ✅ |
+| composite | evidence_weighted | emb=0.4, nw=0.2, sw=0.1, evi=0.2, tax=0.1 | 0.364 | 0.560 | 0.639 | 0.412 | 0.542 | 0.642 | 0.167 | 0.267 | 0.307 | ✅ |
+
+**Prediction set:** `a818b653` (k=5, aspect_sep=true, alignments+taxonomy+reranker_features)
+
+**Conclusión:** `alignment_weighted` es el mejor scoring en todas las categorías y aspectos. Mejora el baseline (embedding_only) entre +1.5% y +4% Fmax. Las configs que usan evidence_weight (evidence_primary, composite, embedding_plus_evidence) **empeoran** el baseline — la señal de evidencia perjudica el ranking bajo CAFA-eval con IA weighting.
+
+---
+
+## Exp 4 — Re-ranker LightGBM
+
+**Requisito:** prediction set con `compute_alignments=true, compute_taxonomy=true, compute_reranker_features=true`.
+
+**Entrenamiento:** `train_reranker_auto` con 12 splits temporales (GOA 160→165 hasta 215→220), test 220→229.
+9 modelos (NK/LK/PK × BPO/MFO/CCO), binary CE, features completas (alignments + taxonomy + reranker_features).
+
+### 4a. Sin balance (job `188eb26a`)
+
+| Cat-Asp | AUC | Iter | Observación |
+|---------|-----|------|-------------|
+| NK-BPO | 0.771 | 1 | early stop — pocos positivos (0.17%) |
+| NK-MFO | 0.938 | 300 | buen modelo |
+| NK-CCO | 0.911 | 266 | buen modelo |
+| LK-BPO | 0.770 | 1 | early stop |
+| LK-MFO | 0.930 | 300 | buen modelo |
+| LK-CCO | 0.872 | 300 | buen modelo |
+| PK-BPO | 0.779 | 1 | early stop |
+| PK-MFO | 0.831 | 1 | early stop |
+| PK-CCO | 0.767 | 1 | early stop |
+
+6 de 9 modelos no aprenden (early stop iter=1) por desbalance extremo.
+
+### 4b. Con balance `neg_pos_ratio=10` (job `a96eed71`)
+
+| Cat-Asp | AUC | Iter | Δ AUC vs 4a |
+|---------|-----|------|-------------|
+| NK-BPO | 0.898 | 4 | +0.127 |
+| NK-MFO | 0.922 | 9 | -0.016 |
+| NK-CCO | 0.881 | 4 | -0.030 |
+| LK-BPO | 0.893 | 4 | +0.124 |
+| LK-MFO | 0.925 | 11 | -0.005 |
+| LK-CCO | 0.854 | 3 | -0.018 |
+| PK-BPO | 0.796 | 2 | +0.017 |
+| PK-MFO | 0.849 | 3 | +0.018 |
+| PK-CCO | 0.781 | 2 | +0.014 |
+
+Todos los modelos aprenden. BPO sube ~12 puntos AUC. MFO/CCO bajan ligeramente (menos datos de entrenamiento).
+
+### Resultados CAFA-eval (v1)
+
+| Método | NK-BPO | NK-MFO | NK-CCO | LK-BPO | LK-MFO | LK-CCO | PK-BPO | PK-MFO | PK-CCO |
+|--------|--------|--------|--------|--------|--------|--------|--------|--------|--------|
+| baseline (emb only) | 0.412 | 0.590 | 0.668 | 0.467 | 0.558 | 0.675 | 0.187 | 0.278 | 0.325 |
+| **alignment_weighted** | **0.428** | **0.611** | 0.683 | **0.500** | **0.598** | 0.699 | 0.201 | 0.285 | 0.337 |
+| reranker v1 (sin balance) | 0.384 | 0.584 | **0.695** | 0.447 | 0.482 | **0.713** | 0.201 | 0.284 | 0.335 |
+| reranker v1 (balanced) | 0.408 | 0.577 | 0.687 | 0.478 | 0.506 | 0.711 | 0.201 | **0.298** | 0.332 |
+
+**Conclusiones v1:**
+- El balance corrige BPO (+0.024 NK, +0.031 LK vs sin balance) pero no alcanza al heurístico
+- Ambos rerankers mejoran **CCO** respecto al baseline (+2-4%)
+- Ambos rerankers **empeoran MFO** respecto al heurístico (-3 a -9%)
+- El reranker balanced destaca en **PK-MFO** (0.298, mejor de todos los métodos)
+- `alignment_weighted` sigue siendo el mejor approach global: gana en 6 de 9 celdas
+
+---
+
+## Exp 5 — Re-ranker v2 (per-categoría con IA weighting)
+
+**Cambios respecto a v1:**
+- 3 modelos per-categoría (NK, LK, PK) en vez de 9 per-aspecto
+- `is_unbalance` eliminado (evita doble compensación con `neg_pos_ratio`)
+- `learning_rate`: 0.05 → 0.01
+- `num_boost_round`: 300 → 1000 (con `early_stopping_rounds`: 50)
+- IA values como `sample_weight` en entrenamiento (términos raros pesan más)
+
+### 5a. Quick test (2 splits: 211→215→220, test 229) — eval `9242ea3e`
+
+| Método | NK-BPO | NK-MFO | NK-CCO | LK-BPO | LK-MFO | LK-CCO | PK-BPO | PK-MFO | PK-CCO |
+|--------|--------|--------|--------|--------|--------|--------|--------|--------|--------|
+| reranker v2 (2 splits) | 0.418 | 0.601 | 0.691 | 0.477 | 0.560 | 0.700 | 0.182 | 0.282 | 0.341 |
+
+MFO ya no se destruye (0.601 vs 0.577 de v1 balanced). Prometedor con solo 2 splits.
+
+### 5b. Full training (13 splits: 160→220, test 229) — eval `a3d3bbea`
+
+Modelos: `lgbm_v2_full-{nk,lk,pk}`
+- NK: `fc013658-9c95-48e8-9c72-c13f477a8b26`
+- LK: `8697ffed-6814-4594-85a1-5dae3ea00b1f`
+- PK: `cdcbc26f-8f9a-41b2-9196-21bf4f9d3e2e`
+
+| Método | NK-BPO | NK-MFO | NK-CCO | LK-BPO | LK-MFO | LK-CCO | PK-BPO | PK-MFO | PK-CCO |
+|--------|--------|--------|--------|--------|--------|--------|--------|--------|--------|
+| baseline (emb only) | 0.412 | 0.590 | 0.668 | 0.467 | 0.558 | 0.675 | 0.187 | 0.278 | 0.325 |
+| **alignment_weighted** | **0.428** | **0.611** | 0.683 | **0.500** | **0.598** | 0.699 | **0.201** | 0.285 | 0.337 |
+| reranker v1 (sin balance) | 0.384 | 0.584 | 0.695 | 0.447 | 0.482 | **0.713** | 0.201 | 0.284 | 0.335 |
+| reranker v1 (balanced) | 0.408 | 0.577 | 0.687 | 0.478 | 0.506 | 0.711 | 0.201 | **0.298** | 0.332 |
+| **reranker v2 full** | 0.425 | 0.607 | **0.689** | 0.486 | 0.575 | **0.707** | 0.199 | 0.297 | **0.335** |
+
+**Conclusiones v2 full:**
+- **Mucho más robusto que v1** — MFO no se destruye (0.607 vs 0.577 de v1 bal), BPO mejora consistentemente
+- **CCO sigue siendo el punto fuerte del reranker**: NK-CCO 0.689, LK-CCO 0.707 (segundo mejor tras v1 unbal)
+- **PK recupera**: v2 full (0.199/0.297/0.335) supera al v2 quick test que había caído en PK-BPO
+- **alignment_weighted sigue ganando en BPO y MFO**: NK-BPO 0.428 vs 0.425, LK-BPO 0.500 vs 0.486, LK-MFO 0.598 vs 0.575
+- El IA weighting en entrenamiento + modelos per-categoría eliminan la inestabilidad de v1 pero no superan al heurístico globalmente
+
+---
+
+## Exp 6 — Re-ranker v3 (features completas: alineamientos + taxonomía en entrenamiento)
+
+**Cambio clave respecto a v2:** En v2 las features de alineamiento (NW/SW) y taxonomía estaban hardcodeadas a NULL durante el entrenamiento — el modelo nunca las veía. v3 computa `compute_alignment()` y `compute_taxonomy()` por cada par (query, ref) durante la generación de datos de entrenamiento, dando al modelo acceso a las 22 features completas.
+
+**Configuración:** 13 splits (160→220), test 229, `neg_pos_ratio=10`, IA weights, `compute_alignments=true`, `compute_taxonomy=true`. Tiempo de entrenamiento: ~2h 45m (vs ~2h de v2 — el overhead de alineamientos es mínimo).
+
+Modelos: `lgbm_v3_full-{nk,lk,pk}`
+- NK: `2ff1818f-71b6-4932-8f8d-b3000e3c8d34`
+- LK: `269e26b4-0bec-42fa-a077-fe5b675dd2de`
+- PK: `e14b9716-bbf8-4b99-b34b-b801c3966579`
+
+### Resultados CAFA-eval — eval `23851bff`
+
+| Método | NK-BPO | NK-MFO | NK-CCO | LK-BPO | LK-MFO | LK-CCO | PK-BPO | PK-MFO | PK-CCO |
+|--------|--------|--------|--------|--------|--------|--------|--------|--------|--------|
+| baseline (emb only) | 0.412 | 0.590 | 0.668 | 0.467 | 0.558 | 0.675 | 0.187 | 0.278 | 0.325 |
+| alignment_weighted | 0.428 | 0.611 | 0.683 | **0.500** | 0.598 | 0.699 | 0.201 | 0.285 | 0.337 |
+| reranker v2 full | 0.425 | 0.607 | 0.689 | 0.486 | 0.575 | 0.707 | 0.199 | 0.297 | 0.335 |
+| **reranker v3 full** | **0.431** | **0.620** | **0.692** | 0.478 | **0.607** | 0.697 | **0.201** | **0.297** | **0.339** |
+
+**Conclusiones v3:**
+- **Las features de alineamiento importaban.** v3 supera a v2 en casi todas las métricas, especialmente MFO (+0.013 NK, +0.032 LK)
+- **Supera al heurístico `alignment_weighted`** en 7 de 9 celdas: NK-BPO (+0.003), NK-MFO (+0.009), NK-CCO (+0.009), LK-MFO (+0.009), PK-BPO (=), PK-MFO (+0.012), PK-CCO (+0.002)
+- Solo pierde en LK-BPO (0.478 vs 0.500) y LK-CCO (0.697 vs 0.699)
+- **Resultado positivo**: el re-ranker con features completas es el mejor método global
+
+---
+
+## Resumen de progreso
+
+| Fase | Experimento | Estado | Mejor Fmax NK-MFO |
+|------|-------------|--------|-------------------|
+| 1 | Baseline KNN (k sweep) | ✅ | 0.590 (k=5) |
+| 2 | aspect_separated_knn | ✅ | ~0.590 (sin diferencia clara) |
+| 3 | Scoring heurístico (5 configs) | ✅ | 0.611 (alignment_weighted) |
+| 4a | Re-ranker v1 LightGBM (sin balance) | ✅ | 0.584 (mejora CCO, empeora MFO) |
+| 4b | Re-ranker v1 LightGBM (balanced) | ✅ | 0.577 (mejora PK-MFO a 0.298) |
+| 5a | Re-ranker v2 quick test (2 splits) | ✅ | 0.601 (mucho más estable que v1) |
+| 5b | Re-ranker v2 full (13 splits) | ✅ | 0.607 (robusto, pero no supera heurístico) |
+| 6 | **Re-ranker v3 full (features completas)** | ✅ | **0.620** (supera al heurístico) |
+| 7 | **Comparativa eggNOG-mapper** | ✅ | 0.359 (PROTEA 9/9 celdas mejor) |
+| 8 | **Comparativa Pannzer2 + data leakage** | ✅ | 0.717 (con leakage: 62.4% NK GT exacto) |
+| 9 | **Comparativa InterProScan 6** | ✅ | 0.551 (PROTEA supera en 8/9 celdas) |
+| 10 | **ProstT5 vs ESMC (v3 preliminar)** | ⚠️ F3 contaminado por under-training | F1+F2 válidos, F3 pendiente |
+| 11 | **Re-train v4 "converged" (5000 rounds)** | 🔄 en curso | — |
+| 12 | **Extended PLM matrix (8 modelos)** | 📋 diseño listo (`EXPERIMENTAL_DESIGN.md`) | — |
+
+**Flujo de dependencias:**
+```
+Exp 1 (k sweep) ✅
+  → Exp 2 (aspect_sep) ✅
+    → Predicción con features completas ✅ (a818b653)
+      → Exp 3 (scoring configs) ✅ — alignment_weighted gana
+      → Exp 4 (re-ranker v1, 12 splits) ✅ — mejora CCO, empeora MFO
+      → Exp 5 (re-ranker v2, per-cat + IA weights) ✅ — robusto pero no supera heurístico
+      → Exp 6 (re-ranker v3, features completas) ✅ — SUPERA al heurístico
+      → Exp 7 (eggNOG-mapper comparison) ✅ — PROTEA gana 9/9 celdas
+      → Exp 8 (Pannzer2 + leakage analysis) ✅ — leakage confirmado, PROTEA única evaluación fair
+      → Exp 9 (InterProScan 6) ✅ — PROTEA supera en 8/9 celdas
+```
+
+**Mejor configuración global: `reranker v3 full` (LightGBM per-categoría, 22 features, IA weights)**
+
+---
+
+## Exp 7 — Comparativa con eggNOG-mapper
+
+**Herramienta:** eggNOG-mapper v2.1.13 (Docker: `quay.io/biocontainers/eggnog-mapper:2.1.13--pyhdfd78af_2`)
+**Base de datos:** eggNOG DB v5.0.2 + Diamond v2.0.15
+**Parámetros:** `-m diamond --go_evidence experimental --tax_scope auto --target_orthologs all --cpu 8`
+**Test set:** 20,281 proteínas del delta GOA 220→229 (mismo que todos los experimentos PROTEA)
+**Cobertura:** 17,334/20,281 proteínas con GO terms (85.5%)
+**Tiempo:** ~21 minutos (solo CPU, 8 threads)
+
+### Resultados CAFA-eval (IA-weighted)
+
+| Método | NK-BPO | NK-MFO | NK-CCO | LK-BPO | LK-MFO | LK-CCO | PK-BPO | PK-MFO | PK-CCO |
+|--------|--------|--------|--------|--------|--------|--------|--------|--------|--------|
+| **eggNOG-mapper 2.1.13** | 0.247 | 0.359 | 0.386 | 0.382 | 0.334 | 0.450 | 0.190 | 0.199 | 0.325 |
+| PROTEA baseline (emb only) | 0.412 | 0.590 | 0.668 | 0.467 | 0.558 | 0.675 | 0.187 | 0.278 | 0.325 |
+| **PROTEA reranker v3** | **0.431** | **0.620** | **0.692** | **0.478** | **0.607** | **0.697** | **0.201** | **0.297** | **0.339** |
+
+### Diferencia absoluta Fmax (PROTEA v3 - eggNOG-mapper)
+
+| Categoría | BPO | MFO | CCO |
+|-----------|------|------|------|
+| NK | +0.184 | +0.261 | +0.306 |
+| LK | +0.096 | +0.273 | +0.247 |
+| PK | +0.011 | +0.098 | +0.014 |
+
+**Conclusiones:**
+- PROTEA v3 supera a eggNOG-mapper en **9 de 9 celdas**
+- Incluso el baseline de PROTEA (solo embeddings) supera a eggNOG-mapper en 8 de 9 celdas
+- Las mayores diferencias están en NK y LK (hasta +0.306 Fmax en NK-CCO)
+- eggNOG-mapper tiene menor cobertura (85.5% vs 100%) y no produce scores graduados
+- Script de evaluación: `scripts/evaluate_external_tool.py`
+
+---
+
+## Exp 8 — Comparativa con Pannzer2 + análisis de data leakage
+
+**Herramienta:** Pannzer2 (servidor web Helsinki, marzo 2026)
+**Base de datos:** UniProt/SwissProt actual (actualizada a fecha de ejecución)
+**Test set:** 20,281 proteínas del delta GOA 220→229 (mismo que todos los experimentos)
+**Cobertura:** 19,964/20,281 proteínas con GO terms (98.4%)
+**Predicciones totales:** 532,557 (max 30 GO terms por proteína, con PPV scores calibrados 0.31–0.91)
+
+### Resultados CAFA-eval (IA-weighted)
+
+| Método | NK-BPO | NK-MFO | NK-CCO | LK-BPO | LK-MFO | LK-CCO | PK-BPO | PK-MFO | PK-CCO |
+|--------|--------|--------|--------|--------|--------|--------|--------|--------|--------|
+| **Pannzer2** † | **0.656** | **0.717** | **0.791** | **0.681** | **0.729** | **0.813** | **0.391** | **0.574** | **0.618** |
+| InterProScan 6 † | 0.312 | 0.551 | 0.476 | 0.479 | 0.488 | 0.491 | 0.208 | 0.269 | 0.250 |
+| eggNOG-mapper 2.1.13 † | 0.247 | 0.359 | 0.386 | 0.382 | 0.334 | 0.450 | 0.190 | 0.199 | 0.325 |
+| **PROTEA reranker v3** | **0.431** | **0.620** | **0.692** | **0.478** | **0.607** | **0.697** | **0.201** | **0.297** | **0.339** |
+
+† Subject to temporal data leakage (reference DB from March 2026, after GOA 229).
+
+### Data leakage: análisis temporal
+
+Los resultados de Pannzer2 y eggNOG-mapper **no son comparables directamente** con PROTEA debido a data leakage temporal:
+
+| | Pannzer2 | InterProScan 6 | eggNOG-mapper | PROTEA |
+|---|---|---|---|---|
+| **Fecha de ejecución** | Marzo 2026 | 25 Mar 2026 | 24 Mar 2026 | — |
+| **BD de referencia** | UniProt/SwissProt 2026 | InterPro 2026 | eggNOG v5.0.2 (2026) | GOA 220 (frozen at t0) |
+| **Conoce las respuestas?** | Sí | Parcialmente | Parcialmente | No |
+
+**Cuantificación del leakage:** Se midió el porcentaje de pares (proteína, GO term) del ground truth que aparecen exactamente en las predicciones de cada herramienta.
+
+| Categoría | GT pairs | Pannzer2 exact match | eggNOG exact match |
+|-----------|----------|---------------------|-------------------|
+| **Total** | 40,014 | 20,373 (**50.9%**) | 10,308 (25.8%) |
+| NK | 6,953 | 4,339 (**62.4%**) | 1,025 (14.7%) |
+| LK | 5,520 | 3,624 (**65.7%**) | 1,087 (19.7%) |
+| PK | 27,541 | 12,410 (45.1%) | 8,196 (29.8%) |
+
+Pannzer2 acierta el 62.4% de las anotaciones NK — proteínas que por definición no tenían anotaciones experimentales en t0. Esto confirma que su BD de referencia contiene anotaciones posteriores a GOA 220, incluyendo muchas que forman parte del ground truth GOA 229.
+
+**Conclusión:** PROTEA es la única herramienta del benchmark que garantiza integridad temporal: la referencia se congela en t0, el ground truth se computa como delta, y todo queda versionado en la BD. Los números de Pannzer2 y eggNOG-mapper representan un **upper bound optimista** bajo data leakage, no una comparación fair.
+
+- Parsing de resultados Pannzer2: `/home/frapercan/Thesis/pannzer2_results/parse_pannzer2.py`
+- Raw HTML: `/home/frapercan/Thesis/pannzer2_results/raw/PANZ_{1-21}.html`
+- Script de evaluación: `scripts/evaluate_external_tool.py --tool pannzer2`
+
+---
+
+## Hallazgos previos
+
+- Baseline KNN con `score = 1 - distance/2` da buenos resultados en NK/LK
+- Un intento previo de LightGBM per-aspecto (9 modelos) **empeoró** NK/LK:
+  - Causa 1: optimiza binary CE (todos los GO terms pesan igual) pero CAFA-eval pondera por IC
+  - Causa 2: features de agregación estaban NULL en el prediction set
+
+### Cambios de configuración
+
+- **2026-04-23 — Peso IEA en `DEFAULT_EVIDENCE_WEIGHTS` 0.3 → 0.8.** La jerarquía clásica de GO-docs coloca IEA por debajo del tier computacional (ISS/IBA/... 0.7) y de NAS (0.5). Observación empírica en el histórico de GOA: las anotaciones IEA se promueven a un código experimental con mayor frecuencia que las del tier computacional, por lo que su fiabilidad previa estaba infraestimada. Los tres stages del benchmark (`baseline`, `alignment_weighted`, `reranker` v4) no consumen `evidence_weight`, así que las Fmax reportadas en Exp 1–11 no cambian; el swap sólo afecta a scorings basados en evidencia (p. ej. `evidence_primary`, `composite`, `embedding_plus_evidence`).
+
+---
+
+## Exp 10 — ProstT5 vs ESMC (comparativa preliminar v3)
+
+**Fecha**: 2026-04-10
+**Objetivo**: replicar el reranker v3 sobre un segundo PLM (ProstT5-XL ~3B) para ver si la ganancia del v3 generaliza más allá de ESMC-300M.
+
+> **Caveat metodológico importante**: ESMC-300M (~300M params, BERT-like encoder) y ProstT5-XL (~3B params, T5 encoder + structure fine-tuning) son modelos con tamaño y arquitectura distintos. Esta comparativa mezcla esos ejes — no es fair para concluir nada sobre "ESMC vs ProstT5 como familia". El benchmark con matriz limpia está en `EXPERIMENTAL_DESIGN.md` (Exp 12).
+
+### Setup
+
+- **Evaluation set**: `42b34e79-6fe9-4fa0-b718-02f43a1e3192` (delta GOA 220→229, 20281 proteínas)
+- **ESMC prediction set**: `a818b653-cad9-4f42-8e04-eda3f5ff2ceb`
+- **ProstT5 prediction set**: `38ee00af-cbfd-4c5b-ab84-c98a32765b40`
+- **IA file**: `IA_cafa6.tsv`
+- **Ontology snapshot**: `947bdff6-d17c-4ca3-a41a-bc8fb4d74b7a`
+
+Rerankers v3 (`num_boost_round=1000, early_stopping_rounds=50, neg_pos_ratio=10, IA sample weights, 13 splits 160→220`):
+
+| Embedding | NK | LK | PK |
+|---|---|---|---|
+| ESMC-300M (job `16c3dcfd`) | `2ff1818f` | `269e26b4` | `e14b9716` |
+| ProstT5-XL (job `12b704d4`) | `a1b4947d` | `60597ab9` | `1efd0c33` |
+
+CAFA eval results:
+- ESMC + reranker: `ba7476cb-81f2-461a-b69a-a99c8df834bf`
+- ProstT5 + reranker: `7b97e74a-54df-4e4e-90ed-39e07b58de64`
+
+### Resultados (cafaeval + IA, evaluación oficial)
+
+**F1 — ProstT5 gana en retrieval bruto**: avg Fmax baseline ProstT5 0.4849 vs ESMC 0.4824. Consistente en las 9 celdas: ProstT5 gana 44/45 en el 45-cell benchmark previo.
+
+**F3 — Reranker per-aspect (9 celdas)**:
+
+| Método | NK-BPO | NK-MFO | NK-CCO | LK-BPO | LK-MFO | LK-CCO | PK-BPO | PK-MFO | PK-CCO | Avg |
+|---|---|---|---|---|---|---|---|---|---|---|
+| ESMC baseline | 0.412 | 0.590 | 0.668 | 0.467 | 0.558 | 0.675 | 0.187 | 0.278 | 0.325 | 0.4624 |
+| ESMC + reranker v3 | 0.431 | 0.620 | 0.692 | 0.478 | 0.607 | 0.697 | 0.201 | 0.297 | 0.339 | **0.4846** |
+| ProstT5 baseline | ~ | ~ | ~ | ~ | ~ | ~ | ~ | ~ | ~ | **0.4849** |
+| ProstT5 + reranker v3 | ~ | ~ | ~ | ~ | ~ | ~ | ~ | ~ | ~ | 0.4817 |
+
+- **ESMC mejora con reranker**: 6/9 celdas, avg Δ = **+0.0022**
+- **ProstT5 degrada con reranker**: 9/9 celdas, avg Δ = **−0.0032**
+- Avg final ESMC+rr (0.4846) ≈ ProstT5+rr (0.4817), diferencia pequeña pero de signo opuesto a la del retrieval bruto
+
+### F2 — Feature importance (hipótesis de compensación)
+
+Extracción de `feature_importance` (gain) de los 6 rerankers. Agregado sobre features de `{alignment_*, similarity_*, taxonomic_*}`:
+
+- **ESMC ponderan alignment+taxonomy entre 2.15% y 5.22% más** que sus homólogos ProstT5 (monótono en NK/LK/PK)
+- Diferencias dramáticas en features individuales:
+  - NK `alignment_score_nw`: ESMC 4.72% vs ProstT5 1.69% (**2.8×**)
+  - PK `similarity_nw`: ESMC 9.63% vs ProstT5 3.91% (**2.5×**)
+- ProstT5 compensa redistribuyendo a features derivadas del embedding: `ref_annotation_density`, `vote_count`, `k_position`
+
+**Interpretación**: cuando el embedding es "más fuerte" (ProstT5), el reranker se apoya menos en señales externas (alineamiento, taxonomía) y más en estadísticos derivados del propio retrieval. Este es el carry-over de la hipótesis que se va a testear formalmente como H4 en `EXPERIMENTAL_DESIGN.md`.
+
+### Blocker — under-training en los 6 modelos v3
+
+Revisión del `best_iteration` de cada modelo con `num_boost_round=1000, early_stopping_rounds=50`:
+
+| Modelo | best_iteration |
+|---|---|
+| ESMC-nk | **1000** (techo, early stop no disparó) |
+| ESMC-lk | 994 |
+| ESMC-pk | 999 |
+| ProstT5-nk | **1000** |
+| ProstT5-lk | 995 |
+| ProstT5-pk | **1000** |
+
+Con 95k–332k samples por tier y LR=0.01, este dataset típicamente necesita 3000–10000 iters para saturar. **Conclusión**: los deltas de F3 (especialmente el signo negativo de ProstT5 −0.0032) pueden ser artefacto del under-training, no efecto real del embedding.
+
+- **F2 (feature importance) sigue siendo válido** — ambos modelos tuvieron el mismo presupuesto bajo el techo, la diferencia *relativa* en cómo distribuyen alignment/taxonomy es una comparación justa
+- **F3 (signos de los deltas Fmax) está contaminado** — no se debe usar para la tesis hasta que converjan
+
+**Lección metodológica crítica**: el campo `test_evaluation` que reporta `train_reranker_auto` muestra deltas de +0.04 a +0.08 Fmax mucho más optimistas que los +0.002 reales de cafaeval. El test_evaluation no aplica propagación GO ni IA weighting — **no usar para la tesis**. Solo cafaeval con IA.
+
+### Estado
+
+- F1 y F2: publicables con los números actuales
+- F3: **pendiente de re-evaluación** tras v4 (ver Exp 11)
+- Estado de trabajo detallado: `project_reranker_benchmark.md` (auto-memory)
+
+---
+
+## Exp 11 — Re-training v4 "converged" (en curso)
+
+**Fecha de lanzamiento**: 2026-04-10 18:03 UTC
+**Objetivo**: re-entrenar los 6 modelos (ESMC y ProstT5, NK/LK/PK) con presupuesto suficiente para que el early stopping dispare de verdad, eliminando el confounder de under-training del Exp 10.
+
+### Cambios respecto a v3
+
+| Parámetro | v3 | v4 |
+|---|---|---|
+| `num_boost_round` | 1000 | **5000** |
+| `early_stopping_rounds` | 50 | **100** |
+| Resto | — | idéntico (13 splits 160→220, neg_pos_ratio=10, IA weights, per-tier NK/LK/PK, alignment+taxonomy features) |
+
+El resto del pipeline (KNN, FAISS IVFFlat, feature engineering) es idéntico — v4 cambia **solo** el presupuesto de boosting.
+
+### Jobs
+
+Ambos lanzados a `protea.training` (cola aislada, worker dedicado, peak RAM ~14 GB con los fixes de chunked KNN del 2026-04-10):
+
+| Job | Modelo | Estado esperado |
+|---|---|---|
+| `48c91381-1af1-414c-bd1b-a6a51c931873` | `lgbm_v4_converged_esmc` | running (~2h) |
+| `e923ac70-21a8-4c5c-8cc6-9ebb76d156aa` | `lgbm_v4_converged_prostt5` | queued, arrancará al terminar ESMC |
+
+Tiempo estimado total: ~4h serial (protea.training procesa uno a uno).
+
+### Escenarios esperados al terminar
+
+- **A — narrativa F2 se confirma**: ProstT5 sigue degradando (−ΔFmax tras converger) → conclusión fuerte de tesis, la hipótesis de compensación gana peso
+- **B — ProstT5 pasa a neutro o +**: narrativa se suaviza ("ambos embeddings mejoran con reranker, ESMC un poco más") — F2 sigue válido como explicación
+- **C — ambos suben ~0.01-0.02**: confirma que v3 estaba under-trained y da números definitivos más altos que Exp 10
+
+### Pendientes cuando termine
+
+1. Verificar `best_iteration` de los 6 modelos nuevos (esperamos 2000-4000, disparando early stop)
+2. Re-lanzar `run_cafa_evaluation` para ambos embeddings con los nuevos reranker UUIDs
+3. Re-extraer feature importance y re-validar F2
+4. Reemplazar la tabla de F3 en el Exp 10 con los números de v4
+5. Decidir A/B/C y actualizar la narrativa de la tesis en consecuencia
+
+---
+
+## Exp 12 — Extended PLM benchmark matrix (planned)
+
+**Fecha de diseño**: 2026-04-10
+**Estado**: documento de diseño prospectivo
+**Plan completo**: `EXPERIMENTAL_DESIGN.md`
+
+### Motivación
+
+Exp 10 expuso el confounder central del trabajo preliminar: comparar ESMC-300M (~300M, BERT-like) con ProstT5-XL (~3B, T5 + structure fine-tuning) mezcla **tamaño** y **familia** en un solo eje. Ningún finding se puede atribuir a una u otra dimensión sin una matriz que los separe.
+
+### Matriz propuesta (8 modelos)
+
+| # | Modelo | Params | Backend | Estado |
+|---|---|---|---|---|
+| 1 | ESMC-300M | ~300M | `esm3c` | ✓ (Exp 10, v4 en curso) |
+| 2 | ESMC-600M | ~600M | `esm3c` | nuevo |
+| 3 | ESM2-650M (`esm2_t33_650M_UR50D`) | ~650M | `esm` | nuevo |
+| 4 | ESM2-3B (`esm2_t36_3B_UR50D`) | ~3B | `esm` | nuevo |
+| 5 | Ankh-base (`ElnaggarLab/ankh-base`) | ~450M | `ankh` | nuevo |
+| 6 | Ankh-large (`ElnaggarLab/ankh-large`) | ~1.9B | `ankh` | nuevo |
+| 7 | ProtT5-XL (`prot_t5_xl_uniref50`) | ~3B | `t5` | nuevo |
+| 8 | ProstT5-XL | ~3B | `t5` | ✓ (Exp 10, v4 en curso) |
+
+**Descartado**: ESM2-15B (coste de embedding prohibitivo, no tiene par T5 de tamaño equivalente → rompe la simetría de la matriz).
+
+### Research questions (ver `EXPERIMENTAL_DESIGN.md` §2)
+
+- **RQ1**: ¿a tamaño fijo, qué familia gana (BERT-like vs T5 encoder)?
+- **RQ2**: ¿cómo escala Fmax con el tamaño dentro de una familia? ¿Dónde satura?
+- **RQ3**: ¿estructura aporta? — test pareado ProtT5-XL vs ProstT5-XL (mismo backbone, única diferencia = 3Di fine-tuning)
+- **RQ4**: ¿los embeddings más débiles fuerzan al reranker a compensar con alignment+taxonomy? (carry-over de F2)
+
+### Protocolo
+
+Pipeline idéntico para los 8 modelos — cero tuning per-modelo. Ver `EXPERIMENTAL_DESIGN.md` §6 para hiperparámetros pinned: KNN `k=5`, FAISS IVFFlat, alignments + taxonomy on, reranker v4 (5000 rounds), `run_cafa_evaluation` con IA weighting.
+
+### Tests estadísticos
+
+Wilcoxon signed-rank sobre las 9 celdas Fmax, corrección Holm-Bonferroni sobre 6 comparaciones pareadas, bootstrap CI 95% para effect sizes. Regresión OLS para H4.
+
+### Coste
+
+~3-4 días de compute secuencial (embeddings + KNN + v4 training + eval por los 6 modelos nuevos). Comprimible con paralelismo GPU si procede.
+
+### Estado
+
+- **Diseño**: completo (`EXPERIMENTAL_DESIGN.md` v1.0)
+- **Ejecución**: bloqueada hasta que v4 (Exp 11) valide que el presupuesto es correcto
+- **Dependencias previas**: Ankh backend ya integrado en PROTEA como `model_backend="ankh"` dedicado (no alias de `t5`) — ver `project_ankh_backend.md`
+
+### Deliverables esperados
+
+1. Tabla master 8 × 3 (baseline / alignment_weighted / reranker) × 9 celdas
+2. Heatmap de feature importance de las 24 rerankers (8 modelos × 3 tiers)
+3. Report estadístico (p-valores + effect sizes + CIs) por comparación
+4. Capítulo de tesis formalizando RQ1-RQ4 con la matriz como evidencia
diff --git a/README.md b/README.md
index 9cefcd8..f894fa5 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 **Protein annotation platform** for large-scale GO term prediction, sequence embedding, and functional analysis.
 
-PROTEA provides a unified backend for ingesting protein data from UniProt, computing ESM2 embeddings, and predicting Gene Ontology terms via KNN transfer — with a full job queue, REST API, and web interface.
+PROTEA provides a unified backend for ingesting protein data from UniProt, computing protein language model embeddings (ESMC, ProstT5, ESM2), and predicting Gene Ontology terms via KNN transfer plus a learned LightGBM re-ranker — with a full job queue, REST API, and web interface.
 
 [![Lint](https://github.com/frapercan/PROTEA/actions/workflows/lint.yml/badge.svg)](https://github.com/frapercan/PROTEA/actions/workflows/lint.yml)
 [![Tests](https://github.com/frapercan/PROTEA/actions/workflows/test.yml/badge.svg)](https://github.com/frapercan/PROTEA/actions/workflows/test.yml)
@@ -21,6 +21,16 @@ PROTEA provides a unified backend for ingesting protein data from UniProt, compu
 
 ---
 
+## Why PROTEA?
+
+PROTEA is the successor to [PIS](https://github.com/CBBIO/protein-information-system) and [FANTASIA](https://github.com/CBBIO/fantasia), rebuilt around three goals:
+
+1. **Clean architecture** — infrastructure, orchestration, and domain logic are explicitly decoupled. Operations are pure domain logic; workers own sessions and queue state; routers expose HTTP. No more God-classes that mix everything.
+2. **Learned re-ranking on top of KNN transfer** — beyond classical embedding-KNN annotation, PROTEA trains **LightGBM rerankers on temporal GOA splits** (LambdaRank + CAFA IA weighting, per-tier NK/LK/PK models). Candidates retrieved by KNN are re-scored with alignment, taxonomy, and retrieval features.
+3. **Honest temporal evaluation** — benchmarking uses **temporal holdout deltas** between historical GOA releases (e.g. 220→229), evaluated with the official `cafaeval` library and information-accretion weighting, avoiding the optimistic leakage of random splits.
+
+---
+
 ## What PROTEA does
 
 | Capability | Details |
@@ -28,12 +38,13 @@ PROTEA provides a unified backend for ingesting protein data from UniProt, compu
 | **Protein ingestion** | Paginated UniProt REST API, MD5-deduplicated sequences |
 | **GO ontology** | Load OBO snapshots, full DAG stored per release |
 | **GO annotations** | Bulk import from GOA (GAF) and QuickGO (TSV) |
-| **Embeddings** | ESM2 via GPU workers, stored as pgvector VECTOR columns |
-| **GO prediction** | KNN transfer with optional NW/SW alignment and taxonomic features |
-| **CAFA evaluation** | Benchmark pipeline with cafaeval integration |
-| **Job queue** | RabbitMQ-backed, 7 queues, full audit trail per job |
-| **REST API** | 21 FastAPI endpoints across 5 routers |
-| **Web UI** | Next.js frontend with protein explorer, annotation viewer, prediction browser |
+| **Embeddings** | ESMC, ProstT5, and ESM2 backends via GPU workers; stored as pgvector `VECTOR` columns |
+| **GO prediction** | KNN transfer (FAISS IVFFlat / numpy) with optional NW/SW alignment and taxonomic features |
+| **Learning-to-rank** | LightGBM rerankers trained on temporal GOA splits — LambdaRank + IA weighting, per-tier NK/LK/PK models |
+| **CAFA evaluation** | Benchmark pipeline with `cafaeval` integration, Fmax + IA-weighted scoring, per-aspect (BPO/MFO/CCO) results |
+| **Job queue** | RabbitMQ-backed, 8 queues (ingestion, embeddings, predictions, training), full audit trail per job |
+| **REST API** | FastAPI routers for jobs, proteins, embeddings, query sets, scoring, evaluation, and admin |
+| **Web UI** | Next.js frontend with protein explorer, annotation viewer, prediction browser, and live job widget |
 
 ---
 
@@ -73,6 +84,52 @@ bash scripts/manage.sh start
 
 ---
 
+## 5 minutes to your first job
+
+With the stack running locally, you can submit a job and watch it
+move through the queue + worker + DB lifecycle in under 5 minutes.
+
+```bash
+# 1. Submit a `ping` job (the smoke-test operation).
+JOB_ID=$(curl -s -X POST http://localhost:8000/jobs \
+  -H 'content-type: application/json' \
+  -d '{"operation": "ping", "queue_name": "protea.ping", "payload": {}}' \
+  | jq -r '.id')
+echo "queued: $JOB_ID"
+
+# 2. Tail the structured-event log until the job reaches a terminal state.
+curl -s "http://localhost:8000/jobs/$JOB_ID/events" | jq -c '.[]'
+# {"event":"ping.start","fields":null,"level":"info","ts":"..."}
+# {"event":"ping.done","fields":{"latency_ms":1.2},"level":"info","ts":"..."}
+
+# 3. Check the final job row + result.
+curl -s "http://localhost:8000/jobs/$JOB_ID" | jq '{status, result, error_code}'
+# {"status":"succeeded","result":{"echo":"pong"},"error_code":null}
+```
+
+That round-trip exercises the full machinery: HTTP enqueue → AMQP
+publish → worker claim → operation execute → JobEvent stream → DB
+commit → REST query. Real operations (`insert_proteins`,
+`load_goa_annotations`, `compute_embeddings`, `predict_go_terms`)
+are submitted the same way; their payloads are documented at
+`/docs` (Swagger UI) and in the operation-catalog page of the
+Sphinx docs.
+
+Discovering the installed plugins (added in F2B turn 36):
+
+```bash
+curl -s http://localhost:8000/backends | jq '.plugins[].name'
+# "ankh", "esm", "esm3c", "t5"
+
+curl -s http://localhost:8000/sources | jq '.plugins[].name'
+# "goa", "quickgo", "uniprot"
+
+curl -s http://localhost:8000/runners | jq '.plugins[].name'
+# "baseline", "knn", "lightgbm"
+```
+
+---
+
 ## Documentation
 
 Full documentation at **https://protea.readthedocs.io**
@@ -103,10 +160,17 @@ poetry run task lint           # ruff + flake8 + mypy
 |---|---|
 | API | FastAPI + SQLAlchemy 2.x + PostgreSQL 16 + pgvector |
 | Queue | RabbitMQ (pika) |
-| Embeddings | ESM2 (Meta) via Hugging Face Transformers |
-| KNN search | FAISS IVFFlat / numpy |
+| Embeddings | ESMC (ESM SDK), ProstT5 / prot_t5_xl (T5Encoder), ESM2 (Hugging Face Transformers) |
+| KNN search | FAISS IVFFlat / numpy (chunked brute-force) |
+| Re-ranker | LightGBM (LambdaRank, IA-weighted samples) |
 | Frontend | Next.js 19 + Tailwind v4 |
-| Deployment | Docker, manage.sh, vast.ai GPU instances |
+| Deployment | Docker Compose, `scripts/manage.sh` process supervisor |
+
+---
+
+## License
+
+Released into the public domain under the [Unlicense](LICENSE). You are free to copy, modify, publish, use, compile, sell, or distribute PROTEA for any purpose, commercial or non-commercial, without attribution.
 
 ---
 
diff --git a/RERANKER.md b/RERANKER.md
index 2301546..89a0711 100644
--- a/RERANKER.md
+++ b/RERANKER.md
@@ -1,188 +1,237 @@
-# Temporal Holdout Re-Ranker for GO Term Prediction
+# PROTEA Re-Ranker — Design and Rationale
 
-## Motivación
+**Status**: implemented (v3 shipped, v4 training in progress)
+**Location in code**: `protea/core/reranker.py`, `protea/core/operations/train_reranker.py`
+**Version**: 2.0 — 2026-04-10 (rewrite)
 
-El pipeline actual de PROTEA transfiere anotaciones GO mediante KNN sobre embeddings ESM, usando un scoring heurístico que combina distancia de embedding y pesos de evidencia. Este scoring no está optimizado para la métrica objetivo (Fmax) ni para el comportamiento real de las anotaciones GO a lo largo del tiempo.
-
-La hipótesis central es que existe una señal aprendible: **dado el contexto de una predicción KNN, ¿acabará este GO term apareciendo en el siguiente release de GOA para esta proteína?** Esta señal puede extraerse directamente del mecanismo de holdout temporal que ya implementa PROTEA.
+> This document describes **the re-ranker as it exists in PROTEA today**. An earlier version of this file proposed a PyTorch cross-attention architecture with WebDataset shards; that proposal was explored on paper but **never implemented**. The system converged on a simpler LightGBM design for the reasons documented in §3 ("Why LightGBM and not a neural cross-encoder"). The experiment log showing the evolution across versions lives in `EXPERIMENTS.md`; the forward-looking PLM benchmark plan that uses this re-ranker as a fixed downstream stage lives in `EXPERIMENTAL_DESIGN.md`.
 
 ---
 
-## Formulación del Problema
+## 1. Problem statement
 
-Sea $\mathcal{G}_N$ el conjunto de anotaciones GO en el release $N$ de GOA (Swiss-Prot reviewed). Para cada par consecutivo $(G_N, G_{N+1})$, el delta temporal es:
+PROTEA predicts GO terms by transferring annotations from the $k$ nearest reference proteins in an embedding space. The raw retrieval score is a distance-based heuristic (e.g. `1 - cosine_distance / 2`) optionally combined with alignment identity and evidence weights. This heuristic is:
 
-$$\Delta_{N \to N+1} = \{(p, t) \mid (p, t) \in \mathcal{G}_{N+1} \setminus \mathcal{G}_N\}$$
+- **Not optimised for F<sub>max</sub> with IA weighting** — the metric CAFA actually uses
+- **Not calibrated across tiers** — No-Knowledge, Limited-Knowledge and Previously-Known proteins behave very differently and benefit from different signal combinations
+- **Not able to use all available features** — sequence alignments, taxonomy, neighbour statistics, and evidence codes are either ignored or combined by hand with arbitrary weights
 
-El re-ranker aprende una función:
+The re-ranker replaces this heuristic with a **learned function** that, for each candidate GO term, produces a probability score used to reorder the top-$k$ retrieval list:
 
 $$f(q, t, \mathcal{N}_K(q)) \to \hat{y} \in [0, 1]$$
 
-donde:
-- $q$ es la proteína query (representada por su embedding ESM)
-- $t$ es el GO term candidato
-- $\mathcal{N}_K(q)$ es el conjunto de $K$ vecinos más cercanos en el espacio de embeddings con referencia $\mathcal{G}_N$
-- $\hat{y}$ es la probabilidad de que $(q, t) \in \Delta_{N \to N+1}$
+where $q$ is the query protein, $t$ is a candidate GO term, and $\mathcal{N}_K(q)$ is the set of $K$ nearest neighbours that voted for $t$.
+
+The training signal is derived from the **temporal structure of GOA releases**: a GO term that first appears for a protein in a later release (and was missing from an earlier one) defines a positive example; any term predicted but absent from the future release is a negative. See §4.
 
 ---
 
-## Protocolo de Entrenamiento
+## 2. Scope of this document
 
-Se utiliza validación cruzada temporal con múltiples splits históricos de GOA:
+| Covered | Not covered |
+|---|---|
+| Model architecture and feature set | Downstream CAFA evaluation protocol (→ `EXPERIMENTAL_DESIGN.md` §7) |
+| Training protocol and hyperparameters | PLM comparison across ESMC/ESM2/ProstT5/Ankh (→ `EXPERIMENTAL_DESIGN.md`) |
+| Version history and key design decisions | Historical result tables per experiment (→ `EXPERIMENTS.md`) |
+| Integration with the PROTEA pipeline | Alternative rankers (cross-attention, ListNet, ProT5 rerankers…) |
+| Known limitations | External tool baselines (eggNOG, Pannzer2, InterProScan) |
 
-```
-Training splits:
-  GOA_190 → GOA_195
-  GOA_195 → GOA_200
-  GOA_200 → GOA_205
-  GOA_205 → GOA_211
-  GOA_211 → GOA_215
-  GOA_215 → GOA_220
-
-Test split (holdout estricto, nunca visto durante training):
-  GOA_220 → GOA_229
-```
+---
+
+## 3. Why LightGBM and not a neural cross-encoder
+
+The original design (see §11 for the earlier version's record) proposed a cross-attention neural re-ranker in PyTorch, with learned GO term embeddings from the GO DAG and a WebDataset sharded data pipeline. That proposal was abandoned in favour of a LightGBM gradient-boosted tree model for four concrete reasons:
 
-Para cada split se generan ejemplos etiquetados: positivos $(y=1)$ si el par (proteína, GO term) aparece en el delta, negativos $(y=0)$ en caso contrario. El desbalanceo esperado es aproximadamente 1:10, manejable con técnicas estándar.
+1. **Data volume is moderate, not huge.** Each temporal split yields 80k–330k training rows after negative subsampling. Gradient boosted trees are the sample-efficient sweet spot for this regime; a cross-attention transformer would either overfit or need heavy regularisation and we would then be tuning architecture choices instead of studying the actual research question.
+2. **Feature heterogeneity is the bottleneck, not representation.** The informative features are already engineered (alignment scores, taxonomy distance, neighbour statistics). A model whose job is to combine 23 tabular features non-linearly across categorical and numeric axes is exactly what GBDT excels at. A neural cross-encoder would need to learn an equivalent combination from scratch.
+3. **Interpretability is a thesis requirement.** The F2 finding (that smaller PLMs force the re-ranker to rely more on alignment/taxonomy) can only be measured through gain-based feature importance. LightGBM exposes this directly; extracting equivalent attributions from a cross-attention model requires additional machinery (integrated gradients, attention rollout) that adds failure modes.
+4. **Training cost was a hard constraint.** Each re-ranker (per-tier × per-embedding) trains in 2–4 hours on CPU. The same pipeline under a neural cross-encoder with the same budget would train a single model for similar time on a GPU while blocking the embedding worker. Since the PLM benchmark (`EXPERIMENTAL_DESIGN.md`) multiplies compute cost by 8, the LightGBM choice is what makes the study feasible on a single workstation.
+
+The cross-attention design was not a wrong idea, only a wrong fit for this problem at this scale. Revisiting it remains an option if a later phase of the work finds a measurable ceiling on LightGBM.
 
 ---
 
-## Arquitectura: Cross-Attention Re-Ranker
+## 4. Temporal holdout training signal
 
-El modelo procesa cada par (query, GO term) usando el contexto completo de los vecinos KNN que contribuyeron a esa predicción.
+Let $\mathcal{G}_N$ denote the set of GO annotations present in GOA release $N$ (Swiss-Prot reviewed, evidence-filtered to exclude IEA if so configured). For any ordered pair of releases $(N, N+1)$, the **annotation delta** is
 
-```
-Inputs por predicción (query_protein, go_term):
-  query_embedding       float32[D]       ESM embedding del query (D=480 para esmc_300m)
-  neighbor_embeddings   float32[K × D]   ESM embeddings de los K vecinos contribuyentes
-  tabular_features      float32[K × F]   distancia, evidencia, alineamiento, taxonomía...
-  go_term_embedding     float32[G]       embedding semántico del GO term (G=64)
-
-Arquitectura:
-  1. query_proj(query_embedding)          →  q        [H=256]
-  2. ref_proj(neighbor_embeddings)        →  tokens   [K × H]
-  3. feature_encoder(tabular_features)   →  (sumado a tokens)
-  4. CrossAttention(q, tokens, tokens)   →  context  [H]
-  5. MLP([q ‖ context ‖ go_emb ‖ agg_features])  →  score  [1]
-```
+$$\Delta_{N \to N+1} = \{(p, t) \mid (p, t) \in \mathcal{G}_{N+1} \setminus \mathcal{G}_N\}$$
+
+For a training pair $(N, N+1)$:
 
-La atención cruzada permite al modelo aprender **qué vecinos son más informativos para este query concreto**, en lugar de agregar los scores de forma heurística.
+1. All proteins in $\mathcal{G}_{N+1}$ are used as queries.
+2. KNN retrieval is performed using **only** the reference set derived from $\mathcal{G}_N$ (no leakage from the future).
+3. For each candidate $(q, t)$ in the retrieval output:
+   - **Positive** ($y = 1$) if $(q, t) \in \Delta_{N \to N+1}$ (the annotation materialised between $N$ and $N+1$)
+   - **Negative** ($y = 0$) if the model predicted $t$ but $(q, t) \notin \mathcal{G}_{N+1}$
 
-### GO Term Embeddings
+This definition ensures the training labels are **causally prior** to the prediction: at time $N$ the system does not know what $N+1$ will contain, and neither does the re-ranker while scoring.
 
-Los embeddings de los GO terms se aprenden a partir de la estructura del DAG de GO (relaciones `is_a` / `part_of`) mediante Node2Vec o TransE, de forma que términos semánticamente relacionados (padre-hijo) tengan representaciones similares. El DAG ya está disponible en PROTEA a través de los modelos `GOTerm` y `GOTermRelationship`.
+The test split $(220 \to 229)$ is never seen during training and produces the F<sub>max</sub> numbers that are reported for the thesis.
 
 ---
 
-## Feature Vector
+## 5. Feature set (implementation: `protea/core/reranker.py`)
 
-Cada predicción (query, GO term) se caracteriza por las siguientes features tabulares, computadas por vecino que contribuyó a la predicción:
+Each (query, candidate GO term, contributing neighbour) triple is characterised by **23 features** — 20 numeric and 3 categorical — computed at KNN time and persisted on `GOPrediction` rows.
 
-| Feature | Descripción | Estado |
+### 5.1 Numeric features (20)
+
+| Group | Feature | Origin |
 |---|---|---|
-| `distance` | Distancia coseno en espacio de embeddings | Existente |
-| `evidence_weight` | Peso del código de evidencia (IDA > IEA) | Existente |
-| `identity_nw / sw` | Identidad de secuencia (alineamiento NW/SW) | Existente (opcional) |
-| `similarity_nw / sw` | Similaridad de secuencia | Existente (opcional) |
-| `taxonomic_distance` | Distancia taxonómica entre query y referencia | Existente (opcional) |
-| `vote_count` | Número de vecinos que coinciden en este GO term | **Nuevo** |
-| `k_position` | Posición del vecino más cercano que predijo este término | **Nuevo** |
-| `go_term_frequency` | Frecuencia del término en el annotation set de referencia | **Nuevo** |
-| `ref_annotation_density` | Número de GO terms de la proteína de referencia | **Nuevo** |
-| `neighbor_distance_std` | Varianza de distancias a los K vecinos | **Nuevo** |
+| **Embedding retrieval** | `distance` | cosine distance between query and the contributing neighbour |
+| **NW alignment** | `identity_nw`, `similarity_nw`, `alignment_score_nw`, `gaps_pct_nw`, `alignment_length_nw` | Needleman–Wunsch via parasail (BLOSUM62), computed per (query, neighbour) pair when `compute_alignments=True` |
+| **SW alignment** | `identity_sw`, `similarity_sw`, `alignment_score_sw`, `gaps_pct_sw`, `alignment_length_sw` | Smith–Waterman via parasail (BLOSUM62), same condition |
+| **Sequence length** | `length_query`, `length_ref` | Raw sequence lengths |
+| **Taxonomy** | `taxonomic_distance`, `taxonomic_common_ancestors` | NCBI taxonomy LCA via ete3 when `compute_taxonomy=True` |
+| **Neighbour aggregation** | `vote_count` | Number of neighbours in the top-$k$ that voted for the same GO term |
+| | `k_position` | Rank (0-indexed) of the closest neighbour that supported the term |
+| | `go_term_frequency` | Global frequency of the term in the reference annotation set |
+| | `ref_annotation_density` | Number of distinct GO terms annotating the reference protein |
+| | `neighbor_distance_std` | Standard deviation of distances across the $k$ neighbours of the query |
+
+### 5.2 Categorical features (3)
+
+| Feature | Meaning |
+|---|---|
+| `qualifier` | GAF qualifier of the source annotation (`enables`, `involved_in`, etc.) |
+| `evidence_code` | GAF evidence code of the source annotation (`EXP`, `IDA`, `IEA`, …) |
+| `taxonomic_relation` | Discrete label derived from the LCA (`same_species`, `same_genus`, `same_family`, `distant`) |
+
+Categoricals are passed to LightGBM via its native `categorical_feature` handling (no one-hot encoding; LightGBM partitions on category sets directly).
+
+### 5.3 Missing-value convention
+
+- Numeric missing values are left as `NaN` and handled natively by LightGBM's missing-value-aware splits.
+- Categorical missing values are coerced to `NA` and treated as a distinct bin.
+- Alignment and taxonomy columns are only populated when `compute_alignments=True` / `compute_taxonomy=True` at prediction time. If either flag is off, those columns are all-NaN for the run and the re-ranker still trains but with a degraded feature set.
 
 ---
 
-## Función de Pérdida
+## 6. Model and training protocol
+
+### 6.1 Model
+
+- **Library**: LightGBM (`lightgbm.Booster`)
+- **Objective**: `binary` (binary cross-entropy / log loss)
+- **Validation metric**: `binary_logloss` and `auc` (early stopping is tracked on AUC)
+- **Boosting**: `gbdt` with `num_leaves=31`, `learning_rate=0.01`, `feature_fraction=0.8`, `bagging_fraction=0.8`, `bagging_freq=5`, `seed=42`
+- **Early stopping**: disabled via callback only if `early_stopping_rounds=0`; otherwise stops when validation AUC does not improve for the configured number of rounds
 
-Se utiliza **LambdaRank** en lugar de binary cross-entropy, ya que optimiza directamente el orden de las predicciones (proxy de NDCG / Fmax) en lugar de la calibración de probabilidades.
+> **Note on the objective.** Earlier drafts of this document (and informal notes) described the loss as **LambdaRank**. The implementation is actually **binary cross-entropy**. Switching to a pairwise/listwise rank loss is a known avenue for future work; it was deferred because (a) binary CE is the simpler baseline and has already matched or beaten the heuristic `alignment_weighted` scoring and (b) LambdaRank would require restructuring the training data into query groups, which complicates the per-split sampling pipeline.
 
-Para cada proteína query, las predicciones GO se rankean conjuntamente:
-- Positivos: GO terms en $\Delta_{N \to N+1}$
-- Negativos: GO terms predichos pero no en el delta
+### 6.2 Split strategy
+
+- **Stratified train/val split** at `val_fraction=0.2`, stratified on the label (the positive rate is 0.17%–5% depending on tier × aspect — naive random splits would under-represent positives in the validation set).
+- **Negative subsampling** via `neg_pos_ratio=10`: after splitting, each of the train and val sets is independently subsampled so that `|negatives| ≤ 10 × |positives|`. Without this step, 6 of 9 per-(tier, aspect) models in v1 failed to learn at all — the positive rate was too low for gradient boosted trees to see a signal.
+- **IA sample weighting**: when an information accretion file is provided, each row's `sample_weight` is set to `IA(go_term)`. This makes the model focus on informative (rare, specific) GO terms — the same aspect of the term that CAFA evaluation rewards via IA-weighted F<sub>max</sub>.
+
+### 6.3 Per-tier, not per-aspect
+
+One model is trained **per tier** (`NK`, `LK`, `PK`), not per (tier × aspect). This was an explicit change in v2 after v1 trained 9 models (one per cell) and 6 of them either never converged or overfit on the smaller aspect slices. Aspect identity is not currently used as a feature; this is a known simplification (see §9).
+
+### 6.4 Temporal splits
+
+- **Training pairs**: 13 consecutive deltas from GOA 160 through GOA 220 — `[(160,165), (165,170), (170,175), (175,180), (180,185), (185,190), (190,195), (195,200), (200,205), (205,211), (211,215), (215,220)]`. The training rows from all pairs are concatenated and passed to LightGBM as a single dataset. Pair identity is not used as a feature.
+- **Test pair**: `(220, 229)` — never seen during training. The test set is passed through the trained reranker and fed to `run_cafa_evaluation` alongside the baseline to measure the lift.
+
+### 6.5 Budget
+
+| Version | `num_boost_round` | `early_stopping_rounds` | Comment |
+|---|---|---|---|
+| v1 | 300 | 50 | 6/9 models hit iter=1 (early stop on first round) — under-trained, unbalanced |
+| v2 | 1000 | 50 | Stable; per-tier models; IA weighting introduced |
+| v3 | 1000 | 50 | Same budget; alignment + taxonomy features fully populated in training (were NULL in v2) |
+| v4 | **5000** | **100** | In progress 2026-04-10: all 6 v3 models hit `best_iteration ≈ 1000` — implying they never converged under the previous budget. v4 restores early stopping as a convergence criterion, not a time-out. |
 
 ---
 
-## Pipeline de Datos: WebDataset
+## 7. Integration with the PROTEA pipeline
 
-El volumen de datos (múltiples splits × ~1.35M predicciones por split × embeddings de 480 dim) requiere un pipeline de datos eficiente. Se propone almacenar los ejemplos de entrenamiento en formato **WebDataset** (shards tar), con un shard por split GOA:
+### 7.1 ORM and persistence
 
-```
-reranker_data/
-  splits/
-    goa190_to_195.tar       # ~2GB por shard
-    goa195_to_200.tar
-    ...
-    goa220_to_229.tar       # test split — no tocar durante training
-  models/
-    reranker_v1.pt
-    reranker_v1_config.json
-```
+- **`Reranker` row** (table: `rerankers`) — stores the trained LightGBM booster serialised as bytes alongside training metadata (`feature_importance`, `val_auc`, `best_iteration`, `train_samples`, hyperparameters, parent `job_id`).
+- **`RerankerTrainingJob`** row captures the auto-pipeline metadata (splits used, features computed, per-tier model IDs).
+
+### 7.2 Scoring router
+
+The `scoring` router exposes endpoints to list and inspect rerankers:
+- `GET /scoring/rerankers` — list trained rerankers
+- `GET /scoring/rerankers/{id}` — metadata + feature importance
 
-Cada muestra en el WebDataset es **una proteína query** con todas sus predicciones GO para ese split:
+### 7.3 Applying the re-ranker at evaluation time
 
-```python
+At evaluation time (`run_cafa_evaluation`), the caller supplies a `rerankers` mapping that selects a re-ranker per tier:
+
+```json
 {
-    "query_accession": "P12345",
-    "query_embedding": float32[480],
-    "go_term_ids": ["GO:0006915", "GO:0005737", ...],   # N_preds
-    "neighbor_embeddings": float32[N_preds, K, 480],
-    "tabular_features": float32[N_preds, K, F],
-    "labels": int8[N_preds],                             # 1 si en delta, 0 si no
+  "rerankers": {
+    "nk": {"reranker_id": "2ff1818f-71b6-4932-8f8d-b3000e3c8d34"},
+    "lk": {"reranker_id": "269e26b4-0bec-42fa-a077-fe5b675dd2de"},
+    "pk": {"reranker_id": "e14b9716-bbf8-4b99-b34b-b801c3966579"}
+  }
 }
 ```
 
-El streaming de WebDataset permite entrenar sin cargar todo en RAM.
+The evaluation operation:
+1. Streams predictions from the target `PredictionSet` tier by tier.
+2. For each tier, loads the corresponding booster, applies it to the feature matrix, and overrides the original `score` with the re-ranked probability.
+3. Feeds the re-ranked predictions to `cafaeval` with IA weighting and emits per-cell F<sub>max</sub>.
 
----
+The raw `PredictionSet` is never mutated — the re-ranker only changes the `score` column as the rows pass through evaluation. This means a single prediction set can be evaluated under multiple re-rankers (ESMC, ProstT5, v3, v4, …) without duplicating storage.
 
-## Stack Tecnológico
+### 7.4 `train_reranker_auto` operation
 
-| Componente | Tecnología |
-|---|---|
-| Modelo | PyTorch |
-| Data pipeline | WebDataset + torch.utils.data |
-| Baseline comparación | LightGBM (binary + LambdaRank) |
-| GO embeddings | Node2Vec / PyTorch Geometric |
-| Seguimiento experimentos | wandb |
-| Embeddings proteína | ESM2 / ESMC (ya en PROTEA) |
+The operation `train_reranker_auto` orchestrates the full pipeline end-to-end:
 
----
+1. For each training pair, runs KNN retrieval (FAISS IVFFlat by default) with `compute_alignments=True`, `compute_taxonomy=True`.
+2. Writes per-pair parquet files into a temporary directory.
+3. Loads the concatenation into memory, applies per-tier splits, trains three LightGBM boosters.
+4. Persists the three boosters as `Reranker` rows under a common base name.
+5. Optionally runs a self-evaluation on the held-out test split (see warning in §8).
+6. **Cleans up the temporary parquet files** on exit (`shutil.rmtree(tmp_dir)` at `train_reranker.py:1480`).
+
+The cleanup in step 6 has an important consequence: **re-training only the LightGBM stage is not possible** after a pipeline run — a re-train requires re-executing the full KNN + feature engineering path. This is why each v-version re-train takes hours, not minutes.
 
-## Integración en PROTEA
+---
 
-Una vez entrenado, el re-ranker se integra en el pipeline existente:
+## 8. Known limitations and caveats
 
-1. Nuevo modelo ORM `RerankingModel`: almacena pesos serializados y metadata de entrenamiento
-2. Campo `reranker_id` (nullable) en `PredictionSet`
-3. Si `reranker_id` presente: `store_predictions` aplica el modelo y sobreescribe `score` con $\hat{y}$
-4. El threshold de Fmax se calcula igual que ahora sobre los nuevos scores
-5. UI: selector de re-ranker en la pantalla de predicción
+1. **`test_evaluation` is not comparable to `cafaeval`.** The operation optionally runs an internal test evaluator against the held-out split. That evaluator does not apply GO propagation, does not apply IA weighting, and uses a naive macro-Fmax that inflates improvements by +0.04 to +0.08 over what `cafaeval` actually reports. **It must not be used in thesis claims.** Only `run_cafa_evaluation` with IA and GO propagation produces numbers that belong in the thesis.
+2. **Binary objective is a proxy for ranking.** Binary cross-entropy optimises pointwise calibration, not ranking quality. This is the single largest known gap between the current implementation and the ideal model for F<sub>max</sub>. Replacing it with LambdaRank (or a listwise objective) is the first item on the "future work" list.
+3. **Parquet staging files are ephemeral.** The KNN + feature engineering output is thrown away at the end of a training run, so the LightGBM stage cannot be iterated independently. Persisting the staging parquet (behind a flag) would allow rapid hyperparameter sweeps. Open question: is the additional disk cost (10–20 GB per run) worth it?
+4. **No aspect feature.** Aspect is not used as a feature, even though BPO/MFO/CCO have very different annotation densities and the same term can behave differently across aspects. A per-tier model averages across aspects and may under-perform in MFO vs BPO.
+5. **No uncertainty output.** The re-ranker emits a point probability. Downstream evaluation is sensitive to calibration, but calibration is not currently measured. A reliability diagram per tier would help diagnose whether the probabilities are meaningful or only usable for ranking.
+6. **Under-training of v1–v3.** All six v3 models (ESMC and ProstT5, NK/LK/PK) hit `best_iteration ≈ 1000` at the previous budget, which indicates the models never satisfied the early stopping criterion. The F<sub>max</sub> deltas derived from v3 must be treated as provisional until v4 completes. See `project_reranker_benchmark.md` for the full story.
+7. **Temporal label noise.** Some annotations in $\Delta_{N \to N+1}$ are not genuinely "new biology"; they are curation catch-ups. There is no filter for this, so the training label includes noise. Evidence code filtering removes the worst offenders (IEA) but not all.
+8. **Single embedding at a time.** The re-ranker is trained on features derived from one embedding configuration. There is no multi-embedding ensemble; comparing ESMC, ProstT5 and Ankh means training three independent re-rankers — which is exactly what the benchmark in `EXPERIMENTAL_DESIGN.md` does.
 
 ---
 
-## Experimentos y Ablaciones
+## 9. Version history
 
-El diseño permite comparar directamente:
+| Version | Date | Change | Outcome |
+|---|---|---|---|
+| v1 (unbalanced) | 2026-03-22 | First working pipeline: 9 per-(tier, aspect) models, binary CE, 300 rounds, no sample weights, no negative subsampling | 6/9 models never learned (positive rate too low); CCO/MFO noisy |
+| v1 (balanced) | 2026-03-22 | Added `neg_pos_ratio=10`; same 9 models | All models learned; BPO recovered; MFO degraded vs heuristic |
+| v2 | 2026-03-23 | Collapsed to 3 per-tier models (NK/LK/PK); added IA sample weighting; raised `num_boost_round` to 1000 | Robust; matched the heuristic `alignment_weighted` in most cells but did not beat it |
+| v3 | 2026-03-23 | Populated alignment + taxonomy features during training (were NULL in v2) | First version to beat `alignment_weighted` in 7/9 cells for ESMC-300M |
+| v3 ProstT5 | 2026-04-10 | Same v3 protocol, run on ProstT5-XL embeddings for cross-embedding comparison | Yielded the F1/F2/F3 findings in `project_reranker_benchmark.md`; exposed the under-training in v3 |
+| v4 (in progress) | 2026-04-10 | Raised `num_boost_round` to 5000 and `early_stopping_rounds` to 100; same features, same splits | In training for both ESMC-300M and ProstT5-XL (jobs `48c91381`, `e923ac70`); meant to provide the converged reference numbers |
 
-| Configuración | Descripción |
-|---|---|
-| **Baseline** | KNN + scoring heurístico actual |
-| **LightGBM tabular** | Re-ranker con features tabulares sin embeddings |
-| **LightGBM + derived** | Features tabulares + features derivadas del embedding (density, std) |
-| **MLP cross-encoder** | Arquitectura completa sin cross-attention |
-| **Cross-attention (propuesto)** | Arquitectura completa |
-| **+ GO DAG embeddings** | Ablación: ¿aportan los go_term_emb? |
-| **+ temporal CV** | Ablación: ¿mejora añadir más splits históricos? |
+Concrete reranker UUIDs for the v3 and v4 runs live in `project_reranker_benchmark.md` and will be mirrored into `EXPERIMENTS.md` once v4 completes.
+
+---
+
+## 10. Forward pointers
 
-La métrica principal es **Fmax promedio sobre los 9 settings** (NK/LK/PK × BPO/MFO/CCO) en el test split GOA220→229.
+- **`EXPERIMENTS.md`** — per-experiment tables, external tool comparisons, day-to-day lab notebook.
+- **`EXPERIMENTAL_DESIGN.md`** — the prospective 8-model PLM comparison that uses this re-ranker as a fixed downstream stage.
+- **`project_reranker_benchmark.md`** (in auto-memory) — volatile working state for the ongoing benchmark.
+- **Code**: `protea/core/reranker.py` (feature definitions, `train`, `predict_scores`), `protea/core/operations/train_reranker.py` (both `TrainRerankerPayload` and `TrainRerankerAutoPayload`, the full pipeline).
 
 ---
 
-## Valor para la Tesis
+## 11. Historical note: why this file was rewritten
 
-1. **Científicamente honesto**: el mismo mecanismo temporal que se usa para evaluar se usa para entrenar. No hay data leakage.
-2. **Comprobable y cuantificable**: Fmax(baseline KNN) vs Fmax(re-ranker) en benchmark idéntico.
-3. **Interpretable**: las feature importances (LightGBM) o los pesos de atención (cross-attention) revelan qué aspectos de una predicción KNN son más predictivos de anotaciones futuras.
-4. **Generalizable**: el re-ranker aprende sobre distribuciones temporales de anotaciones GO, no sobre una proteína concreta — debería generalizar a proteínas no vistas.
-5. **Extensible**: la arquitectura admite incorporar embeddings de secuencia de mayor calidad (ESM3, ProstT5) sin cambiar el pipeline.
+The previous version of `RERANKER.md` (removed 2026-04-10) proposed a PyTorch cross-attention re-ranker over ESM embeddings with WebDataset sharded I/O, Node2Vec GO term embeddings, wandb tracking, and a nine-cell (tier × aspect) ablation matrix. That design was never built. The system that actually exists and produces the benchmark numbers in `EXPERIMENTS.md` is the LightGBM pipeline documented above. Keeping the two in sync was causing confusion when referring back to the design doc during thesis writing, so the document was rewritten from the current source of truth (`protea/core/reranker.py`) rather than from the original proposal. The historical proposal is preserved in git history for reference.
diff --git a/alembic/versions/651358a5a2c8_add_consensus_features_to_go_prediction.py b/alembic/versions/651358a5a2c8_add_consensus_features_to_go_prediction.py
new file mode 100644
index 0000000..61820ad
--- /dev/null
+++ b/alembic/versions/651358a5a2c8_add_consensus_features_to_go_prediction.py
@@ -0,0 +1,37 @@
+"""add consensus features to go_prediction
+
+Revision ID: 651358a5a2c8
+Revises: b1a1f4ec0e42
+Create Date: 2026-04-16 10:00:00.000000
+"""
+from __future__ import annotations
+
+import sqlalchemy as sa
+
+from alembic import op
+
+revision: str = "651358a5a2c8"
+down_revision: str = "b1a1f4ec0e42"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "go_prediction",
+        sa.Column("neighbor_vote_fraction", sa.Float(), nullable=True),
+    )
+    op.add_column(
+        "go_prediction",
+        sa.Column("neighbor_min_distance", sa.Float(), nullable=True),
+    )
+    op.add_column(
+        "go_prediction",
+        sa.Column("neighbor_mean_distance", sa.Float(), nullable=True),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("go_prediction", "neighbor_mean_distance")
+    op.drop_column("go_prediction", "neighbor_min_distance")
+    op.drop_column("go_prediction", "neighbor_vote_fraction")
diff --git a/alembic/versions/76cafcb8d9be_add_groundtruth_uri_to_evaluation_set.py b/alembic/versions/76cafcb8d9be_add_groundtruth_uri_to_evaluation_set.py
new file mode 100644
index 0000000..d75d9ff
--- /dev/null
+++ b/alembic/versions/76cafcb8d9be_add_groundtruth_uri_to_evaluation_set.py
@@ -0,0 +1,32 @@
+"""add groundtruth_uri to evaluation_set
+
+Revision ID: 76cafcb8d9be
+Revises: e037f3ae9f58
+Create Date: 2026-04-22 01:50:29.469554
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision: str = '76cafcb8d9be'
+down_revision: Union[str, Sequence[str], None] = 'e037f3ae9f58'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('evaluation_set', sa.Column('groundtruth_uri', sa.String(length=512), nullable=True))
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_column('evaluation_set', 'groundtruth_uri')
+    # ### end Alembic commands ###
diff --git a/alembic/versions/7a2c9e1d0b33_add_reranker_v6_features_to_go_prediction.py b/alembic/versions/7a2c9e1d0b33_add_reranker_v6_features_to_go_prediction.py
new file mode 100644
index 0000000..c19a3b6
--- /dev/null
+++ b/alembic/versions/7a2c9e1d0b33_add_reranker_v6_features_to_go_prediction.py
@@ -0,0 +1,55 @@
+"""add reranker v6 features to go_prediction
+
+Adds 25 nullable Float columns used by the v6 reranker:
+
+- 6 Anc2Vec semantic-coherence features (neighbor + query-known).
+- 3 tax_voters consensus features (computed over the subset of neighbors that
+  voted for each candidate term).
+- 16 emb_pca_query_* features (per-query projection onto the precomputed
+  principal components of the reference embedding pool).
+
+All columns are nullable because older prediction_sets predate these features
+and older reranker versions do not read them.
+
+Revision ID: 7a2c9e1d0b33
+Revises: 651358a5a2c8
+Create Date: 2026-04-19 12:00:00.000000
+"""
+from __future__ import annotations
+
+import sqlalchemy as sa
+
+from alembic import op
+
+revision: str = "7a2c9e1d0b33"
+down_revision: str = "651358a5a2c8"
+branch_labels = None
+depends_on = None
+
+
+_ANC2VEC_COLS = (
+    "anc2vec_neighbor_cos",
+    "anc2vec_neighbor_maxcos",
+    "anc2vec_has_emb",
+    "anc2vec_query_known_cos",
+    "anc2vec_query_known_maxcos",
+    "anc2vec_query_known_count",
+)
+
+_TAX_VOTERS_COLS = (
+    "tax_voters_same_frac",
+    "tax_voters_close_frac",
+    "tax_voters_mean_common_ancestors",
+)
+
+_EMB_PCA_COLS = tuple(f"emb_pca_query_{i}" for i in range(16))
+
+
+def upgrade() -> None:
+    for col in (*_ANC2VEC_COLS, *_TAX_VOTERS_COLS, *_EMB_PCA_COLS):
+        op.add_column("go_prediction", sa.Column(col, sa.Float(), nullable=True))
+
+
+def downgrade() -> None:
+    for col in reversed((*_ANC2VEC_COLS, *_TAX_VOTERS_COLS, *_EMB_PCA_COLS)):
+        op.drop_column("go_prediction", col)
diff --git a/alembic/versions/b1a1f4ec0e42_sequence_embedding_to_halfvec.py b/alembic/versions/b1a1f4ec0e42_sequence_embedding_to_halfvec.py
new file mode 100644
index 0000000..e8927eb
--- /dev/null
+++ b/alembic/versions/b1a1f4ec0e42_sequence_embedding_to_halfvec.py
@@ -0,0 +1,54 @@
+"""migrate sequence_embedding.embedding from vector to halfvec
+
+Revision ID: b1a1f4ec0e42
+Revises: f7a004f5f2c7
+Create Date: 2026-04-14 22:00:00.000000
+"""
+from __future__ import annotations
+
+from alembic import op
+
+revision: str = "b1a1f4ec0e42"
+down_revision: str = "f7a004f5f2c7"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.execute(
+        """
+        DO $$
+        BEGIN
+            IF (
+                SELECT udt_name
+                FROM information_schema.columns
+                WHERE table_name = 'sequence_embedding'
+                  AND column_name = 'embedding'
+            ) = 'vector' THEN
+                ALTER TABLE sequence_embedding
+                ALTER COLUMN embedding TYPE halfvec
+                USING embedding::halfvec;
+            END IF;
+        END $$;
+        """
+    )
+
+
+def downgrade() -> None:
+    op.execute(
+        """
+        DO $$
+        BEGIN
+            IF (
+                SELECT udt_name
+                FROM information_schema.columns
+                WHERE table_name = 'sequence_embedding'
+                  AND column_name = 'embedding'
+            ) = 'halfvec' THEN
+                ALTER TABLE sequence_embedding
+                ALTER COLUMN embedding TYPE vector
+                USING embedding::vector;
+            END IF;
+        END $$;
+        """
+    )
diff --git a/alembic/versions/b2c3d4e5f6a7_add_embedding_config_display_metadata.py b/alembic/versions/b2c3d4e5f6a7_add_embedding_config_display_metadata.py
new file mode 100644
index 0000000..b2ff521
--- /dev/null
+++ b/alembic/versions/b2c3d4e5f6a7_add_embedding_config_display_metadata.py
@@ -0,0 +1,40 @@
+"""add display metadata columns to embedding_config
+
+Revision ID: b2c3d4e5f6a7
+Revises: 3505bfa74df6
+Create Date: 2026-04-10
+
+Adds three nullable columns to ``embedding_config`` so the benchmark UI can
+show a human-readable label, a family tag, and the approximate parameter
+count without having to infer everything from the raw HuggingFace
+``model_name`` at render time.
+
+All columns are nullable — existing rows can be backfilled later with
+``UPDATE embedding_config SET display_name = ..., family = ..., param_count = ...``
+or left as NULL (the router falls back to the Python-side derivation).
+"""
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = 'b2c3d4e5f6a7'
+down_revision: str | Sequence[str] | None = '3505bfa74df6'
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    op.add_column('embedding_config', sa.Column('display_name', sa.String(), nullable=True))
+    op.add_column('embedding_config', sa.Column('family', sa.String(), nullable=True))
+    op.add_column('embedding_config', sa.Column('param_count', sa.BigInteger(), nullable=True))
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    op.drop_column('embedding_config', 'param_count')
+    op.drop_column('embedding_config', 'family')
+    op.drop_column('embedding_config', 'display_name')
diff --git a/alembic/versions/c4d5e6f7a8b9_add_taxonomy_to_query_set_entry.py b/alembic/versions/c4d5e6f7a8b9_add_taxonomy_to_query_set_entry.py
new file mode 100644
index 0000000..438cda9
--- /dev/null
+++ b/alembic/versions/c4d5e6f7a8b9_add_taxonomy_to_query_set_entry.py
@@ -0,0 +1,43 @@
+"""add taxonomy_id and species to query_set_entry
+
+Revision ID: c4d5e6f7a8b9
+Revises: b2c3d4e5f6a7
+Create Date: 2026-04-11
+
+Adds two nullable columns to ``query_set_entry`` so user-uploaded FASTA
+sequences can carry their UniProt header taxonomy (``OX=`` / ``OS=``) even
+when the accession is not present in the ``protein`` table and therefore
+has no ``ProteinUniProtMetadata`` counterpart.
+
+The populating helper lives in ``protea.api.routers.query_sets`` and is a
+silent no-op for non-UniProt headers.
+"""
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = 'c4d5e6f7a8b9'
+down_revision: str | Sequence[str] | None = 'b2c3d4e5f6a7'
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    op.add_column('query_set_entry', sa.Column('taxonomy_id', sa.Integer(), nullable=True))
+    op.add_column('query_set_entry', sa.Column('species', sa.String(), nullable=True))
+    op.create_index(
+        'ix_query_set_entry_taxonomy_id',
+        'query_set_entry',
+        ['taxonomy_id'],
+    )
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    op.drop_index('ix_query_set_entry_taxonomy_id', table_name='query_set_entry')
+    op.drop_column('query_set_entry', 'species')
+    op.drop_column('query_set_entry', 'taxonomy_id')
diff --git a/alembic/versions/c517e16da06b_reranker_model_artifact_columns.py b/alembic/versions/c517e16da06b_reranker_model_artifact_columns.py
new file mode 100644
index 0000000..eeeaf93
--- /dev/null
+++ b/alembic/versions/c517e16da06b_reranker_model_artifact_columns.py
@@ -0,0 +1,81 @@
+"""reranker_model_artifact_columns
+
+Revision ID: c517e16da06b
+Revises: 7a2c9e1d0b33
+Create Date: 2026-04-21 02:57:27.951747
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision: str = 'c517e16da06b'
+down_revision: Union[str, Sequence[str], None] = '7a2c9e1d0b33'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    op.add_column('reranker_model', sa.Column('artifact_uri', sa.String(length=512), nullable=True))
+    op.add_column('reranker_model', sa.Column('feature_schema_sha', sa.String(length=16), nullable=True))
+    op.add_column('reranker_model', sa.Column('embedding_config_id', sa.UUID(), nullable=True))
+    op.add_column('reranker_model', sa.Column('ontology_snapshot_id', sa.UUID(), nullable=True))
+    op.add_column('reranker_model', sa.Column('producer_version', sa.String(length=64), nullable=True))
+    op.add_column('reranker_model', sa.Column('producer_git_sha', sa.String(length=40), nullable=True))
+    op.add_column('reranker_model', sa.Column('spec_yaml', sa.Text(), nullable=True))
+    # model_data goes nullable so new rows can live exclusively by reference
+    # (artifact_uri). Downgrade restores NOT NULL — will fail loudly if any
+    # row has a NULL model_data, which is the correct behavior.
+    op.alter_column(
+        'reranker_model', 'model_data',
+        existing_type=sa.TEXT(),
+        nullable=True,
+    )
+    op.create_index(
+        op.f('ix_reranker_model_embedding_config_id'),
+        'reranker_model', ['embedding_config_id'], unique=False,
+    )
+    op.create_index(
+        op.f('ix_reranker_model_ontology_snapshot_id'),
+        'reranker_model', ['ontology_snapshot_id'], unique=False,
+    )
+    op.create_foreign_key(
+        'fk_reranker_model_ontology_snapshot_id',
+        'reranker_model', 'ontology_snapshot',
+        ['ontology_snapshot_id'], ['id'], ondelete='SET NULL',
+    )
+    op.create_foreign_key(
+        'fk_reranker_model_embedding_config_id',
+        'reranker_model', 'embedding_config',
+        ['embedding_config_id'], ['id'], ondelete='SET NULL',
+    )
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    op.drop_constraint(
+        'fk_reranker_model_embedding_config_id',
+        'reranker_model', type_='foreignkey',
+    )
+    op.drop_constraint(
+        'fk_reranker_model_ontology_snapshot_id',
+        'reranker_model', type_='foreignkey',
+    )
+    op.drop_index(op.f('ix_reranker_model_ontology_snapshot_id'), table_name='reranker_model')
+    op.drop_index(op.f('ix_reranker_model_embedding_config_id'), table_name='reranker_model')
+    op.alter_column(
+        'reranker_model', 'model_data',
+        existing_type=sa.TEXT(),
+        nullable=False,
+    )
+    op.drop_column('reranker_model', 'spec_yaml')
+    op.drop_column('reranker_model', 'producer_git_sha')
+    op.drop_column('reranker_model', 'producer_version')
+    op.drop_column('reranker_model', 'ontology_snapshot_id')
+    op.drop_column('reranker_model', 'embedding_config_id')
+    op.drop_column('reranker_model', 'feature_schema_sha')
+    op.drop_column('reranker_model', 'artifact_uri')
diff --git a/alembic/versions/c7bab0210568_add_dataset_table.py b/alembic/versions/c7bab0210568_add_dataset_table.py
new file mode 100644
index 0000000..c9791bc
--- /dev/null
+++ b/alembic/versions/c7bab0210568_add_dataset_table.py
@@ -0,0 +1,67 @@
+"""add dataset table
+
+Revision ID: c7bab0210568
+Revises: c517e16da06b
+Create Date: 2026-04-21 20:45:37.964428
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision: str = 'c7bab0210568'
+down_revision: Union[str, Sequence[str], None] = 'c517e16da06b'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('dataset',
+    sa.Column('id', sa.UUID(), nullable=False),
+    sa.Column('name', sa.String(length=255), nullable=False),
+    sa.Column('operation', sa.String(length=64), nullable=False),
+    sa.Column('job_id', sa.UUID(), nullable=True),
+    sa.Column('storage_backend', sa.String(length=32), nullable=False),
+    sa.Column('key_prefix', sa.String(length=512), nullable=False),
+    sa.Column('train_uri', sa.String(length=1024), nullable=True),
+    sa.Column('eval_uri', sa.String(length=1024), nullable=True),
+    sa.Column('manifest_uri', sa.String(length=1024), nullable=False),
+    sa.Column('schema_sha', sa.String(length=16), nullable=False),
+    sa.Column('manifest_sha', sa.String(length=64), nullable=True),
+    sa.Column('n_train_rows', sa.BigInteger(), nullable=False),
+    sa.Column('n_eval_rows', sa.BigInteger(), nullable=False),
+    sa.Column('k', sa.Integer(), nullable=False),
+    sa.Column('annotation_source', sa.String(length=32), nullable=False),
+    sa.Column('embedding_config_id', sa.UUID(), nullable=True),
+    sa.Column('ontology_snapshot_id', sa.UUID(), nullable=True),
+    sa.Column('train_snapshot_pairs', postgresql.JSONB(astext_type=sa.Text()), nullable=False),
+    sa.Column('eval_snapshot_pair', sa.String(length=64), nullable=True),
+    sa.Column('producer_version', sa.String(length=64), nullable=True),
+    sa.Column('producer_git_sha', sa.String(length=40), nullable=True),
+    sa.Column('meta', postgresql.JSONB(astext_type=sa.Text()), nullable=False),
+    sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
+    sa.ForeignKeyConstraint(['embedding_config_id'], ['embedding_config.id'], ondelete='SET NULL'),
+    sa.ForeignKeyConstraint(['job_id'], ['job.id'], ondelete='SET NULL'),
+    sa.ForeignKeyConstraint(['ontology_snapshot_id'], ['ontology_snapshot.id'], ondelete='SET NULL'),
+    sa.PrimaryKeyConstraint('id'),
+    sa.UniqueConstraint('name')
+    )
+    op.create_index(op.f('ix_dataset_embedding_config_id'), 'dataset', ['embedding_config_id'], unique=False)
+    op.create_index(op.f('ix_dataset_job_id'), 'dataset', ['job_id'], unique=False)
+    op.create_index(op.f('ix_dataset_ontology_snapshot_id'), 'dataset', ['ontology_snapshot_id'], unique=False)
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index(op.f('ix_dataset_ontology_snapshot_id'), table_name='dataset')
+    op.drop_index(op.f('ix_dataset_job_id'), table_name='dataset')
+    op.drop_index(op.f('ix_dataset_embedding_config_id'), table_name='dataset')
+    op.drop_table('dataset')
+    # ### end Alembic commands ###
diff --git a/alembic/versions/e037f3ae9f58_link_reranker_model_to_dataset.py b/alembic/versions/e037f3ae9f58_link_reranker_model_to_dataset.py
new file mode 100644
index 0000000..80a5202
--- /dev/null
+++ b/alembic/versions/e037f3ae9f58_link_reranker_model_to_dataset.py
@@ -0,0 +1,38 @@
+"""link reranker_model to dataset
+
+Revision ID: e037f3ae9f58
+Revises: c7bab0210568
+Create Date: 2026-04-21 20:50:32.983265
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision: str = 'e037f3ae9f58'
+down_revision: Union[str, Sequence[str], None] = 'c7bab0210568'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('reranker_model', sa.Column('dataset_id', sa.UUID(), nullable=True))
+    op.add_column('reranker_model', sa.Column('external_source', sa.String(length=128), nullable=True))
+    op.create_index(op.f('ix_reranker_model_dataset_id'), 'reranker_model', ['dataset_id'], unique=False)
+    op.create_foreign_key(None, 'reranker_model', 'dataset', ['dataset_id'], ['id'], ondelete='SET NULL')
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_constraint(None, 'reranker_model', type_='foreignkey')
+    op.drop_index(op.f('ix_reranker_model_dataset_id'), table_name='reranker_model')
+    op.drop_column('reranker_model', 'external_source')
+    op.drop_column('reranker_model', 'dataset_id')
+    # ### end Alembic commands ###
diff --git a/alembic/versions/f7a004f5f2c7_add_visitor_events.py b/alembic/versions/f7a004f5f2c7_add_visitor_events.py
new file mode 100644
index 0000000..96f90f9
--- /dev/null
+++ b/alembic/versions/f7a004f5f2c7_add_visitor_events.py
@@ -0,0 +1,40 @@
+"""add visitor_event table
+
+Revision ID: f7a004f5f2c7
+Revises: c4d5e6f7a8b9
+Create Date: 2026-04-12 20:50:00.000000
+"""
+from __future__ import annotations
+
+import sqlalchemy as sa
+
+from alembic import op
+
+revision: str = "f7a004f5f2c7"
+down_revision: str = "c4d5e6f7a8b9"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "visitor_event",
+        sa.Column("id", sa.BigInteger(), autoincrement=True, nullable=False),
+        sa.Column("day", sa.Date(), nullable=False),
+        sa.Column("visitor_hash", sa.String(length=16), nullable=False),
+        sa.Column("path", sa.String(length=255), nullable=False),
+        sa.Column("method", sa.String(length=8), nullable=False),
+        sa.Column("status", sa.Integer(), nullable=False),
+        sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index("ix_visitor_event_day_hash", "visitor_event", ["day", "visitor_hash"])
+    op.create_index("ix_visitor_event_created_at", "visitor_event", ["created_at"])
+    op.create_index("ix_visitor_event_path", "visitor_event", ["path"])
+
+
+def downgrade() -> None:
+    op.drop_index("ix_visitor_event_path", table_name="visitor_event")
+    op.drop_index("ix_visitor_event_created_at", table_name="visitor_event")
+    op.drop_index("ix_visitor_event_day_hash", table_name="visitor_event")
+    op.drop_table("visitor_event")
diff --git a/protea/api/schemas/__init__.py b/apps/lafa_container/__init__.py
similarity index 100%
rename from protea/api/schemas/__init__.py
rename to apps/lafa_container/__init__.py
diff --git a/apps/lafa_container/protea_main.py b/apps/lafa_container/protea_main.py
new file mode 100644
index 0000000..1eb70fb
--- /dev/null
+++ b/apps/lafa_container/protea_main.py
@@ -0,0 +1,237 @@
+"""LAFA-compatible PROTEA wrapper.
+
+Entry point that honours the LAFA container CLI contract:
+
+    --query_file        FASTA of query sequences
+    --train_sequences   FASTA of training sequences
+    --annot_file        TSV (EntryID, term, aspect) of training annotations
+    --graph             go-basic.obo (currently unused; kept for contract parity)
+    --output_baseline   3-column TSV output (Query_ID, GO_Term, Score)
+
+Pipeline:
+    1. Mean-pool ProtT5 embeddings for queries and refs (``prott5_encoder``).
+    2. Cosine KNN via ``protea.core.knn_search.search_knn`` (numpy backend).
+    3. First-hit GO transfer per query (matches PROTEA's ``_predict_batch``).
+    4. Score = ``1 - distance`` (cosine, in [0, 1]).
+    5. Emit ``<query>\\t<term>\\t<score:.4f>``; gzipped if ``--output_baseline``
+       ends in ``.gz``.
+
+Smoke-test focus: integration over fidelity. The ontology graph is accepted
+but not consulted — LAFA distributes propagated TSVs in the official splits.
+"""
+
+from __future__ import annotations
+
+import argparse
+import csv
+import gzip
+import os
+import sys
+from collections import defaultdict
+from pathlib import Path
+from typing import Iterator
+
+import numpy as np
+
+# Make `protea.core.knn_search` importable when running from a checkout.
+_REPO_ROOT = Path(__file__).resolve().parents[2]
+if str(_REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(_REPO_ROOT))
+
+from protea.core.knn_search import search_knn  # noqa: E402
+
+from prott5_encoder import embed_sequences, fasta_accessions, parse_fasta  # noqa: E402
+
+
+def _open_text(path: str):
+    return gzip.open(path, "rt") if path.endswith(".gz") else open(path)
+
+
+def _load_annotations(path: str, ref_accessions: set[str]) -> dict[str, list[str]]:
+    """Return ``{ref_accession: [go_term, ...]}`` filtered to refs we use.
+
+    Dispatches by extension: ``.gaf[.gz]`` → GAF parser (skipping ``NOT``
+    qualifiers and ``!`` headers); anything else → TSV with ``EntryID`` /
+    ``term`` columns in the header.
+    """
+    base = path[:-3] if path.endswith(".gz") else path
+    if base.endswith(".gaf"):
+        return _load_annotations_gaf(path, ref_accessions)
+    return _load_annotations_tsv(path, ref_accessions)
+
+
+def _load_annotations_tsv(path: str, ref_accessions: set[str]) -> dict[str, list[str]]:
+    go_map: dict[str, list[str]] = defaultdict(list)
+    with _open_text(path) as handle:
+        header = handle.readline().rstrip("\n").split("\t")
+        try:
+            entry_idx = header.index("EntryID")
+            term_idx = header.index("term")
+        except ValueError:
+            print(
+                f"[protea_main] Annotation TSV must have header with 'EntryID' and 'term'. "
+                f"Got: {header}",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+        for line in handle:
+            cols = line.rstrip("\n").split("\t")
+            if len(cols) <= max(entry_idx, term_idx):
+                continue
+            acc = cols[entry_idx]
+            term = cols[term_idx]
+            if acc in ref_accessions:
+                go_map[acc].append(term)
+    return go_map
+
+
+def _load_annotations_gaf(path: str, ref_accessions: set[str]) -> dict[str, list[str]]:
+    """Parse a GAF 2.x file. Cols: 2=DB_Object_ID, 5=GO_ID, 4=Qualifier."""
+    go_map: dict[str, list[str]] = defaultdict(list)
+    with _open_text(path) as handle:
+        for raw in handle:
+            if raw.startswith("!"):
+                continue
+            cols = raw.rstrip("\n").split("\t")
+            if len(cols) < 9:
+                continue
+            if "NOT" in cols[3]:
+                continue
+            acc = cols[1]
+            term = cols[4]
+            if acc in ref_accessions:
+                go_map[acc].append(term)
+    return go_map
+
+
+def _open_output(path: str):
+    if path.endswith(".gz"):
+        return gzip.open(path, "wt", newline="")
+    return open(path, "w", newline="")
+
+
+def _stack(embeddings: dict[str, np.ndarray], order: list[str]) -> tuple[np.ndarray, list[str]]:
+    """Stack embeddings in ``order``, dropping accessions that failed to embed."""
+    kept_accs: list[str] = []
+    rows: list[np.ndarray] = []
+    for acc in order:
+        vec = embeddings.get(acc)
+        if vec is None:
+            continue
+        kept_accs.append(acc)
+        rows.append(vec)
+    if not rows:
+        return np.empty((0, 0), dtype=np.float32), kept_accs
+    return np.stack(rows).astype(np.float32, copy=False), kept_accs
+
+
+def _transfer(
+    query_accs: list[str],
+    neighbors: list[list[tuple[str, float]]],
+    go_map: dict[str, list[str]],
+    *,
+    keep_self_hits: bool,
+) -> Iterator[tuple[str, str, float]]:
+    """First-hit GO transfer; one ``(query, term, score)`` row per (q, term)."""
+    for q_acc, top_refs in zip(query_accs, neighbors, strict=False):
+        seen: set[str] = set()
+        for ref_acc, distance in top_refs:
+            if not keep_self_hits and ref_acc == q_acc:
+                continue
+            score = max(0.0, 1.0 - float(distance))
+            for term in go_map.get(ref_acc, ()):
+                if term in seen:
+                    continue
+                seen.add(term)
+                yield q_acc, term, score
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="LAFA-compatible PROTEA KNN wrapper (ProtT5 + cosine KNN + first-hit transfer)."
+    )
+    parser.add_argument("--query_file", "-q", required=True)
+    parser.add_argument("--train_sequences", required=True)
+    parser.add_argument("--annot_file", "-a", required=True)
+    parser.add_argument("--graph", required=True, help="OBO file (currently not consulted).")
+    parser.add_argument("--output_baseline", "-o", required=True)
+    parser.add_argument("--k", type=int, default=5, help="KNN neighbours per query (default: 5).")
+    parser.add_argument("--metric", default="cosine", choices=["cosine", "l2"])
+    parser.add_argument("--backend", default="numpy", choices=["numpy", "faiss"])
+    parser.add_argument(
+        "--keep_self_hits",
+        action="store_true",
+        help="Keep query==ref hits (default: drop, matching LAFA's prott5_container).",
+    )
+    parser.add_argument(
+        "--model_dir",
+        default=os.environ.get("HF_CACHE"),
+        help="HuggingFace cache dir (default: $HF_CACHE).",
+    )
+    args = parser.parse_args()
+
+    for label, path in (
+        ("query", args.query_file),
+        ("train", args.train_sequences),
+        ("annot", args.annot_file),
+        ("graph", args.graph),
+    ):
+        if not os.path.exists(path):
+            print(f"[protea_main] {label} file not found: {path}", file=sys.stderr)
+            sys.exit(1)
+
+    print(f"[protea_main] reading FASTAs: {args.query_file} / {args.train_sequences}")
+    query_seqs = parse_fasta(args.query_file)
+    train_seqs = parse_fasta(args.train_sequences)
+    print(f"[protea_main] queries={len(query_seqs)} refs={len(train_seqs)}")
+
+    print(f"[protea_main] loading annotations from {args.annot_file}")
+    go_map = _load_annotations(args.annot_file, set(train_seqs))
+    refs_with_anns = [acc for acc in train_seqs if acc in go_map]
+    print(f"[protea_main] refs with annotations: {len(refs_with_anns)}/{len(train_seqs)}")
+    if not refs_with_anns:
+        print("[protea_main] no annotated refs after filter — nothing to transfer.", file=sys.stderr)
+        sys.exit(2)
+
+    to_embed = {**{a: query_seqs[a] for a in query_seqs},
+                **{a: train_seqs[a] for a in refs_with_anns}}
+    print(f"[protea_main] embedding {len(to_embed)} sequences with ProtT5 mean-pool")
+    embeddings = embed_sequences(to_embed, cache_dir=args.model_dir)
+
+    query_order = fasta_accessions(args.query_file)
+    Q, kept_q = _stack(embeddings, query_order)
+    R, kept_r = _stack(embeddings, refs_with_anns)
+    print(f"[protea_main] embedding matrix Q={Q.shape} R={R.shape}")
+    if Q.size == 0 or R.size == 0:
+        print("[protea_main] empty embedding matrix — aborting.", file=sys.stderr)
+        sys.exit(3)
+
+    print(f"[protea_main] KNN k={args.k} metric={args.metric} backend={args.backend}")
+    neighbors = search_knn(
+        Q,
+        R,
+        kept_r,
+        k=args.k,
+        metric=args.metric,
+        backend=args.backend,
+    )
+
+    out_path = args.output_baseline
+    out_dir = os.path.dirname(out_path)
+    if out_dir:
+        os.makedirs(out_dir, exist_ok=True)
+
+    n_rows = 0
+    with _open_output(out_path) as fh:
+        writer = csv.writer(fh, delimiter="\t")
+        for q_acc, term, score in _transfer(
+            kept_q, neighbors, go_map, keep_self_hits=args.keep_self_hits
+        ):
+            writer.writerow([q_acc, term, f"{score:.4f}"])
+            n_rows += 1
+
+    print(f"[protea_main] wrote {n_rows} predictions to {out_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/apps/lafa_container/prott5_encoder.py b/apps/lafa_container/prott5_encoder.py
new file mode 100644
index 0000000..a747bbb
--- /dev/null
+++ b/apps/lafa_container/prott5_encoder.py
@@ -0,0 +1,139 @@
+"""Mean-pooled ProtT5 embedder for the LAFA wrapper.
+
+Standalone version of the encoder used by FANTASIA/PROTEA's ProtT5 backend,
+trimmed to the needs of the LAFA contract (FASTA in, ``{accession: vector}``
+out). Mirrors the preprocessing of ``baselines/prott5_container/prott5_embedder.py``
+in the LAFA reference container so embeddings are bit-comparable.
+"""
+
+from __future__ import annotations
+
+import os
+import time
+from typing import Iterable
+
+import numpy as np
+import torch
+from transformers import T5EncoderModel, T5Tokenizer
+
+_MODEL_NAME = "Rostlab/prot_t5_xl_half_uniref50-enc"
+
+
+def _load_model(cache_dir: str | None) -> tuple[T5EncoderModel, T5Tokenizer, torch.device]:
+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    model = T5EncoderModel.from_pretrained(_MODEL_NAME, cache_dir=cache_dir)
+    if device.type == "cpu":
+        model = model.to(torch.float32)
+    model = model.to(device).eval()
+    tokenizer = T5Tokenizer.from_pretrained(_MODEL_NAME, do_lower_case=False, cache_dir=cache_dir)
+    return model, tokenizer, device
+
+
+def _prepare(seq: str) -> str:
+    return " ".join(seq.replace("U", "X").replace("Z", "X").replace("O", "X"))
+
+
+def embed_sequences(
+    sequences: dict[str, str],
+    *,
+    cache_dir: str | None = None,
+    max_residues: int = 4000,
+    max_seq_len: int = 1000,
+    max_batch: int = 100,
+) -> dict[str, np.ndarray]:
+    """Return one mean-pooled vector per accession.
+
+    Sorts sequences by descending length so short tails batch efficiently;
+    falls back to single-sequence processing for sequences > ``max_seq_len``.
+    """
+    if not sequences:
+        return {}
+
+    model, tokenizer, device = _load_model(cache_dir)
+
+    items = sorted(sequences.items(), key=lambda kv: -len(kv[1]))
+    embeddings: dict[str, np.ndarray] = {}
+
+    start = time.time()
+    batch: list[tuple[str, str, int]] = []
+    for idx, (acc, seq) in enumerate(items, 1):
+        prepared = _prepare(seq)
+        seq_len = len(seq)
+        batch.append((acc, prepared, seq_len))
+
+        n_res = sum(s_len for _, _, s_len in batch) + seq_len
+        flush = (
+            len(batch) >= max_batch
+            or n_res >= max_residues
+            or idx == len(items)
+            or seq_len > max_seq_len
+        )
+        if not flush:
+            continue
+
+        accs, seqs, lens = zip(*batch)
+        batch = []
+
+        token_encoding = tokenizer.batch_encode_plus(
+            list(seqs), add_special_tokens=True, padding="longest"
+        )
+        input_ids = torch.tensor(token_encoding["input_ids"]).to(device)
+        attention_mask = torch.tensor(token_encoding["attention_mask"]).to(device)
+
+        try:
+            with torch.no_grad():
+                hidden = model(input_ids, attention_mask=attention_mask).last_hidden_state
+        except RuntimeError as exc:
+            print(f"[prott5_encoder] OOM/error on batch with longest L={lens[0]}: {exc}")
+            continue
+
+        for b_idx, ident in enumerate(accs):
+            s_len = lens[b_idx]
+            vec = hidden[b_idx, :s_len].mean(dim=0).detach().cpu().numpy().astype(np.float32)
+            embeddings[ident] = vec
+
+    elapsed = time.time() - start
+    print(
+        f"[prott5_encoder] {len(embeddings)} embeddings in {elapsed:.1f}s "
+        f"({elapsed / max(1, len(embeddings)):.3f}s/protein, device={device})"
+    )
+    return embeddings
+
+
+def parse_fasta(path: str) -> dict[str, str]:
+    """Read a FASTA file into ``{accession: sequence}``.
+
+    Accession is the substring between the first two ``|`` if present
+    (UniProt-style ``sp|P12345|name``), else the full id token.
+    """
+    seqs: dict[str, str] = {}
+    current: str | None = None
+    with open(path) as handle:
+        for raw in handle:
+            line = raw.strip()
+            if not line:
+                continue
+            if line.startswith(">"):
+                header = line[1:].split()[0]
+                parts = header.split("|")
+                current = parts[1] if len(parts) >= 2 else header
+                seqs[current] = ""
+            elif current is not None:
+                seqs[current] += line.upper().replace("-", "")
+    return seqs
+
+
+def fasta_accessions(path: str) -> list[str]:
+    """Return accessions in FASTA order (stable for output ordering)."""
+    accs: list[str] = []
+    with open(path) as handle:
+        for raw in handle:
+            if raw.startswith(">"):
+                header = raw[1:].strip().split()[0]
+                parts = header.split("|")
+                accs.append(parts[1] if len(parts) >= 2 else header)
+    return accs
+
+
+def keys_as_array(seqs: Iterable[str]) -> list[str]:
+    return list(seqs)
diff --git a/apps/web/app/[locale]/benchmark/page.tsx b/apps/web/app/[locale]/benchmark/page.tsx
new file mode 100644
index 0000000..550b8b7
--- /dev/null
+++ b/apps/web/app/[locale]/benchmark/page.tsx
@@ -0,0 +1,591 @@
+"use client";
+
+import { useEffect, useMemo, useState } from "react";
+import Link from "next/link";
+import {
+  getBenchmarkEmbeddings,
+  getBenchmarkMatrix,
+  type BenchmarkBestCell,
+  type BenchmarkEmbedding,
+  type BenchmarkEvalSet,
+  type BenchmarkMatrixResponse,
+  type BenchmarkRow,
+  type BenchmarkStage,
+} from "../../../lib/api";
+
+// ── Helpers ──────────────────────────────────────────────────────────────
+
+function formatParams(n: number | null): string {
+  if (n == null) return "";
+  if (n >= 1_000_000_000) {
+    const v = n / 1_000_000_000;
+    return v >= 10 ? `${Math.round(v)}B` : `${v.toFixed(1)}B`;
+  }
+  if (n >= 1_000_000) return `${Math.round(n / 1_000_000)}M`;
+  return `${n}`;
+}
+
+function formatProteins(n: number | undefined): string {
+  if (n == null) return "";
+  if (n >= 1_000) return `${(n / 1_000).toFixed(1)}k`;
+  return String(n);
+}
+
+function cellKey(eid: string, cat: string, asp: string): string {
+  return `${eid}|${cat}|${asp}`;
+}
+
+/** Index rows by (embedding, cat, asp) for O(1) cell lookup. The matrix
+ *  endpoint already dedupes to a single best row per tuple. */
+function indexRows(rows: BenchmarkRow[]): Map<string, BenchmarkRow> {
+  const out = new Map<string, BenchmarkRow>();
+  for (const r of rows) {
+    out.set(cellKey(r.embedding_config_id, r.category, r.aspect), r);
+  }
+  return out;
+}
+
+/** Index the leaderboard by (cat, asp) so the table can highlight winners. */
+function indexBestPerCell(cells: BenchmarkBestCell[]): Map<string, BenchmarkBestCell> {
+  const out = new Map<string, BenchmarkBestCell>();
+  for (const c of cells) {
+    out.set(`${c.category}|${c.aspect}`, c);
+  }
+  return out;
+}
+
+function stageLabel(stages: BenchmarkStage[], name: string): string {
+  return stages.find((s) => s.name === name)?.label ?? name;
+}
+
+function evalSetLabel(evalSets: BenchmarkEvalSet[], id: string): string {
+  return evalSets.find((e) => e.id === id)?.label ?? `${id.slice(0, 8)}…`;
+}
+
+/** Pick the initial stage once the catalog is loaded. Backend already
+ *  returns stages sorted by YAML preferred_default_stages, so the first
+ *  entry IS the preferred one if it has data. */
+function pickDefaultStage(stages: BenchmarkStage[]): string | null {
+  return stages.length > 0 ? stages[0].name : null;
+}
+
+/** CSV export of the currently filtered rows — one line per cell. */
+function rowsToCsv(
+  embeddings: BenchmarkEmbedding[],
+  rows: BenchmarkRow[],
+  stage: string,
+): string {
+  const embById = new Map(embeddings.map((e) => [e.id, e]));
+  const header = [
+    "display_name",
+    "family",
+    "param_count",
+    "model_name",
+    "stage",
+    "category",
+    "aspect",
+    "fmax",
+    "precision",
+    "recall",
+    "coverage",
+    "n_proteins",
+    "evaluation_set_id",
+    "evaluation_result_id",
+  ].join(",");
+  const lines = [header];
+  for (const r of rows) {
+    if (r.stage !== stage) continue;
+    const e = embById.get(r.embedding_config_id);
+    lines.push(
+      [
+        e?.display_name ?? "",
+        e?.family ?? "",
+        e?.param_count ?? "",
+        e?.model_name ?? "",
+        r.stage,
+        r.category,
+        r.aspect,
+        r.fmax,
+        r.precision ?? "",
+        r.recall ?? "",
+        r.coverage ?? "",
+        r.n_proteins ?? "",
+        r.evaluation_set_id,
+        r.evaluation_result_id,
+      ]
+        .map((v) => {
+          const s = String(v);
+          if (/[,"\n]/.test(s)) return `"${s.replace(/"/g, '""')}"`;
+          return s;
+        })
+        .join(","),
+    );
+  }
+  return lines.join("\n");
+}
+
+function downloadCsv(filename: string, content: string): void {
+  const blob = new Blob([content], { type: "text/csv;charset=utf-8" });
+  const url = URL.createObjectURL(blob);
+  const a = document.createElement("a");
+  a.href = url;
+  a.download = filename;
+  document.body.appendChild(a);
+  a.click();
+  document.body.removeChild(a);
+  URL.revokeObjectURL(url);
+}
+
+// ── Page ─────────────────────────────────────────────────────────────────
+
+export default function BenchmarkPage() {
+  const [embeddings, setEmbeddings] = useState<BenchmarkEmbedding[] | null>(null);
+  const [matrix, setMatrix] = useState<BenchmarkMatrixResponse | null>(null);
+  const [error, setError] = useState<string | null>(null);
+  const [stage, setStage] = useState<string | null>(null);
+  const [evalSetId, setEvalSetId] = useState<string | "all">("all");
+  const [selectedK, setSelectedK] = useState<number | null>(null);
+
+  // Unfiltered catalog fetch — populates the full set of known stages and
+  // eval sets, so selector chips don't disappear when a filtered query
+  // returns zero rows.
+  const [catalog, setCatalog] = useState<{
+    stages: BenchmarkStage[];
+    evalSets: BenchmarkEvalSet[];
+    categories: string[];
+    aspects: string[];
+    ks: number[];
+  }>({ stages: [], evalSets: [], categories: [], aspects: [], ks: [] });
+
+  useEffect(() => {
+    getBenchmarkMatrix()
+      .then((m) => {
+        setCatalog({
+          stages: m.stages,
+          evalSets: m.evaluation_sets,
+          categories: m.categories,
+          aspects: m.aspects,
+          ks: m.ks ?? [],
+        });
+        setStage((prev) => prev ?? pickDefaultStage(m.stages));
+        setSelectedK((prev) => prev ?? (m.ks?.[0] ?? null));
+      })
+      .catch((e) => setError(e.message));
+  }, []);
+
+  useEffect(() => {
+    if (stage === null) return;
+    setError(null);
+    Promise.all([
+      getBenchmarkEmbeddings(),
+      getBenchmarkMatrix({
+        stage,
+        evaluation_set_id: evalSetId === "all" ? undefined : evalSetId,
+        k: selectedK ?? undefined,
+      }),
+    ])
+      .then(([e, m]) => {
+        setEmbeddings(e.embeddings);
+        setMatrix(m);
+      })
+      .catch((e) => setError(e.message));
+  }, [stage, evalSetId, selectedK]);
+
+  const rowIndex = useMemo(
+    () => (matrix ? indexRows(matrix.rows) : new Map<string, BenchmarkRow>()),
+    [matrix],
+  );
+
+  const bestPerCell = useMemo(
+    () => (matrix ? indexBestPerCell(matrix.best_per_cell) : new Map<string, BenchmarkBestCell>()),
+    [matrix],
+  );
+
+  const embeddingsWithData = useMemo(() => {
+    if (!embeddings || !matrix) return new Set<string>();
+    return new Set(matrix.embedding_config_ids);
+  }, [embeddings, matrix]);
+
+  if (error) {
+    return (
+      <div className="max-w-6xl mx-auto px-4 sm:px-6 py-12">
+        <div className="rounded-lg border border-red-200 bg-red-50 p-6 text-center">
+          <p className="text-red-800 text-sm">{error}</p>
+        </div>
+      </div>
+    );
+  }
+
+  if (!embeddings || !matrix || stage === null) {
+    return (
+      <div className="max-w-6xl mx-auto px-4 sm:px-6 py-12 space-y-8">
+        <div className="h-8 w-64 bg-gray-100 rounded animate-pulse" />
+        <div className="h-96 bg-gray-100 rounded-lg animate-pulse" />
+      </div>
+    );
+  }
+
+  const hasData = matrix.rows.length > 0;
+  const stageList = catalog.stages.length > 0 ? catalog.stages : matrix.stages;
+  const evalSetList = catalog.evalSets.length > 0 ? catalog.evalSets : matrix.evaluation_sets;
+  const categories = catalog.categories.length > 0 ? catalog.categories : matrix.categories;
+  const aspects = catalog.aspects.length > 0 ? catalog.aspects : matrix.aspects;
+  const currentStageLabel = stageLabel(stageList, stage);
+
+  // Active eval set banner: when "all" is selected and there's only one set,
+  // show that one; when a specific one is selected, show its full metadata.
+  const activeEvalSet =
+    evalSetId !== "all"
+      ? evalSetList.find((e) => e.id === evalSetId) ?? null
+      : evalSetList.length === 1
+        ? evalSetList[0]
+        : null;
+
+  return (
+    <div className="max-w-6xl mx-auto px-4 sm:px-6 py-8 space-y-6">
+      {/* Header */}
+      <header className="flex flex-col sm:flex-row sm:items-end sm:justify-between gap-3">
+        <div>
+          <h1 className="text-2xl font-bold text-gray-900">Benchmark matrix</h1>
+          <p className="text-sm text-gray-500 mt-1">
+            Per-embedding Fmax across categories and aspects for every evaluation
+            run in the database.{" "}
+            <Link href="/" className="text-blue-600 hover:underline">
+              Back to home
+            </Link>
+          </p>
+        </div>
+        <div className="flex gap-2">
+          <button
+            disabled={!hasData}
+            onClick={() =>
+              downloadCsv(
+                `benchmark_${stage}.csv`,
+                rowsToCsv(embeddings, matrix.rows, stage),
+              )
+            }
+            className="rounded-md border border-gray-300 bg-white px-3 py-1.5 text-sm text-gray-700 hover:bg-gray-50 disabled:opacity-50 disabled:cursor-not-allowed"
+          >
+            Download CSV
+          </button>
+        </div>
+      </header>
+
+      {/* Eval set context banner */}
+      {activeEvalSet && (
+        <section className="rounded-lg border border-blue-100 bg-blue-50 px-4 py-3">
+          <div className="flex flex-wrap items-baseline justify-between gap-x-4 gap-y-1">
+            <div>
+              <span className="text-xs font-semibold text-blue-700 uppercase tracking-wider">
+                Evaluation split
+              </span>
+              <div className="text-sm font-semibold text-blue-900 mt-0.5">
+                {activeEvalSet.label}
+              </div>
+            </div>
+            <div className="flex gap-3 text-xs text-blue-800">
+              {activeEvalSet.stats.delta_proteins != null && (
+                <span>
+                  <span className="text-blue-500">Δ</span>{" "}
+                  <span className="font-mono font-semibold">
+                    {activeEvalSet.stats.delta_proteins.toLocaleString()}
+                  </span>{" "}
+                  proteins
+                </span>
+              )}
+              {activeEvalSet.stats.nk_proteins != null && (
+                <span>
+                  <span className="text-blue-500">NK</span>{" "}
+                  <span className="font-mono font-semibold">
+                    {formatProteins(activeEvalSet.stats.nk_proteins)}
+                  </span>
+                </span>
+              )}
+              {activeEvalSet.stats.lk_proteins != null && (
+                <span>
+                  <span className="text-blue-500">LK</span>{" "}
+                  <span className="font-mono font-semibold">
+                    {formatProteins(activeEvalSet.stats.lk_proteins)}
+                  </span>
+                </span>
+              )}
+              {activeEvalSet.stats.pk_proteins != null && (
+                <span>
+                  <span className="text-blue-500">PK</span>{" "}
+                  <span className="font-mono font-semibold">
+                    {formatProteins(activeEvalSet.stats.pk_proteins)}
+                  </span>
+                </span>
+              )}
+              {activeEvalSet.new_obo_version && (
+                <span>
+                  <span className="text-blue-500">OBO</span>{" "}
+                  <span className="font-mono">{activeEvalSet.new_obo_version}</span>
+                </span>
+              )}
+            </div>
+          </div>
+        </section>
+      )}
+
+      {/* Filters */}
+      <div className="flex flex-wrap gap-3 items-end">
+        <div>
+          <label className="block text-xs font-medium text-gray-500 uppercase tracking-wider mb-1">
+            Pipeline stage
+          </label>
+          <div className="flex flex-wrap gap-1 rounded-lg bg-gray-100 p-0.5">
+            {stageList.map((s) => (
+              <button
+                key={s.name}
+                onClick={() => setStage(s.name)}
+                title={`${s.kind}${s.is_baseline ? " · baseline" : ""}`}
+                className={`px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
+                  stage === s.name
+                    ? "bg-white text-gray-900 shadow-sm"
+                    : "text-gray-500 hover:text-gray-700"
+                }`}
+              >
+                {s.label}
+                {s.is_baseline && (
+                  <span className="ml-1 text-[9px] text-gray-400 uppercase">base</span>
+                )}
+              </button>
+            ))}
+          </div>
+        </div>
+
+        {catalog.ks.length > 0 && (
+          <div>
+            <label className="block text-xs font-medium text-gray-500 uppercase tracking-wider mb-1">
+              Neighbours (K)
+            </label>
+            <div className="flex gap-1 rounded-lg bg-gray-100 p-0.5">
+              {catalog.ks.map((n) => (
+                <button
+                  key={n}
+                  onClick={() => setSelectedK(n)}
+                  className={`px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
+                    selectedK === n
+                      ? "bg-white text-gray-900 shadow-sm"
+                      : "text-gray-500 hover:text-gray-700"
+                  }`}
+                >
+                  K={n}
+                </button>
+              ))}
+            </div>
+          </div>
+        )}
+
+        {evalSetList.length > 1 && (
+          <div>
+            <label className="block text-xs font-medium text-gray-500 uppercase tracking-wider mb-1">
+              Evaluation set
+            </label>
+            <select
+              value={evalSetId}
+              onChange={(e) => setEvalSetId(e.target.value)}
+              className="rounded-md border border-gray-300 bg-white px-3 py-1.5 text-sm text-gray-700"
+            >
+              <option value="all">All splits</option>
+              {evalSetList.map((es) => (
+                <option key={es.id} value={es.id}>
+                  {es.label}
+                </option>
+              ))}
+            </select>
+          </div>
+        )}
+
+        <div className="text-xs text-gray-400 ml-auto self-end">
+          {matrix.total} cells · {matrix.embedding_config_ids.length} embeddings ·{" "}
+          {matrix.evaluation_sets.length} eval set
+          {matrix.evaluation_sets.length === 1 ? "" : "s"}
+        </div>
+      </div>
+
+      {/* Leaderboard: best Fmax per (cat, asp) across every model & stage */}
+      {matrix.best_per_cell.length > 0 && (
+        <section className="rounded-xl border bg-white shadow-sm p-4">
+          <div className="flex items-baseline justify-between mb-3">
+            <h2 className="text-sm font-semibold text-gray-800">
+              Best Fmax per cell
+              <span className="ml-2 text-xs font-normal text-gray-400">
+                across every model in current stage filter
+              </span>
+            </h2>
+          </div>
+          <div className="overflow-x-auto">
+            <table className="w-full text-sm">
+              <thead>
+                <tr>
+                  <th className="px-2 py-1 text-left font-medium text-gray-500"></th>
+                  {aspects.map((asp) => (
+                    <th
+                      key={asp}
+                      className="px-2 py-1 text-center text-[10px] font-medium text-gray-500 uppercase tracking-wide"
+                    >
+                      {asp}
+                    </th>
+                  ))}
+                </tr>
+              </thead>
+              <tbody>
+                {categories.map((cat) => (
+                  <tr key={cat} className="border-t">
+                    <td className="px-2 py-2 font-semibold text-gray-700">{cat}</td>
+                    {aspects.map((asp) => {
+                      const best = bestPerCell.get(`${cat}|${asp}`);
+                      if (!best) {
+                        return (
+                          <td
+                            key={asp}
+                            className="px-2 py-2 text-center text-gray-300"
+                          >
+                            —
+                          </td>
+                        );
+                      }
+                      const emb = embeddings.find((e) => e.id === best.embedding_config_id);
+                      return (
+                        <td
+                          key={asp}
+                          className="px-2 py-2 text-center border-l"
+                          title={`${emb?.display_name ?? best.embedding_config_id} · ${stageLabel(stageList, best.stage)}`}
+                        >
+                          <div className="font-semibold text-gray-900 tabular-nums">
+                            {best.fmax.toFixed(3)}
+                          </div>
+                          <div className="text-[10px] text-gray-500 truncate max-w-[120px] mx-auto">
+                            {emb?.display_name ?? "—"}
+                          </div>
+                          <div className="text-[9px] text-gray-400 truncate max-w-[120px] mx-auto">
+                            {stageLabel(stageList, best.stage)}
+                          </div>
+                        </td>
+                      );
+                    })}
+                  </tr>
+                ))}
+              </tbody>
+            </table>
+          </div>
+        </section>
+      )}
+
+      {/* Matrix table */}
+      {!hasData ? (
+        <section className="rounded-xl border-2 border-dashed border-gray-200 bg-gray-50 p-12 text-center">
+          <p className="text-gray-500 text-sm">
+            No evaluation results for{" "}
+            <span className="font-semibold">{currentStageLabel}</span> yet.
+          </p>
+          <p className="text-gray-400 text-xs mt-2">
+            Run <code>run_cafa_evaluation</code> for an embedding to populate
+            this cell of the matrix.
+          </p>
+        </section>
+      ) : (
+        <div className="overflow-x-auto rounded-lg border bg-white shadow-sm">
+          <table className="w-full text-sm">
+            <thead className="bg-gray-50">
+              <tr>
+                <th
+                  rowSpan={2}
+                  className="px-4 py-2 text-left font-medium text-gray-600 border-b sticky left-0 bg-gray-50"
+                >
+                  Embedding
+                </th>
+                {categories.map((cat) => (
+                  <th
+                    key={cat}
+                    colSpan={aspects.length}
+                    className="px-2 py-1.5 text-center font-semibold text-gray-700 border-b border-l"
+                  >
+                    {cat}
+                  </th>
+                ))}
+              </tr>
+              <tr>
+                {categories.flatMap((cat) =>
+                  aspects.map((asp) => (
+                    <th
+                      key={`${cat}-${asp}`}
+                      className="px-2 py-1 text-center text-[10px] font-medium text-gray-500 uppercase tracking-wide border-b border-l"
+                    >
+                      {asp}
+                    </th>
+                  )),
+                )}
+              </tr>
+            </thead>
+            <tbody>
+              {embeddings.map((emb) => {
+                const hasRow = embeddingsWithData.has(emb.id);
+                return (
+                  <tr
+                    key={emb.id}
+                    className={`border-t ${hasRow ? "" : "opacity-50"}`}
+                  >
+                    <td className="px-4 py-2 sticky left-0 bg-white border-r">
+                      <div className="font-medium text-gray-900">
+                        {emb.display_name}
+                      </div>
+                      <div className="text-[10px] text-gray-400 font-mono">
+                        {emb.family}
+                        {emb.param_count != null
+                          ? ` · ${formatParams(emb.param_count)}`
+                          : ""}
+                      </div>
+                    </td>
+                    {categories.flatMap((cat) =>
+                      aspects.map((asp) => {
+                        const row = rowIndex.get(cellKey(emb.id, cat, asp));
+                        const best = bestPerCell.get(`${cat}|${asp}`);
+                        const isWinner =
+                          row && best && row.evaluation_result_id === best.evaluation_result_id;
+                        return (
+                          <td
+                            key={`${emb.id}-${cat}-${asp}`}
+                            className={`px-2 py-2 text-center tabular-nums border-l ${
+                              isWinner ? "bg-green-50" : ""
+                            }`}
+                            title={
+                              row
+                                ? `precision=${row.precision ?? "—"} recall=${row.recall ?? "—"} (eval_result=${row.evaluation_result_id.slice(0, 8)}…)`
+                                : "no data"
+                            }
+                          >
+                            {row ? (
+                              <span
+                                className={`font-semibold ${
+                                  isWinner ? "text-green-700" : "text-gray-900"
+                                }`}
+                              >
+                                {row.fmax.toFixed(3)}
+                              </span>
+                            ) : (
+                              <span className="text-gray-300">—</span>
+                            )}
+                          </td>
+                        );
+                      }),
+                    )}
+                  </tr>
+                );
+              })}
+            </tbody>
+          </table>
+        </div>
+      )}
+
+      <p className="text-xs text-gray-400">
+        Display names and stage labels come from{" "}
+        <code>embedding_config</code> (DB) and{" "}
+        <code>protea/config/benchmark.yaml</code>. Edit the YAML to change
+        ordering, labels, or the baseline tag.
+      </p>
+    </div>
+  );
+}
diff --git a/apps/web/app/[locale]/embeddings/page.tsx b/apps/web/app/[locale]/embeddings/page.tsx
index d3411d0..a45eaee 100644
--- a/apps/web/app/[locale]/embeddings/page.tsx
+++ b/apps/web/app/[locale]/embeddings/page.tsx
@@ -41,6 +41,10 @@ const MODEL_PRESETS: Record<string, ModelPreset[]> = {
     { value: "Rostlab/prot_t5_xl_half_uniref50-enc", label: "ProT5-XL half (FP16 encoder)",     layers: 24, defaultMaxLength: 1024 },
     { value: "Rostlab/ProstT5",                      label: "ProstT5 (3Di + AA)",               layers: 24, defaultMaxLength: 1024 },
   ],
+  ankh: [
+    { value: "ElnaggarLab/ankh-base",  label: "Ankh base (~450M, 48 layers, d=768)",  layers: 48, defaultMaxLength: 1024 },
+    { value: "ElnaggarLab/ankh-large", label: "Ankh large (~1.9B, 48 layers, d=1536)", layers: 48, defaultMaxLength: 1024 },
+  ],
   auto: [
     { value: "facebook/esm2_t33_650M_UR50D", label: "ESM-2 650M (auto backend)", layers: 33, defaultMaxLength: 1022 },
   ],
@@ -91,7 +95,7 @@ export default function EmbeddingsPage() {
   const [cmpConfigId, setCmpConfigId] = useState("");
   const [cmpQuerySetId, setCmpQuerySetId] = useState("");
   const [cmpQueueBatchSize, setCmpQueueBatchSize] = useState(100);
-  const [cmpBatchSize, setCmpBatchSize] = useState(8);
+  const [cmpBatchSize, setCmpBatchSize] = useState(1);
   const [cmpDevice, setCmpDevice] = useState("cuda");
   const [cmpSkipExisting, setCmpSkipExisting] = useState(true);
   const [cmpResult, setCmpResult] = useState<{ id: string; status: string } | null>(null);
@@ -306,6 +310,7 @@ export default function EmbeddingsPage() {
                       <option value="esm">{t("configsTab.newConfigForm.modelBackendEsm")}</option>
                       <option value="esm3c">{t("configsTab.newConfigForm.modelBackendEsm3c")}</option>
                       <option value="t5">{t("configsTab.newConfigForm.modelBackendT5")}</option>
+                      <option value="ankh">{t("configsTab.newConfigForm.modelBackendAnkh")}</option>
                       <option value="auto">{t("configsTab.newConfigForm.modelBackendAuto")}</option>
                     </select>
                   </div>
diff --git a/apps/web/app/[locale]/evaluation/page.tsx b/apps/web/app/[locale]/evaluation/page.tsx
index 25af270..f7ce0fb 100644
--- a/apps/web/app/[locale]/evaluation/page.tsx
+++ b/apps/web/app/[locale]/evaluation/page.tsx
@@ -274,6 +274,7 @@ function EvaluationSetCard({
     const MAX_ATTEMPTS = 30;
 
     const interval = setInterval(async () => {
+      if (typeof document !== "undefined" && document.visibilityState === "hidden") return;
       attempts++;
       try {
         const fresh = await listResults(e.id);
diff --git a/apps/web/app/[locale]/jobs/[id]/page.tsx b/apps/web/app/[locale]/jobs/[id]/page.tsx
index 7d1d68f..08ec6bf 100644
--- a/apps/web/app/[locale]/jobs/[id]/page.tsx
+++ b/apps/web/app/[locale]/jobs/[id]/page.tsx
@@ -72,6 +72,7 @@ export default function JobDetail({ params }: { params: Promise<{ id: string }>
   const intervalRef = useRef<ReturnType<typeof setInterval> | null>(null);
 
   async function refresh() {
+    if (typeof document !== "undefined" && document.visibilityState === "hidden") return;
     try {
       setError("");
       const [j, ev, ch] = await Promise.all([
@@ -98,16 +99,24 @@ export default function JobDetail({ params }: { params: Promise<{ id: string }>
     refresh();
   }, [jobId]);
 
-  // Auto-refresh while job is active
+  // Auto-refresh while job is active. Pauses when the tab is hidden and
+  // resumes on visibilitychange — avoids burning bandwidth on background tabs.
   useEffect(() => {
     if (!job) return;
     const isTerminal = TERMINAL.includes(String(job.status).toLowerCase());
-    if (!isTerminal) {
-      intervalRef.current = setInterval(refresh, 2000);
-    } else {
+    if (isTerminal) {
       if (intervalRef.current) clearInterval(intervalRef.current);
+      return;
     }
-    return () => { if (intervalRef.current) clearInterval(intervalRef.current); };
+    intervalRef.current = setInterval(refresh, 2000);
+    const onVisibility = () => {
+      if (document.visibilityState === "visible") refresh();
+    };
+    document.addEventListener("visibilitychange", onVisibility);
+    return () => {
+      if (intervalRef.current) clearInterval(intervalRef.current);
+      document.removeEventListener("visibilitychange", onVisibility);
+    };
   }, [job?.status]);
 
   async function onDelete() {
@@ -186,6 +195,15 @@ export default function JobDetail({ params }: { params: Promise<{ id: string }>
             <span className="font-mono text-xs text-gray-400">{jobId}</span>
           </div>
 
+          {job.operation_description && (
+            <p className="text-sm text-gray-600 leading-snug">{job.operation_description}</p>
+          )}
+          {job.operation_summary && (
+            <p className="font-mono text-xs text-gray-700 break-words rounded bg-gray-50 px-2 py-1.5 border border-gray-100">
+              {job.operation_summary}
+            </p>
+          )}
+
           <div className="grid grid-cols-2 gap-x-6 gap-y-1 text-sm">
             <div><span className="text-gray-500">{t("jobDetail.queue")}</span> <span className="font-mono text-xs">{job.queue_name}</span></div>
             <div><span className="text-gray-500">{t("jobDetail.created")}</span> {formatDate(job.created_at)}</div>
diff --git a/apps/web/app/[locale]/jobs/page.tsx b/apps/web/app/[locale]/jobs/page.tsx
index 7eff46a..3138a81 100644
--- a/apps/web/app/[locale]/jobs/page.tsx
+++ b/apps/web/app/[locale]/jobs/page.tsx
@@ -57,6 +57,7 @@ export default function JobsPage() {
   const intervalRef = useRef<ReturnType<typeof setInterval> | null>(null);
 
   async function refresh(status = statusFilter, showLoader = false) {
+    if (!showLoader && typeof document !== "undefined" && document.visibilityState === "hidden") return;
     if (showLoader) setLoading(true);
     try {
       setError("");
@@ -75,7 +76,9 @@ export default function JobsPage() {
     refresh(statusFilter, true);
   }, [statusFilter]);
 
-  // Auto-refresh: faster when there are active jobs, slower otherwise
+  // Auto-refresh: faster when there are active jobs, slower otherwise.
+  // Pauses automatically when the tab is hidden (refresh() checks
+  // document.visibilityState) and forces a refresh on visibilitychange.
   useEffect(() => {
     if (!autoRefresh) {
       if (intervalRef.current) clearInterval(intervalRef.current);
@@ -86,7 +89,14 @@ export default function JobsPage() {
       return hasActive ? 3000 : 8000;
     }
     intervalRef.current = setInterval(() => refresh(), schedule());
-    return () => { if (intervalRef.current) clearInterval(intervalRef.current); };
+    const onVisibility = () => {
+      if (document.visibilityState === "visible") refresh();
+    };
+    document.addEventListener("visibilitychange", onVisibility);
+    return () => {
+      if (intervalRef.current) clearInterval(intervalRef.current);
+      document.removeEventListener("visibilitychange", onVisibility);
+    };
   }, [autoRefresh, statusFilter, jobs]);
 
   const activeCount = jobs.filter((j) => j.status === "running" || j.status === "queued").length;
@@ -157,6 +167,12 @@ export default function JobsPage() {
               <span className="text-xs text-gray-400">{formatDate(j.created_at)}</span>
             </div>
             <p className="mt-1.5 text-sm font-medium text-gray-800">{j.operation}</p>
+            {j.operation_description && (
+              <p className="text-xs text-gray-500 leading-snug">{j.operation_description}</p>
+            )}
+            {j.operation_summary && (
+              <p className="mt-1 text-xs font-mono text-gray-700 break-words">{j.operation_summary}</p>
+            )}
             <InlineProgress current={j.progress_current} total={j.progress_total} />
             <p className="mt-1 font-mono text-xs text-gray-400 truncate">{j.id}</p>
           </Link>
@@ -165,10 +181,10 @@ export default function JobsPage() {
 
       {/* Desktop table */}
       <div className="mt-4 hidden lg:block overflow-hidden rounded-lg border bg-white shadow-sm">
-        <div className="grid grid-cols-[140px_180px_1fr_180px] gap-2 border-b bg-gray-50 px-4 py-2.5 text-xs font-semibold uppercase tracking-wide text-gray-500">
+        <div className="grid grid-cols-[140px_220px_1fr_180px] gap-2 border-b bg-gray-50 px-4 py-2.5 text-xs font-semibold uppercase tracking-wide text-gray-500">
           <div>{t("status")}</div>
           <div>{t("operation")}</div>
-          <div>{t("jobId")}</div>
+          <div>{t("operationContext")}</div>
           <div>{t("created")}</div>
         </div>
 
@@ -180,14 +196,24 @@ export default function JobsPage() {
           <Link
             key={j.id}
             href={`/jobs/${j.id}`}
-            className="grid grid-cols-[140px_180px_1fr_180px] gap-2 border-b px-4 py-3 text-sm hover:bg-blue-50 transition-colors last:border-0 items-start"
+            className="grid grid-cols-[140px_220px_1fr_180px] gap-2 border-b px-4 py-3 text-sm hover:bg-blue-50 transition-colors last:border-0 items-start"
           >
             <div><StatusBadge status={j.status} /></div>
             <div>
-              <span className="text-gray-700 truncate block">{j.operation}</span>
+              <span className="text-gray-700 font-medium block truncate">{j.operation}</span>
+              {j.operation_description && (
+                <span className="text-xs text-gray-500 leading-snug line-clamp-2 block">{j.operation_description}</span>
+              )}
               <InlineProgress current={j.progress_current} total={j.progress_total} />
             </div>
-            <div className="font-mono text-xs text-gray-400 truncate">{j.id}</div>
+            <div className="space-y-0.5">
+              {j.operation_summary ? (
+                <span className="text-xs font-mono text-gray-700 break-words block">{j.operation_summary}</span>
+              ) : (
+                <span className="text-xs text-gray-300">—</span>
+              )}
+              <span className="font-mono text-[10px] text-gray-400 truncate block">{j.id}</span>
+            </div>
             <div className="text-gray-500 text-xs">{formatDate(j.created_at)}</div>
           </Link>
         ))}
diff --git a/apps/web/app/[locale]/layout.tsx b/apps/web/app/[locale]/layout.tsx
index 1fa1b1e..6f39907 100644
--- a/apps/web/app/[locale]/layout.tsx
+++ b/apps/web/app/[locale]/layout.tsx
@@ -29,8 +29,11 @@ export default async function LocaleLayout({
   const { locale } = await params;
   const messages = await getMessages();
   return (
-    <html lang={locale}>
-      <body className={`${geistSans.variable} ${geistMono.variable} antialiased min-h-screen bg-gray-50`}>
+    <html lang={locale} suppressHydrationWarning>
+      <body
+        className={`${geistSans.variable} ${geistMono.variable} antialiased min-h-screen bg-gray-50`}
+        suppressHydrationWarning
+      >
         <NextIntlClientProvider messages={messages}>
           <UsagePolicyModal />
           <ToastProvider>
diff --git a/apps/web/app/[locale]/page.tsx b/apps/web/app/[locale]/page.tsx
index 9f3f9fd..120ae9d 100644
--- a/apps/web/app/[locale]/page.tsx
+++ b/apps/web/app/[locale]/page.tsx
@@ -8,11 +8,6 @@ import { getShowcase, type ShowcaseData } from "../../lib/api";
 import { AnnotateForm } from "../../components/AnnotateForm";
 
 const ASPECTS = ["MFO", "BPO", "CCO"] as const;
-const ASPECT_COLORS: Record<string, string> = {
-  MFO: "blue",
-  BPO: "green",
-  CCO: "purple",
-};
 const ASPECT_LABELS: Record<string, string> = {
   MFO: "Molecular Function",
   BPO: "Biological Process",
@@ -26,12 +21,6 @@ const CATEGORY_LABELS: Record<string, string> = {
   PK: "Partial Knowledge",
 };
 
-const METHOD_KEYS: Record<string, string> = {
-  knn_baseline: "knnBaseline",
-  knn_scored: "knnScored",
-  knn_reranker: "knnReranker",
-};
-
 const STAGE_ICONS: Record<string, string> = {
   sequences: "Aa",
   embeddings: "E",
@@ -48,12 +37,30 @@ const STAGE_I18N: Record<string, string> = {
   evaluations: "stageEvaluation",
 };
 
+const STAGE_LABELS: Record<string, string> = {
+  baseline: "pipelineStageBaseline",
+  alignment_weighted: "pipelineStageAlignmentWeighted",
+  reranker: "pipelineStageReranker",
+};
+
+const STAGE_BADGE: Record<string, string> = {
+  baseline: "bg-gray-100 text-gray-700",
+  alignment_weighted: "bg-amber-100 text-amber-800",
+  reranker: "bg-blue-100 text-blue-800",
+};
+
+function formatParamCount(n: number | null): string {
+  if (n == null) return "";
+  if (n >= 1_000_000_000) return `${(n / 1_000_000_000).toFixed(n >= 10_000_000_000 ? 0 : 1)}B`;
+  if (n >= 1_000_000) return `${Math.round(n / 1_000_000)}M`;
+  return `${n}`;
+}
+
 export default function HomePage() {
   const t = useTranslations("home");
   const router = useRouter();
   const [data, setData] = useState<ShowcaseData | null>(null);
   const [error, setError] = useState<string | null>(null);
-  const [activeCategory, setActiveCategory] = useState<string>("NK");
 
   useEffect(() => {
     getShowcase().then(setData).catch((e) => setError(e.message));
@@ -65,7 +72,12 @@ export default function HomePage() {
         <div className="rounded-lg border border-red-200 bg-red-50 p-6 text-center">
           <p className="text-red-800 text-sm">{error}</p>
           <button
-            onClick={() => { setError(null); getShowcase().then(setData).catch((e) => setError(e.message)); }}
+            onClick={() => {
+              setError(null);
+              getShowcase()
+                .then(setData)
+                .catch((e) => setError(e.message));
+            }}
             className="mt-3 text-sm text-red-600 underline hover:text-red-800"
           >
             Retry
@@ -79,28 +91,26 @@ export default function HomePage() {
     return (
       <div className="max-w-5xl mx-auto px-4 sm:px-6 py-12 space-y-8">
         <div className="h-8 w-96 bg-gray-100 rounded animate-pulse" />
-        <div className="grid grid-cols-1 sm:grid-cols-3 gap-4">
-          {[0, 1, 2].map((i) => (
-            <div key={i} className="h-32 bg-gray-100 rounded-lg animate-pulse" />
-          ))}
-        </div>
+        <div className="h-32 bg-gray-100 rounded-lg animate-pulse" />
         <div className="h-48 bg-gray-100 rounded-lg animate-pulse" />
       </div>
     );
   }
 
-  const hasFmax = data.best_fmax && Object.keys(data.best_fmax).length > 0;
-  const hasComparison = data.method_comparison && Object.keys(data.method_comparison).length > 0;
+  const best = data.best;
+  const paramBadge = best ? formatParamCount(best.embedding.param_count) : "";
 
-  // Available categories (only those with data)
-  const availableCategories = CATEGORIES.filter(
-    (cat) => data.best_fmax?.[cat] || data.method_comparison?.[cat]
-  );
-
-  // Current category data
-  const catFmax = data.best_fmax?.[activeCategory] ?? {};
-  const catMethods = data.method_comparison?.[activeCategory] ?? [];
-  const baseline = catMethods.find((m) => m.method === "knn_baseline");
+  // Derive a per-aspect summary (mean over the 3 categories) from the flat
+  // per_cell list the backend returns, so we can show 3 big Fmax tiles without
+  // imposing a specific category on the user.
+  const perAspect: Record<string, { sum: number; count: number }> = {};
+  if (best) {
+    for (const cell of best.per_cell) {
+      if (!perAspect[cell.aspect]) perAspect[cell.aspect] = { sum: 0, count: 0 };
+      perAspect[cell.aspect].sum += cell.fmax;
+      perAspect[cell.aspect].count += 1;
+    }
+  }
 
   return (
     <div className="max-w-5xl mx-auto px-4 sm:px-6 py-8 space-y-10">
@@ -109,145 +119,81 @@ export default function HomePage() {
         <h1 className="text-3xl sm:text-4xl font-bold text-gray-900 tracking-tight">
           PROTEA
         </h1>
-        <p className="text-lg text-gray-500 max-w-2xl mx-auto">
-          {t("subtitle")}
-        </p>
+        <p className="text-lg text-gray-500 max-w-2xl mx-auto">{t("subtitle")}</p>
       </section>
 
       {/* ── Annotate form ─────────────────────────────────────────── */}
       <AnnotateForm />
 
-      {/* ── Category tabs ─────────────────────────────────────────── */}
-      {hasFmax ? (
-        <>
-          <section>
-            <div className="flex items-center gap-4 mb-4">
-              <h2 className="text-sm font-medium text-gray-500 uppercase tracking-wider">
-                {t("bestResults")}
-              </h2>
-              <div className="flex gap-1 rounded-lg bg-gray-100 p-0.5">
-                {availableCategories.map((cat) => (
-                  <button
-                    key={cat}
-                    onClick={() => setActiveCategory(cat)}
-                    className={`px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
-                      activeCategory === cat
-                        ? "bg-white text-gray-900 shadow-sm"
-                        : "text-gray-500 hover:text-gray-700"
-                    }`}
-                    title={CATEGORY_LABELS[cat]}
+      {/* ── Best result spotlight ─────────────────────────────────── */}
+      {best ? (
+        <section>
+          <div className="flex items-center justify-between mb-3">
+            <h2 className="text-sm font-medium text-gray-500 uppercase tracking-wider">
+              {t("bestOverall")}
+            </h2>
+            <Link
+              href="/benchmark"
+              className="text-xs text-blue-600 hover:text-blue-800 underline underline-offset-2"
+            >
+              {t("viewBenchmark")} →
+            </Link>
+          </div>
+
+          <div className="rounded-2xl border bg-white p-6 shadow-sm">
+            <div className="flex flex-col sm:flex-row sm:items-start sm:justify-between gap-4">
+              <div>
+                <div className="flex items-center gap-2 flex-wrap">
+                  <span className="text-2xl font-bold text-gray-900">
+                    {best.embedding.display_name}
+                  </span>
+                  {paramBadge && (
+                    <span className="rounded-full bg-gray-100 px-2 py-0.5 text-xs font-medium text-gray-600 tabular-nums">
+                      {paramBadge}
+                    </span>
+                  )}
+                  <span
+                    className={`rounded-full px-2 py-0.5 text-xs font-medium ${STAGE_BADGE[best.stage]}`}
                   >
-                    {cat}
-                  </button>
-                ))}
+                    {t(STAGE_LABELS[best.stage] as any)}
+                  </span>
+                </div>
+                <div className="text-xs text-gray-400 mt-1 font-mono">
+                  {best.embedding.model_name}
+                </div>
+              </div>
+
+              <div className="text-right">
+                <div className="text-4xl font-bold text-gray-900 tabular-nums">
+                  {best.avg_fmax.toFixed(3)}
+                </div>
+                <div className="text-xs text-gray-500 mt-1">{t("avgFmaxAcrossCells")}</div>
               </div>
-              <span className="text-xs text-gray-400" title={CATEGORY_LABELS[activeCategory]}>
-                {CATEGORY_LABELS[activeCategory]}
-              </span>
             </div>
 
-            {/* ── Fmax cards ────────────────────────────────────────── */}
-            <div className="grid grid-cols-1 sm:grid-cols-3 gap-4">
+            {/* Per-aspect mini tiles (mean across NK/LK/PK) */}
+            <div className="mt-5 grid grid-cols-3 gap-3">
               {ASPECTS.map((aspect) => {
-                const d = catFmax[aspect];
-                if (!d) return null;
-                const color = ASPECT_COLORS[aspect];
+                const agg = perAspect[aspect];
+                const value = agg ? agg.sum / agg.count : null;
                 return (
                   <div
                     key={aspect}
-                    className={`rounded-xl border-2 p-5 text-center`}
-                    style={{
-                      borderColor: `var(--color-${color}-200, #bfdbfe)`,
-                      backgroundColor: `var(--color-${color}-50, #eff6ff)`,
-                    }}
+                    className="rounded-lg border bg-gray-50 p-3 text-center"
+                    title={ASPECT_LABELS[aspect]}
                   >
-                    <div className="text-4xl font-bold text-gray-900 tabular-nums">
-                      {d.fmax.toFixed(2)}
+                    <div className="text-xl font-semibold text-gray-900 tabular-nums">
+                      {value != null ? value.toFixed(3) : "—"}
                     </div>
-                    <div className="text-sm font-semibold text-gray-600 mt-1">
-                      {t("fmax")} {aspect}
-                    </div>
-                    <div className="text-xs text-gray-400 mt-1">
-                      {ASPECT_LABELS[aspect]}
-                    </div>
-                    <div className="text-xs text-gray-400 mt-1">
-                      {d.method_label}
+                    <div className="text-[10px] uppercase tracking-wide text-gray-500 mt-1">
+                      {aspect}
                     </div>
                   </div>
                 );
               })}
             </div>
-          </section>
-
-          {/* ── Method comparison table ───────────────────────────── */}
-          {catMethods.length > 0 && (
-            <section>
-              <h2 className="text-sm font-medium text-gray-500 uppercase tracking-wider mb-3">
-                {t("methodComparison")}
-                <span className="ml-2 text-xs font-normal normal-case text-gray-400">
-                  ({activeCategory})
-                </span>
-              </h2>
-              <div className="overflow-x-auto rounded-lg border">
-                <table className="w-full text-sm">
-                  <thead>
-                    <tr className="bg-gray-50 text-left">
-                      <th className="px-4 py-3 font-medium text-gray-600">{t("method")}</th>
-                      {ASPECTS.map((a) => (
-                        <th key={a} className="px-4 py-3 font-medium text-gray-600 text-center">
-                          {a}
-                        </th>
-                      ))}
-                    </tr>
-                  </thead>
-                  <tbody>
-                    {catMethods.map((row, i) => {
-                      const isBest = ASPECTS.some(
-                        (a) => catFmax[a]?.method === row.method
-                      );
-                      return (
-                        <tr
-                          key={row.method}
-                          className={`border-t ${isBest ? "bg-blue-50" : i % 2 === 0 ? "bg-white" : "bg-gray-50/50"}`}
-                        >
-                          <td className="px-4 py-3 font-medium text-gray-900">
-                            {t(METHOD_KEYS[row.method] ?? row.method)}
-                            {isBest && (
-                              <span className="ml-2 text-xs text-blue-600 font-normal">best</span>
-                            )}
-                          </td>
-                          {ASPECTS.map((aspect) => {
-                            const val = (row as any)[aspect]?.fmax;
-                            const baseVal = baseline ? (baseline as any)[aspect]?.fmax : null;
-                            const delta = val != null && baseVal != null && row.method !== "knn_baseline"
-                              ? val - baseVal
-                              : null;
-                            return (
-                              <td key={aspect} className="px-4 py-3 text-center tabular-nums">
-                                {val != null ? (
-                                  <span>
-                                    <span className="font-semibold">{val.toFixed(3)}</span>
-                                    {delta != null && (
-                                      <span className={`ml-1.5 text-xs ${delta > 0 ? "text-green-600" : delta < 0 ? "text-red-600" : "text-gray-400"}`}>
-                                        {delta > 0 ? "+" : ""}{delta.toFixed(3)}
-                                      </span>
-                                    )}
-                                  </span>
-                                ) : (
-                                  <span className="text-gray-300">&mdash;</span>
-                                )}
-                              </td>
-                            );
-                          })}
-                        </tr>
-                      );
-                    })}
-                  </tbody>
-                </table>
-              </div>
-            </section>
-          )}
-        </>
+          </div>
+        </section>
       ) : (
         <section className="rounded-xl border-2 border-dashed border-gray-200 bg-gray-50 p-8 text-center">
           <p className="text-gray-500">{t("noDataYet")}</p>
@@ -309,12 +255,14 @@ export default function HomePage() {
           {t("stats")}
         </h2>
         <div className="grid grid-cols-2 sm:grid-cols-4 gap-3">
-          {([
-            ["proteins", data.counts.proteins],
-            ["sequences", data.counts.sequences],
-            ["embeddings", data.counts.embeddings],
-            ["predictions", data.counts.predictions],
-          ] as [string, number][]).map(([key, count]) => (
+          {(
+            [
+              ["proteins", data.counts.proteins],
+              ["sequences", data.counts.sequences],
+              ["embeddings", data.counts.embeddings],
+              ["predictions", data.counts.predictions],
+            ] as [string, number][]
+          ).map(([key, count]) => (
             <div key={key} className="rounded-lg border bg-white p-3 text-center">
               <div className="text-2xl font-bold text-gray-900 tabular-nums">
                 {count.toLocaleString()}
@@ -328,7 +276,7 @@ export default function HomePage() {
       {/* ── CTAs ──────────────────────────────────────────────────── */}
       <section className="flex flex-col sm:flex-row items-center justify-center gap-3 pt-2">
         <Link
-          href="/evaluation"
+          href="/benchmark"
           className="rounded-md bg-blue-600 px-6 py-2.5 text-sm font-medium text-white hover:bg-blue-700 transition-colors"
         >
           {t("exploreResults")}
diff --git a/apps/web/app/[locale]/reranker/page.tsx b/apps/web/app/[locale]/reranker/page.tsx
index edf0751..1d1bd0c 100644
--- a/apps/web/app/[locale]/reranker/page.tsx
+++ b/apps/web/app/[locale]/reranker/page.tsx
@@ -201,28 +201,44 @@ function RerankerCard({
           </div>
         </div>
         <div className="flex flex-wrap gap-4 mt-2 text-xs text-gray-500">
-          <span>AUC: <strong className="text-gray-700">{m.val_auc?.toFixed(4) ?? "—"}</strong></span>
-          <span>F1: <strong className="text-gray-700">{m.val_f1?.toFixed(4) ?? "—"}</strong></span>
-          <span>Precision: <strong className="text-gray-700">{m.val_precision?.toFixed(4) ?? "—"}</strong></span>
-          <span>Recall: <strong className="text-gray-700">{m.val_recall?.toFixed(4) ?? "—"}</strong></span>
-          <span>Positive rate: <strong className="text-gray-700">{m.positive_rate != null ? `${(m.positive_rate * 100).toFixed(2)}%` : "—"}</strong></span>
+          {m.test_fmax != null ? (
+            <>
+              <span>Test Fmax: <strong className="text-gray-700">{m.test_fmax.toFixed(4)}</strong></span>
+              <span>Best iter: <strong className="text-gray-700">{m.best_iteration ?? "—"}</strong></span>
+              {m.positive_rate_train != null && (
+                <span>Train pos. rate: <strong className="text-gray-700">{(m.positive_rate_train * 100).toFixed(2)}%</strong></span>
+              )}
+            </>
+          ) : (
+            <>
+              <span>AUC: <strong className="text-gray-700">{m.val_auc?.toFixed(4) ?? "—"}</strong></span>
+              <span>F1: <strong className="text-gray-700">{m.val_f1?.toFixed(4) ?? "—"}</strong></span>
+              <span>Precision: <strong className="text-gray-700">{m.val_precision?.toFixed(4) ?? "—"}</strong></span>
+              <span>Recall: <strong className="text-gray-700">{m.val_recall?.toFixed(4) ?? "—"}</strong></span>
+              <span>Positive rate: <strong className="text-gray-700">{m.positive_rate != null ? `${(m.positive_rate * 100).toFixed(2)}%` : "—"}</strong></span>
+            </>
+          )}
         </div>
       </div>
 
       {expanded && (
         <div className="border-t px-4 py-4 space-y-5">
-          {/* Validation metrics */}
+          {/* Training-time metrics */}
           <div>
-            <p className="text-xs font-semibold uppercase tracking-wide text-gray-400 mb-2">Validation metrics</p>
+            <p className="text-xs font-semibold uppercase tracking-wide text-gray-400 mb-2">Training-time metrics</p>
             <div className="grid grid-cols-2 sm:grid-cols-4 gap-2">
+              <MetricsBadge label="Test Fmax" value={m.test_fmax} />
+              <MetricsBadge label="Best iteration" value={m.best_iteration} />
               <MetricsBadge label="AUC" value={m.val_auc} />
               <MetricsBadge label="Log-loss" value={m.val_logloss} />
               <MetricsBadge label="F1" value={m.val_f1} />
-              <MetricsBadge label="Best iteration" value={m.best_iteration} />
             </div>
             <div className="flex flex-wrap gap-4 mt-2 text-xs text-gray-500">
-              <span>Train samples: {m.train_samples?.toLocaleString()}</span>
-              <span>Val samples: {m.val_samples?.toLocaleString()}</span>
+              {m.train_samples != null && <span>Train samples: {m.train_samples.toLocaleString()}</span>}
+              {m.val_samples != null && <span>Val samples: {m.val_samples.toLocaleString()}</span>}
+              {m.positive_rate_train != null && (
+                <span>Train positive rate: {(m.positive_rate_train * 100).toFixed(2)}%</span>
+              )}
             </div>
           </div>
 
diff --git a/apps/web/components/AnnotateForm.tsx b/apps/web/components/AnnotateForm.tsx
index e28e1cf..2c17cc1 100644
--- a/apps/web/components/AnnotateForm.tsx
+++ b/apps/web/components/AnnotateForm.tsx
@@ -7,23 +7,40 @@ import {
   annotateProteins,
   getJob,
   launchPredictGoTerms,
+  listJobs,
   listPredictionSets,
   type AnnotateResult,
+  type Job,
 } from "@/lib/api";
 
 type Stage = "idle" | "uploading" | "embedding" | "predicting" | "done" | "error";
 
 const POLL_MS = 3_000;
+const QUEUE_POLL_MS = 30_000;
 
-const EXAMPLE_FASTA = `>sp|P04637|P53_HUMAN Cellular tumor antigen p53
+// Operations that occupy the shared GPU pipeline. While any of these is
+// queued or running we block new user annotation requests, since they won't
+// actually enter the queue in a reasonable time frame.
+const BLOCKING_OPERATIONS = new Set([
+  "compute_embeddings",
+  "compute_embeddings_batch",
+  "predict_go_terms",
+  "predict_go_terms_batch",
+]);
+
+const EXAMPLE_FASTA = `>sp|P01116|RASK_HUMAN GTPase KRas OS=Homo sapiens OX=9606 GN=KRAS PE=1 SV=1
+MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAG
+QEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHHYREQIKRVKDSEDVPMVLVGNKCDL
+PSRTVDTKQAQDLARSYGIPFIETSAKTRQRVEDAFYTLVREIRQYRLKKISKEEKTPGC
+VKIKKCIIM
+>sp|P04637|P53_HUMAN Cellular tumor antigen p53 OS=Homo sapiens OX=9606 GN=TP53 PE=1 SV=4
 MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGP
-DEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYPQGLNGTVNLPGRNSFEV
-RVCACPGRDRRTEEENLHKTTGIDSFLHPEVEYFTPETDPAGPMCSRHFYQLAKTCPVQLW
-VDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHERCTCGGNHGISTTTGICLICQFFLVHKP
->sp|P38398|BRCA1_HUMAN Breast cancer type 1 susceptibility protein
-MDLSALRVEEVQNVINAMQKILECPICLELIKEPVSTKCDHIFCKFCMLKLLNQKKGPSQC
-PLCKNDITKRSLQESTRFSQLVEELLKIICAFQLDTGLEYANSYNFAKKENNSPEHLKDEV
-SIIQSMGYRNRAKRLLQSEPENPSLQETSLSVQLSNLGTVRTLRTKQRIQPQKTSVYIELG`;
+DEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAK
+SVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHE
+RCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNS
+SCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELP
+PGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPG
+GSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD`;
 
 export function AnnotateForm() {
   const t = useTranslations("home");
@@ -41,6 +58,11 @@ export function AnnotateForm() {
   // Drag-and-drop state
   const [dragOver, setDragOver] = useState(false);
 
+  // Queue-awareness: poll active jobs and block submission while any
+  // embedding/prediction operation is queued or running, because our
+  // single-GPU setup can't absorb another request in reasonable time.
+  const [blockingJobs, setBlockingJobs] = useState<Job[] | null>(null);
+
   const handleFile = (file: File) => {
     const reader = new FileReader();
     reader.onload = (e) => {
@@ -152,7 +174,49 @@ export function AnnotateForm() {
     };
   }, []);
 
+  // Poll for active embedding/prediction jobs to know whether the GPU
+  // pipeline is currently saturated.
+  useEffect(() => {
+    let cancelled = false;
+    const fetchBlocking = async () => {
+      if (typeof document !== "undefined" && document.visibilityState === "hidden") return;
+      try {
+        const [queued, running] = await Promise.all([
+          listJobs({ limit: 100, status: "queued" }),
+          listJobs({ limit: 100, status: "running" }),
+        ]);
+        if (cancelled) return;
+        const merged = [...running, ...queued].filter((j) =>
+          BLOCKING_OPERATIONS.has(j.operation),
+        );
+        setBlockingJobs(merged);
+      } catch {
+        // ignore transient errors; keep prior state
+      }
+    };
+    fetchBlocking();
+    const id = setInterval(fetchBlocking, QUEUE_POLL_MS);
+    const onVisibility = () => {
+      if (document.visibilityState === "visible") fetchBlocking();
+    };
+    document.addEventListener("visibilitychange", onVisibility);
+    return () => {
+      cancelled = true;
+      clearInterval(id);
+      document.removeEventListener("visibilitychange", onVisibility);
+    };
+  }, []);
+
   const isRunning = stage === "uploading" || stage === "embedding" || stage === "predicting";
+  // A running local annotation flow already owns the UI; don't double-block.
+  const isQueueBlocked = !isRunning && (blockingJobs?.length ?? 0) > 0;
+  const runningJob = blockingJobs?.find((j) => j.status === "running") ?? null;
+  const runningPct =
+    runningJob && runningJob.progress_total && runningJob.progress_current
+      ? Math.round((runningJob.progress_current / runningJob.progress_total) * 100)
+      : null;
+  const queuedCount =
+    blockingJobs?.filter((j) => j.status === "queued").length ?? 0;
 
   return (
     <section className="rounded-2xl border-2 border-blue-100 bg-gradient-to-b from-blue-50/60 to-white p-6 sm:p-8">
@@ -163,6 +227,41 @@ export function AnnotateForm() {
         {t("annotateDescription" as any)}
       </p>
 
+      {/* Queue-busy banner ─ blocks submission while the GPU pipeline is saturated */}
+      {isQueueBlocked && (
+        <div
+          role="status"
+          className="mb-5 rounded-lg border border-amber-200 bg-amber-50 p-4 text-sm text-amber-900"
+        >
+          <div className="flex items-start gap-2">
+            <span aria-hidden className="text-base leading-none">⏳</span>
+            <div className="flex-1">
+              <p className="font-semibold">
+                {t("annotateQueueBlockedTitle" as any)}
+              </p>
+              <p className="mt-1 text-amber-800">
+                {t("annotateQueueBlockedBody" as any)}
+              </p>
+              <ul className="mt-2 space-y-0.5 text-xs text-amber-800">
+                {runningJob && (
+                  <li>
+                    <span className="font-mono">{runningJob.operation}</span>
+                    {" — "}
+                    {t("annotateQueueRunningLabel" as any)}
+                    {runningPct != null ? ` (${runningPct}%)` : ""}
+                  </li>
+                )}
+                {queuedCount > 0 && (
+                  <li>
+                    {t("annotateQueueWaitingLabel" as any)}: {queuedCount}
+                  </li>
+                )}
+              </ul>
+            </div>
+          </div>
+        </div>
+      )}
+
       {/* FASTA input */}
       <div
         className={`relative rounded-lg border-2 transition-colors ${
@@ -182,10 +281,10 @@ export function AnnotateForm() {
           onChange={(e) => setFasta(e.target.value)}
           placeholder={t("annotatePlaceholder" as any)}
           rows={6}
-          disabled={isRunning}
+          disabled={isRunning || isQueueBlocked}
           className="w-full rounded-lg p-4 text-xs font-mono text-gray-700 placeholder:text-gray-400 focus:outline-none focus:ring-2 focus:ring-blue-300 resize-y disabled:opacity-50 disabled:cursor-not-allowed bg-transparent"
         />
-        {!fasta && !isRunning && (
+        {!fasta && !isRunning && !isQueueBlocked && (
           <div className="absolute bottom-3 right-3 flex gap-2">
             <button
               type="button"
@@ -219,7 +318,8 @@ export function AnnotateForm() {
       <div className="mt-4 flex items-center gap-4">
         <button
           onClick={handleSubmit}
-          disabled={!fasta.trim() || isRunning}
+          disabled={!fasta.trim() || isRunning || isQueueBlocked}
+          title={isQueueBlocked ? t("annotateQueueBlockedTitle" as any) : undefined}
           className="rounded-lg bg-blue-600 px-6 py-2.5 text-sm font-semibold text-white hover:bg-blue-700 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
         >
           {isRunning ? (
diff --git a/apps/web/components/FloatingJobsWidget.tsx b/apps/web/components/FloatingJobsWidget.tsx
index 718e5ac..e2a9197 100644
--- a/apps/web/components/FloatingJobsWidget.tsx
+++ b/apps/web/components/FloatingJobsWidget.tsx
@@ -13,7 +13,7 @@ export function FloatingJobsWidget() {
   const poll = useCallback(async () => {
     if (document.visibilityState === "hidden") return;
     try {
-      const running = await listJobs({ limit: 5, status: "running" });
+      const running = await listJobs({ limit: 50, status: "running" });
       setJobs(running);
     } catch {
       // ignore transient errors
diff --git a/apps/web/components/NavLinks.tsx b/apps/web/components/NavLinks.tsx
index c883d0d..4dc5c35 100644
--- a/apps/web/components/NavLinks.tsx
+++ b/apps/web/components/NavLinks.tsx
@@ -90,6 +90,7 @@ export function NavLinks({ mobileExtras }: { mobileExtras?: React.ReactNode }) {
     {
       title: t("results" as any),
       items: [
+        { href: "/benchmark", label: t("benchmark" as any) },
         { href: "/evaluation", label: t("evaluation") },
         { href: "/scoring", label: t("scoring") },
       ],
diff --git a/apps/web/lib/api.ts b/apps/web/lib/api.ts
index cab4fe0..eb6455f 100644
--- a/apps/web/lib/api.ts
+++ b/apps/web/lib/api.ts
@@ -1,6 +1,8 @@
 export type Job = {
   id: string;
   operation: string;
+  operation_description?: string | null;
+  operation_summary?: string | null;
   queue_name: string;
   status: string;
   parent_job_id?: string | null;
@@ -634,31 +636,41 @@ export async function createQuerySet(file: File, name: string, description?: str
 // Showcase
 // ---------------------------------------------------------------------------
 
-export type ShowcaseAspectFmax = {
+export type ShowcasePipelineStage = {
+  name: string;
+  count: number;
+  href: string;
+};
+
+export type ShowcaseBestCell = {
+  category: string;
+  aspect: string;
   fmax: number;
-  method: string;
-  method_label: string;
-  evaluation_result_id: string;
+  precision: number | null;
+  recall: number | null;
 };
 
-export type ShowcaseMethodEntry = {
-  method: string;
-  label: string;
-  BPO: { fmax: number | null };
-  MFO: { fmax: number | null };
-  CCO: { fmax: number | null };
+export type ShowcaseEmbedding = {
+  id: string;
+  model_name: string;
+  model_backend: string;
+  family: string;
+  display_name: string;
+  param_count: number | null;
 };
 
-export type ShowcasePipelineStage = {
-  name: string;
-  count: number;
-  href: string;
+export type ShowcaseBest = {
+  evaluation_result_id: string;
+  evaluation_set_id: string;
+  stage: "baseline" | "alignment_weighted" | "reranker";
+  avg_fmax: number;
+  embedding: ShowcaseEmbedding;
+  per_cell: ShowcaseBestCell[];
 };
 
 export type ShowcaseData = {
   protein_stats: { total: number; canonical: number };
-  best_fmax: Record<string, Record<string, ShowcaseAspectFmax>>;
-  method_comparison: Record<string, ShowcaseMethodEntry[]>;
+  best: ShowcaseBest | null;
   counts: {
     proteins: number;
     sequences: number;
@@ -674,3 +686,115 @@ export type ShowcaseData = {
 export function getShowcase() {
   return http<ShowcaseData>("/showcase/");
 }
+
+// ─── Benchmark matrix ──────────────────────────────────────────────
+
+export type BenchmarkEmbedding = {
+  id: string;
+  model_name: string;
+  model_backend: string;
+  description: string | null;
+  pooling: string;
+  layer_agg: string;
+  family: string;
+  display_name: string;
+  param_count: number | null;
+};
+
+export type BenchmarkEmbeddingsResponse = {
+  embeddings: BenchmarkEmbedding[];
+  total: number;
+};
+
+export type BenchmarkStageKind = "scoring" | "reranker";
+
+export type BenchmarkStage = {
+  name: string;
+  label: string;
+  kind: BenchmarkStageKind;
+  is_baseline: boolean;
+};
+
+export type BenchmarkRow = {
+  embedding_config_id: string;
+  evaluation_set_id: string;
+  stage: string;
+  k: number;
+  category: string;
+  aspect: string;
+  fmax: number;
+  precision: number | null;
+  recall: number | null;
+  coverage: number | null;
+  n_proteins: number | null;
+  evaluation_result_id: string;
+};
+
+export type BenchmarkBestCell = {
+  category: string;
+  aspect: string;
+  fmax: number;
+  precision: number | null;
+  recall: number | null;
+  coverage: number | null;
+  embedding_config_id: string;
+  k: number;
+  stage: string;
+  evaluation_result_id: string;
+  evaluation_set_id: string;
+};
+
+export type BenchmarkEvalSet = {
+  id: string;
+  label: string;
+  old_source: string | null;
+  old_source_version: string | null;
+  new_source: string | null;
+  new_source_version: string | null;
+  old_obo_version: string | null;
+  new_obo_version: string | null;
+  stats: {
+    delta_proteins?: number;
+    nk_proteins?: number;
+    lk_proteins?: number;
+    pk_proteins?: number;
+    nk_annotations?: number;
+    lk_annotations?: number;
+    pk_annotations?: number;
+    known_terms_count?: number;
+  };
+};
+
+export type BenchmarkMatrixResponse = {
+  rows: BenchmarkRow[];
+  total: number;
+  evaluation_sets: BenchmarkEvalSet[];
+  embedding_config_ids: string[];
+  stages: BenchmarkStage[];
+  categories: string[];
+  aspects: string[];
+  ks: number[];
+  best_per_cell: BenchmarkBestCell[];
+  filters: {
+    evaluation_set_id: string | null;
+    stage: string | null;
+    k: number | null;
+  };
+};
+
+export function getBenchmarkEmbeddings() {
+  return http<BenchmarkEmbeddingsResponse>("/benchmark/embeddings");
+}
+
+export function getBenchmarkMatrix(params?: {
+  evaluation_set_id?: string;
+  stage?: string;
+  k?: number;
+}) {
+  const qs = new URLSearchParams();
+  if (params?.evaluation_set_id) qs.set("evaluation_set_id", params.evaluation_set_id);
+  if (params?.stage) qs.set("stage", params.stage);
+  if (params?.k !== undefined) qs.set("k", String(params.k));
+  const suffix = qs.toString() ? `?${qs.toString()}` : "";
+  return http<BenchmarkMatrixResponse>(`/benchmark/matrix${suffix}`);
+}
diff --git a/apps/web/messages/de.json b/apps/web/messages/de.json
index 4d66adb..589a528 100644
--- a/apps/web/messages/de.json
+++ b/apps/web/messages/de.json
@@ -1,12 +1,12 @@
 {
   "layout": { "title": "PROTEA", "description": "Proteindatenplattform — Job-Warteschlange und Pipeline-Verwaltung" },
-  "nav": { "proteins": "Proteine", "annotations": "Annotationen", "querySets": "Query-Sets", "embeddings": "Embeddings", "functionalAnnotation": "Funktionale Annotation", "scoring": "Scoring", "evaluation": "Evaluierung", "reranker": "Re-Ranker", "jobs": "Jobs", "maintenance": "Wartung", "home": "Startseite", "data": "Daten", "pipelineGroup": "Pipeline", "results": "Ergebnisse", "system": "System" },
-  "home": { "title": "Pipeline zur Vorhersage von Proteinfunktionen", "subtitle": "Von der Sequenz zur funktionellen Annotation durch Embedding-Ähnlichkeit, Re-Ranking und LLM-Kuration", "bestResults": "Beste Ergebnisse", "fmax": "Fmax", "methodComparison": "Methodenvergleich", "method": "Methode", "delta": "vs Baseline", "pipeline": "Pipeline", "stats": "Plattformstatistiken", "proteins": "Proteine", "sequences": "Sequenzen", "embeddings": "Embeddings", "predictions": "Vorhersagen", "predictionSets": "Vorhersagesets", "rerankerModels": "Re-Ranker-Modelle", "evaluations": "Evaluierungen", "exploreResults": "Ergebnisse erkunden", "annotateProteins": "Meine Proteine annotieren", "knnBaseline": "KNN (Embedding-Distanz)", "knnScored": "KNN + Scoring", "knnReranker": "KNN + Re-Ranker", "noDataYet": "Noch keine Evaluierungsdaten verfügbar. Führen Sie die Pipeline aus, um hier Ergebnisse zu sehen.", "getStarted": "Starten", "stageSequences": "Sequenzen", "stageEmbeddings": "Embeddings", "stageKnn": "KNN-Suche", "stageReranker": "Re-Ranker", "stageLlm": "LLM-Kuration", "stageAnnotation": "Annotation", "stageEvaluation": "Evaluierung", "nkCategory": "NK-Kategorie (No Knowledge) — anspruchsvollste Evaluierungseinstellung", "annotateTitle": "Ihre Proteine annotieren", "annotateDescription": "Proteinsequenzen im FASTA-Format einfuegen und automatisch funktionelle Annotationen mit der besten verfuegbaren Methode erhalten.", "annotatePlaceholder": ">sp|P04637|P53_HUMAN Cellular tumor antigen p53\nMEEPQSDPSVEPPLSQETFSDLWKLL...", "annotateTryExample": "Beispiel testen", "annotateUploadFile": "Datei hochladen", "annotateButton": "Annotieren", "annotateUploading": "Hochladen...", "annotateEmbedding": "Embeddings berechnen...", "annotatePredicting": "GO-Terme vorhersagen...", "annotateDone": "Fertig! Weiterleitung zu Ergebnissen...", "annotateStepUpload": "Hochladen", "annotateStepEmbed": "Embeddings", "annotateStepPredict": "Vorhersage" },
-  "jobs": { "title": "Jobs", "allStatuses": "Alle Status", "queued": "Wartend", "running": "Laufend", "succeeded": "Erfolgreich", "failed": "Fehlgeschlagen", "cancelled": "Abgebrochen", "autoRefresh": "Auto-Aktualisierung", "refresh": "Aktualisieren", "noJobsFound": "Keine Jobs gefunden.", "status": "Status", "operation": "Operation", "jobId": "Job-ID", "created": "Erstellt", "activeJobs": "{count} aktiv", "jobDetail": { "title": "Job-Details", "backToJobs": "← Jobs", "live": "Live", "cancel": "Abbrechen", "delete": "Löschen", "deleteConfirm": "Diesen Job löschen?", "queue": "Warteschlange:", "created": "Erstellt:", "started": "Gestartet:", "finished": "Beendet:", "progress": "{current} / {total} {unit} ({percent}%)", "payloadLabel": "Payload", "childJobsTitle": "Kind-Jobs", "childJobsCount": "({count})", "eventsTitle": "Ereignisse", "eventsCount": "({count})" } },
+  "nav": { "proteins": "Proteine", "annotations": "Annotationen", "querySets": "Query-Sets", "embeddings": "Embeddings", "functionalAnnotation": "Funktionale Annotation", "scoring": "Scoring", "evaluation": "Evaluierung", "benchmark": "Benchmark", "reranker": "Re-Ranker", "jobs": "Jobs", "maintenance": "Wartung", "home": "Startseite", "data": "Daten", "pipelineGroup": "Pipeline", "results": "Ergebnisse", "system": "System" },
+  "home": { "title": "Pipeline zur Vorhersage von Proteinfunktionen", "subtitle": "Von der Sequenz zur funktionellen Annotation durch Embedding-Ähnlichkeit, Re-Ranking und LLM-Kuration", "bestResults": "Beste Ergebnisse", "bestOverall": "Bestes Gesamtergebnis", "avgFmaxAcrossCells": "Mittleres Fmax über 9 Zellen", "viewBenchmark": "Vollständigen Benchmark anzeigen", "pipelineStageBaseline": "baseline", "pipelineStageAlignmentWeighted": "alignment-gewichtet", "pipelineStageReranker": "re-ranker", "fmax": "Fmax", "methodComparison": "Methodenvergleich", "method": "Methode", "delta": "vs Baseline", "pipeline": "Pipeline", "stats": "Plattformstatistiken", "proteins": "Proteine", "sequences": "Sequenzen", "embeddings": "Embeddings", "predictions": "Vorhersagen", "predictionSets": "Vorhersagesets", "rerankerModels": "Re-Ranker-Modelle", "evaluations": "Evaluierungen", "exploreResults": "Ergebnisse erkunden", "annotateProteins": "Meine Proteine annotieren", "knnBaseline": "KNN (Embedding-Distanz)", "knnScored": "KNN + Scoring", "knnReranker": "KNN + Re-Ranker", "noDataYet": "Noch keine Evaluierungsdaten verfügbar. Führen Sie die Pipeline aus, um hier Ergebnisse zu sehen.", "getStarted": "Starten", "stageSequences": "Sequenzen", "stageEmbeddings": "Embeddings", "stageKnn": "KNN-Suche", "stageReranker": "Re-Ranker", "stageLlm": "LLM-Kuration", "stageAnnotation": "Annotation", "stageEvaluation": "Evaluierung", "nkCategory": "NK-Kategorie (No Knowledge) — anspruchsvollste Evaluierungseinstellung", "annotateTitle": "Ihre Proteine annotieren", "annotateDescription": "Proteinsequenzen im FASTA-Format einfuegen und automatisch funktionelle Annotationen mit der besten verfuegbaren Methode erhalten.", "annotatePlaceholder": ">sp|P04637|P53_HUMAN Cellular tumor antigen p53\nMEEPQSDPSVEPPLSQETFSDLWKLL...", "annotateTryExample": "Beispiel testen", "annotateUploadFile": "Datei hochladen", "annotateButton": "Annotieren", "annotateUploading": "Hochladen...", "annotateEmbedding": "Embeddings berechnen...", "annotatePredicting": "GO-Terme vorhersagen...", "annotateDone": "Fertig! Weiterleitung zu Ergebnissen...", "annotateStepUpload": "Hochladen", "annotateStepEmbed": "Embeddings", "annotateStepPredict": "Vorhersage" },
+  "jobs": { "title": "Jobs", "allStatuses": "Alle Status", "queued": "Wartend", "running": "Laufend", "succeeded": "Erfolgreich", "failed": "Fehlgeschlagen", "cancelled": "Abgebrochen", "autoRefresh": "Auto-Aktualisierung", "refresh": "Aktualisieren", "noJobsFound": "Keine Jobs gefunden.", "status": "Status", "operation": "Operation", "operationContext": "Kontext", "jobId": "Job-ID", "created": "Erstellt", "activeJobs": "{count} aktiv", "jobDetail": { "title": "Job-Details", "backToJobs": "← Jobs", "live": "Live", "cancel": "Abbrechen", "delete": "Löschen", "deleteConfirm": "Diesen Job löschen?", "queue": "Warteschlange:", "created": "Erstellt:", "started": "Gestartet:", "finished": "Beendet:", "progress": "{current} / {total} {unit} ({percent}%)", "payloadLabel": "Payload", "childJobsTitle": "Kind-Jobs", "childJobsCount": "({count})", "eventsTitle": "Ereignisse", "eventsCount": "({count})" } },
   "proteins": { "title": "Proteine", "tabs": { "browse": "Durchsuchen", "stats": "Statistiken", "insert": "Proteine einfügen", "metadata": "Metadaten abrufen" }, "browseTab": { "searchPlaceholder": "Akzession, Gen, Organismus…", "search": "Suchen", "clear": "Zurücksetzen", "allProteins": "Alle Proteine", "swissProt": "Nur Swiss-Prot", "trembl": "Nur TrEMBL", "canonicalOnly": "Nur kanonische", "totalProteins": "{count} Proteine", "tableHeaders": { "accession": "Akzession", "entryName": "Eintragsname", "gene": "Gen", "organism": "Organismus", "length": "Länge", "source": "Quelle" }, "noProteinsCta": "Keine Proteine gefunden. Verwenden Sie den Tab „Proteine einfügen“, um aus UniProt zu importieren.", "pagination": { "page": "Seite {current} von {total}", "previous": "Zurück", "next": "Weiter" } }, "statsTab": { "refresh": "Aktualisieren", "loading": "Lädt…", "overview": "Übersicht", "coverage": "Abdeckung", "totalProteins": "Proteine gesamt", "canonical": "Kanonische", "isoforms": "{count} Isoformen", "reviewed": "Swiss-Prot", "reviewedSub": "geprüft", "unreviewed": "TrEMBL", "unreviewedSub": "ungeprüft", "withMetadata": "Mit Metadaten", "metadataSub": "{percent}% der kanonischen", "withEmbeddings": "Mit Embeddings", "embeddingsSub": "{percent}% gesamt", "withGoAnnotations": "Mit GO-Annotationen", "goAnnotationsSub": "{percent}% gesamt" }, "insertTab": { "title": "Proteine aus UniProt einfügen", "description": "Lädt FASTA-Sequenzen herunter und fügt Protein- und Sequenz-Einträge ein.", "searchCriteriaLabel": "Suchkriterien", "searchCriteriaHelper": "UniProt-Abfrage — reviewed:true = nur Swiss-Prot", "pageSizeLabel": "Seitengröße", "totalLimitLabel": "Gesamtlimit", "totalLimitOptional": "(optional)", "includeIsoforms": "Isoformen einschließen", "jobQueuedPrefix": "Job in Warteschlange: ", "launchJob": "Job starten", "launching": "Wird gestartet…" }, "metadataTab": { "title": "UniProt-Metadaten abrufen", "description": "Lädt TSV-Annotationen herunter und fügt ProteinUniProtMetadata-Einträge ein.", "searchCriteriaLabel": "Suchkriterien", "searchCriteriaHelper": "UniProt-Abfrage — reviewed:true = nur Swiss-Prot", "pageSizeLabel": "Seitengröße", "totalLimitLabel": "Gesamtlimit", "totalLimitOptional": "(optional)", "launchJob": "Job starten", "launching": "Wird gestartet…" }, "sourceSwissProt": "Swiss-Prot", "sourceTrembl": "TrEMBL" },
   "proteinDetail": { "backToProteins": "← Proteine", "tabs": { "overview": "Übersicht", "annotations": "GO-Annotationen" }, "overviewTab": { "identity": "Identität", "gene": "Gen", "organism": "Organismus", "taxonId": "Taxon-ID", "length": "Länge", "aa": "AS", "sequenceId": "Sequenz-ID", "canonical": "Kanonisch", "coverage": "Abdeckung", "embeddings": "Embeddings", "goAnnotations": "GO-Annotationen", "metadata": "Metadaten", "yes": "ja", "none": "keine", "isoforms": "Isoformen", "function": "Funktion", "biochemistry": "Biochemie", "ecNumber": "EC-Nummer", "catalyticActivity": "Katalytische Aktivität", "cofactor": "Kofaktor", "activityRegulation": "Aktivitätsregulation", "pathway": "Signalweg", "absorption": "Absorption", "kinetics": "Kinetik", "phDependence": "pH-Abhängigkeit", "redoxPotential": "Redoxpotenzial", "temperatureDependence": "Temperaturabhängigkeit", "rheaId": "Rhea-ID", "keywords": "Schlüsselwörter", "noFunctionalMetadata": "Keine funktionalen Metadaten verfügbar. Verwenden Sie den Tab „Metadaten abrufen“, um aus UniProt zu importieren.", "showGoGraph": "GO-Graph anzeigen", "hideGoGraph": "GO-Graph ausblenden", "loadingGraph": "Graph wird geladen…", "noGoAnnotations": "Keine GO-Annotationen für dieses Protein gefunden.", "molecularFunction": "Molekulare Funktion", "biologicalProcess": "Biologischer Prozess", "cellularComponent": "Zelluläre Komponente", "annotations": "Annotationen", "goTableHeaders": { "goId": "GO-ID", "name": "Name", "evidence": "Evidenz", "qualifier": "Qualifikator", "source": "Quelle" } } },
   "annotations": { "title": "Annotationen", "tabs": { "sets": "Annotationssets", "snapshots": "Ontologie-Snapshots", "loadSnapshot": "Snapshot laden", "loadGoa": "GOA laden", "loadQuickgo": "QuickGO laden" }, "setsTab": { "annotationSets": "{count} Annotationssets", "refresh": "Aktualisieren", "noSetsFound": "Noch keine Annotationssets. Laden Sie GO-Annotationen im Tab „GOA laden“ oder „QuickGO laden“.", "tableHeaders": { "id": "ID", "source": "Quelle", "version": "Version", "annotations": "Annotationen", "meta": "Meta", "created": "Erstellt" }, "delete": "Löschen", "deleteConfirm": "Dieses Annotationsset und seine {count} GO-Annotationen löschen? Dies kann nicht rückgängig gemacht werden.", "deleteConfirmNoAnnotations": "Dieses Annotationsset löschen?" }, "snapshotsTab": { "snapshots": "{count} Snapshots", "refresh": "Aktualisieren", "noSnapshotsFound": "Noch keine Ontologie-Snapshots. Verwenden Sie den Tab „Snapshot laden“.", "tableHeaders": { "id": "ID", "version": "Version", "goTerms": "GO-Terme", "iaUrl": "IA-URL", "loaded": "Geladen" }, "notSet": "nicht gesetzt", "save": "Speichern", "cancel": "Abbrechen", "editTooltip": "Tippen zum Bearbeiten der IA-URL" }, "loadSnapshotTab": { "title": "Ontologie-Snapshot laden", "description": "Lädt eine GO-OBO-Datei herunter und füllt GOTerm-Einträge.", "oboUrlLabel": "OBO-URL", "launchJob": "Job starten", "launching": "Wird gestartet…" }, "loadGoaTab": { "title": "GOA-Annotationen laden", "description": "Lädt GO-Annotationen aus einer GAF-Datei in großen Mengen.", "snapshotLabel": "Ontologie-Snapshot", "selectSnapshot": "— Snapshot auswählen —", "noSnapshots": "Keine Snapshots — zuerst „Snapshot laden“ ausführen.", "gafUrlLabel": "GAF-URL", "gafUrlPlaceholder": "https://current.geneontology.org/annotations/goa_human.gaf.gz", "sourceVersionLabel": "Quellversion", "sourceVersionPlaceholder": "2025-03", "launchJob": "Job starten", "launching": "Wird gestartet…" }, "loadQuickgoTab": { "title": "QuickGO-Annotationen laden", "description": "Streamt GO-Annotationen aus der QuickGO-Massen-Download-API.", "snapshotLabel": "Ontologie-Snapshot", "selectSnapshot": "— Snapshot auswählen —", "noSnapshots": "Keine Snapshots — zuerst „Snapshot laden“ ausführen.", "sourceVersionLabel": "Quellversion", "sourceVersionPlaceholder": "2025-03", "launchJob": "Job starten", "launching": "Wird gestartet…" } },
-  "embeddings": { "title": "Embeddings", "tabs": { "configs": "Konfigurationen", "compute": "Berechnen" }, "configsTab": { "configs": "{count} Konfigurationen", "newConfig": "+ Neue Konfiguration", "cancel": "Abbrechen", "newConfigForm": { "title": "Neue Embedding-Konfiguration", "layerIndexingWarning": "Schicht-Indizierung — umgekehrte Konvention: 0 = letzte (semantischste) Schicht, 1 = vorletzte, usw.", "modelBackendLabel": "Modell-Backend", "modelBackendEsm": "esm — HuggingFace EsmModel (ESM-2)", "modelBackendEsm3c": "esm3c — ESM SDK ESMC (ESM3c) · FP16 auf GPU", "modelBackendT5": "t5 — HuggingFace T5EncoderModel (ProstT5…)", "modelBackendAuto": "auto — Rückfall auf esm", "modelLabel": "Modell", "customModelPlaceholder": "z.B. facebook/esm2_t33_650M_UR50D", "layerIndicesLabel": "Schichtindizes", "layerIndicesHelper": "(0 = letzte, 1 = vorletzte…)", "layerIndicesPlaceholder": "0  oder  0,1,2", "layerAggLabel": "Schicht-Aggregation", "layerAggMean": "mean — elementweiser Durchschnitt", "layerAggLast": "last — nur die letzte ausgewählte Schicht", "layerAggConcat": "concat — alle verketten (dim × n_layers)", "poolingLabel": "Sequenz-Pooling", "poolingMean": "mean — Mittelwert über Residuen", "poolingMax": "max — Maximum über Residuen", "poolingMeanMax": "mean_max — concat(mean, max) · dim × 2", "poolingCls": "cls — CLS/BOS-Token an Position 0", "maxLengthLabel": "Max. Länge (Token)", "descriptionLabel": "Beschreibung (optional)", "normalizeResidues": "Residuen normalisieren (L2 pro Residuum vor Pooling)", "normalizeFinal": "Finales Embedding normalisieren (L2 nach Pooling)", "enableChunking": "Chunking aktivieren (lange Sequenzen → mehrere Embeddings pro Sequenz)", "chunkSizeLabel": "Chunk-Größe (Residuen)", "chunkOverlapLabel": "Chunk-Überlappung (Residuen)", "createConfig": "Konfiguration erstellen", "creating": "Wird erstellt…" }, "tableHeaders": { "description": "Beschreibung", "model": "Modell", "backend": "Backend", "layers": "Schichten", "agg": "Agg", "pool": "Pool", "norm": "Norm", "created": "Erstellt" }, "noConfigs": "Noch keine Embedding-Konfigurationen. Erstellen Sie eine", "deleteConfirm": "Diese Embedding-Konfiguration und ihre {count} gespeicherten Embeddings löschen? Dies kann nicht rückgängig gemacht werden.", "deleteConfirmNoEmbeddings": "Diese Embedding-Konfiguration löschen?" }, "computeTab": { "title": "Embeddings berechnen", "loading": "Lädt…", "configLabel": "Embedding-Konfiguration", "noConfigs": "— keine Konfigurationen verfügbar —", "querySetLabel": "Query-Set", "querySetHelper": "(optional — leer lassen um alle zu berechnen)", "allSequences": "— alle Sequenzen —", "queueBatchSizeLabel": "Queue-Batchgröße", "queueBatchSizeHelper": "(Seq/Job)", "modelBatchSizeLabel": "Modell-Batchgröße", "modelBatchSizeHelper": "(Seq/Forward)", "deviceLabel": "Gerät", "deviceCpu": "cpu — CPU (FP32)", "deviceCuda": "cuda — Standard-GPU (FP16 für ESM3c/T5)", "deviceCuda0": "cuda:0 — GPU 0", "deviceCuda1": "cuda:1 — GPU 1", "deviceCustom": "benutzerdefiniert…", "skipExisting": "Vorhandene Embeddings überspringen", "launchComputeJob": "Berechnungs-Job starten", "launching": "Wird gestartet…" } },
+  "embeddings": { "title": "Embeddings", "tabs": { "configs": "Konfigurationen", "compute": "Berechnen" }, "configsTab": { "configs": "{count} Konfigurationen", "newConfig": "+ Neue Konfiguration", "cancel": "Abbrechen", "newConfigForm": { "title": "Neue Embedding-Konfiguration", "layerIndexingWarning": "Schicht-Indizierung — umgekehrte Konvention: 0 = letzte (semantischste) Schicht, 1 = vorletzte, usw.", "modelBackendLabel": "Modell-Backend", "modelBackendEsm": "esm — HuggingFace EsmModel (ESM-2)", "modelBackendEsm3c": "esm3c — ESM SDK ESMC (ESM3c) · FP16 auf GPU", "modelBackendT5": "t5 — HuggingFace T5EncoderModel (ProstT5…)", "modelBackendAnkh": "ankh — HuggingFace T5EncoderModel (Ankh base/large)", "modelBackendAuto": "auto — Rückfall auf esm", "modelLabel": "Modell", "customModelPlaceholder": "z.B. facebook/esm2_t33_650M_UR50D", "layerIndicesLabel": "Schichtindizes", "layerIndicesHelper": "(0 = letzte, 1 = vorletzte…)", "layerIndicesPlaceholder": "0  oder  0,1,2", "layerAggLabel": "Schicht-Aggregation", "layerAggMean": "mean — elementweiser Durchschnitt", "layerAggLast": "last — nur die letzte ausgewählte Schicht", "layerAggConcat": "concat — alle verketten (dim × n_layers)", "poolingLabel": "Sequenz-Pooling", "poolingMean": "mean — Mittelwert über Residuen", "poolingMax": "max — Maximum über Residuen", "poolingMeanMax": "mean_max — concat(mean, max) · dim × 2", "poolingCls": "cls — CLS/BOS-Token an Position 0", "maxLengthLabel": "Max. Länge (Token)", "descriptionLabel": "Beschreibung (optional)", "normalizeResidues": "Residuen normalisieren (L2 pro Residuum vor Pooling)", "normalizeFinal": "Finales Embedding normalisieren (L2 nach Pooling)", "enableChunking": "Chunking aktivieren (lange Sequenzen → mehrere Embeddings pro Sequenz)", "chunkSizeLabel": "Chunk-Größe (Residuen)", "chunkOverlapLabel": "Chunk-Überlappung (Residuen)", "createConfig": "Konfiguration erstellen", "creating": "Wird erstellt…" }, "tableHeaders": { "description": "Beschreibung", "model": "Modell", "backend": "Backend", "layers": "Schichten", "agg": "Agg", "pool": "Pool", "norm": "Norm", "created": "Erstellt" }, "noConfigs": "Noch keine Embedding-Konfigurationen. Erstellen Sie eine", "deleteConfirm": "Diese Embedding-Konfiguration und ihre {count} gespeicherten Embeddings löschen? Dies kann nicht rückgängig gemacht werden.", "deleteConfirmNoEmbeddings": "Diese Embedding-Konfiguration löschen?" }, "computeTab": { "title": "Embeddings berechnen", "loading": "Lädt…", "configLabel": "Embedding-Konfiguration", "noConfigs": "— keine Konfigurationen verfügbar —", "querySetLabel": "Query-Set", "querySetHelper": "(optional — leer lassen um alle zu berechnen)", "allSequences": "— alle Sequenzen —", "queueBatchSizeLabel": "Queue-Batchgröße", "queueBatchSizeHelper": "(Seq/Job)", "modelBatchSizeLabel": "Modell-Batchgröße", "modelBatchSizeHelper": "(Seq/Forward)", "deviceLabel": "Gerät", "deviceCpu": "cpu — CPU (FP32)", "deviceCuda": "cuda — Standard-GPU (FP16 für ESM3c/T5)", "deviceCuda0": "cuda:0 — GPU 0", "deviceCuda1": "cuda:1 — GPU 1", "deviceCustom": "benutzerdefiniert…", "skipExisting": "Vorhandene Embeddings überspringen", "launchComputeJob": "Berechnungs-Job starten", "launching": "Wird gestartet…" } },
   "functionalAnnotation": { "title": "Funktionale Annotation", "tabs": { "predict": "Annotation starten", "results": "Ergebnisse" }, "predictTab": { "title": "GO-Term-Annotation durch Embedding-Ähnlichkeit", "loading": "Lädt…", "configLabel": "Embedding-Konfiguration", "noConfigs": "— keine Konfigurationen verfügbar —", "querySetLabel": "Query-Set", "querySetHelper": "(optional — leer lassen um alle zu annotieren)", "allSequences": "— alle Sequenzen —", "annotationSetLabel": "Annotationsset", "noAnnotationSets": "— keine Annotationssets verfügbar —", "snapshotLabel": "Ontologie-Snapshot", "noSnapshots": "— keine Snapshots verfügbar —", "limitPerEntryLabel": "Limit pro Eintrag", "batchSizeLabel": "Batchgröße", "distanceThresholdLabel": "Distanzschwelle", "distanceThresholdHelper": "(optional)", "knnStrategy": "KNN-Strategie", "aspectSeparatedKnn": "KNN-Indizes pro Aspekt", "aspectSeparatedKnnHelper": "Separate BPO / MFO / CCO Referenzindizes — verbessert den Recall pro Aspekt", "featureEngineering": "Feature Engineering", "featureEngineeringHelper": "(opt-in — erhöht Rechenzeit)", "sequenceAlignments": "Sequenzalignments", "sequenceAlignmentsHelper": "NW (global) + SW (lokal) via parasail/BLOSUM62", "taxonomicDistance": "Taxonomische Distanz", "taxonomicDistanceHelper": "LCA, Distanz und Relation via NCBI-Taxonomie", "searchBackend": "Such-Backend", "searchBackendLabel": "Backend", "numpyBackend": "numpy — exakt", "faissBackend": "faiss — indiziert", "metricLabel": "Metrik", "cosineSimilarity": "Kosinus", "euclideanDistance": "L2 (Euklidisch²)", "indexTypeLabel": "Indextyp", "flatIndex": "Flat — exakt", "ivfflatIndex": "IVFFlat — approximativ (>100K Refs)", "hnswIndex": "HNSW — approximativ, graphbasiert", "nlistLabel": "nlist", "nprobeLabel": "nprobe", "mLabel": "M", "efSearchLabel": "efSearch", "launchAnnotationJob": "Annotations-Job starten", "launching": "Wird gestartet…" }, "resultsTab": { "title": "Ergebnisse", "refresh": "Aktualisieren", "noResults": "Noch keine Annotationsergebnisse. Starten Sie einen Annotations-Job im Tab „Annotation starten“.", "tableHeaders": { "id": "ID", "config": "Konfiguration", "annotationSet": "Annotationsset", "snapshot": "Snapshot", "goTerms": "GO-Terme", "distanceThreshold": "Dist.-Schwelle", "k": "k", "created": "Erstellt" }, "delete": "Löschen", "deleteConfirm": "Dieses Annotationsset und seine {count} GO-Term-Zuweisungen löschen? Dies kann nicht rückgängig gemacht werden.", "deleteConfirmNoAssignments": "Dieses Annotationsset löschen?" } },
   "evaluation": { "title": "CAFA-Evaluierung", "generateSection": { "heading": "Neues Evaluierungsset", "description": "Berechnet das Delta zwischen zwei GOA-Releases. Wendet experimentelle Evidenzfilterung und NOT-Qualifier-Propagation durch den GO-DAG an.", "oldSetLabel": "Altes GOA-Set (Referenz)", "selectSet": "— auswählen —", "newSetLabel": "Neues GOA-Set (Ground Truth)", "errorSameSets": "Altes und neues Set müssen unterschiedlich sein.", "generateEvaluationSet": "Evaluierungsset generieren", "generating": "Wird eingereiht…" }, "evaluationSetsSection": { "heading": "Evaluierungssets" }, "evaluationSetCard": { "deleteConfirm": "Dieses Evaluierungsset und alle Ergebnisse löschen?", "delete": "Löschen", "deltaProteins": "Delta-Proteine", "nkProteins": "NK-Proteine", "lkProteins": "LK-Proteine", "pkProteins": "PK-Proteine", "groundTruthFiles": "Ground-Truth-Dateien", "downloadNK": "NK", "downloadLK": "LK", "downloadPK": "PK", "downloadKnownTerms": "Bekannte Terme", "deltaProteinSequences": "Delta-Proteinsequenzen (FASTA)", "allDelta": "Alle Delta (NK+LK+PK)", "nkOnly": "Nur NK", "lkOnly": "Nur LK", "pkOnly": "Nur PK", "runCafaEvaluator": "CAFA-Evaluator starten", "predictionSetLabel": "Prediction-Set", "scoringConfigLabel": "Scoring-Konfiguration (optional)", "fallbackFormula": "— Fallback (1−d/2) —", "maxDistanceLabel": "Max. Distanz (optional)", "jobQueued": "Job eingereiht.", "viewJob": "Job anzeigen →", "runEvaluation": "Evaluierung starten (NK + LK + PK)", "resultsHeading": "Ergebnisse", "pollingResults": "● Abfrage", "refreshResults": "↻ Aktualisieren", "refreshing": "Wird aktualisiert…", "noEvaluations": "Noch keine Evaluierungen durchgeführt.", "predictionSet": "Prediction-Set:", "scoring": "Scoring:", "artifactsDownload": "↓ Artefakte (.zip)", "resultDelete": "Löschen", "deleteResultConfirm": "Dieses Evaluierungsergebnis löschen?" }, "resultMetrics": { "nk": "NK", "lk": "LK", "pk": "PK", "biologicalProcess": "Biologischer Prozess", "molecularFunction": "Molekulare Funktion", "cellularComponent": "Zelluläre Komponente", "fmax": "Fmax", "precision": "Präzision", "recall": "Recall", "coverage": "Abdeckung", "tau": "τ" }, "manualEvaluatorSection": { "heading": "Manueller Evaluator-Befehl" } },
   "scoring": { "title": "Scoring", "description": "Ein ScoringConfig definiert, wie Vorhersagesignale zu einem [0, 1]-Konfidenzwert kombiniert werden — ohne die KNN-Pipeline erneut auszuführen.", "loadPresets": "Voreinstellungen laden", "presetsLoading": "Lädt…", "availableSignals": "Verfügbare Signale", "signals": { "embeddingSimilarity": "Embedding-Ähnlichkeit", "embeddingSimilarityHint": "1 − Kosinus-Distanz / 2 — immer verfügbar.", "identityNw": "Identität NW", "identityNwHint": "Needleman-Wunsch globale Sequenzidentität [0, 1].", "identitySw": "Identität SW", "identitySwHint": "Smith-Waterman lokale Sequenzidentität [0, 1].", "evidenceWeight": "Evidenzgewicht", "evidenceWeightHint": "Qualität des GO-Evidenzcodes der Referenzannotation.", "taxonomicProximity": "Taxonomische Nähe", "taxonomicProximityHint": "1 / (1 + taxonomische_Distanz)" }, "configCard": { "deleteConfirm": "Scoring-Konfiguration \"{name}\" löschen?", "delete": "Löschen", "customEvidenceWeights": "benutzerdefinierte Evidenzgewichte", "evidenceCodeWeights": "Evidenzcode-Gewichte", "expand": "▶", "collapse": "▲", "systemDefaults": "(Systemstandard)", "custom": "(benutzerdefiniert)" }, "newConfigForm": { "newConfig": "+ Neues Scoring-Profil", "formTitle": "Neue Konfiguration", "close": "×", "nameLabel": "Name", "namePlaceholder": "mein_profil", "formulaLabel": "Formel", "linear": "linear", "evidenceWeighted": "evidence_weighted", "descriptionLabel": "Beschreibung", "descriptionHelper": "(optional)", "descriptionPlaceholder": "Zweck dieser Konfiguration…", "signalWeights": "Signalgewichte", "evidenceCodeWeights": "Evidenzcode-Gewichte", "overrideCheckbox": "Evidenzcode-Qualitätsgewichte überschreiben", "systemDefaultsNote": "Systemstandard — EXP/IDA → 1,0 · ISS/IBA → 0,7 · IEA → 0,3 · ND → 0,1", "experimental": "Experimentell", "experimentalDescription": "Annotationen mit direkter experimenteller Evidenz. Höchste Vertrauensstufe.", "computational": "Komputativ / Phylogenetisch", "computationalDescription": "Annotationen aus Sequenzähnlichkeit, Orthologie oder phylogenetischer Inferenz.", "electronic": "Elektronisch", "electronicDescription": "Automatisierte Annotationen (IEA) oder nicht rückverfolgbare Autorenaussagen (NAS). Geringere Konfidenz.", "noData": "Keine Daten", "noDataDescription": "Platzhaltercode — keine biologischen Daten verfügbar.", "off": "Aus", "max": "Max", "groupShortcut": "0,5", "resetEvidenceWeights": "Auf Systemstandard zurücksetzen", "saveConfig": "Konfiguration speichern", "saving": "Wird gespeichert…", "cancel": "Abbrechen" }, "noConfigs": "Noch keine Konfigurationen. Voreinstellungen laden oder oben eine erstellen." },
diff --git a/apps/web/messages/en.json b/apps/web/messages/en.json
index 18d9b88..25f0903 100644
--- a/apps/web/messages/en.json
+++ b/apps/web/messages/en.json
@@ -11,6 +11,7 @@
     "functionalAnnotation": "Functional Annotation",
     "scoring": "Scoring",
     "evaluation": "Evaluation",
+    "benchmark": "Benchmark",
     "reranker": "Re-ranker",
     "jobs": "Jobs",
     "maintenance": "Maintenance",
@@ -24,6 +25,12 @@
     "title": "Protein Function Prediction Pipeline",
     "subtitle": "From sequence to functional annotation through embedding similarity, re-ranking, and LLM curation",
     "bestResults": "Best Results",
+    "bestOverall": "Best Overall",
+    "avgFmaxAcrossCells": "mean Fmax across 9 cells",
+    "viewBenchmark": "View full benchmark",
+    "pipelineStageBaseline": "baseline",
+    "pipelineStageAlignmentWeighted": "alignment-weighted",
+    "pipelineStageReranker": "re-ranker",
     "fmax": "Fmax",
     "methodComparison": "Method Comparison",
     "method": "Method",
@@ -64,7 +71,11 @@
     "annotateDone": "Done! Redirecting to results...",
     "annotateStepUpload": "Upload",
     "annotateStepEmbed": "Embeddings",
-    "annotateStepPredict": "Prediction"
+    "annotateStepPredict": "Prediction",
+    "annotateQueueBlockedTitle": "Annotation temporarily unavailable",
+    "annotateQueueBlockedBody": "The embedding / prediction pipeline is currently busy processing a backlog of long-running jobs. New requests would not enter the queue in a reasonable time, so submissions are paused until the backlog clears.",
+    "annotateQueueRunningLabel": "running",
+    "annotateQueueWaitingLabel": "Jobs waiting"
   },
   "jobs": {
     "title": "Jobs",
@@ -79,6 +90,7 @@
     "noJobsFound": "No jobs found.",
     "status": "Status",
     "operation": "Operation",
+    "operationContext": "Context",
     "jobId": "Job ID",
     "created": "Created",
     "activeJobs": "{count} active",
@@ -323,6 +335,7 @@
         "modelBackendEsm": "esm — HuggingFace EsmModel (ESM-2)",
         "modelBackendEsm3c": "esm3c — ESM SDK ESMC (ESM3c) · FP16 on GPU",
         "modelBackendT5": "t5 — HuggingFace T5EncoderModel (ProstT5…)",
+        "modelBackendAnkh": "ankh — HuggingFace T5EncoderModel (Ankh base/large)",
         "modelBackendAuto": "auto — falls back to esm",
         "modelLabel": "Model",
         "customModelPlaceholder": "e.g. facebook/esm2_t33_650M_UR50D",
diff --git a/apps/web/messages/es.json b/apps/web/messages/es.json
index 49ff5e7..f56fd46 100644
--- a/apps/web/messages/es.json
+++ b/apps/web/messages/es.json
@@ -11,6 +11,7 @@
     "functionalAnnotation": "Anotación",
     "scoring": "Scoring",
     "evaluation": "Evaluación",
+    "benchmark": "Benchmark",
     "reranker": "Re-ranker",
     "jobs": "Trabajos",
     "maintenance": "Mantenimiento",
@@ -24,6 +25,12 @@
     "title": "Pipeline de Prediccion de Funcion Proteica",
     "subtitle": "De secuencia a anotacion funcional mediante similitud de embeddings, re-ranking y curacion LLM",
     "bestResults": "Mejores Resultados",
+    "bestOverall": "Mejor resultado global",
+    "avgFmaxAcrossCells": "Fmax promedio sobre 9 celdas",
+    "viewBenchmark": "Ver benchmark completo",
+    "pipelineStageBaseline": "baseline",
+    "pipelineStageAlignmentWeighted": "ponderado por alineamiento",
+    "pipelineStageReranker": "re-ranker",
     "fmax": "Fmax",
     "methodComparison": "Comparacion de Metodos",
     "method": "Metodo",
@@ -64,7 +71,11 @@
     "annotateDone": "Listo! Redirigiendo a resultados...",
     "annotateStepUpload": "Subida",
     "annotateStepEmbed": "Embeddings",
-    "annotateStepPredict": "Prediccion"
+    "annotateStepPredict": "Prediccion",
+    "annotateQueueBlockedTitle": "Anotacion temporalmente no disponible",
+    "annotateQueueBlockedBody": "El pipeline de embeddings / prediccion esta ocupado procesando una cola de trabajos de larga duracion. Las nuevas peticiones no entrarian en cola en un tiempo razonable, asi que el envio esta pausado hasta que la cola se vacie.",
+    "annotateQueueRunningLabel": "en ejecucion",
+    "annotateQueueWaitingLabel": "Trabajos esperando"
   },
   "jobs": {
     "title": "Trabajos",
@@ -79,6 +90,7 @@
     "noJobsFound": "No se encontraron trabajos.",
     "status": "Estado",
     "operation": "Operación",
+    "operationContext": "Contexto",
     "jobId": "ID del trabajo",
     "created": "Creado",
     "activeJobs": "{count} activos",
@@ -323,6 +335,7 @@
         "modelBackendEsm": "esm — HuggingFace EsmModel (ESM-2)",
         "modelBackendEsm3c": "esm3c — ESM SDK ESMC (ESM3c) · FP16 en GPU",
         "modelBackendT5": "t5 — HuggingFace T5EncoderModel (ProstT5…)",
+        "modelBackendAnkh": "ankh — HuggingFace T5EncoderModel (Ankh base/large)",
         "modelBackendAuto": "auto — recurre a esm",
         "modelLabel": "Modelo",
         "customModelPlaceholder": "p.ej. facebook/esm2_t33_650M_UR50D",
diff --git a/apps/web/messages/pt.json b/apps/web/messages/pt.json
index 2c9b404..21c07f5 100644
--- a/apps/web/messages/pt.json
+++ b/apps/web/messages/pt.json
@@ -1,12 +1,12 @@
 {
   "layout": { "title": "PROTEA", "description": "Plataforma de dados proteicos — fila de jobs e gestão de pipelines" },
-  "nav": { "proteins": "Proteínas", "annotations": "Anotações", "querySets": "Query Sets", "embeddings": "Embeddings", "functionalAnnotation": "Anotação Funcional", "scoring": "Scoring", "evaluation": "Avaliação", "reranker": "Re-Ranker", "jobs": "Jobs", "maintenance": "Manutenção", "home": "Início", "data": "Dados", "pipelineGroup": "Pipeline", "results": "Resultados", "system": "Sistema" },
-  "home": { "title": "Pipeline de Predição de Função Proteica", "subtitle": "De sequência a anotação funcional por similaridade de embeddings, re-ranking e curação LLM", "bestResults": "Melhores Resultados", "fmax": "Fmax", "methodComparison": "Comparação de Métodos", "method": "Método", "delta": "vs linha base", "pipeline": "Pipeline", "stats": "Estatísticas da Plataforma", "proteins": "Proteínas", "sequences": "Sequências", "embeddings": "Embeddings", "predictions": "Predições", "predictionSets": "Conjuntos de predição", "rerankerModels": "Modelos Re-ranker", "evaluations": "Avaliações", "exploreResults": "Explorar Resultados", "annotateProteins": "Anotar Minhas Proteínas", "knnBaseline": "KNN (distância de embedding)", "knnScored": "KNN + Scoring", "knnReranker": "KNN + Re-ranker", "noDataYet": "Ainda não há dados de avaliação. Execute o pipeline para ver resultados aqui.", "getStarted": "Começar", "stageSequences": "Sequências", "stageEmbeddings": "Embeddings", "stageKnn": "Busca KNN", "stageReranker": "Re-ranker", "stageLlm": "Curação LLM", "stageAnnotation": "Anotação", "stageEvaluation": "Avaliação", "nkCategory": "Categoria NK (No Knowledge) — configuração de avaliação mais exigente", "annotateTitle": "Anote suas proteinas", "annotateDescription": "Cole suas sequencias de proteinas em formato FASTA e obtenha anotacoes funcionais automaticamente com o melhor metodo disponivel.", "annotatePlaceholder": ">sp|P04637|P53_HUMAN Cellular tumor antigen p53\nMEEPQSDPSVEPPLSQETFSDLWKLL...", "annotateTryExample": "Testar exemplo", "annotateUploadFile": "Enviar arquivo", "annotateButton": "Anotar", "annotateUploading": "Enviando...", "annotateEmbedding": "Calculando embeddings...", "annotatePredicting": "Prevendo termos GO...", "annotateDone": "Pronto! Redirecionando para resultados...", "annotateStepUpload": "Envio", "annotateStepEmbed": "Embeddings", "annotateStepPredict": "Predicao" },
-  "jobs": { "title": "Jobs", "allStatuses": "Todos os status", "queued": "Na fila", "running": "Em execução", "succeeded": "Concluído", "failed": "Falhou", "cancelled": "Cancelado", "autoRefresh": "Atualização automática", "refresh": "Atualizar", "noJobsFound": "Nenhum job encontrado.", "status": "Status", "operation": "Operação", "jobId": "ID do Job", "created": "Criado", "activeJobs": "{count} ativo(s)", "jobDetail": { "title": "Detalhes do Job", "backToJobs": "← Jobs", "live": "Ao vivo", "cancel": "Cancelar", "delete": "Excluir", "deleteConfirm": "Excluir este job?", "queue": "Fila:", "created": "Criado:", "started": "Iniciado:", "finished": "Concluído:", "progress": "{current} / {total} {unit} ({percent}%)", "payloadLabel": "Payload", "childJobsTitle": "Jobs filhos", "childJobsCount": "({count})", "eventsTitle": "Eventos", "eventsCount": "({count})" } },
+  "nav": { "proteins": "Proteínas", "annotations": "Anotações", "querySets": "Query Sets", "embeddings": "Embeddings", "functionalAnnotation": "Anotação Funcional", "scoring": "Scoring", "evaluation": "Avaliação", "benchmark": "Benchmark", "reranker": "Re-Ranker", "jobs": "Jobs", "maintenance": "Manutenção", "home": "Início", "data": "Dados", "pipelineGroup": "Pipeline", "results": "Resultados", "system": "Sistema" },
+  "home": { "title": "Pipeline de Predição de Função Proteica", "subtitle": "De sequência a anotação funcional por similaridade de embeddings, re-ranking e curação LLM", "bestResults": "Melhores Resultados", "bestOverall": "Melhor resultado geral", "avgFmaxAcrossCells": "Fmax médio sobre 9 células", "viewBenchmark": "Ver benchmark completo", "pipelineStageBaseline": "baseline", "pipelineStageAlignmentWeighted": "ponderado por alinhamento", "pipelineStageReranker": "re-ranker", "fmax": "Fmax", "methodComparison": "Comparação de Métodos", "method": "Método", "delta": "vs linha base", "pipeline": "Pipeline", "stats": "Estatísticas da Plataforma", "proteins": "Proteínas", "sequences": "Sequências", "embeddings": "Embeddings", "predictions": "Predições", "predictionSets": "Conjuntos de predição", "rerankerModels": "Modelos Re-ranker", "evaluations": "Avaliações", "exploreResults": "Explorar Resultados", "annotateProteins": "Anotar Minhas Proteínas", "knnBaseline": "KNN (distância de embedding)", "knnScored": "KNN + Scoring", "knnReranker": "KNN + Re-ranker", "noDataYet": "Ainda não há dados de avaliação. Execute o pipeline para ver resultados aqui.", "getStarted": "Começar", "stageSequences": "Sequências", "stageEmbeddings": "Embeddings", "stageKnn": "Busca KNN", "stageReranker": "Re-ranker", "stageLlm": "Curação LLM", "stageAnnotation": "Anotação", "stageEvaluation": "Avaliação", "nkCategory": "Categoria NK (No Knowledge) — configuração de avaliação mais exigente", "annotateTitle": "Anote suas proteinas", "annotateDescription": "Cole suas sequencias de proteinas em formato FASTA e obtenha anotacoes funcionais automaticamente com o melhor metodo disponivel.", "annotatePlaceholder": ">sp|P04637|P53_HUMAN Cellular tumor antigen p53\nMEEPQSDPSVEPPLSQETFSDLWKLL...", "annotateTryExample": "Testar exemplo", "annotateUploadFile": "Enviar arquivo", "annotateButton": "Anotar", "annotateUploading": "Enviando...", "annotateEmbedding": "Calculando embeddings...", "annotatePredicting": "Prevendo termos GO...", "annotateDone": "Pronto! Redirecionando para resultados...", "annotateStepUpload": "Envio", "annotateStepEmbed": "Embeddings", "annotateStepPredict": "Predicao" },
+  "jobs": { "title": "Jobs", "allStatuses": "Todos os status", "queued": "Na fila", "running": "Em execução", "succeeded": "Concluído", "failed": "Falhou", "cancelled": "Cancelado", "autoRefresh": "Atualização automática", "refresh": "Atualizar", "noJobsFound": "Nenhum job encontrado.", "status": "Status", "operation": "Operação", "operationContext": "Contexto", "jobId": "ID do Job", "created": "Criado", "activeJobs": "{count} ativo(s)", "jobDetail": { "title": "Detalhes do Job", "backToJobs": "← Jobs", "live": "Ao vivo", "cancel": "Cancelar", "delete": "Excluir", "deleteConfirm": "Excluir este job?", "queue": "Fila:", "created": "Criado:", "started": "Iniciado:", "finished": "Concluído:", "progress": "{current} / {total} {unit} ({percent}%)", "payloadLabel": "Payload", "childJobsTitle": "Jobs filhos", "childJobsCount": "({count})", "eventsTitle": "Eventos", "eventsCount": "({count})" } },
   "proteins": { "title": "Proteínas", "tabs": { "browse": "Explorar", "stats": "Estatísticas", "insert": "Inserir Proteínas", "metadata": "Buscar Metadados" }, "browseTab": { "searchPlaceholder": "accession, gene, organismo…", "search": "Buscar", "clear": "Limpar", "allProteins": "Todas as proteínas", "swissProt": "Apenas Swiss-Prot", "trembl": "Apenas TrEMBL", "canonicalOnly": "Apenas canônicas", "totalProteins": "{count} proteínas", "tableHeaders": { "accession": "Accession", "entryName": "Nome do Registro", "gene": "Gene", "organism": "Organismo", "length": "Comprimento", "source": "Fonte" }, "noProteinsCta": "Nenhuma proteína encontrada. Use a aba Inserir Proteínas para importar do UniProt.", "pagination": { "page": "Página {current} de {total}", "previous": "Anterior", "next": "Próxima" } }, "statsTab": { "refresh": "Atualizar", "loading": "Carregando…", "overview": "Visão geral", "coverage": "Cobertura", "totalProteins": "Total de proteínas", "canonical": "Canônicas", "isoforms": "{count} isoformas", "reviewed": "Swiss-Prot", "reviewedSub": "revisadas", "unreviewed": "TrEMBL", "unreviewedSub": "não revisadas", "withMetadata": "Com metadados", "metadataSub": "{percent}% das canônicas", "withEmbeddings": "Com embeddings", "embeddingsSub": "{percent}% do total", "withGoAnnotations": "Com anotações GO", "goAnnotationsSub": "{percent}% do total" }, "insertTab": { "title": "Inserir proteínas do UniProt", "description": "Baixa sequências FASTA e insere registros Protein + Sequence.", "searchCriteriaLabel": "Critérios de busca", "searchCriteriaHelper": "Consulta UniProt — reviewed:true = apenas Swiss-Prot", "pageSizeLabel": "Tamanho da página", "totalLimitLabel": "Limite total", "totalLimitOptional": "(opcional)", "includeIsoforms": "Incluir isoformas", "jobQueuedPrefix": "Job na fila: ", "launchJob": "Iniciar Job", "launching": "Iniciando…" }, "metadataTab": { "title": "Buscar metadados do UniProt", "description": "Baixa anotações TSV e insere registros ProteinUniProtMetadata.", "searchCriteriaLabel": "Critérios de busca", "searchCriteriaHelper": "Consulta UniProt — reviewed:true = apenas Swiss-Prot", "pageSizeLabel": "Tamanho da página", "totalLimitLabel": "Limite total", "totalLimitOptional": "(opcional)", "launchJob": "Iniciar Job", "launching": "Iniciando…" }, "sourceSwissProt": "Swiss-Prot", "sourceTrembl": "TrEMBL" },
   "proteinDetail": { "backToProteins": "← Proteínas", "tabs": { "overview": "Visão geral", "annotations": "Anotações GO" }, "overviewTab": { "identity": "Identidade", "gene": "Gene", "organism": "Organismo", "taxonId": "ID do Táxon", "length": "Comprimento", "aa": "aa", "sequenceId": "ID da Sequência", "canonical": "Canônica", "coverage": "Cobertura", "embeddings": "Embeddings", "goAnnotations": "Anotações GO", "metadata": "Metadados", "yes": "sim", "none": "nenhum", "isoforms": "Isoformas", "function": "Função", "biochemistry": "Bioquímica", "ecNumber": "Número EC", "catalyticActivity": "Atividade catalítica", "cofactor": "Cofator", "activityRegulation": "Regulação da atividade", "pathway": "Via metabólica", "absorption": "Absorção", "kinetics": "Cinética", "phDependence": "Dependência de pH", "redoxPotential": "Potencial redox", "temperatureDependence": "Dependência de temperatura", "rheaId": "ID Rhea", "keywords": "Palavras-chave", "noFunctionalMetadata": "Nenhum metadado funcional disponível. Use a aba Buscar Metadados para importar do UniProt.", "showGoGraph": "Mostrar grafo GO", "hideGoGraph": "Ocultar grafo GO", "loadingGraph": "Carregando grafo…", "noGoAnnotations": "Nenhuma anotação GO encontrada para esta proteína.", "molecularFunction": "Função Molecular", "biologicalProcess": "Processo Biológico", "cellularComponent": "Componente Celular", "annotations": "anotações", "goTableHeaders": { "goId": "ID GO", "name": "Nome", "evidence": "Evidência", "qualifier": "Qualificador", "source": "Fonte" } } },
   "annotations": { "title": "Anotações", "tabs": { "sets": "Conjuntos", "snapshots": "Snapshots", "loadSnapshot": "Carregar Snapshot", "loadGoa": "Carregar GOA", "loadQuickgo": "Carregar QuickGO" }, "setsTab": { "annotationSets": "{count} conjuntos de anotações", "refresh": "Atualizar", "noSetsFound": "Nenhum conjunto de anotações ainda. Carregue anotações GO pelas abas Carregar GOA ou Carregar QuickGO.", "tableHeaders": { "id": "ID", "source": "Fonte", "version": "Versão", "annotations": "Anotações", "meta": "Meta", "created": "Criado" }, "delete": "Excluir", "deleteConfirm": "Excluir este conjunto e suas {count} anotações GO? Esta ação não pode ser desfeita.", "deleteConfirmNoAnnotations": "Excluir este conjunto de anotações?" }, "snapshotsTab": { "snapshots": "{count} snapshots", "refresh": "Atualizar", "noSnapshotsFound": "Nenhum snapshot de ontologia ainda. Use a aba Carregar Snapshot.", "tableHeaders": { "id": "ID", "version": "Versão", "goTerms": "Termos GO", "iaUrl": "URL IA", "loaded": "Carregado" }, "notSet": "não definido", "save": "Salvar", "cancel": "Cancelar", "editTooltip": "Toque para editar a URL IA" }, "loadSnapshotTab": { "title": "Carregar snapshot de ontologia", "description": "Baixa um arquivo GO OBO e preenche registros GOTerm.", "oboUrlLabel": "URL OBO", "launchJob": "Iniciar Job", "launching": "Iniciando…" }, "loadGoaTab": { "title": "Carregar anotações GOA", "description": "Carrega em massa anotações GO de um arquivo GAF.", "snapshotLabel": "Snapshot de Ontologia", "selectSnapshot": "— selecionar snapshot —", "noSnapshots": "Nenhum snapshot — execute Carregar Snapshot primeiro.", "gafUrlLabel": "URL GAF", "gafUrlPlaceholder": "https://current.geneontology.org/annotations/goa_human.gaf.gz", "sourceVersionLabel": "Versão da fonte", "sourceVersionPlaceholder": "2025-03", "launchJob": "Iniciar Job", "launching": "Iniciando…" }, "loadQuickgoTab": { "title": "Carregar anotações QuickGO", "description": "Transmite anotações GO da API de download em massa do QuickGO.", "snapshotLabel": "Snapshot de Ontologia", "selectSnapshot": "— selecionar snapshot —", "noSnapshots": "Nenhum snapshot — execute Carregar Snapshot primeiro.", "sourceVersionLabel": "Versão da fonte", "sourceVersionPlaceholder": "2025-03", "launchJob": "Iniciar Job", "launching": "Iniciando…" } },
-  "embeddings": { "title": "Embeddings", "tabs": { "configs": "Configurações", "compute": "Computar" }, "configsTab": { "configs": "{count} configurações", "newConfig": "+ Nova configuração", "cancel": "Cancelar", "newConfigForm": { "title": "Nova configuração de embedding", "layerIndexingWarning": "Indexação de camadas — convenção reversa: 0 = última (mais semântica), 1 = penúltima, etc.", "modelBackendLabel": "Backend do modelo", "modelBackendEsm": "esm — HuggingFace EsmModel (ESM-2)", "modelBackendEsm3c": "esm3c — ESM SDK ESMC (ESM3c) · FP16 na GPU", "modelBackendT5": "t5 — HuggingFace T5EncoderModel (ProstT5…)", "modelBackendAuto": "auto — fallback para esm", "modelLabel": "Modelo", "customModelPlaceholder": "ex.: facebook/esm2_t33_650M_UR50D", "layerIndicesLabel": "Índices de camadas", "layerIndicesHelper": "(0 = última, 1 = penúltima…)", "layerIndicesPlaceholder": "0  ou  0,1,2", "layerAggLabel": "Agregação de camadas", "layerAggMean": "mean — média elemento a elemento", "layerAggLast": "last — apenas a última camada", "layerAggConcat": "concat — concatenar todas (dim × n_layers)", "poolingLabel": "Pooling de sequência", "poolingMean": "mean — média sobre resíduos", "poolingMax": "max — máximo sobre resíduos", "poolingMeanMax": "mean_max — concat(mean, max) · dim × 2", "poolingCls": "cls — token CLS/BOS na posição 0", "maxLengthLabel": "Comprimento máx. (tokens)", "descriptionLabel": "Descrição (opcional)", "normalizeResidues": "Normalizar resíduos (L2 por resíduo antes do pooling)", "normalizeFinal": "Normalizar embedding final (L2 após pooling)", "enableChunking": "Habilitar chunking (sequências longas → múltiplos embeddings)", "chunkSizeLabel": "Tamanho do chunk (resíduos)", "chunkOverlapLabel": "Sobreposição do chunk (resíduos)", "createConfig": "Criar configuração", "creating": "Criando…" }, "tableHeaders": { "description": "Descrição", "model": "Modelo", "backend": "Backend", "layers": "Camadas", "agg": "Agg", "pool": "Pool", "norm": "Norm", "created": "Criado" }, "noConfigs": "Nenhuma configuração de embedding ainda. Crie uma", "deleteConfirm": "Excluir esta configuração e seus {count} embeddings? Esta ação não pode ser desfeita.", "deleteConfirmNoEmbeddings": "Excluir esta configuração de embedding?" }, "computeTab": { "title": "Computar embeddings", "loading": "Carregando…", "configLabel": "Configuração de embedding", "noConfigs": "— nenhuma configuração disponível —", "querySetLabel": "Query Set", "querySetHelper": "(opcional — deixe vazio para computar todos)", "allSequences": "— todas as sequências —", "queueBatchSizeLabel": "Batch na fila", "queueBatchSizeHelper": "(seqs/job)", "modelBatchSizeLabel": "Batch do modelo", "modelBatchSizeHelper": "(seqs/forward)", "deviceLabel": "Dispositivo", "deviceCpu": "cpu — CPU (FP32)", "deviceCuda": "cuda — GPU padrão (FP16 para ESM3c/T5)", "deviceCuda0": "cuda:0 — GPU 0", "deviceCuda1": "cuda:1 — GPU 1", "deviceCustom": "personalizado…", "skipExisting": "Ignorar embeddings existentes", "launchComputeJob": "Iniciar job de computação", "launching": "Iniciando…" } },
+  "embeddings": { "title": "Embeddings", "tabs": { "configs": "Configurações", "compute": "Computar" }, "configsTab": { "configs": "{count} configurações", "newConfig": "+ Nova configuração", "cancel": "Cancelar", "newConfigForm": { "title": "Nova configuração de embedding", "layerIndexingWarning": "Indexação de camadas — convenção reversa: 0 = última (mais semântica), 1 = penúltima, etc.", "modelBackendLabel": "Backend do modelo", "modelBackendEsm": "esm — HuggingFace EsmModel (ESM-2)", "modelBackendEsm3c": "esm3c — ESM SDK ESMC (ESM3c) · FP16 na GPU", "modelBackendT5": "t5 — HuggingFace T5EncoderModel (ProstT5…)", "modelBackendAnkh": "ankh — HuggingFace T5EncoderModel (Ankh base/large)", "modelBackendAuto": "auto — fallback para esm", "modelLabel": "Modelo", "customModelPlaceholder": "ex.: facebook/esm2_t33_650M_UR50D", "layerIndicesLabel": "Índices de camadas", "layerIndicesHelper": "(0 = última, 1 = penúltima…)", "layerIndicesPlaceholder": "0  ou  0,1,2", "layerAggLabel": "Agregação de camadas", "layerAggMean": "mean — média elemento a elemento", "layerAggLast": "last — apenas a última camada", "layerAggConcat": "concat — concatenar todas (dim × n_layers)", "poolingLabel": "Pooling de sequência", "poolingMean": "mean — média sobre resíduos", "poolingMax": "max — máximo sobre resíduos", "poolingMeanMax": "mean_max — concat(mean, max) · dim × 2", "poolingCls": "cls — token CLS/BOS na posição 0", "maxLengthLabel": "Comprimento máx. (tokens)", "descriptionLabel": "Descrição (opcional)", "normalizeResidues": "Normalizar resíduos (L2 por resíduo antes do pooling)", "normalizeFinal": "Normalizar embedding final (L2 após pooling)", "enableChunking": "Habilitar chunking (sequências longas → múltiplos embeddings)", "chunkSizeLabel": "Tamanho do chunk (resíduos)", "chunkOverlapLabel": "Sobreposição do chunk (resíduos)", "createConfig": "Criar configuração", "creating": "Criando…" }, "tableHeaders": { "description": "Descrição", "model": "Modelo", "backend": "Backend", "layers": "Camadas", "agg": "Agg", "pool": "Pool", "norm": "Norm", "created": "Criado" }, "noConfigs": "Nenhuma configuração de embedding ainda. Crie uma", "deleteConfirm": "Excluir esta configuração e seus {count} embeddings? Esta ação não pode ser desfeita.", "deleteConfirmNoEmbeddings": "Excluir esta configuração de embedding?" }, "computeTab": { "title": "Computar embeddings", "loading": "Carregando…", "configLabel": "Configuração de embedding", "noConfigs": "— nenhuma configuração disponível —", "querySetLabel": "Query Set", "querySetHelper": "(opcional — deixe vazio para computar todos)", "allSequences": "— todas as sequências —", "queueBatchSizeLabel": "Batch na fila", "queueBatchSizeHelper": "(seqs/job)", "modelBatchSizeLabel": "Batch do modelo", "modelBatchSizeHelper": "(seqs/forward)", "deviceLabel": "Dispositivo", "deviceCpu": "cpu — CPU (FP32)", "deviceCuda": "cuda — GPU padrão (FP16 para ESM3c/T5)", "deviceCuda0": "cuda:0 — GPU 0", "deviceCuda1": "cuda:1 — GPU 1", "deviceCustom": "personalizado…", "skipExisting": "Ignorar embeddings existentes", "launchComputeJob": "Iniciar job de computação", "launching": "Iniciando…" } },
   "functionalAnnotation": { "title": "Anotação Funcional", "tabs": { "predict": "Executar Anotação", "results": "Resultados" }, "predictTab": { "title": "Anotação de termos GO por similaridade de embedding", "loading": "Carregando…", "configLabel": "Configuração de embedding", "noConfigs": "— nenhuma configuração disponível —", "querySetLabel": "Query Set", "querySetHelper": "(opcional — deixe vazio para anotar todos)", "allSequences": "— todas as sequências —", "annotationSetLabel": "Conjunto de anotações", "noAnnotationSets": "— nenhum conjunto disponível —", "snapshotLabel": "Snapshot de ontologia", "noSnapshots": "— nenhum snapshot disponível —", "limitPerEntryLabel": "Limite por entrada", "batchSizeLabel": "Tamanho do batch", "distanceThresholdLabel": "Limiar de distância", "distanceThresholdHelper": "(opcional)", "knnStrategy": "Estratégia KNN", "aspectSeparatedKnn": "Índices KNN por aspecto", "aspectSeparatedKnnHelper": "Índices de referência separados BPO / MFO / CCO — melhora o recall por aspecto", "featureEngineering": "Feature Engineering", "featureEngineeringHelper": "(opt-in — aumenta o tempo de computação)", "sequenceAlignments": "Alinhamentos de sequência", "sequenceAlignmentsHelper": "NW (global) + SW (local) via parasail/BLOSUM62", "taxonomicDistance": "Distância taxonômica", "taxonomicDistanceHelper": "LCA, distância e relação via taxonomia NCBI", "searchBackend": "Backend de busca", "searchBackendLabel": "Backend", "numpyBackend": "numpy — exato", "faissBackend": "faiss — indexado", "metricLabel": "Métrica", "cosineSimilarity": "cosseno", "euclideanDistance": "L2 (Euclidiano²)", "indexTypeLabel": "Tipo de índice", "flatIndex": "Flat — exato", "ivfflatIndex": "IVFFlat — aproximado (>100K refs)", "hnswIndex": "HNSW — aproximado, baseado em grafo", "nlistLabel": "nlist", "nprobeLabel": "nprobe", "mLabel": "M", "efSearchLabel": "efSearch", "launchAnnotationJob": "Iniciar job de anotação", "launching": "Iniciando…" }, "resultsTab": { "title": "Resultados", "refresh": "Atualizar", "noResults": "Nenhum resultado de anotação ainda. Execute um job na aba Executar Anotação.", "tableHeaders": { "id": "ID", "config": "Configuração", "annotationSet": "Conjunto", "snapshot": "Snapshot", "goTerms": "Termos GO", "distanceThreshold": "Limiar dist.", "k": "k", "created": "Criado" }, "delete": "Excluir", "deleteConfirm": "Excluir este conjunto e suas {count} atribuições de termos GO? Esta ação não pode ser desfeita.", "deleteConfirmNoAssignments": "Excluir este conjunto de anotações?" } },
   "evaluation": { "title": "Avaliação CAFA", "generateSection": { "heading": "Novo conjunto de avaliação", "description": "Computa o delta entre duas releases GOA. Aplica filtragem de evidência experimental e propagação do qualificador NOT pelo DAG GO.", "oldSetLabel": "Conjunto GOA antigo (referência)", "selectSet": "— selecionar —", "newSetLabel": "Conjunto GOA novo (ground truth)", "errorSameSets": "Os conjuntos antigo e novo devem ser diferentes.", "generateEvaluationSet": "Gerar conjunto de avaliação", "generating": "Enfileirando…" }, "evaluationSetsSection": { "heading": "Conjuntos de avaliação" }, "evaluationSetCard": { "deleteConfirm": "Excluir este conjunto de avaliação e todos os resultados?", "delete": "Excluir", "deltaProteins": "Proteínas delta", "nkProteins": "Proteínas NK", "lkProteins": "Proteínas LK", "pkProteins": "Proteínas PK", "groundTruthFiles": "Arquivos de ground truth", "downloadNK": "NK", "downloadLK": "LK", "downloadPK": "PK", "downloadKnownTerms": "Termos conhecidos", "deltaProteinSequences": "Sequências delta (FASTA)", "allDelta": "Todo delta (NK+LK+PK)", "nkOnly": "Apenas NK", "lkOnly": "Apenas LK", "pkOnly": "Apenas PK", "runCafaEvaluator": "Executar avaliador CAFA", "predictionSetLabel": "Prediction set", "scoringConfigLabel": "Configuração de scoring (opcional)", "fallbackFormula": "— fallback (1−d/2) —", "maxDistanceLabel": "Distância máxima (opcional)", "jobQueued": "Job na fila.", "viewJob": "Ver job →", "runEvaluation": "Executar avaliação (NK + LK + PK)", "resultsHeading": "Resultados", "pollingResults": "● consultando", "refreshResults": "↻ Atualizar", "refreshing": "Atualizando…", "noEvaluations": "Nenhuma avaliação executada ainda.", "predictionSet": "Prediction set:", "scoring": "Scoring:", "artifactsDownload": "↓ Artefatos (.zip)", "resultDelete": "Excluir", "deleteResultConfirm": "Excluir este resultado de avaliação?" }, "resultMetrics": { "nk": "NK", "lk": "LK", "pk": "PK", "biologicalProcess": "Processo Biológico", "molecularFunction": "Função Molecular", "cellularComponent": "Componente Celular", "fmax": "Fmax", "precision": "Precisão", "recall": "Recall", "coverage": "Cobertura", "tau": "τ" }, "manualEvaluatorSection": { "heading": "Comando do avaliador manual" } },
   "scoring": { "title": "Scoring", "description": "Um ScoringConfig define como os sinais de predição são combinados em uma pontuação de confiança [0, 1] — sem re-executar o pipeline KNN.", "loadPresets": "Carregar predefinições", "presetsLoading": "Carregando…", "availableSignals": "Sinais disponíveis", "signals": { "embeddingSimilarity": "Similaridade de embedding", "embeddingSimilarityHint": "1 − distância cosseno / 2 — sempre disponível.", "identityNw": "Identidade NW", "identityNwHint": "Identidade de sequência global Needleman-Wunsch [0, 1].", "identitySw": "Identidade SW", "identitySwHint": "Identidade de sequência local Smith-Waterman [0, 1].", "evidenceWeight": "Peso de evidência", "evidenceWeightHint": "Qualidade do código de evidência GO da anotação de referência.", "taxonomicProximity": "Proximidade taxonômica", "taxonomicProximityHint": "1 / (1 + distância_taxonômica)" }, "configCard": { "deleteConfirm": "Excluir scoring \"{name}\"?", "delete": "Excluir", "customEvidenceWeights": "pesos de evidência personalizados", "evidenceCodeWeights": "Pesos de código de evidência", "expand": "▶", "collapse": "▲", "systemDefaults": "(padrão do sistema)", "custom": "(personalizado)" }, "newConfigForm": { "newConfig": "+ Novo perfil de scoring", "formTitle": "Nova configuração", "close": "×", "nameLabel": "Nome", "namePlaceholder": "meu_perfil", "formulaLabel": "Fórmula", "linear": "linear", "evidenceWeighted": "evidence_weighted", "descriptionLabel": "Descrição", "descriptionHelper": "(opcional)", "descriptionPlaceholder": "Finalidade desta configuração…", "signalWeights": "Pesos dos sinais", "evidenceCodeWeights": "Pesos de código de evidência", "overrideCheckbox": "Substituir pesos de qualidade por código de evidência", "systemDefaultsNote": "Padrão do sistema — EXP/IDA → 1,0 · ISS/IBA → 0,7 · IEA → 0,3 · ND → 0,1", "experimental": "Experimental", "experimentalDescription": "Anotações com evidência experimental direta. Maior nível de confiança.", "computational": "Computacional / Filogenético", "computationalDescription": "Anotações derivadas de similaridade de sequência, ortologia ou inferência filogenética.", "electronic": "Eletrônico", "electronicDescription": "Anotações automatizadas (IEA) ou declarações de autores não rastreáveis (NAS). Menor confiança.", "noData": "Sem dados", "noDataDescription": "Código de marcador indicando que nenhum dado biológico está disponível.", "off": "Desligado", "max": "Máx", "groupShortcut": "0,5", "resetEvidenceWeights": "Redefinir para padrão do sistema", "saveConfig": "Salvar configuração", "saving": "Salvando…", "cancel": "Cancelar" }, "noConfigs": "Nenhuma configuração ainda. Carregue predefinições ou crie uma acima." },
diff --git a/apps/web/messages/zh.json b/apps/web/messages/zh.json
index fab9c73..508721b 100644
--- a/apps/web/messages/zh.json
+++ b/apps/web/messages/zh.json
@@ -1,12 +1,12 @@
 {
   "layout": { "title": "PROTEA", "description": "蛋白质数据平台 — 任务队列与流水线管理" },
-  "nav": { "proteins": "蛋白质", "annotations": "注释", "querySets": "查询集", "embeddings": "嵌入向量", "functionalAnnotation": "功能注释", "scoring": "评分", "evaluation": "评估", "reranker": "重排序器", "jobs": "任务", "maintenance": "维护", "home": "首页", "data": "数据", "pipelineGroup": "流水线", "results": "结果", "system": "系统" },
-  "home": { "title": "蛋白质功能预测流水线", "subtitle": "通过嵌入向量相似性、重排序和LLM筛选，从序列到功能注释", "bestResults": "最佳结果", "fmax": "Fmax", "methodComparison": "方法比较", "method": "方法", "delta": "vs 基线", "pipeline": "流水线", "stats": "平台统计", "proteins": "蛋白质", "sequences": "序列", "embeddings": "嵌入向量", "predictions": "预测", "predictionSets": "预测集", "rerankerModels": "重排序模型", "evaluations": "评估", "exploreResults": "探索结果", "annotateProteins": "注释我的蛋白质", "knnBaseline": "KNN（嵌入距离）", "knnScored": "KNN + 评分", "knnReranker": "KNN + 重排序器", "noDataYet": "暂无评估数据。运行流水线后可在此查看结果。", "getStarted": "开始使用", "stageSequences": "序列", "stageEmbeddings": "嵌入向量", "stageKnn": "KNN搜索", "stageReranker": "重排序器", "stageLlm": "LLM筛选", "stageAnnotation": "注释", "stageEvaluation": "评估", "nkCategory": "NK类别（No Knowledge）— 最严格的评估设置", "annotateTitle": "注释您的蛋白质", "annotateDescription": "粘贴FASTA格式的蛋白质序列，使用最佳可用方法自动获取功能注释。", "annotatePlaceholder": ">sp|P04637|P53_HUMAN Cellular tumor antigen p53\nMEEPQSDPSVEPPLSQETFSDLWKLL...", "annotateTryExample": "试用示例", "annotateUploadFile": "上传文件", "annotateButton": "注释", "annotateUploading": "上传中...", "annotateEmbedding": "计算嵌入向量中...", "annotatePredicting": "预测GO术语中...", "annotateDone": "完成！正在跳转到结果页面...", "annotateStepUpload": "上传", "annotateStepEmbed": "嵌入向量", "annotateStepPredict": "预测" },
-  "jobs": { "title": "任务", "allStatuses": "所有状态", "queued": "已排队", "running": "运行中", "succeeded": "已成功", "failed": "已失败", "cancelled": "已取消", "autoRefresh": "自动刷新", "refresh": "刷新", "noJobsFound": "未找到任务。", "status": "状态", "operation": "操作", "jobId": "任务 ID", "created": "创建时间", "activeJobs": "{count} 个活跃", "jobDetail": { "title": "任务详情", "backToJobs": "← 任务", "live": "实时", "cancel": "取消", "delete": "删除", "deleteConfirm": "删除此任务？", "queue": "队列：", "created": "创建：", "started": "开始：", "finished": "完成：", "progress": "{current} / {total} {unit} ({percent}%)", "payloadLabel": "载荷", "childJobsTitle": "子任务", "childJobsCount": "({count})", "eventsTitle": "事件", "eventsCount": "({count})" } },
+  "nav": { "proteins": "蛋白质", "annotations": "注释", "querySets": "查询集", "embeddings": "嵌入向量", "functionalAnnotation": "功能注释", "scoring": "评分", "evaluation": "评估", "benchmark": "基准", "reranker": "重排序器", "jobs": "任务", "maintenance": "维护", "home": "首页", "data": "数据", "pipelineGroup": "流水线", "results": "结果", "system": "系统" },
+  "home": { "title": "蛋白质功能预测流水线", "subtitle": "通过嵌入向量相似性、重排序和LLM筛选，从序列到功能注释", "bestResults": "最佳结果", "bestOverall": "全局最佳", "avgFmaxAcrossCells": "9 个单元的平均 Fmax", "viewBenchmark": "查看完整基准", "pipelineStageBaseline": "baseline", "pipelineStageAlignmentWeighted": "对齐加权", "pipelineStageReranker": "重排序器", "fmax": "Fmax", "methodComparison": "方法比较", "method": "方法", "delta": "vs 基线", "pipeline": "流水线", "stats": "平台统计", "proteins": "蛋白质", "sequences": "序列", "embeddings": "嵌入向量", "predictions": "预测", "predictionSets": "预测集", "rerankerModels": "重排序模型", "evaluations": "评估", "exploreResults": "探索结果", "annotateProteins": "注释我的蛋白质", "knnBaseline": "KNN（嵌入距离）", "knnScored": "KNN + 评分", "knnReranker": "KNN + 重排序器", "noDataYet": "暂无评估数据。运行流水线后可在此查看结果。", "getStarted": "开始使用", "stageSequences": "序列", "stageEmbeddings": "嵌入向量", "stageKnn": "KNN搜索", "stageReranker": "重排序器", "stageLlm": "LLM筛选", "stageAnnotation": "注释", "stageEvaluation": "评估", "nkCategory": "NK类别（No Knowledge）— 最严格的评估设置", "annotateTitle": "注释您的蛋白质", "annotateDescription": "粘贴FASTA格式的蛋白质序列，使用最佳可用方法自动获取功能注释。", "annotatePlaceholder": ">sp|P04637|P53_HUMAN Cellular tumor antigen p53\nMEEPQSDPSVEPPLSQETFSDLWKLL...", "annotateTryExample": "试用示例", "annotateUploadFile": "上传文件", "annotateButton": "注释", "annotateUploading": "上传中...", "annotateEmbedding": "计算嵌入向量中...", "annotatePredicting": "预测GO术语中...", "annotateDone": "完成！正在跳转到结果页面...", "annotateStepUpload": "上传", "annotateStepEmbed": "嵌入向量", "annotateStepPredict": "预测" },
+  "jobs": { "title": "任务", "allStatuses": "所有状态", "queued": "已排队", "running": "运行中", "succeeded": "已成功", "failed": "已失败", "cancelled": "已取消", "autoRefresh": "自动刷新", "refresh": "刷新", "noJobsFound": "未找到任务。", "status": "状态", "operation": "操作", "operationContext": "上下文", "jobId": "任务 ID", "created": "创建时间", "activeJobs": "{count} 个活跃", "jobDetail": { "title": "任务详情", "backToJobs": "← 任务", "live": "实时", "cancel": "取消", "delete": "删除", "deleteConfirm": "删除此任务？", "queue": "队列：", "created": "创建：", "started": "开始：", "finished": "完成：", "progress": "{current} / {total} {unit} ({percent}%)", "payloadLabel": "载荷", "childJobsTitle": "子任务", "childJobsCount": "({count})", "eventsTitle": "事件", "eventsCount": "({count})" } },
   "proteins": { "title": "蛋白质", "tabs": { "browse": "浏览", "stats": "统计", "insert": "导入蛋白质", "metadata": "获取元数据" }, "browseTab": { "searchPlaceholder": "登录号、基因、生物体…", "search": "搜索", "clear": "清除", "allProteins": "所有蛋白质", "swissProt": "仅 Swiss-Prot", "trembl": "仅 TrEMBL", "canonicalOnly": "仅规范序列", "totalProteins": "{count} 条蛋白质", "tableHeaders": { "accession": "登录号", "entryName": "条目名称", "gene": "基因", "organism": "生物体", "length": "长度", "source": "来源" }, "noProteinsCta": "未找到蛋白质。请使用\"导入蛋白质\"标签页从 UniProt 导入。", "pagination": { "page": "第 {current} 页，共 {total} 页", "previous": "上一页", "next": "下一页" } }, "statsTab": { "refresh": "刷新", "loading": "加载中…", "overview": "概览", "coverage": "覆盖率", "totalProteins": "蛋白质总数", "canonical": "规范序列", "isoforms": "{count} 个亚型", "reviewed": "Swiss-Prot", "reviewedSub": "已审核", "unreviewed": "TrEMBL", "unreviewedSub": "未审核", "withMetadata": "含元数据", "metadataSub": "占规范序列的 {percent}%", "withEmbeddings": "含嵌入向量", "embeddingsSub": "占总数的 {percent}%", "withGoAnnotations": "含 GO 注释", "goAnnotationsSub": "占总数的 {percent}%" }, "insertTab": { "title": "从 UniProt 导入蛋白质", "description": "下载 FASTA 序列并插入蛋白质及序列记录。", "searchCriteriaLabel": "搜索条件", "searchCriteriaHelper": "UniProt 查询 — reviewed:true = 仅 Swiss-Prot", "pageSizeLabel": "每页条数", "totalLimitLabel": "总限制", "totalLimitOptional": "（可选）", "includeIsoforms": "包含亚型", "jobQueuedPrefix": "任务已排队：", "launchJob": "启动任务", "launching": "启动中…" }, "metadataTab": { "title": "获取 UniProt 元数据", "description": "下载 TSV 注释并插入 ProteinUniProtMetadata 记录。", "searchCriteriaLabel": "搜索条件", "searchCriteriaHelper": "UniProt 查询 — reviewed:true = 仅 Swiss-Prot", "pageSizeLabel": "每页条数", "totalLimitLabel": "总限制", "totalLimitOptional": "（可选）", "launchJob": "启动任务", "launching": "启动中…" }, "sourceSwissProt": "Swiss-Prot", "sourceTrembl": "TrEMBL" },
   "proteinDetail": { "backToProteins": "← 蛋白质", "tabs": { "overview": "概览", "annotations": "GO 注释" }, "overviewTab": { "identity": "标识", "gene": "基因", "organism": "生物体", "taxonId": "分类单元 ID", "length": "长度", "aa": "aa", "sequenceId": "序列 ID", "canonical": "规范序列", "coverage": "覆盖率", "embeddings": "嵌入向量", "goAnnotations": "GO 注释", "metadata": "元数据", "yes": "是", "none": "无", "isoforms": "亚型", "function": "功能", "biochemistry": "生物化学", "ecNumber": "EC 编号", "catalyticActivity": "催化活性", "cofactor": "辅因子", "activityRegulation": "活性调节", "pathway": "通路", "absorption": "吸收", "kinetics": "动力学", "phDependence": "pH 依赖性", "redoxPotential": "氧化还原电位", "temperatureDependence": "温度依赖性", "rheaId": "Rhea ID", "keywords": "关键词", "noFunctionalMetadata": "无可用功能元数据。请使用\"获取元数据\"标签页从 UniProt 导入。", "showGoGraph": "显示 GO 图", "hideGoGraph": "隐藏 GO 图", "loadingGraph": "加载图中…", "noGoAnnotations": "未找到该蛋白质的 GO 注释。", "molecularFunction": "分子功能", "biologicalProcess": "生物过程", "cellularComponent": "细胞组分", "annotations": "注释", "goTableHeaders": { "goId": "GO ID", "name": "名称", "evidence": "证据", "qualifier": "限定词", "source": "来源" } } },
   "annotations": { "title": "注释", "tabs": { "sets": "注释集", "snapshots": "本体快照", "loadSnapshot": "加载快照", "loadGoa": "加载 GOA", "loadQuickgo": "加载 QuickGO" }, "setsTab": { "annotationSets": "{count} 个注释集", "refresh": "刷新", "noSetsFound": "暂无注释集。请从\"加载 GOA\"或\"加载 QuickGO\"标签页加载 GO 注释。", "tableHeaders": { "id": "ID", "source": "来源", "version": "版本", "annotations": "注释", "meta": "元信息", "created": "创建时间" }, "delete": "删除", "deleteConfirm": "删除此注释集及其 {count} 条 GO 注释？此操作无法撤销。", "deleteConfirmNoAnnotations": "删除此注释集？" }, "snapshotsTab": { "snapshots": "{count} 个快照", "refresh": "刷新", "noSnapshotsFound": "暂无本体快照。请使用\"加载快照\"标签页。", "tableHeaders": { "id": "ID", "version": "版本", "goTerms": "GO 术语", "iaUrl": "IA URL", "loaded": "加载时间" }, "notSet": "未设置", "save": "保存", "cancel": "取消", "editTooltip": "点击编辑 IA URL" }, "loadSnapshotTab": { "title": "加载本体快照", "description": "下载 GO OBO 文件并填充 GOTerm 记录。", "oboUrlLabel": "OBO URL", "launchJob": "启动任务", "launching": "启动中…" }, "loadGoaTab": { "title": "加载 GOA 注释", "description": "从 GAF 文件批量加载 GO 注释。", "snapshotLabel": "本体快照", "selectSnapshot": "— 选择快照 —", "noSnapshots": "无快照 — 请先运行\"加载快照\"。", "gafUrlLabel": "GAF URL", "gafUrlPlaceholder": "https://current.geneontology.org/annotations/goa_human.gaf.gz", "sourceVersionLabel": "来源版本", "sourceVersionPlaceholder": "2025-03", "launchJob": "启动任务", "launching": "启动中…" }, "loadQuickgoTab": { "title": "加载 QuickGO 注释", "description": "从 QuickGO 批量下载 API 流式传输 GO 注释。", "snapshotLabel": "本体快照", "selectSnapshot": "— 选择快照 —", "noSnapshots": "无快照 — 请先运行\"加载快照\"。", "sourceVersionLabel": "来源版本", "sourceVersionPlaceholder": "2025-03", "launchJob": "启动任务", "launching": "启动中…" } },
-  "embeddings": { "title": "嵌入向量", "tabs": { "configs": "配置", "compute": "计算" }, "configsTab": { "configs": "{count} 个配置", "newConfig": "+ 新建配置", "cancel": "取消", "newConfigForm": { "title": "新建嵌入向量配置", "layerIndexingWarning": "层索引 — 反向约定：0 = 最后（语义最丰富）层，1 = 倒数第二层，以此类推。", "modelBackendLabel": "模型后端", "modelBackendEsm": "esm — HuggingFace EsmModel (ESM-2)", "modelBackendEsm3c": "esm3c — ESM SDK ESMC (ESM3c) · GPU 上 FP16", "modelBackendT5": "t5 — HuggingFace T5EncoderModel (ProstT5…)", "modelBackendAuto": "auto — 回退到 esm", "modelLabel": "模型", "customModelPlaceholder": "例如 facebook/esm2_t33_650M_UR50D", "layerIndicesLabel": "层索引", "layerIndicesHelper": "（0 = 最后，1 = 倒数第二…）", "layerIndicesPlaceholder": "0  或  0,1,2", "layerAggLabel": "层聚合", "layerAggMean": "mean — 逐元素平均", "layerAggLast": "last — 仅最后选定层", "layerAggConcat": "concat — 拼接所有层（dim × n_layers）", "poolingLabel": "序列池化", "poolingMean": "mean — 残基均值", "poolingMax": "max — 残基最大值", "poolingMeanMax": "mean_max — concat(mean, max) · dim × 2", "poolingCls": "cls — 位置 0 的 CLS/BOS token", "maxLengthLabel": "最大长度（token）", "descriptionLabel": "描述（可选）", "normalizeResidues": "归一化残基（池化前按残基 L2 归一化）", "normalizeFinal": "归一化最终嵌入向量（池化后 L2 归一化）", "enableChunking": "启用分块（长序列 → 每条序列生成多个嵌入向量）", "chunkSizeLabel": "块大小（残基）", "chunkOverlapLabel": "块重叠（残基）", "createConfig": "创建配置", "creating": "创建中…" }, "tableHeaders": { "description": "描述", "model": "模型", "backend": "后端", "layers": "层", "agg": "聚合", "pool": "池化", "norm": "归一化", "created": "创建时间" }, "noConfigs": "暂无嵌入向量配置。请创建一个", "deleteConfirm": "删除此嵌入向量配置及其 {count} 个存储的嵌入向量？此操作无法撤销。", "deleteConfirmNoEmbeddings": "删除此嵌入向量配置？" }, "computeTab": { "title": "计算嵌入向量", "loading": "加载中…", "configLabel": "嵌入向量配置", "noConfigs": "— 无可用配置 —", "querySetLabel": "查询集", "querySetHelper": "（可选 — 留空则计算所有序列）", "allSequences": "— 所有序列 —", "queueBatchSizeLabel": "队列批次大小", "queueBatchSizeHelper": "（序列数/任务）", "modelBatchSizeLabel": "模型批次大小", "modelBatchSizeHelper": "（序列数/前向传播）", "deviceLabel": "设备", "deviceCpu": "cpu — CPU (FP32)", "deviceCuda": "cuda — 默认 GPU（ESM3c/T5 使用 FP16）", "deviceCuda0": "cuda:0 — GPU 0", "deviceCuda1": "cuda:1 — GPU 1", "deviceCustom": "自定义…", "skipExisting": "跳过已有嵌入向量", "launchComputeJob": "启动计算任务", "launching": "启动中…" } },
+  "embeddings": { "title": "嵌入向量", "tabs": { "configs": "配置", "compute": "计算" }, "configsTab": { "configs": "{count} 个配置", "newConfig": "+ 新建配置", "cancel": "取消", "newConfigForm": { "title": "新建嵌入向量配置", "layerIndexingWarning": "层索引 — 反向约定：0 = 最后（语义最丰富）层，1 = 倒数第二层，以此类推。", "modelBackendLabel": "模型后端", "modelBackendEsm": "esm — HuggingFace EsmModel (ESM-2)", "modelBackendEsm3c": "esm3c — ESM SDK ESMC (ESM3c) · GPU 上 FP16", "modelBackendT5": "t5 — HuggingFace T5EncoderModel (ProstT5…)", "modelBackendAnkh": "ankh — HuggingFace T5EncoderModel (Ankh base/large)", "modelBackendAuto": "auto — 回退到 esm", "modelLabel": "模型", "customModelPlaceholder": "例如 facebook/esm2_t33_650M_UR50D", "layerIndicesLabel": "层索引", "layerIndicesHelper": "（0 = 最后，1 = 倒数第二…）", "layerIndicesPlaceholder": "0  或  0,1,2", "layerAggLabel": "层聚合", "layerAggMean": "mean — 逐元素平均", "layerAggLast": "last — 仅最后选定层", "layerAggConcat": "concat — 拼接所有层（dim × n_layers）", "poolingLabel": "序列池化", "poolingMean": "mean — 残基均值", "poolingMax": "max — 残基最大值", "poolingMeanMax": "mean_max — concat(mean, max) · dim × 2", "poolingCls": "cls — 位置 0 的 CLS/BOS token", "maxLengthLabel": "最大长度（token）", "descriptionLabel": "描述（可选）", "normalizeResidues": "归一化残基（池化前按残基 L2 归一化）", "normalizeFinal": "归一化最终嵌入向量（池化后 L2 归一化）", "enableChunking": "启用分块（长序列 → 每条序列生成多个嵌入向量）", "chunkSizeLabel": "块大小（残基）", "chunkOverlapLabel": "块重叠（残基）", "createConfig": "创建配置", "creating": "创建中…" }, "tableHeaders": { "description": "描述", "model": "模型", "backend": "后端", "layers": "层", "agg": "聚合", "pool": "池化", "norm": "归一化", "created": "创建时间" }, "noConfigs": "暂无嵌入向量配置。请创建一个", "deleteConfirm": "删除此嵌入向量配置及其 {count} 个存储的嵌入向量？此操作无法撤销。", "deleteConfirmNoEmbeddings": "删除此嵌入向量配置？" }, "computeTab": { "title": "计算嵌入向量", "loading": "加载中…", "configLabel": "嵌入向量配置", "noConfigs": "— 无可用配置 —", "querySetLabel": "查询集", "querySetHelper": "（可选 — 留空则计算所有序列）", "allSequences": "— 所有序列 —", "queueBatchSizeLabel": "队列批次大小", "queueBatchSizeHelper": "（序列数/任务）", "modelBatchSizeLabel": "模型批次大小", "modelBatchSizeHelper": "（序列数/前向传播）", "deviceLabel": "设备", "deviceCpu": "cpu — CPU (FP32)", "deviceCuda": "cuda — 默认 GPU（ESM3c/T5 使用 FP16）", "deviceCuda0": "cuda:0 — GPU 0", "deviceCuda1": "cuda:1 — GPU 1", "deviceCustom": "自定义…", "skipExisting": "跳过已有嵌入向量", "launchComputeJob": "启动计算任务", "launching": "启动中…" } },
   "functionalAnnotation": { "title": "功能注释", "tabs": { "predict": "运行注释", "results": "结果" }, "predictTab": { "title": "基于嵌入向量相似性的 GO 术语注释", "loading": "加载中…", "configLabel": "嵌入向量配置", "noConfigs": "— 无可用配置 —", "querySetLabel": "查询集", "querySetHelper": "（可选 — 留空则注释所有序列）", "allSequences": "— 所有序列 —", "annotationSetLabel": "注释集", "noAnnotationSets": "— 无可用注释集 —", "snapshotLabel": "本体快照", "noSnapshots": "— 无可用快照 —", "limitPerEntryLabel": "每条目限制", "batchSizeLabel": "批次大小", "distanceThresholdLabel": "距离阈值", "distanceThresholdHelper": "（可选）", "knnStrategy": "KNN 策略", "aspectSeparatedKnn": "按功能方面分离的 KNN 索引", "aspectSeparatedKnnHelper": "分离的 BPO / MFO / CCO 参考索引 — 独立提高各方面的召回率", "featureEngineering": "特征工程", "featureEngineeringHelper": "（按需启用 — 增加计算时间）", "sequenceAlignments": "序列比对", "sequenceAlignmentsHelper": "NW（全局）+ SW（局部），通过 parasail/BLOSUM62", "taxonomicDistance": "分类距离", "taxonomicDistanceHelper": "通过 NCBI 分类学计算 LCA、距离和关系", "searchBackend": "搜索后端", "searchBackendLabel": "后端", "numpyBackend": "numpy — 精确", "faissBackend": "faiss — 索引化", "metricLabel": "度量", "cosineSimilarity": "余弦", "euclideanDistance": "L2（欧氏²）", "indexTypeLabel": "索引类型", "flatIndex": "Flat — 精确", "ivfflatIndex": "IVFFlat — 近似（>100K 参考）", "hnswIndex": "HNSW — 近似，基于图", "nlistLabel": "nlist", "nprobeLabel": "nprobe", "mLabel": "M", "efSearchLabel": "efSearch", "launchAnnotationJob": "启动注释任务", "launching": "启动中…" }, "resultsTab": { "title": "结果", "refresh": "刷新", "noResults": "暂无注释结果。请在\"运行注释\"标签页运行注释任务。", "tableHeaders": { "id": "ID", "config": "配置", "annotationSet": "注释集", "snapshot": "快照", "goTerms": "GO 术语", "distanceThreshold": "距离阈值", "created": "创建时间" }, "delete": "删除", "deleteConfirm": "删除此注释集及其 {count} 个 GO 术语分配？此操作无法撤销。", "deleteConfirmNoAssignments": "删除此注释集？" } },
   "evaluation": { "title": "CAFA 评估", "generateSection": { "heading": "新建评估集", "description": "计算两个 GOA 版本之间的差异。", "oldSetLabel": "旧 GOA 集（参考）", "selectSet": "— 选择 —", "newSetLabel": "新 GOA 集（真实标注）", "errorSameSets": "旧集和新集必须不同。", "generateEvaluationSet": "生成评估集", "generating": "排队中…" }, "evaluationSetsSection": { "heading": "评估集" }, "evaluationSetCard": { "deleteConfirm": "删除此评估集及其所有结果？", "delete": "删除", "deltaProteins": "差异蛋白质", "nkProteins": "NK 蛋白质", "lkProteins": "LK 蛋白质", "pkProteins": "PK 蛋白质", "groundTruthFiles": "真实标注文件", "downloadNK": "NK", "downloadLK": "LK", "downloadPK": "PK", "downloadKnownTerms": "已知术语", "deltaProteinSequences": "差异蛋白质序列（FASTA）", "allDelta": "所有差异（NK+LK+PK）", "nkOnly": "仅 NK", "lkOnly": "仅 LK", "pkOnly": "仅 PK", "runCafaEvaluator": "运行 CAFA 评估器", "predictionSetLabel": "预测集", "scoringConfigLabel": "评分配置（可选）", "fallbackFormula": "— 回退（1−d/2）—", "maxDistanceLabel": "最大距离（可选）", "jobQueued": "任务已排队。", "viewJob": "查看任务 →", "runEvaluation": "运行评估（NK + LK + PK）", "resultsHeading": "结果", "pollingResults": "● 轮询中", "refreshResults": "↻ 刷新", "refreshing": "刷新中…", "noEvaluations": "尚未运行任何评估。", "predictionSet": "预测集：", "scoring": "评分：", "artifactsDownload": "↓ 产物（.zip）", "resultDelete": "删除", "deleteResultConfirm": "删除此评估结果？" }, "resultMetrics": { "nk": "NK", "lk": "LK", "pk": "PK", "biologicalProcess": "生物过程", "molecularFunction": "分子功能", "cellularComponent": "细胞组分", "fmax": "Fmax", "precision": "精确率", "recall": "召回率", "coverage": "覆盖率", "tau": "τ" }, "manualEvaluatorSection": { "heading": "手动评估器命令" } },
   "scoring": { "title": "评分配置", "description": "ScoringConfig 定义如何将原始预测信号合并为单一的 [0, 1] 置信度分数。", "loadPresets": "加载预设", "presetsLoading": "加载中…", "availableSignals": "可用信号", "signals": { "embeddingSimilarity": "嵌入向量相似性", "embeddingSimilarityHint": "1 − 余弦距离 / 2 — 始终可用。", "identityNw": "NW 一致性", "identityNwHint": "Needleman-Wunsch 全局序列一致性 [0, 1]。", "identitySw": "SW 一致性", "identitySwHint": "Smith-Waterman 局部序列一致性 [0, 1]。", "evidenceWeight": "证据权重", "evidenceWeightHint": "参考注释的 GO 证据代码质量。", "taxonomicProximity": "分类邻近度", "taxonomicProximityHint": "1 / (1 + 分类距离)" }, "configCard": { "deleteConfirm": "删除评分配置\"{name}\"？", "delete": "删除", "customEvidenceWeights": "自定义证据权重", "evidenceCodeWeights": "证据代码权重", "expand": "▶", "collapse": "▲", "systemDefaults": "（系统默认值）", "custom": "（自定义）" }, "newConfigForm": { "newConfig": "+ 新建评分配置", "formTitle": "新建配置", "close": "×", "nameLabel": "名称", "namePlaceholder": "我的配置", "formulaLabel": "公式", "linear": "linear", "evidenceWeighted": "evidence_weighted", "descriptionLabel": "描述", "descriptionHelper": "（可选）", "descriptionPlaceholder": "此配置的用途…", "signalWeights": "信号权重", "evidenceCodeWeights": "证据代码权重", "overrideCheckbox": "覆盖各证据代码质量权重", "systemDefaultsNote": "使用系统默认值 — EXP/IDA → 1.0 · ISS/IBA → 0.7 · IEA → 0.3 · ND → 0.1", "experimental": "实验性", "experimentalDescription": "由直接实验证据支持的注释。最高置信度等级。", "computational": "计算/系统发育", "computationalDescription": "源自序列相似性、直系同源或系统发育推断的注释。", "electronic": "电子化", "electronicDescription": "自动化注释（IEA）或不可溯源的作者声明（NAS）。置信度较低。", "noData": "无数据", "noDataDescription": "表示无可用生物数据的占位代码。", "off": "关闭", "max": "最大", "groupShortcut": "0.5", "resetEvidenceWeights": "全部重置为系统默认值", "saveConfig": "保存配置", "saving": "保存中…", "cancel": "取消" }, "noConfigs": "暂无配置。请加载预设或在上方创建一个。" },
diff --git a/apps/web/public/thesis.pdf b/apps/web/public/thesis.pdf
index d993b81..d035322 100644
Binary files a/apps/web/public/thesis.pdf and b/apps/web/public/thesis.pdf differ
diff --git a/deploy/grafana/dashboards/visitors.json b/deploy/grafana/dashboards/visitors.json
new file mode 100644
index 0000000..e6afa76
--- /dev/null
+++ b/deploy/grafana/dashboards/visitors.json
@@ -0,0 +1,364 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": { "type": "grafana", "uid": "-- Grafana --" },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": null,
+  "links": [],
+  "liveNow": false,
+  "panels": [
+    {
+      "datasource": { "type": "postgres", "uid": "protea-postgres" },
+      "description": "Unique anonymous visitors per UTC day. Each visitor is the first 16 hex chars of sha256(daily_salt || client_ip); the salt rotates at midnight UTC so cross-day correlation is impossible.",
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 6, "w": 6, "x": 0, "y": 0 },
+      "id": 1,
+      "options": {
+        "colorMode": "value",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+        "textMode": "auto"
+      },
+      "pluginVersion": "11.3.0",
+      "targets": [
+        {
+          "datasource": { "type": "postgres", "uid": "protea-postgres" },
+          "format": "time_series",
+          "rawQuery": true,
+          "rawSql": "SELECT day AS time, COUNT(DISTINCT visitor_hash) AS \"unique visitors\" FROM visitor_event WHERE day >= CURRENT_DATE GROUP BY day",
+          "refId": "A"
+        }
+      ],
+      "title": "Unique visitors today",
+      "type": "stat"
+    },
+    {
+      "datasource": { "type": "postgres", "uid": "protea-postgres" },
+      "description": "Total page views (counted GET requests) today. Excludes polling, assets and health probes.",
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "blue", "value": null }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 6, "w": 6, "x": 6, "y": 0 },
+      "id": 2,
+      "options": {
+        "colorMode": "value",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+        "textMode": "auto"
+      },
+      "pluginVersion": "11.3.0",
+      "targets": [
+        {
+          "datasource": { "type": "postgres", "uid": "protea-postgres" },
+          "format": "time_series",
+          "rawQuery": true,
+          "rawSql": "SELECT day AS time, COUNT(*) AS \"page views\" FROM visitor_event WHERE day >= CURRENT_DATE GROUP BY day",
+          "refId": "A"
+        }
+      ],
+      "title": "Page views today",
+      "type": "stat"
+    },
+    {
+      "datasource": { "type": "postgres", "uid": "protea-postgres" },
+      "description": "Visitors seen at least once in the selected range. Caveat: a visitor who browsed on two different days counts as two here — the daily-rotating salt is a privacy feature, not a bug.",
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "purple", "value": null }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 6, "w": 6, "x": 12, "y": 0 },
+      "id": 3,
+      "options": {
+        "colorMode": "value",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+        "textMode": "auto"
+      },
+      "pluginVersion": "11.3.0",
+      "targets": [
+        {
+          "datasource": { "type": "postgres", "uid": "protea-postgres" },
+          "format": "time_series",
+          "rawQuery": true,
+          "rawSql": "SELECT MIN(day) AS time, SUM(daily_uniques) AS \"visitor-days\" FROM (SELECT day, COUNT(DISTINCT visitor_hash) AS daily_uniques FROM visitor_event WHERE $__timeFilter(created_at) GROUP BY day) s",
+          "refId": "A"
+        }
+      ],
+      "title": "Visitor-days in range",
+      "type": "stat"
+    },
+    {
+      "datasource": { "type": "postgres", "uid": "protea-postgres" },
+      "description": "Daily unique visitors over time.",
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "bars",
+            "fillOpacity": 60,
+            "gradientMode": "none",
+            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 9, "w": 12, "x": 0, "y": 6 },
+      "id": 4,
+      "options": {
+        "legend": { "calcs": ["sum", "mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true },
+        "tooltip": { "mode": "single", "sort": "none" }
+      },
+      "targets": [
+        {
+          "datasource": { "type": "postgres", "uid": "protea-postgres" },
+          "format": "time_series",
+          "rawQuery": true,
+          "rawSql": "SELECT day AS time, COUNT(DISTINCT visitor_hash) AS \"unique visitors\" FROM visitor_event WHERE $__timeFilter(created_at) GROUP BY day ORDER BY day",
+          "refId": "A"
+        }
+      ],
+      "title": "Unique visitors per day",
+      "type": "timeseries"
+    },
+    {
+      "datasource": { "type": "postgres", "uid": "protea-postgres" },
+      "description": "Total page views per day.",
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "bars",
+            "fillOpacity": 60,
+            "gradientMode": "none",
+            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "blue", "value": null }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 9, "w": 12, "x": 12, "y": 6 },
+      "id": 5,
+      "options": {
+        "legend": { "calcs": ["sum", "mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true },
+        "tooltip": { "mode": "single", "sort": "none" }
+      },
+      "targets": [
+        {
+          "datasource": { "type": "postgres", "uid": "protea-postgres" },
+          "format": "time_series",
+          "rawQuery": true,
+          "rawSql": "SELECT day AS time, COUNT(*) AS \"page views\" FROM visitor_event WHERE $__timeFilter(created_at) GROUP BY day ORDER BY day",
+          "refId": "A"
+        }
+      ],
+      "title": "Page views per day",
+      "type": "timeseries"
+    },
+    {
+      "datasource": { "type": "postgres", "uid": "protea-postgres" },
+      "description": "The most-visited paths in the selected range.",
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "custom": {
+            "align": "auto",
+            "cellOptions": { "type": "auto" },
+            "inspect": false
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "matcher": { "id": "byName", "options": "hits" },
+            "properties": [
+              { "id": "custom.cellOptions", "value": { "type": "gauge", "mode": "gradient", "valueDisplayMode": "color" } }
+            ]
+          }
+        ]
+      },
+      "gridPos": { "h": 10, "w": 12, "x": 0, "y": 15 },
+      "id": 6,
+      "options": {
+        "cellHeight": "sm",
+        "footer": { "countRows": false, "fields": "", "reducer": ["sum"], "show": false },
+        "showHeader": true
+      },
+      "pluginVersion": "11.3.0",
+      "targets": [
+        {
+          "datasource": { "type": "postgres", "uid": "protea-postgres" },
+          "format": "table",
+          "rawQuery": true,
+          "rawSql": "SELECT path, COUNT(*) AS hits, COUNT(DISTINCT visitor_hash) AS \"unique visitors\" FROM visitor_event WHERE $__timeFilter(created_at) GROUP BY path ORDER BY hits DESC LIMIT 25",
+          "refId": "A"
+        }
+      ],
+      "title": "Top paths",
+      "type": "table"
+    },
+    {
+      "datasource": { "type": "postgres", "uid": "protea-postgres" },
+      "description": "Response status code distribution. 5xx spikes are your early-warning signal.",
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "bars",
+            "fillOpacity": 80,
+            "gradientMode": "none",
+            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "normal" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 10, "w": 12, "x": 12, "y": 15 },
+      "id": 7,
+      "options": {
+        "legend": { "calcs": ["sum"], "displayMode": "table", "placement": "bottom", "showLegend": true },
+        "tooltip": { "mode": "multi", "sort": "none" }
+      },
+      "targets": [
+        {
+          "datasource": { "type": "postgres", "uid": "protea-postgres" },
+          "format": "time_series",
+          "rawQuery": true,
+          "rawSql": "SELECT date_trunc('hour', created_at) AS time, CASE WHEN status < 200 THEN '1xx' WHEN status < 300 THEN '2xx' WHEN status < 400 THEN '3xx' WHEN status < 500 THEN '4xx' ELSE '5xx' END AS metric, COUNT(*) AS value FROM visitor_event WHERE $__timeFilter(created_at) GROUP BY 1, 2 ORDER BY 1",
+          "refId": "A"
+        }
+      ],
+      "title": "Status codes over time",
+      "type": "timeseries"
+    }
+  ],
+  "refresh": "30s",
+  "schemaVersion": 39,
+  "tags": ["protea", "analytics"],
+  "templating": { "list": [] },
+  "time": { "from": "now-30d", "to": "now" },
+  "timepicker": {},
+  "timezone": "browser",
+  "title": "PROTEA — Visitor analytics",
+  "uid": "protea-visitors",
+  "version": 1,
+  "weekStart": ""
+}
diff --git a/deploy/grafana/provisioning/dashboards/dashboards.yml b/deploy/grafana/provisioning/dashboards/dashboards.yml
new file mode 100644
index 0000000..9b06006
--- /dev/null
+++ b/deploy/grafana/provisioning/dashboards/dashboards.yml
@@ -0,0 +1,19 @@
+# Dashboard provisioning — tells Grafana where to find the JSON files that
+# should be imported automatically on startup. Files under
+# /var/lib/grafana/dashboards are re-scanned every 30 s; to update a
+# dashboard just edit the JSON on disk and Grafana will pick it up.
+
+apiVersion: 1
+
+providers:
+  - name: "PROTEA Dashboards"
+    orgId: 1
+    folder: "PROTEA"
+    type: file
+    disableDeletion: false
+    editable: true
+    updateIntervalSeconds: 30
+    allowUiUpdates: true
+    options:
+      path: /var/lib/grafana/dashboards
+      foldersFromFilesStructure: false
diff --git a/deploy/grafana/provisioning/datasources/postgres.yml b/deploy/grafana/provisioning/datasources/postgres.yml
new file mode 100644
index 0000000..5384607
--- /dev/null
+++ b/deploy/grafana/provisioning/datasources/postgres.yml
@@ -0,0 +1,28 @@
+# Grafana datasource provisioning — connects to the local pgvectorsql
+# Postgres container. Because Grafana runs with host networking, "localhost"
+# here refers to the host, same as from manage.sh-launched processes.
+#
+# Credentials mirror protea/config/system.yaml for local dev. For any other
+# deployment, override these with env vars before starting the container.
+
+apiVersion: 1
+
+datasources:
+  - name: PROTEA Postgres
+    uid: protea-postgres
+    type: postgres
+    access: proxy
+    url: host.docker.internal:5432
+    user: protea
+    jsonData:
+      database: protea
+      sslmode: disable
+      maxOpenConns: 5
+      maxIdleConns: 2
+      connMaxLifetime: 14400
+      postgresVersion: 1600
+      timescaledb: false
+    secureJsonData:
+      password: protea
+    isDefault: true
+    editable: true
diff --git a/docker-compose.monitoring.yml b/docker-compose.monitoring.yml
new file mode 100644
index 0000000..a9940d1
--- /dev/null
+++ b/docker-compose.monitoring.yml
@@ -0,0 +1,37 @@
+# Grafana sidecar for PROTEA — Level A (visitor analytics only).
+#
+# Runs Grafana in a bridge network and publishes 3001 on the host. Postgres
+# lives in the `pgvectorsql` bridge container which publishes 5432 on the
+# host, so Grafana reaches it via the docker0 gateway (host.docker.internal).
+#
+# `network_mode: host` was tried first but this host's Docker puts "host"
+# containers in a netns that's not actually the real init netns, so the
+# listener was invisible from the outside. Bridge + published port works.
+#
+# Usage:
+#   docker compose -f docker-compose.monitoring.yml up -d
+#   open http://localhost:3001   (admin / admin on first login)
+
+services:
+  grafana:
+    image: grafana/grafana:11.3.0
+    container_name: protea-grafana
+    ports:
+      - "3001:3000"
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    environment:
+      - GF_SECURITY_ADMIN_USER=admin
+      - GF_SECURITY_ADMIN_PASSWORD=admin
+      - GF_USERS_ALLOW_SIGN_UP=false
+      - GF_AUTH_ANONYMOUS_ENABLED=false
+      - GF_ANALYTICS_REPORTING_ENABLED=false
+      - GF_ANALYTICS_CHECK_FOR_UPDATES=false
+    volumes:
+      - grafana_data:/var/lib/grafana
+      - ./deploy/grafana/provisioning:/etc/grafana/provisioning:ro
+      - ./deploy/grafana/dashboards:/var/lib/grafana/dashboards:ro
+    restart: unless-stopped
+
+volumes:
+  grafana_data:
diff --git a/docker-compose.yml b/docker-compose.yml
index 1e74b6c..f7a7a4d 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -21,6 +21,28 @@ services:
         limits:
           memory: 2G
 
+  minio:
+    image: minio/minio:latest
+    profiles: ["storage"]
+    environment:
+      MINIO_ROOT_USER: minioadmin
+      MINIO_ROOT_PASSWORD: minioadmin
+    command: server /data --console-address ":9001"
+    ports:
+      - "9000:9000"
+      - "9001:9001"
+    volumes:
+      - minio_data:/data
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
+      interval: 15s
+      timeout: 5s
+      retries: 5
+    deploy:
+      resources:
+        limits:
+          memory: 512M
+
   rabbitmq:
     image: rabbitmq:3-management
     environment:
@@ -195,3 +217,4 @@ services:
 
 volumes:
   postgres_data:
+  minio_data:
diff --git a/docs/CONFIG_INVENTORY.md b/docs/CONFIG_INVENTORY.md
new file mode 100644
index 0000000..23a18c5
--- /dev/null
+++ b/docs/CONFIG_INVENTORY.md
@@ -0,0 +1,99 @@
+# Hardcoded parameters inventory (T-CONF.1)
+
+Snapshot 2026-05-05 tras un grep sistemático sobre `protea/` buscando constantes módulo-level (`^_?[A-Z][A-Z_]+ = <num>`), defaults hardcodeados en signatures de pydantic payloads, y valores literales obvios. Output: tabla nombre → fichero:línea → categoría → propósito → rango sugerido → exempt? si aplica.
+
+Este inventario es la base de **T-CONF.2** (externalización a `protea_core.config.Settings` con jerarquía defaults < yaml < env < flags) y **T-CONF.3** (doc viva autogenerada).
+
+## Política de externalización
+
+Cada parámetro identificado abajo:
+
+- Si **influye en throughput, memoria, latencia, o robustez** ante cargas variables: candidato directo a `Settings`. Operador de plataforma debe poder ajustarlo en `config/{env}.yaml` o env var sin tocar código.
+- Si es **estructural / físico** (longitud de hash MD5, índices posicionales en formato GAF, dimensión de un space PCA fija por modelo): `# config-exempt: <razón>`. Estas no se mueven a `Settings` pero quedan documentadas.
+- Si es **límite de seguridad / contractual** (max FASTA upload size, max comment length): a `Settings` con sub-modelo `APILimits` para que infosec pueda revisarlo en config.
+
+## Inventario
+
+### A. QueueTuning (RabbitMQ + worker dispatch)
+
+| Constante | Fichero:línea | Valor | Propósito | Rango sugerido | Exempt |
+|-----------|---------------|------:|-----------|----------------|--------|
+| `_MAX_ATTEMPTS` | `infrastructure/queue/publisher.py:14` | 12 | Reintentos máximos al publicar a RabbitMQ. Cubre ~4 min de broker down. | 5-20; tunear según SLA broker | no |
+| `_BASE_DELAY` | `infrastructure/queue/publisher.py:15` | 1 | Backoff inicial publisher (seg). Multiplica x2 hasta cap 30s. | 0.5-5 | no |
+| `_OOM_MAX_RETRIES` | `infrastructure/queue/consumer.py:28` | 5 | Reintentos al hit CUDA OOM en GPU worker. | 3-10 | no |
+| `_OOM_BASE_DELAY` | `infrastructure/queue/consumer.py:29` | 5 | Backoff inicial OOM (seg). | 1-30 | no |
+| `_OOM_MAX_DELAY` | `infrastructure/queue/consumer.py:30` | 300 | Cap del backoff OOM (5 min). | 60-900 | no |
+| `prefetch_count` | `infrastructure/queue/consumer.py:62, 189` | 1 | Prefetch RabbitMQ por consumer. 1 = strict serialization. | 1-10 según operación | no |
+
+### B. WorkerTuning (pools, caches, reapers)
+
+| Constante | Fichero:línea | Valor | Propósito | Rango sugerido | Exempt |
+|-----------|---------------|------:|-----------|----------------|--------|
+| `pool_size` | `infrastructure/database/engine.py:12` | 20 | Connection pool size SQLAlchemy. | 5-50 según carga concurrent | no |
+| `_MODEL_CACHE_MAX` | `core/operations/compute_embeddings.py:609` | 1 | Modelos PLM en cache por proceso. >1 acumula GB en GPU. | 1-2 (GPU memory hard limit) | no |
+| `_REF_CACHE_MAX` | `core/operations/predict_go_terms.py:83` | 1 | Reference data en cache por proceso predict. | 1-2 | no |
+| `timeout_seconds` (reaper main) | `workers/stale_job_reaper.py:26` | 21600 | 6h timeout antes de marcar jobs FAILED. | 1800-43200 según SLA | no |
+| `timeout_seconds` (reaper default) | `workers/stale_job_reaper.py:50` | 3600 | Default constructor; main usa 21600. | 1800-43200 | no |
+| `stall_seconds` | `workers/stale_job_reaper.py:52` | 1800 | Tiempo sin JobEvent antes de considerar stalled. | 600-3600 | no |
+| `_DEFAULT_TTL` | `api/cache.py:18` | 300.0 | TTL default cache HTTP (5 min). | 60-3600 según endpoint | no |
+
+### C. OperationTuning (chunks, batches, HTTP)
+
+| Constante | Fichero:línea | Valor | Propósito | Rango sugerido | Exempt |
+|-----------|---------------|------:|-----------|----------------|--------|
+| `_ANNOTATION_CHUNK_SIZE` | `core/feature_enricher.py:42`, `core/operations/{train_reranker,predict_go_terms}.py` | 10_000 | Filas por chunk al cargar anotaciones. | 1k-100k según RAM | no |
+| `_STREAM_CHUNK_SIZE` | `core/operations/{train_reranker,predict_go_terms}.py` | 2_000 | Chunk size streaming PyArrow. | 500-10k | no |
+| `_STORE_CHUNK_SIZE` | `core/operations/predict_go_terms.py:872` | 10_000 | Filas por chunk al publicar a `protea.predictions.write`. ~20-25 MB serializado. RabbitMQ cap 128 MB. | 5k-50k según mensaje promedio | no |
+| `_NUMPY_QUERY_CHUNK` | `core/knn_search.py:135` | 500 | Query chunk size para KNN numpy. | 100-2000 según RAM | no |
+| `_N_THRESHOLDS` | `core/metrics.py:34` | 101 | Threshold sweep [0.0, 0.01, ..., 1.0] para Fmax. | 51, 101, 201 | no |
+| `batch_size` (compute_embeddings payload) | `core/operations/compute_embeddings.py:90, 108` | 1 | Sequences por batch GPU. 1 evita OOM en proteínas largas. | 1-32 según PLM | no |
+| `batch_size` (predict_go_terms payload) | `core/operations/predict_go_terms.py:171` | 1024 | Queries por batch KNN. | 256-4096 según vector dim | no |
+| `batch_size` (parquet read) | `core/operations/train_reranker.py:1822` | 200_000 | Filas por batch al leer parquet eval. | 50k-500k según RAM | no |
+| `gene_product_batch_size` (QuickGO) | `core/operations/load_quickgo_annotations.py:46` | 200 | Batch QuickGO API. Sus límites internos. | 100-500 (revisar API spec) | no |
+| `timeout_seconds` (UniProt insert) | `core/operations/insert_proteins.py:30` | 60 | HTTP timeout por request UniProt. | 30-300 | no |
+| `timeout_seconds` (UniProt metadata) | `core/operations/fetch_uniprot_metadata.py:29` | 60 | Idem | 30-300 | no |
+| `timeout_seconds` (GOA load) | `core/operations/load_goa_annotations.py:34` | 300 | Timeout ftp.ebi GOA descarga (5 min). | 120-900 | no |
+| `timeout_seconds` (ontology snapshot) | `core/operations/load_ontology_snapshot.py:19` | 120 | Timeout descarga OBO. | 60-300 | no |
+| `timeout_seconds` (QuickGO) | `core/operations/load_quickgo_annotations.py:43` | 300 | Timeout QuickGO API. | 120-900 | no |
+| `max_retries` (UniProt) | `core/operations/insert_proteins.py:33`, `fetch_uniprot_metadata.py` | 6 | Reintentos HTTP. | 3-10 | no |
+| `backoff_base_seconds` | `core/operations/{insert,fetch}_uniprot*.py` | 0.8 | Backoff inicial UniProt. | 0.5-2 | no |
+| `backoff_max_seconds` | `core/operations/{insert,fetch}_uniprot*.py` | 20.0 | Cap backoff UniProt. | 10-60 | no |
+| `jitter_seconds` | `core/operations/{insert,fetch}_uniprot*.py` | 0.4 | Jitter agregado al sleep. | 0-1 | no |
+
+### D. APILimits (HTTP boundaries)
+
+| Constante | Fichero:línea | Valor | Propósito | Rango sugerido | Exempt |
+|-----------|---------------|------:|-----------|----------------|--------|
+| `_MAX_FASTA_BYTES` | `api/routers/annotate.py:95`, `query_sets.py:112` | 50 MB | Tope upload FASTA. Hardcodeado en dos sitios. | 10-200 MB; **dedupe a Settings** | no |
+| `_MAX_COMMENT_LENGTH` | `api/routers/support.py:14` | 500 | Max chars comentario soporte. | 200-2000 | no |
+| `_RECENT_LIMIT` | `api/routers/support.py:15` | 20 | Items en /support/recent. | 10-100 | no |
+| `_PAGE_LIMIT` | `api/routers/support.py:16` | 100 | Page size hard cap. | 50-500 | no |
+
+### E. ResearchKnobs (modelado, no infraestructura)
+
+| Constante | Fichero:línea | Valor | Propósito | Notas |
+|-----------|---------------|------:|-----------|-------|
+| `EMBEDDING_PCA_DIM` | `core/reranker.py:102` | 16 | Dim PCA reducido para feature engineering. **CONTRATO con `protea-contracts.feature_schema`**. No mover a Settings (es parte del schema canónico). | exempt: contrato con `protea-contracts` |
+| `N_THRESHOLDS` (CAFA sweep) | `core/metrics.py:34` | 101 | Granularidad sweep para Fmax. Cambiar afecta números canónicos. | exempt: parte de la metodología CAFA |
+
+### F. Estructurales (config-exempt)
+
+GAF column indices (`load_goa_annotations.py:90-97`): `_IDX_ACCESSION=1`, `_IDX_QUALIFIER=3`, etc. Son posiciones físicas del formato GAF 2.x; cambiar significaría no leer GAF. **exempt: format spec**.
+
+Cualquier `min_length=1` o `max_length=255` en `Field(...)` de pydantic payloads en `api/routers/`: longitudes de validación de strings (UUIDs, names, paths). **exempt: shape de payloads** (revisable junto con `protea-contracts` si se mueve a paquete).
+
+## Resumen cuantitativo
+
+- **Total entradas**: 31 constantes con candidate to externalize.
+- **Estructurales exempt**: ~10 (GAF indices, hash lengths, payload shape constraints).
+- **Research knobs exempt**: 2 (PCA dim, threshold sweep).
+- **A externalizar a Settings (T-CONF.2)**: **31** parámetros, 5 categorías (`QueueTuning`, `WorkerTuning`, `OperationTuning`, `APILimits`, `ResearchKnobs`).
+- **Duplicación detectada**: `_ANNOTATION_CHUNK_SIZE` aparece en 3 ficheros (`feature_enricher`, `train_reranker`, `predict_go_terms`); `_STREAM_CHUNK_SIZE` en 2; `_MAX_FASTA_BYTES` en 2 routers. Externalizar **dedupica por construcción** (un solo Settings).
+
+## Próximos pasos (T-CONF.2 + T-CONF.3)
+
+T-CONF.2: crear `protea_core.config` con sub-modelos `QueueTuning`, `WorkerTuning`, `OperationTuning`, `APILimits`. `Settings` raíz que los compone. `protea/config/{dev,prod,hpc-bsc,hpc-airgap}.yaml` con valores per target. Ruta canónica de carga: defaults < yaml < env vars (`PROTEA__QUEUE__MAX_ATTEMPTS=15` etc.) < flags CLI. Sustituir las 31 referencias en código por `settings.X.Y`.
+
+T-CONF.3: autogenerar `docs/source/appendix/configuration.rst` desde el modelo pydantic con docstrings + rangos del inventario. Test CI que parsea cada env yaml y confirma schema válido.
+
+**AC final** (definido en master plan v3 §5 T-CONF.2): `grep -rE "^_?[A-Z][A-Z_]+\s*=\s*[0-9]" protea-core/` solo devuelve constantes con `# config-exempt: <razón>` (los 12 estructurales / research knobs documentados aquí).
diff --git a/docs/refactor_plan_lab_decoupling.md b/docs/refactor_plan_lab_decoupling.md
new file mode 100644
index 0000000..7f22e61
--- /dev/null
+++ b/docs/refactor_plan_lab_decoupling.md
@@ -0,0 +1,230 @@
+# PROTEA ↔ protea-reranker-lab: plan de desacoplamiento
+
+**Fecha:** 2026-05-04
+**Estado:** propuesta, sin aprobar
+**Autor:** discusión con Claude (sesión guru/PROTEA-audit)
+
+Plan completo para limpiar el contrato entre PROTEA y `protea-reranker-lab`. La fricción central es que las definiciones de columnas viven duplicadas en tres sitios y el cómputo está disperso entre tres archivos de PROTEA. Esto crea un canal silencioso de schema-drift que solo se detecta en runtime de inferencia.
+
+---
+
+## 1. Diagnóstico del acoplamiento actual
+
+El contrato declarado es el **parquet + manifest**. Las definiciones de columnas viven duplicadas en tres sitios:
+
+| Dónde | Contiene | Quién lo usa |
+|---|---|---|
+| `protea-reranker-lab/src/.../reranker.py` (L22-63) | `NUMERIC_FEATURES`, `CATEGORICAL_FEATURES`, `ALL_FEATURES`, `FEATURE_FAMILIES` | Lab: training + filtering por familia |
+| `protea-reranker-lab/.../contracts.py` | Re-exporta lo anterior + `ManifestV1`, `compute_*_sha` | **Boundary oficial** |
+| `PROTEA/protea/core/reranker.py` (L39-111) | `NUMERIC_FEATURES`, `CATEGORICAL_FEATURES`, `ALL_FEATURES` — idénticos | PROTEA: `predict`, `apply_reranker`, `prepare_dataset`, **export** |
+
+Puntos de drift detectados:
+
+1. `PROTEA/core/parquet_export.py:30` importa `ALL_FEATURES` de PROTEA, no del lab. El `schema_sha` que escribe en el manifest se computa sobre esa lista (L192-194). Si la lista de PROTEA y la del lab divergen, el dump tiene unas columnas y el manifest declara las otras — y nadie se entera hasta inferencia.
+2. `PROTEA/core/reranker.py:395-420` define `infer_active_feature_families` con un comentario literal: *"Keep this in sync with `protea_reranker_lab.contracts.FEATURE_FAMILIES`"*. La sincronía es manual.
+3. `predict_go_terms.py:937-957` valida `feature_schema_sha` en runtime y rechaza si difiere — pero es **defensa reactiva**: el booster ya está entrenado, el dump ya está hecho.
+4. **Bug latente:** `parquet_export.py:192-194` calcula sha como `hashlib.sha256(json.dumps(ALL_FEATURES, sort_keys=True).encode()).hexdigest()[:12]`, mientras que `lab.contracts.compute_schema_sha` usa `"|".join(sorted(...))`. Los dos shas **no coinciden** hoy. Verificarlo es el primer test del refactor.
+
+**El verdadero problema:** se mezclan dos cosas que deberían vivir separadas:
+
+- **Definición** de features (qué columnas, qué familia, qué dtype) — *datos*.
+- **Cómputo** de features (cómo se calculan a partir de KNN + secuencias + taxonomía + anc2vec) — *código de PROTEA*.
+
+Hoy la definición vive en dos sitios y el cómputo está repartido entre `feature_engineering.py`, `feature_enricher.py` y los `_load_*` de `predict_go_terms.py`.
+
+---
+
+## 2. Opciones de diseño
+
+| Opción | Idea | Coste | Cleanness |
+|---|---|---|---|
+| **A. Lab canónico, PROTEA importa** | `protea-reranker-lab.contracts` única fuente. PROTEA borra su copia. Lab se promociona de dev-dep a runtime. | Bajo | Alto |
+| **B. PROTEA canónico, lab importa** | Inverso. Lab pierde su independencia (hoy es pydantic-only). | Bajo | Bajo |
+| **C. Tercer paquete `protea-features`** | Lo mínimo: column lists + family map + sha. Ambos dependen. | Medio | Muy alto |
+| **D. Vendor + CI assert** | Dos copias, fallar el CI si divergen. | Mínimo | Bajo |
+| **E. Schema declarativo (YAML)** | Features en un YAML, ambos lo leen. | Alto | Máximo |
+
+**Recomendación: A**, con un partido pequeño dentro del lab (ver fase 1).
+
+Razones:
+
+- El lab ya está diseñado para ser el contrato — ya re-exporta desde `contracts.py`. Solo falta que PROTEA lo respete.
+- C es más limpio teóricamente pero supone un repo nuevo, otro release, otro pyproject. No merece para una tesis.
+- E es donde uno acaba si lo lleva al extremo. Para 52 features con un orden conocido y una tesis con fecha, no compensa.
+
+---
+
+## 3. El plan, en fases
+
+### Fase 0 — desbroce (sin cambiar contrato)
+
+Sin estos pasos, las fases siguientes mueven código muerto y duplican trabajo.
+
+- **0.1** Borrar `protea/core/operations/train_reranker.py` (1825 líneas, no registrado, ver CLAUDE.md L107-108). Mover los helpers que `ExportResearchDatasetOperation` consume a un módulo neutro `protea/core/training_dump_helpers.py`.
+- **0.2** Deduplicar `_update_parent_progress` (idéntico en `compute_embeddings.py:590` y `predict_go_terms.py:2016`) → `protea/core/contracts/parent_progress.py`.
+- **0.3** *(Opcional)* Convertir `UniProtHttpMixin` a delegación. No desbloquea el plan pero quita ruido.
+
+**Coste:** 1-2 días. **Riesgo:** muy bajo (dead code y duplicado).
+
+### Fase 1 — partido del lab + dependencia limpia
+
+El lab hoy hace `contracts.py` → `from .reranker import ALL_FEATURES, FEATURE_FAMILIES`, y `reranker.py` importa LightGBM. Si PROTEA hace `from protea_reranker_lab.contracts import ALL_FEATURES`, arrastra LightGBM. PROTEA ya lo tiene en deps, pero **el contrato no debería arrastrar el motor**.
+
+- **1.1** Dentro del lab, partir `reranker.py`:
+  ```
+  src/protea_reranker_lab/
+    schema.py       # NUEVO: solo datos — ALL_FEATURES, FEATURE_FAMILIES,
+                    #        RESERVED_COLUMNS, NUMERIC_FEATURES,
+                    #        CATEGORICAL_FEATURES, EMBEDDING_PCA_DIM
+    contracts.py    # imports de schema, ManifestV1, DatasetSpec, compute_*_sha
+    reranker.py     # solo el motor LightGBM (TrainConfig, fit, predict_streaming)
+  ```
+  `schema.py` no depende de nada que no sea stdlib. `contracts.py` depende de pydantic + schema. `reranker.py` depende de lightgbm + numpy.
+
+- **1.2** Borrar de `PROTEA/protea/core/reranker.py` las constantes `NUMERIC_FEATURES`, `CATEGORICAL_FEATURES`, `ALL_FEATURES`, `EMBEDDING_PCA_DIM` (L39-102). Sustituir por imports del lab.
+
+- **1.3** En `PROTEA/protea/core/parquet_export.py`, sustituir el cálculo manual de sha por `compute_schema_sha` del lab. Esperar que aparezcan los manifests con sha distinto del legacy — script de migración o flag de compat.
+
+- **1.4** Promocionar el lab de dev-dep a runtime-dep en `pyproject.toml`. Para tesis, el path-dep en runtime vale; para producción, publicar en index privado.
+
+**Coste:** 1 día. **Riesgo medio:** el sha-bug latente puede salir, hay que recalcular shas de manifests existentes.
+
+**Criterio de éxito:** `grep ALL_FEATURES protea/core/` da cero resultados con definición; solo imports.
+
+### Fase 2 — registry de cómputo
+
+La definición está unificada (fase 1). El cómputo sigue disperso. Aquí el corazón del problema.
+
+- **2.1** Crear `protea/core/features/`:
+  ```
+  protea/core/features/
+    __init__.py       # re-exporta REGISTRY y register
+    registry.py       # Feature, FeatureRegistry, register
+    knn.py            # @register: distance, k_position, vote_count, ...
+    alignment.py      # identity_nw, similarity_nw, ...
+    taxonomy.py       # taxonomic_*, tax_voters_*
+    anc2vec.py        # anc2vec_*
+    emb_pca.py        # emb_pca_query_*
+    annotation_meta.py # qualifier, evidence_code, aspect
+  ```
+
+  ```python
+  # registry.py
+  from dataclasses import dataclass
+  from typing import Callable, Literal
+
+  @dataclass(frozen=True)
+  class Feature:
+      name: str
+      family: str
+      dtype: Literal["numeric", "categorical"]
+      compute: Callable[..., dict]   # firma exacta a definir según contexto
+
+  class FeatureRegistry:
+      def __init__(self):
+          self._features: dict[str, Feature] = {}
+      def register(self, f: Feature) -> None: ...
+      def get(self, name: str) -> Feature: ...
+      def families(self) -> dict[str, list[str]]: ...
+      def selected(self, families: list[str], drop: list[str]) -> list[str]: ...
+
+  REGISTRY = FeatureRegistry()
+  ```
+
+- **2.2** Mover el cómputo actual en bloques:
+  - `feature_engineering.py` (alignment NW/SW, taxonomy pair) → `alignment.py`, `taxonomy.py`.
+  - `feature_enricher.py` (las 25 v6 features) → repartir entre `taxonomy.py` (tax_voters_*), `anc2vec.py`, `emb_pca.py`.
+  - `predict_go_terms.py:_load_*` que carga datos de soporte → no tocar la **carga**, solo el cálculo de la columna.
+
+- **2.3** En `parquet_export.py` y en `predict_go_terms._predict_batch`, usar el registry:
+  ```python
+  ctx = PredictionContext(query, refs, knn_result, anc2vec, ...)
+  for feat in registry.selected(active_families, drop=p.drop_features):
+      feat.compute(ctx, predictions)
+  ```
+
+- **2.4** Borrar `infer_active_feature_families` (PROTEA/reranker.py:395). El registry da la respuesta directamente.
+
+**Coste:** 4-7 días. Es el grueso. **Riesgo medio-alto** porque toca el camino caliente del modelo. Necesitas tests de regresión que comparen un dump pre-refactor vs post-refactor para una entrada fija.
+
+**Criterio de éxito:** añadir una feature nueva = (a) añadir nombre a `lab/schema.py`, (b) registrar `Feature` en el archivo de su familia. Cero cambios en `parquet_export.py` ni en `_predict_batch`.
+
+### Fase 3 — guardrails
+
+- **3.1** Test de invariante en `tests/test_feature_contract.py`:
+  ```python
+  def test_protea_registry_covers_lab_contract():
+      from protea_reranker_lab.schema import ALL_FEATURES
+      from protea.core.features import REGISTRY
+      missing = set(ALL_FEATURES) - set(REGISTRY.names())
+      extra   = set(REGISTRY.names()) - set(ALL_FEATURES)
+      assert not missing, f"PROTEA no computa: {missing}"
+      assert not extra,   f"PROTEA computa columnas no declaradas: {extra}"
+
+  def test_feature_families_match():
+      from protea_reranker_lab.schema import FEATURE_FAMILIES
+      assert REGISTRY.families() == FEATURE_FAMILIES
+  ```
+
+- **3.2** Validación al exportar:
+  ```python
+  expected = compute_schema_sha(ALL_FEATURES)
+  written  = compute_schema_sha([c for c in train_df.columns if c in ALL_FEATURES])
+  assert written == expected
+  ```
+
+- **3.3** Validación al inferir simplificada: `_apply_reranker_if_aligned` deja de "inferir" familias y compara el sha del registry con el del booster.
+
+**Coste:** 1 día.
+
+### Fase 4 — opcional, alto valor: cómputo unificado export-vs-inferencia
+
+Hoy la misma feature se calcula con código distinto en export-time vs inference-time. Eso es la causa profunda del schema-drift.
+
+Con la fase 2 hecha, ambos usan el `Feature.compute(ctx, ...)`. La diferencia entre export e inferencia se reduce a **qué `ctx` se construye** (export tiene ground-truth label; inferencia no). El cómputo de cada columna es literalmente el mismo objeto `Feature`.
+
+Es la victoria conceptual: un único pipeline de features, dos consumidores. Cierra la prevención del drift "porque por construcción no puede haber dos definiciones del mismo número".
+
+---
+
+## 4. Decisiones que necesitan tomarse antes de empezar
+
+1. **¿Lab como runtime-dep de PROTEA?** Si sí, fase 1.4 procede. Si prefieres mantenerlo dev-only, hay que ir a opción C (paquete `protea-features` separado).
+2. **¿Romper schema_sha existentes?** El cálculo actual de `parquet_export.py` no coincide con `lab.contracts.compute_schema_sha`. Unificar invalida los manifests ya escritos. Opciones:
+   - Script de migración que reescribe manifests.
+   - Mantener compat retroactiva computando ambas y validando contra cualquiera.
+   - Versionar el sha (`schema_sha_v2`).
+3. **¿Hasta dónde llegar?** Para tesis, **fases 0-3 son suficientes y dejan el sistema en estado defendible**. La fase 4 es la elegante pero arriesga calendario.
+4. **Tests de regresión bit-exactos.** Antes de la fase 2, fijar un `tests/data/golden_dump_v9_pk_bpo.parquet` y un test que `assert_frame_equal` post-refactor. Sin esto, la fase 2 va a ciegas.
+
+---
+
+## 5. Roadmap resumen
+
+| Fase | Tiempo | Bloquea a | Reversible | Valor |
+|---|---|---|---|---|
+| 0 — desbroce | 1-2d | nada | sí | medio (calidad) |
+| 1 — fuente única | 1d | 2, 3 | sí | alto (elimina drift de definición) |
+| 2 — registry de cómputo | 4-7d | 4 | parcial | alto (one-place add feature) |
+| 3 — guardrails | 1d | nada | sí | alto (preventivo) |
+| 4 — cómputo unificado | 3-5d | nada | parcial | muy alto (elegancia) |
+
+---
+
+## 6. Lo que NO se recomienda
+
+- Mover `predict_go_terms.py` a la jerarquía de Operations (refactor grande señalado en auditoría previa) **al mismo tiempo**. Ortogonal pero se solapa en archivos. Hazlo después de la fase 2 — el registry deja `_predict_batch` mucho más manejable.
+- YAML de schema (opción E). La indirección "string en YAML → función en Python" se paga siempre y solo se cobra si el equipo crece o si pasa a un lenguaje distinto.
+- Reescribir `feature_enricher.py` antes de la fase 2. Sus 611 líneas son densas con orden de pasos delicado. El registry obliga a desentrañarlo, mejor todo de golpe.
+
+---
+
+## 7. Notas de la auditoría previa (contexto)
+
+Antes de este plan, una auditoría sobre PROTEA identificó cinco refactors candidatos. Tres de ellos tocan archivos que esta planificación también modifica:
+
+- **#1** `UniProtHttpMixin` → delegación. **Ortogonal**, fase 0.3.
+- **#2** `BaseWorker.handle_job` long method. **Ortogonal**, no afectado por este plan.
+- **#3** `_update_parent_progress` duplicado. **Incluido**, fase 0.2.
+- **#4** `PredictGOTermsBatchOperation` (1400 líneas). **Solapado** con fase 2 — el registry simplifica `_predict_batch` y los `_load_*` siguen como cargadores de soporte. La extracción `KnnRunner` / `RerankerApplier` cae natural después de la fase 2.
+- **#5** `train_reranker.py` 1825 líneas no registrado. **Incluido**, fase 0.1.
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 8dc80b5..32fe890 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,4 +1,5 @@
 sphinx>=7.2.6
 sphinx-copybutton>=0.5.2
 sphinx-design>=0.6.1
+sphinxcontrib-bibtex>=2.6.2
 shibuya>=2025.9.24
diff --git a/docs/source/adr/007-contract-first-lab-integration.rst b/docs/source/adr/007-contract-first-lab-integration.rst
new file mode 100644
index 0000000..c0dcf6b
--- /dev/null
+++ b/docs/source/adr/007-contract-first-lab-integration.rst
@@ -0,0 +1,112 @@
+ADR-007: Contract-first integration with ``protea-reranker-lab``
+================================================================
+
+:Date: 2026-04-21
+:Author: frapercan
+
+The problem
+-----------
+
+Re-ranker development is iterative and research-shaped: we want to try
+out new feature families, different boosting objectives
+(binary vs LambdaRank), IA weighting schemes, and training protocols
+(per-cell vs full, multi-snapshot vs single-snapshot) without reshaping
+the production predict path every time. At the same time, a trained
+re-ranker must eventually be usable from inside PROTEA's
+``predict_go_terms`` batch worker.
+
+Two natural but incorrect structures were considered and rejected:
+
+- **Single repository, shared runtime.** Lab training code would live
+  inside ``protea/core/`` and be imported by predict-time workers.
+  This forces every experimental LightGBM knob, callback, or eval
+  harness into PROTEA's dependency tree (``optuna``, ``seaborn``,
+  notebook code) and couples the schedule of research iteration to the
+  schedule of production releases.
+- **Single repository, disjoint packages.** Separate ``protea`` and
+  ``protea_lab`` top-level packages under one repo. Still leaks lab
+  imports into the production image (Python's import system does not
+  care about package boundaries at install time), and creates a single
+  review/merge pipeline for two workflows with very different cadences.
+
+What we do
+----------
+
+PROTEA and ``protea-reranker-lab`` live in **separate repositories**
+coupled only through a narrow contract:
+
+1. **A file-format contract** — the frozen dataset layout. PROTEA's
+   ``export_research_dataset`` operation writes exactly three files
+   under an ``ArtifactStore`` key prefix:
+
+   - ``train.parquet`` — all training shards concatenated with
+     ``category`` and ``snapshot_pair`` columns;
+   - ``eval.parquet`` — held-out evaluation shards;
+   - ``manifest.json`` — schema version ``v2``, producer version and
+     git sha, snapshot pair list, ``schema_sha`` fingerprint.
+
+2. **A code contract** — the lab's ``protea_reranker_lab.contracts``
+   module exposes two symbols PROTEA imports:
+
+   - ``ManifestV1`` — Pydantic model for the manifest, used by
+     ``protea.core.parquet_export`` at export time for best-effort
+     validation (dev-only; silently skipped in production images that
+     don't install the lab);
+   - ``compute_feature_schema_sha(feature_families: list[str]) -> str``
+     — deterministic 12-hex-char fingerprint used at **predict time**
+     to verify the live feature set matches the booster's expectations.
+
+3. **An artefact contract** — lab training writes
+   ``runs/<name>/{run.json, spec.yaml, model.txt}``. PROTEA's
+   ``scripts/register_reranker.py`` parses those files, uploads the
+   booster through the ``ArtifactStore``, and inserts a
+   ``RerankerModel`` row with provenance
+   (``producer_version`` / ``producer_git_sha`` / ``spec_yaml``).
+
+Strict feature-schema equality
+------------------------------
+
+``feature_schema_sha`` is computed over the sorted list of feature
+families active at training time. At predict time the batch worker
+recomputes it from its own active flags and compares for **strict
+equality** — not subset, not prefix.
+
+Rejected alternative: subset compatibility. If the booster was trained
+on ``{knn, annotation_meta, alignment_nw, length}`` and the live
+pipeline has ``{knn, annotation_meta, alignment_nw, length,
+taxonomy_pair}``, a subset-match would feed the booster only its
+trained columns. But LightGBM boosters are sensitive to the ordering
+and distribution of the training feature matrix; a superset at
+inference is still a drift in the implicit joint distribution the
+model learned. Strict equality fails safe: the batch worker emits
+``reranker.schema_mismatch`` and falls back to KNN ordering, which is
+always a legitimate baseline. A missed re-ranking hit is preferable to
+silently miscalibrated scores.
+
+Trade-offs
+----------
+
+- **Two-repo friction.** Changing a feature family requires coordinated
+  commits in both repos plus a dataset re-export. Mitigated by keeping
+  the contract surface tiny (three files + two symbols) and by making
+  ``register_reranker.py`` reject runs whose ``schema_sha`` does not
+  match any PROTEA-side snapshot of the same feature-family list.
+- **Production images ship without the lab.** ``protea_reranker_lab``
+  is a dev-only dependency of PROTEA. The predict-time import is
+  guarded — a missing lab install causes the batch worker to emit
+  ``reranker.skipped`` with ``reason=contracts_unavailable`` and fall
+  back to KNN ordering, without crashing. In production we ship the
+  lab alongside PROTEA (single editable path dep) precisely so
+  ``compute_feature_schema_sha`` is available.
+
+Rejected
+--------
+
+- **Dynamic feature-family negotiation.** Letting the worker infer
+  which columns the booster expects from ``booster.feature_name()``
+  and building them lazily. Too fragile — names in the booster do
+  not carry family grouping, so every feature engineering change
+  would need manual backwards-compat shims.
+- **Pickling ``ExperimentSpec`` objects across repos.** Requires both
+  sides to share Python class identity; defeats the point of
+  decoupling and breaks on any lab refactor.
diff --git a/docs/source/adr/008-cafaeval-pk-coverage-fix.rst b/docs/source/adr/008-cafaeval-pk-coverage-fix.rst
new file mode 100644
index 0000000..21d83b1
--- /dev/null
+++ b/docs/source/adr/008-cafaeval-pk-coverage-fix.rst
@@ -0,0 +1,179 @@
+ADR-008: PK coverage fix in cafaeval fork
+==========================================
+
+:Date: 2026-04-23
+:Author: frapercan
+:Status: applied
+
+The problem
+-----------
+
+Upstream ``cafaeval`` (pinned at ``claradepaolis/CAFA-evaluator-PK``) reports
+``coverage`` values greater than ``1.0`` for the Partial-Knowledge (PK)
+evaluation branch. Coverage is defined as the fraction of eligible proteins
+for which the predictor emits at least one scored term at a given threshold,
+which by definition is bounded in ``[0, 1]``. Values of ``1.3–1.9`` are
+observed on every PROTEA benchmark run.
+
+The root cause is an asymmetry between the numerator and the denominator of
+the coverage ratio in the PK kernel of ``cafaeval/evaluation.py``:
+
+.. list-table::
+   :header-rows: 1
+   :widths: 25 55 20
+
+   * - Quantity
+     - Definition in upstream code
+     - Location
+   * - ``metrics['n']`` (numerator)
+     - Number of proteins in ``proteins_with_gt`` (any GT annotation in
+       TOI, **pre-exclusion**) that receive at least one valid prediction
+       at threshold ``τ``.
+     - ``compute_confusion_matrix_exclude_sparse``, line 304.
+   * - ``ne`` (denominator)
+     - Number of proteins with at least one GT annotation in TOI that
+       survives the per-protein ``exclude_matrix`` (i.e. **novel**
+       annotations the predictor should recover).
+     - ``_count_proteins_in_toi`` with ``exclude_matrix != None``, line
+       627; materialised in ``evaluate_prediction``, line 657.
+
+Under the PK branch the exclusion mask is applied to prediction values
+(``valid = (pred_sub != 0) & toi_mask & ~excluded_mask``, line 266) and to
+the per-TP weighting, but **not** to the row count that becomes
+``metrics['n']``. Proteins whose TOI annotations are fully contained in
+``t0`` (no novel GT in ``t1``) are therefore:
+
+- excluded from ``ne`` (correct),
+- but still counted in ``metrics['n']`` whenever the predictor emits a
+  non-excluded term for them (incorrect).
+
+The observed ``coverage > 1`` is the visible symptom. The silent
+secondary effect is that ``precision`` under
+``normalization='cafa'`` uses ``metrics['n']`` as its denominator
+(``normalize()``, line 569), so precision is under-divided — tightened
+by the same factor. On the 220→230 PROTEA benchmark this drags PK Fmax
+from its true value down by 30–40 %.
+
+What we do
+----------
+
+``cafaeval-protea`` (fork commit ``cec8ccd``) applies a one-line semantic
+fix inside ``compute_confusion_matrix_exclude_sparse``:
+
+.. code-block:: python
+
+   # Restrict the row count to proteins that still have ≥1 GT annotation
+   # in TOI after the per-protein exclude mask. Without this, `n` counts
+   # proteins whose TOI annotations were all already known in t0, while
+   # the denominator `ne` drops them — producing coverage > 1.
+   eligible_rows = (
+       (gt_sub != 0) & toi_mask[None, :] & (~excluded_mask)
+   ).any(axis=1)
+
+   metrics[:, 0] = ((pred_at_tau > 0) & eligible_rows[:, None]).sum(axis=0)
+
+The patch runs in numpy (bool mask broadcast + a sum), so it is bounded
+in cost by ``O(n_prot × n_terms)``, the same complexity as the sibling
+mask lines it mirrors (lines 266 and 273). No allocation changes, no new
+dependencies.
+
+The NK/LK kernel path (``compute_confusion_matrix_sparse``) is not
+touched because it has no exclusion concept: ``proteins_with_gt`` already
+matches ``ne`` by construction, and coverage is bounded by
+``[0, 1]`` without any extra masking.
+
+Why this was not caught by the fork's parity tests
+--------------------------------------------------
+
+The parity tests in ``tests/diff/test_oracle_parity.py`` compare the
+fork's output against a frozen pickle of the upstream evaluator run on
+the same corpora. They enforce bit-/ULP-exact equality across every
+column — including ``n`` and ``cov`` — so any semantic correction in
+the PK branch will look like a regression.
+
+To keep the parity gate honest, the fix is accompanied by two changes
+in the test suite:
+
+1. ``tests/test_pk_coverage_bug.py``: a positive regression gate with
+   a synthetic three-protein PK scenario. One of the proteins has all
+   its GT annotations pre-excluded. The test asserts that
+   ``metrics['n'] ≤ ne`` and that TP/FP/FN/recall are unaffected. This
+   test fails if the fix is ever reverted.
+
+2. ``tests/diff/test_oracle_parity.py``: a ``_maybe_xfail_pk()`` helper
+   xfails the PK variants of the oracle parity with a documented reason.
+   The NK/LK variants continue to enforce bit-exact parity with
+   upstream. The xfail is intentional and load-bearing — it records
+   the fact that the fork has deliberately diverged from upstream on
+   PK semantics, not because of a numerical drift.
+
+Effect on the PROTEA 220→230 benchmark
+---------------------------------------
+
+After re-running the 15 PK evaluations under the patched fork:
+
+.. list-table::
+   :header-rows: 1
+   :widths: 20 25 25 25
+
+   * - Cell
+     - Fmax (before)
+     - Fmax (after)
+     - Δ
+   * - PK BPO
+     - 0.130
+     - 0.198
+     - +0.068
+   * - PK CCO
+     - 0.301
+     - 0.366
+     - +0.065
+   * - PK MFO
+     - 0.210
+     - 0.291
+     - +0.081
+   * - PK BPO coverage
+     - 1.94
+     - 0.97
+     - −0.97
+   * - PK BPO precision
+     - 0.088
+     - 0.157
+     - +0.069
+
+NK and LK cells are unchanged within float noise. The thesis PK metrics
+reported hereafter reflect the corrected computation.
+
+Operational implication
+-----------------------
+
+- ``cafaeval-protea`` is installed in PROTEA via a ``file://`` path
+  dependency. After pulling a new fork commit, the venv must be force-
+  reinstalled::
+
+      pip install --force-reinstall --no-deps /path/to/cafaeval-protea
+
+  because ``poetry install`` treats the local path as satisfied once
+  the lockfile hash matches. A plain ``poetry install`` will silently
+  leave the old module in ``site-packages/``.
+
+- Every live ``worker-evaluations`` process holds the ``cafaeval``
+  module in memory. Reinstalling the package does **not** hot-patch
+  running workers — they must be restarted to pick up the fix::
+
+      systemctl --user restart protea-worker-evaluations
+
+- ``EvaluationResult`` rows persisted before the fix carry the buggy
+  ``cov`` / ``precision`` values and must be discarded (DELETE via
+  ``/annotations/evaluation-sets/{id}/results/{rid}``; the endpoint
+  cascades to MinIO artifacts). The launcher re-fires new runs
+  automatically for any ``prediction_set`` that loses its eval.
+
+We should push this fix upstream
+--------------------------------
+
+The bug exists verbatim in ``claradepaolis/CAFA-evaluator-PK`` and, as
+far as we can tell, has never been flagged in an issue. The fix is
+small, semantically justified, and strictly improves correctness for
+every downstream user of the PK branch. An upstream report with the
+minimal reproducer from ``tests/test_pk_coverage_bug.py`` is pending.
diff --git a/docs/source/adr/D01-project-structure.rst b/docs/source/adr/D01-project-structure.rst
new file mode 100644
index 0000000..9b6b6af
--- /dev/null
+++ b/docs/source/adr/D01-project-structure.rst
@@ -0,0 +1,38 @@
+ADR-D1: Project structure (7 code repositories plus thesis)
+=============================================================
+
+:Status: Accepted
+:Date: 2026-05-05
+:Phase: F0 (closed); enacted across F0-F2
+:Supersedes: earlier monolith assumption in plan v1
+
+Context
+-------
+PROTEA started as a single repository combining the API, workers, ORM,
+front-end, four PLM backends, three annotation sources, and the LightGBM
+re-ranker training pipeline. As the system grew towards eight backends
+and external adoption became a goal, plugin extensibility for third
+parties surfaced as a primary architectural concern.
+
+Decision
+--------
+Structure C: seven code repositories plus the thesis manuscript. Plugins
+are discovered via Python ``entry_points``. Granularity is per group
+(sources, runners, backends), not per individual plugin. Thesis lives at
+``~/Thesis/thesis/``.
+
+Repos: ``protea-core``, ``protea-contracts``, ``protea-method``,
+``protea-cafaeval``, ``protea-sources``, ``protea-runners``,
+``protea-backends``.
+
+Consequences
+------------
+- Adding a new backend, source, or runner touches one repository or one
+  sub-module within the relevant group repo.
+- ``protea-method`` ships independently of the platform.
+- Cross-repo release coordination required (see D29).
+- Per-plugin repository granularity deferred to F9 post-defense (see D14).
+
+Resolution
+----------
+Closed in master plan v3, 2026-05-05.
diff --git a/docs/source/adr/D02-export-research-dataset-location.rst b/docs/source/adr/D02-export-research-dataset-location.rst
new file mode 100644
index 0000000..0dfff27
--- /dev/null
+++ b/docs/source/adr/D02-export-research-dataset-location.rst
@@ -0,0 +1,30 @@
+ADR-D2: ``export_research_dataset`` lives in ``protea-core``
+=============================================================
+
+:Status: Accepted
+:Date: 2026-05-05
+:Phase: F1
+
+Context
+-------
+The export operation produces frozen ``train.parquet`` and ``eval.parquet``
+artefacts consumed by the LightGBM lab. It needs the feature schema, the
+KNN reference cache, and access to the relational data model. Two options
+were considered: keep it in ``protea-core``, or move it into the
+``protea-runners`` repository alongside the LightGBM trainer.
+
+Decision
+--------
+Keep ``export_research_dataset`` in ``protea-core``. The feature schema is
+imported from ``protea-contracts``.
+
+Consequences
+------------
+- Schema bumps in ``protea-contracts`` force a new ``protea-core`` release
+  but not a new ``protea-runners`` release.
+- The lab consumes the dataset over the artifact store, no Python import
+  coupling.
+
+Resolution
+----------
+Closed.
diff --git a/docs/source/adr/D03-goprediction-features-jsonb.rst b/docs/source/adr/D03-goprediction-features-jsonb.rst
new file mode 100644
index 0000000..3cefd29
--- /dev/null
+++ b/docs/source/adr/D03-goprediction-features-jsonb.rst
@@ -0,0 +1,31 @@
+ADR-D3: ``GOPrediction.features`` stored as JSONB
+==================================================
+
+:Status: Accepted
+:Date: 2026-05-05
+:Phase: F3
+
+Context
+-------
+The re-ranker feature set grew from 22 features (v1) through 52 features
+(v18-selective) and is expected to keep evolving (lineage, GeOKG,
+ensembles). Adding a column to ``GOPrediction`` for every new feature
+required an Alembic migration per iteration and bound feature engineering
+to DB schema cadence.
+
+Decision
+--------
+Store re-ranker features in a single JSONB column on ``GOPrediction``,
+with a transitional dual-write window of approximately one week before
+dropping the legacy physical columns.
+
+Consequences
+------------
+- New features cost a registry entry, not an Alembic migration.
+- Querying features by name requires JSONB operators; indexed on the
+  most-queried subset.
+- Schema drift caught at the boundary by ``schema_sha`` (see D10).
+
+Resolution
+----------
+Closed; implementation in F3 (T3.1-T3.4).
diff --git a/docs/source/adr/D04-api-versioning.rst b/docs/source/adr/D04-api-versioning.rst
new file mode 100644
index 0000000..24d17ed
--- /dev/null
+++ b/docs/source/adr/D04-api-versioning.rst
@@ -0,0 +1,34 @@
+ADR-D4: API versioning strategy
+===============================
+
+:Status: Accepted
+:Date: 2026-05-05
+:Decided: 2026-05-06 (user confirmation)
+:Phase: F4
+:Gate: opens at F4 entry
+
+Context
+-------
+PROTEA exposes a REST API consumed by the front-end and by external
+clients (LAFA containers, downstream pipelines). As the API surface
+stabilises, breaking changes need a versioning strategy that does not
+strand existing consumers.
+
+Decision (recommended)
+----------------------
+Universal ``/v1/`` path prefix on all endpoints. Future ``/v2/`` branches
+without breaking ``/v1/`` consumers. ``Accept`` header negotiation only
+considered if a real need surfaces.
+
+Consequences
+------------
+- Front-end fetchers and external clients update once.
+- OpenAPI documents per version.
+- Schemathesis runs against the version under test.
+
+Resolution
+----------
+**Accepted as recommended.** Universal ``/v1/`` prefix on all routers.
+Implementation when F4 entry opens (T4.1) — keep deprecated unprefixed
+mounts for one release to avoid breaking ``protea.ngrok.app`` and
+front-end clients during the transition.
diff --git a/docs/source/adr/D05-frontend-in-core.rst b/docs/source/adr/D05-frontend-in-core.rst
new file mode 100644
index 0000000..3f22091
--- /dev/null
+++ b/docs/source/adr/D05-frontend-in-core.rst
@@ -0,0 +1,28 @@
+ADR-D5: Front-end co-located in ``protea-core``
+================================================
+
+:Status: Accepted
+:Date: 2026-05-05
+:Phase: F1
+
+Context
+-------
+The Next.js front-end is the primary consumer of the PROTEA REST API and
+relies tightly on its contract. Two options: keep it inside
+``protea-core/apps/web``, or split into a separate ``protea-web`` repo.
+
+Decision
+--------
+Keep the front-end in ``protea-core/apps/web``. Coupling the API to its
+primary consumer reduces contract drift; separation can happen later if a
+dedicated UI team or external front-end appears.
+
+Consequences
+------------
+- One repository contains both API and UI commits; release tags cover
+  both surfaces simultaneously.
+- Splitting later costs roughly one week.
+
+Resolution
+----------
+Closed.
diff --git a/docs/source/adr/D06-authentication.rst b/docs/source/adr/D06-authentication.rst
new file mode 100644
index 0000000..2bb3813
--- /dev/null
+++ b/docs/source/adr/D06-authentication.rst
@@ -0,0 +1,46 @@
+ADR-D6: Authentication strategy
+================================
+
+:Status: Accepted
+:Date: 2026-05-05
+:Decided: 2026-05-06 (user confirmation)
+:Phase: F5
+:Gate: opens at F5 entry
+
+Context
+-------
+Sensitive endpoints (job creation, dataset import, re-ranker model
+upload, evaluation triggers) are currently unauthenticated. Public
+exposure (cloud deployment, LAFA submission tooling, external adopters)
+requires an authentication layer.
+
+Decision (recommended)
+----------------------
+Two complementary mechanisms:
+
+- **API key** for service-to-service calls (``ApiKey`` ORM table,
+  ``Authorization: Bearer …``).
+- **OIDC** via reverse proxy (oauth2-proxy) for human users.
+
+Rate limiting via ``slowapi``.
+
+Consequences
+------------
+- Migration adds ``ApiKey`` table.
+- ``deploy/nginx/`` ships an oauth2-proxy configuration.
+- Rate-limit policy documented per endpoint.
+
+Resolution
+----------
+**Accepted as recommended, with the OIDC provider pinned.**
+
+- API key path: ``ApiKey`` ORM table + ``Authorization: Bearer …`` for
+  service-to-service calls (LAFA containers, downstream pipelines).
+- OIDC path: **Authentik** as the identity provider behind
+  ``oauth2-proxy`` for human users. User picked Authentik on
+  2026-05-06 ("contra menos custom mejor"); Authentik chosen for its
+  lighter footprint and simpler Docker-Compose setup vs Keycloak's
+  JBoss-flavour weight.
+
+Rate limiting via ``slowapi`` per F5 plan. Implementation gate at F5
+entry (T5.6).
diff --git a/docs/source/adr/D07-observability-stack.rst b/docs/source/adr/D07-observability-stack.rst
new file mode 100644
index 0000000..25bed57
--- /dev/null
+++ b/docs/source/adr/D07-observability-stack.rst
@@ -0,0 +1,44 @@
+ADR-D7: Observability stack
+============================
+
+:Status: Accepted
+:Date: 2026-05-05
+:Decided: 2026-05-06 (user confirmation)
+:Phase: F-OPS
+:Gate: opens at F-OPS entry
+
+Context
+-------
+PROTEA currently relies on per-process log files and ad-hoc
+``print``/logger statements. Multi-target deployment (cloud, HPC,
+airgap) and external adopters need distributed tracing, metrics with
+SLOs, and structured log aggregation.
+
+Decision (recommended)
+----------------------
+Single canonical stack:
+
+- **Tracing**: OpenTelemetry (OTLP exporter) instrumenting FastAPI,
+  SQLAlchemy, ``pika``. ``traceparent`` propagated HTTP -> queue -> worker.
+- **Metrics**: Prometheus client, ``/metrics`` exposed.
+- **Dashboards**: Grafana with dashboards committed in ``deploy/grafana/``.
+- **Logs**: structured JSON via ``python-json-logger``, shipped to Loki
+  via promtail or vector.
+
+Consequences
+------------
+- A single prediction is visible end-to-end as one OTel trace.
+- Three SLOs documented in ``docs/SLOs.md``.
+- Alert rules committed; runbook per alert (see F7).
+
+Resolution
+----------
+**Accepted as recommended.** User confirmation 2026-05-06 ("libre +
+fácil + buen funcionamiento"). Loki + Grafana for logs; Prometheus
+for metrics; OpenTelemetry for traces. Loki chosen over the ELK stack
+(Elasticsearch + Kibana) because it indexes labels rather than full
+text, has lower memory footprint, and integrates with the same Grafana
+that already surfaces Prometheus dashboards. Logs ship via
+``loki-docker-driver`` from container stdout (no separate Promtail
+sidecar in the cloud target). Implementation gate at F-OPS entry
+(T5.1-T5.4).
diff --git a/docs/source/adr/D08-ui-components.rst b/docs/source/adr/D08-ui-components.rst
new file mode 100644
index 0000000..79f96b8
--- /dev/null
+++ b/docs/source/adr/D08-ui-components.rst
@@ -0,0 +1,28 @@
+ADR-D8: UI component library
+=============================
+
+:Status: Accepted
+:Date: 2026-05-05
+:Phase: F8a
+
+Context
+-------
+The front-end needs a consistent component system supporting
+accessibility AA, dark mode, and rapid composition of dashboards
+(jobs, embeddings, predictions, evaluation, experiments).
+
+Decision
+--------
+``shadcn/ui`` on top of Tailwind v4. Components are copy-paste owned
+under ``apps/web/components/ui/``, not imported from a versioned
+library.
+
+Consequences
+------------
+- Customisation is in-tree; no library version bumps.
+- Visual regression coverage via Chromatic or Percy (T8a.2).
+- Lighthouse a11y target ≥95.
+
+Resolution
+----------
+Closed.
diff --git a/docs/source/adr/D09-obsolete-lab-runtime-dep.rst b/docs/source/adr/D09-obsolete-lab-runtime-dep.rst
new file mode 100644
index 0000000..3bd441e
--- /dev/null
+++ b/docs/source/adr/D09-obsolete-lab-runtime-dep.rst
@@ -0,0 +1,31 @@
+ADR-D9: OBSOLETE: lab as runtime dependency
+=============================================
+
+:Status: Obsolete
+:Date: 2026-05-05
+:Supersedes: earlier plan revision (v1)
+:Superseded-by: D1 (Structure C)
+
+Context
+-------
+An earlier revision of the plan considered shipping the
+``protea-reranker-lab`` repository as a runtime dependency of
+``protea-core`` so that LightGBM training could execute inside the
+PROTEA worker pool.
+
+Decision
+--------
+Obsolete. Plan v3 adopts Structure C: the lab merges into
+``protea-runners.lightgbm`` as a plugin discovered via ``entry_points``.
+There is no runtime coupling.
+
+Consequences
+------------
+- ``protea-runners.lightgbm`` is the canonical home for LightGBM
+  training.
+- The dataset-publishing contract (Dataset row + artifact store URI)
+  remains the only interface between platform and trainer.
+
+Resolution
+----------
+Declared obsolete on 2026-05-05.
diff --git a/docs/source/adr/D10-schema-sha-v2.rst b/docs/source/adr/D10-schema-sha-v2.rst
new file mode 100644
index 0000000..8a787b7
--- /dev/null
+++ b/docs/source/adr/D10-schema-sha-v2.rst
@@ -0,0 +1,45 @@
+ADR-D10: ``schema_sha`` v2 parallel migration
+==============================================
+
+:Status: Accepted (implementation pending)
+:Date: 2026-05-05
+:Decided: 2026-05-06 (user confirmation)
+:Phase: F1
+:Gate: T1.6 (requires_human, Alembic on live DB)
+
+Context
+-------
+``schema_sha`` is the load-bearing fingerprint that prevents inference
+from running with a re-ranker booster trained against a different
+feature schema. Historically, two definitions of ``compute_schema_sha``
+co-existed (lab and PROTEA); silent drift caused at least one
+non-reproducible run (v9 study, 2026-05-01) before the parity bug was
+found and fixed.
+
+Decision
+--------
+Add a parallel ``schema_sha_v2`` column to ``Dataset`` and
+``RerankerModel``. Backfill from
+``protea_contracts.compute_schema_sha``. Production reads ``v2``;
+``v1`` kept until F3 for audit and then dropped.
+
+Consequences
+------------
+- One Alembic migration plus one backfill script.
+- Mismatch between v1 and v2 surfaces past silent drift; documented in
+  a regression test rather than fixed retroactively.
+- Boosters loaded for inference compare their stored ``schema_sha``
+  against the live ``v2`` value.
+
+Resolution
+----------
+**Accepted as recommended.** User greenlight 2026-05-06 with the
+explicit constraint **"no subir a prod hasta que no esté listo"** —
+implementation must land in staging (or a local-DB rehearsal) and the
+backfill must be verified there before any production migration.
+Implementation order: (1) Alembic migration adding ``schema_sha_v2``
+column, (2) backfill script populating from
+``protea_contracts.compute_schema_sha``, (3) regression test exposing
+v1/v2 drift on historical rows (rather than retroactively fixing), (4)
+inference path reads v2. Production rollout only after staging
+verification.
diff --git a/docs/source/adr/D11-job-narrative-model.rst b/docs/source/adr/D11-job-narrative-model.rst
new file mode 100644
index 0000000..6931e0d
--- /dev/null
+++ b/docs/source/adr/D11-job-narrative-model.rst
@@ -0,0 +1,36 @@
+ADR-D11: Operational narrative attached to ``Job``
+====================================================
+
+:Status: Accepted
+:Date: 2026-05-05
+:Phase: F3
+
+Context
+-------
+Past experimental campaigns left no narrative beyond raw metrics.
+Reproducing the why of a past run required archaeology in chat logs
+and notebooks. The thesis (chapter 6) needs a curated journey, not a
+raw chronological log; the operational layer needs a place to record
+the reasoning behind each Job.
+
+Decision
+--------
+Two kinds of narrative artefact:
+
+- ``Job`` rows gain ``description``, ``findings``, ``tags`` columns.
+- A new ``JobComment`` table holds chronological commentary tied to a
+  Job.
+
+Material doubles as an internal operational record and as the source
+from which thesis chapter 6 is distilled.
+
+Consequences
+------------
+- Hard rule for F-EXP: a Job does not close without ``findings``
+  populated.
+- UI surfaces narrative inline (D13).
+- Thesis writing track (D21) reads from this corpus, not from logs.
+
+Resolution
+----------
+Closed; implementation in F3 (T3.9, T3.10).
diff --git a/docs/source/adr/D12-fexp-qa-reproduction.rst b/docs/source/adr/D12-fexp-qa-reproduction.rst
new file mode 100644
index 0000000..eadd55b
--- /dev/null
+++ b/docs/source/adr/D12-fexp-qa-reproduction.rst
@@ -0,0 +1,32 @@
+ADR-D12: F-EXP as QA reproduction of the canonical pipeline
+============================================================
+
+:Status: Accepted
+:Date: 2026-05-05
+:Phase: F-EXP
+
+Context
+-------
+After the structural refactor (F0-F5), the rebuilt pipeline needs
+end-to-end validation. Independently, the thesis needs a clean
+campaign whose numbers can be cited without caveat. Running two
+campaigns is duplicative.
+
+Decision
+--------
+Treat F-EXP as both: a QA reproduction of the canonical pipeline and
+the production run that supplies thesis chapter 6 numbers. Each Job
+records its narrative (D11). At the close, material is distilled into
+~8-12 thesis pages.
+
+Consequences
+------------
+- Wipe-and-rebuild executed once on a backed-up database.
+- Tagging convention ``study_v_thesis`` makes the campaign navigable
+  as a single experiment unit.
+- Replay drill (1 % Fmax tolerance) verifies reproducibility of any
+  ``ExperimentRun`` row.
+
+Resolution
+----------
+Closed.
diff --git a/docs/source/adr/D13-early-ui-track.rst b/docs/source/adr/D13-early-ui-track.rst
new file mode 100644
index 0000000..1165efd
--- /dev/null
+++ b/docs/source/adr/D13-early-ui-track.rst
@@ -0,0 +1,33 @@
+ADR-D13: Early UI track parallel to F2
+========================================
+
+:Status: Accepted
+:Date: 2026-05-05
+:Phase: F8a (parallel to F2 final), F8b (parallel to F-EXP)
+
+Context
+-------
+Postponing the front-end until F8 risks shipping a pipeline whose
+state is invisible to its operator. Issues that surface only through
+the UI (job state mismatches, narrative gaps, dashboard latency) would
+arrive too late to influence the design.
+
+Decision
+--------
+Two-stage UI track:
+
+- **F8a** (2 weeks, parallel to F2 final): basic narrative jobs page,
+  generic operation launcher, basic evaluation dashboard, dark mode,
+  a11y AA.
+- **F8b** (2 weeks, during F-EXP): SSE streaming, advanced evaluation
+  dashboard, prediction visualisation, UMAP embeddings page,
+  experiments page.
+
+Consequences
+------------
+- shadcn/ui is a hard prerequisite (D8).
+- F-EXP has a usable UI surface from day one.
+
+Resolution
+----------
+Closed.
diff --git a/docs/source/adr/D14-plugin-granularity.rst b/docs/source/adr/D14-plugin-granularity.rst
new file mode 100644
index 0000000..96d8d9c
--- /dev/null
+++ b/docs/source/adr/D14-plugin-granularity.rst
@@ -0,0 +1,30 @@
+ADR-D14: Per-plugin repository granularity (deferred)
+======================================================
+
+:Status: Deferred
+:Date: 2026-05-05
+:Phase: F9 (post-defense)
+
+Context
+-------
+Structure C ships plugins grouped per concept (sources, runners,
+backends), not per individual plugin. This is simpler today (3 group
+repos vs 9-12 micro-repos) but might invert if third parties publish
+their own plugins and prefer independent release cadence.
+
+Decision
+--------
+Defer the split until after defense. If third parties materialise,
+splitting a group into per-plugin repos is estimated at 0.5-1 day per
+sub-module, contained in F9.
+
+Consequences
+------------
+- Until then, third parties contribute via PR to the relevant group
+  repository.
+- Group repos must keep ``entry_points`` boundaries clean to make a
+  later split mechanical.
+
+Resolution
+----------
+Deferred to F9 post-defense.
diff --git a/docs/source/adr/D15-protea-method-shipping.rst b/docs/source/adr/D15-protea-method-shipping.rst
new file mode 100644
index 0000000..ad7812b
--- /dev/null
+++ b/docs/source/adr/D15-protea-method-shipping.rst
@@ -0,0 +1,35 @@
+ADR-D15: ``protea-method`` distribution channels
+==================================================
+
+:Status: Accepted
+:Date: 2026-05-05
+:Phase: F-OPS
+
+Context
+-------
+``protea-method`` is the pure inference path (KNN, feature compute,
+re-ranker apply). Because it has no FastAPI / SQLAlchemy dependencies
+it can ship independently of the platform, addressing audiences that
+want the method without operating the full stack.
+
+Decision
+--------
+Three distribution channels:
+
+- **PyPI** public package (``pip install protea-method``).
+- **Docker Hub** minimal image (``protea-method-runtime``,
+  ~3-4 GB with one canonical PLM and one default booster, CLI
+  ``protea-predict input.fasta output.tsv``).
+- **Companion engineering paper** covering the F-OPS pillars
+  (containers, multi-target deployment, observability, secrets).
+
+Consequences
+------------
+- Two release surfaces to maintain.
+- LAFA submission containers (D23) build on top of
+  ``protea-method-runtime``.
+- Engineering paper aligned with F-OPS deliverables.
+
+Resolution
+----------
+Closed.
diff --git a/docs/source/adr/D16-thesis-location.rst b/docs/source/adr/D16-thesis-location.rst
new file mode 100644
index 0000000..7b9e8c4
--- /dev/null
+++ b/docs/source/adr/D16-thesis-location.rst
@@ -0,0 +1,29 @@
+ADR-D16: Thesis repository location
+=====================================
+
+:Status: Accepted
+:Date: 2026-05-05
+:Phase: F0
+
+Context
+-------
+Thesis sources need a stable home tracked under git, separate from the
+code repositories so that prose iteration does not pollute code
+history.
+
+Decision
+--------
+``~/Thesis/thesis/``, git-initialised on 2026-05-05 with master branch
+at commit ``4fcd449``. ``.gitignore`` filters LaTeX intermediate
+artefacts.
+
+Consequences
+------------
+- Thesis history is independent of code history; both can be tagged
+  per phase.
+- Cross-references to code commit SHAs are explicit citations, not
+  implicit imports.
+
+Resolution
+----------
+Closed.
diff --git a/docs/source/adr/D17-obsolete-thesis-template.rst b/docs/source/adr/D17-obsolete-thesis-template.rst
new file mode 100644
index 0000000..654c63b
--- /dev/null
+++ b/docs/source/adr/D17-obsolete-thesis-template.rst
@@ -0,0 +1,23 @@
+ADR-D17: OBSOLETE: thesis LaTeX template choice
+=================================================
+
+:Status: Obsolete
+:Date: 2026-05-05
+:Supersedes: earlier plan revision (v1)
+
+Context
+-------
+A previous plan revision called for selecting a thesis LaTeX template.
+The existing in-tree template was retained without further evaluation.
+
+Decision
+--------
+Obsolete. The current template stays.
+
+Consequences
+------------
+None.
+
+Resolution
+----------
+Declared obsolete on 2026-05-05.
diff --git a/docs/source/adr/D18-thesis-writing-model.rst b/docs/source/adr/D18-thesis-writing-model.rst
new file mode 100644
index 0000000..628bf1c
--- /dev/null
+++ b/docs/source/adr/D18-thesis-writing-model.rst
@@ -0,0 +1,30 @@
+ADR-D18: Thesis writing model
+==============================
+
+:Status: Accepted
+:Date: 2026-05-05
+:Phase: F-THESIS (sustained from F0)
+
+Context
+-------
+Thesis drafting at 60-75k words target risks being deferred past the
+point where reasoning behind decisions has rotted. Postponing to F7
+historically failed.
+
+Decision
+--------
+Phase-aligned production:
+
+- Drafts produced at the close of each major phase.
+- User edits during the following phase.
+- Co-supervisors (D20) review asynchronously and iterate.
+
+Consequences
+------------
+- Thesis is a continuous track, not a final-phase sprint.
+- Writing cadence ~3-4h/week.
+- Phase boundaries become natural review points (D20).
+
+Resolution
+----------
+Closed.
diff --git a/docs/source/adr/D19-fresearch-targets.rst b/docs/source/adr/D19-fresearch-targets.rst
new file mode 100644
index 0000000..d4cfa55
--- /dev/null
+++ b/docs/source/adr/D19-fresearch-targets.rst
@@ -0,0 +1,38 @@
+ADR-D19: F-RESEARCH targets
+============================
+
+:Status: Accepted
+:Date: 2026-05-05
+:Phase: F-RESEARCH
+
+Context
+-------
+F-RESEARCH risks becoming an open campaign. Without an explicit cap,
+research questions multiply and the phase loses focus. The plan needs
+a small, ordered set of ideas.
+
+Decision
+--------
+Three directed targets, in order:
+
+1. **Lineage feature** (1 week). Whether a candidate GO term is an
+   ancestor or descendant of a term already known for the protein.
+   Implementable as a single registry feature.
+2. **GeOKG embeddings** (1 week, conditional). Replace ``anc2vec``
+   features with multi-curvature hyperbolic + Euclidean GO embeddings
+   (Bioinformatics 2025).
+3. **Multi-K ensemble** (1 week, optional). Combine K ∈ {5, 10, 20}
+   instead of K=5 fixed.
+
+Each idea produces an ``ExperimentRun``. Wins integrate into the
+canonical pipeline; losses go to the insights appendix (D30).
+
+Consequences
+------------
+- Phase capped at 2-3 weeks.
+- Ideas that did not make the list (PROTEA-DL, retrieval-neural,
+  R-GCN over GO-DAG) deferred to F11 post-defense.
+
+Resolution
+----------
+Closed.
diff --git a/docs/source/adr/D20-supervisors-cadence.rst b/docs/source/adr/D20-supervisors-cadence.rst
new file mode 100644
index 0000000..cbf7d18
--- /dev/null
+++ b/docs/source/adr/D20-supervisors-cadence.rst
@@ -0,0 +1,29 @@
+ADR-D20: Co-supervisor review cadence
+=======================================
+
+:Status: Accepted
+:Date: 2026-05-05
+:Phase: F-THESIS
+
+Context
+-------
+Co-supervisors are David Orellana-Martín (CABD / Universidad de
+Sevilla) and Ana M. Rojas (CABD). They review thesis material
+asynchronously and need predictable delivery points.
+
+Decision
+--------
+A capítulo (chapter or chapter portion) is delivered at the close of
+each major phase. Supervisors iterate during the following phase.
+Their feedback merges back into the next draft.
+
+Consequences
+------------
+- Phase boundaries become natural review checkpoints.
+- No long stretches without supervisor visibility.
+- Both directors are co-supervisors; never refer to either as singular
+  ``advisor``.
+
+Resolution
+----------
+Closed.
diff --git a/docs/source/adr/D21-thesis-track-parallel.rst b/docs/source/adr/D21-thesis-track-parallel.rst
new file mode 100644
index 0000000..8645e3f
--- /dev/null
+++ b/docs/source/adr/D21-thesis-track-parallel.rst
@@ -0,0 +1,30 @@
+ADR-D21: Thesis writing track parallel from F0
+================================================
+
+:Status: Accepted
+:Date: 2026-05-05
+:Phase: F-THESIS (from F0)
+
+Context
+-------
+Earlier plan revisions allocated thesis writing to F7 (final
+documentation phase). This historically fails because the reasoning
+behind early decisions has rotted by the time it is captured.
+
+Decision
+--------
+Thesis writing runs in parallel from F0. ~3-4h/week sustained.
+Material curated per phase: each phase produces specific chapter or
+sub-section material at its close (mapped explicitly in the master
+plan).
+
+Consequences
+------------
+- F0 close already implies refresh of chapters 1-3 plus abstract;
+  F-OPS produces an entire new chapter (Deployment and Operations);
+  F-EXP rewrites chapter 6.
+- D22 (research-diary tone) makes per-phase production tractable.
+
+Resolution
+----------
+Closed.
diff --git a/docs/source/adr/D22-thesis-research-diary.rst b/docs/source/adr/D22-thesis-research-diary.rst
new file mode 100644
index 0000000..7842ddb
--- /dev/null
+++ b/docs/source/adr/D22-thesis-research-diary.rst
@@ -0,0 +1,33 @@
+ADR-D22: Thesis as a concise research diary
+=============================================
+
+:Status: Accepted
+:Date: 2026-05-05
+:Phase: F-THESIS
+
+Context
+-------
+Two failure modes for a software-platform thesis: a chronological log
+of every iteration (archaeology, no narrative), or a clean
+post-rationalised systematisation (loses the reasoning that justifies
+the canonical pipeline). The journey itself (KNN baseline -> 22
+features -> 52 features -> trazabilidad crisis -> leakage discovery
+-> v18-selective canonical) is part of the contribution.
+
+Decision
+--------
+Concise research diary. Each pivot that taught something earns 1-3
+pages. Distilled history, not raw archive. Chapter 6 (evaluation) is
+the journey, written in prose. The insights appendix (D30) carries
+brief notes on ideas that were probed and discarded without reaching
+the chapter.
+
+Consequences
+------------
+- Volume target 60-75k words is reachable without padding.
+- Every pivot link to its operational record (Job ``findings``, see
+  D11).
+
+Resolution
+----------
+Closed.
diff --git a/docs/source/adr/D23-lafa-submission.rst b/docs/source/adr/D23-lafa-submission.rst
new file mode 100644
index 0000000..22398b2
--- /dev/null
+++ b/docs/source/adr/D23-lafa-submission.rst
@@ -0,0 +1,38 @@
+ADR-D23: LAFA submission strategy
+===================================
+
+:Status: Accepted
+:Date: 2026-05-05
+:Phase: F-LAFA
+
+Context
+-------
+LAFA (functionbench.net) provides a public benchmark surface for
+protein function annotation methods, comparable in spirit to CAFA.
+PROTEA needs a credible adoption story; LAFA also exposes the method
+to comparison against external systems on identical evaluation
+conditions.
+
+Decision
+--------
+F-LAFA at the end of the timeline (~1.5 weeks). Three containers
+built on top of ``protea-method-runtime``:
+
+- **knn-v1** (one PLM, KNN baseline, GO propagation).
+- **knn-8plm** (ensemble across the eight PLMs).
+- **v18** (full pipeline with selective re-ranking).
+
+Each container submitted to the LAFA test suite per
+``anphan0828/LAFA_container_guide``.
+
+Consequences
+------------
+- Reuses F-OPS deliverables (``protea-method-runtime``).
+- Material for chapter 7 conclusion: external adoption.
+- ``apps/lafa_container/`` and ``protea-lafa-container/`` (existing
+  preliminaries) are not iterated on until F-LAFA opens; F-LAFA
+  rewrites them on top of ``protea-method-runtime``.
+
+Resolution
+----------
+Closed.
diff --git a/docs/source/adr/D24-hardcoded-params.rst b/docs/source/adr/D24-hardcoded-params.rst
new file mode 100644
index 0000000..06c92d0
--- /dev/null
+++ b/docs/source/adr/D24-hardcoded-params.rst
@@ -0,0 +1,42 @@
+ADR-D24: Hardcoded parameters externalisation (T-CONF)
+========================================================
+
+:Status: Accepted
+:Date: 2026-05-05
+:Phase: F0 (closed)
+
+Context
+-------
+Hardcoded chunk sizes, retries, batch sizes, timeouts, KNN K values,
+score thresholds, pool sizes, reaper timeouts and similar magic
+numbers were dispersed throughout ``protea-core``. Tuning per
+deployment target (cloud, HPC-BSC, HPC-airgap, dev) is impossible
+without externalisation; reproducibility suffers because the magic
+numbers are not part of the run record.
+
+Decision
+--------
+T-CONF: a three-step task in F0.
+
+- **T-CONF.1**: inventory at ``docs/CONFIG_INVENTORY.md`` with 30-60
+  entries minimum.
+- **T-CONF.2**: ``protea_core.config.Settings`` (pydantic-settings)
+  with hierarchy ``defaults < config/{env}.yaml < env vars < CLI
+  flags``. Categories ``QueueTuning``, ``WorkerTuning``,
+  ``OperationTuning``, ``IOTuning``, ``ObservabilityTuning``.
+- **T-CONF.3**: living documentation appendix
+  (``docs/source/appendix/configuration.rst``) auto-generated from
+  the pydantic models.
+
+Consequences
+------------
+- Magic numbers in operations code are forbidden post-T-CONF;
+  ``# config-exempt: <reason>`` allowed only for semantic constants
+  (``MD5_HASH_LEN``).
+- Each ``ExperimentRun`` row records resolved hyperparameters as
+  provenance.
+- HPC and airgap deployments tune via ``config/hpc-bsc.yaml`` etc.
+
+Resolution
+----------
+Closed (T-CONF.1-3 delivered in F0, 2026-05-05).
diff --git a/docs/source/adr/D25-hpc-mode.rst b/docs/source/adr/D25-hpc-mode.rst
new file mode 100644
index 0000000..4ecaaf2
--- /dev/null
+++ b/docs/source/adr/D25-hpc-mode.rst
@@ -0,0 +1,44 @@
+ADR-D25: HPC operation mode
+=============================
+
+:Status: Accepted (mode B primary, mode C deferred)
+:Date: 2026-05-05
+:Decided: 2026-05-06 (user confirmation)
+:Phase: F-OPS
+:Gate: opens at F-OPS entry
+
+Context
+-------
+PROTEA must support HPC environments (BSC and similar). HPC sites
+typically forbid privileged Docker, may restrict outbound network,
+and schedule via SLURM. Two main modes are available:
+
+- **Mode B**: stateless workers running on HPC nodes connect to a
+  PostgreSQL and RabbitMQ hosted in the cloud (LifeWatch / EOSC).
+- **Mode C**: fully airgapped batch bundle. ``.sif`` Apptainer image
+  with snapshot DB precargado, default booster, single-node SLURM
+  job, no outbound traffic.
+
+Decision (recommended)
+----------------------
+Both. Mode B as primary (closer to the cloud architecture). Mode C as
+fallback for sites without outbound network or strict data-sovereignty
+constraints.
+
+Consequences
+------------
+- Two SLURM templates (``deploy/hpc/slurm-mode-b.sh``,
+  ``deploy/hpc/slurm-mode-c.sh``).
+- Apptainer ``.sif`` produced from the OCI multi-stage builds (see
+  D26).
+- Airgap bundle (``protea-airgap-bundle-vX.Y.Z.tar.gz``) tested on a
+  network-disconnected machine.
+
+Resolution
+----------
+**Accepted with scope adjustment.** User confirmation 2026-05-06: mode
+B primary, mode C deferred until **post-defensa** (when contact with
+BSC or similar restricted sites becomes concrete). Mode B is sufficient
+for the thesis defense scope: stateless PROTEA workers on HPC nodes
+connecting to LifeWatch / EOSC-hosted Postgres + RabbitMQ. Mode C
+(airgap ``.sif`` bundle) becomes a F9 post-defensa item.
diff --git a/docs/source/adr/D26-container-runtime.rst b/docs/source/adr/D26-container-runtime.rst
new file mode 100644
index 0000000..4277568
--- /dev/null
+++ b/docs/source/adr/D26-container-runtime.rst
@@ -0,0 +1,29 @@
+ADR-D26: Container runtime: OCI plus Apptainer
+================================================
+
+:Status: Accepted
+:Date: 2026-05-05
+:Phase: F-OPS
+
+Context
+-------
+Cloud and developer environments expect OCI containers (Docker,
+Podman, k8s). HPC sites mostly forbid privileged Docker but support
+Apptainer (formerly Singularity) on rootless ``.sif`` images.
+
+Decision
+--------
+Source of truth is OCI multi-stage Dockerfiles per repo. CI converts
+each tagged image to an Apptainer ``.sif`` published as a release
+artefact. No separate Apptainer Definition file maintained by hand.
+
+Consequences
+------------
+- One Dockerfile per repo. ``.sif`` is a build artefact, not a source
+  format.
+- Image size budget per repo: <500 MB.
+- ``protea-bundle`` repo orchestrates fat image construction for HPC.
+
+Resolution
+----------
+Closed.
diff --git a/docs/source/adr/D27-image-registry.rst b/docs/source/adr/D27-image-registry.rst
new file mode 100644
index 0000000..99e0d91
--- /dev/null
+++ b/docs/source/adr/D27-image-registry.rst
@@ -0,0 +1,36 @@
+ADR-D27: Image registry
+=========================
+
+:Status: Accepted
+:Date: 2026-05-05
+:Decided: 2026-05-06 (user confirmation)
+:Phase: F-OPS
+:Gate: opens at F-OPS entry
+
+Context
+-------
+Seven OCI images need a hosting registry visible from cloud
+deployments, HPC tooling that pulls before converting to ``.sif``,
+and external adopters consuming ``protea-method-runtime``.
+
+Decision (recommended)
+----------------------
+``ghcr.io`` (GitHub Container Registry).
+
+Consequences
+------------
+- GitHub Actions push images on tag using the repository's own
+  GITHUB_TOKEN.
+- Public visibility for ``protea-method-runtime``; private or
+  org-scoped for internal images if needed.
+- Mirror to Docker Hub considered later if external pull rates demand
+  it.
+
+Resolution
+----------
+**Accepted as recommended.** ``ghcr.io`` confirmed by user 2026-05-06.
+Implementation: GitHub Actions workflow per repo on tag push, login via
+the built-in ``GITHUB_TOKEN``, image tag set from the SemVer tag.
+Public visibility for ``protea-method-runtime``; org-scoped or private
+for internal images if/when needed. Mirror to Docker Hub deferred until
+external pull rates demand it.
diff --git a/docs/source/adr/D28-secrets-management.rst b/docs/source/adr/D28-secrets-management.rst
new file mode 100644
index 0000000..ad1a6dd
--- /dev/null
+++ b/docs/source/adr/D28-secrets-management.rst
@@ -0,0 +1,41 @@
+ADR-D28: Secrets management
+=============================
+
+:Status: Accepted
+:Date: 2026-05-05
+:Decided: 2026-05-06 (user confirmation)
+:Phase: F-OPS
+:Gate: opens at F-OPS entry
+
+Context
+-------
+PostgreSQL credentials, MinIO keys, OIDC client secrets, optional
+external API tokens and SSH keys cannot live in plaintext in
+repositories. Multi-target deployment (cloud, HPC, airgap) requires a
+single mechanism that works across all of them.
+
+Decision (recommended)
+----------------------
+``sops`` with ``age`` keys. Encrypted ``secrets.enc.yaml`` committed
+in repos; CI decrypts with the age key stored in GitHub Secrets.
+Local development uses a developer-specific age key checked into the
+user's keyring.
+
+Consequences
+------------
+- Plaintext secrets never on disk persistente.
+- Per-environment file (``secrets.dev.enc.yaml``,
+  ``secrets.prod.enc.yaml``).
+- Rotation procedure documented.
+
+Resolution
+----------
+**Accepted as recommended.** ``sops + age`` confirmed by user
+2026-05-06. Two reasons captured: (a) age keys are post-PGP ed25519,
+short and revocation-chain-free; (b) sops is file-format agnostic so
+the same workflow handles yaml/json/env. First migration target:
+``secrets.enc.yaml`` containing DB URL + AMQP URL + MinIO creds + GitHub
+release token. Bootstrap script invokes ``sops -d`` before
+``manage.sh start``. Per-environment files (``secrets.dev.enc.yaml`` /
+``secrets.prod.enc.yaml``). Rotation procedure to be documented at
+implementation time.
diff --git a/docs/source/adr/D29-release-pipeline.rst b/docs/source/adr/D29-release-pipeline.rst
new file mode 100644
index 0000000..c045e4a
--- /dev/null
+++ b/docs/source/adr/D29-release-pipeline.rst
@@ -0,0 +1,46 @@
+ADR-D29: Release pipeline
+===========================
+
+:Status: Accepted
+:Date: 2026-05-05
+:Decided: 2026-05-06 (user confirmation)
+:Phase: F-OPS
+:Gate: opens at F-OPS entry
+
+Context
+-------
+Seven repos need an independent SemVer release cadence. Releasing one
+repo alone cannot break another; cross-repo integration testing on
+tag is required. ``protea-contracts`` is the most disruptive: bumps
+ripple through all consumers.
+
+Decision (recommended)
+----------------------
+Per-repo SemVer plus cross-repo integration test on tag:
+
+- A SemVer tag (``vX.Y.Z``) in any repo dispatches a build, image
+  push (D27), and integration test that pulls the new image plus the
+  pinned versions of the other six and runs a smoke pipeline.
+- Failures block image promotion; the tag remains but the image is
+  marked as ``release-candidate`` until the integration test passes.
+- ``protea-contracts`` releases trigger a re-pin in all consumers as
+  a follow-up automated PR.
+
+Consequences
+------------
+- Tag is the release primitive; PRs are not.
+- One canonical integration test stack lives in ``protea-bundle``.
+- Manual rollback is repo-local (revert tag, push fix, retag).
+
+Resolution
+----------
+**Accepted with semantic-release tooling.** User confirmation
+2026-05-06 ("semantic parece que añade un mejor contexto"). Version
+bumps + CHANGELOG generation driven by Conventional Commits parsed by
+``semantic-release``: ``feat:`` → minor, ``fix:`` → patch,
+``BREAKING CHANGE:`` footer → major. The commit-message style is
+already in place from the F2 phase (every commit during F2A.6-real,
+F2B, D-MIGR-06, Doc-T11 is conventional). Cross-repo integration test
+on tag stays as recommended. Implementation: a ``release.yml`` GitHub
+Action per repo + ``semantic-release`` config in ``pyproject.toml`` (or
+``.releaserc``).
diff --git a/docs/source/adr/D30-insights-appendix.rst b/docs/source/adr/D30-insights-appendix.rst
new file mode 100644
index 0000000..016e793
--- /dev/null
+++ b/docs/source/adr/D30-insights-appendix.rst
@@ -0,0 +1,34 @@
+ADR-D30: Insights appendix
+============================
+
+:Status: Accepted
+:Date: 2026-05-05
+:Phase: F7
+
+Context
+-------
+Several lessons learned during the project deserve a written record
+that is neither a peer-reviewed publication nor a reluctant footnote
+inside an unrelated chapter. Examples: the ``anc2vec`` feature
+leakage discovery (2026-05-05), the ``schema_sha`` drift incident,
+the v18 selective re-ranking discovery, the PK coverage cafaeval
+upstream bug. None of these belong in the canonical evaluation; all
+of them taught something.
+
+Decision
+--------
+A short appendix at ``docs/source/appendix/insights.rst`` with one
+paragraph to one page per insight. No formalisms. Honest tone:
+described as encountered, with the workaround or fix that closed it.
+
+Consequences
+------------
+- Companion to chapter 6 of the thesis but not part of the chapter
+  itself.
+- Linked from chapter 7 conclusion as a pointer to the operational
+  history.
+- Stable home for future incidents discovered post-defense.
+
+Resolution
+----------
+Closed.
diff --git a/docs/source/adr/index.rst b/docs/source/adr/index.rst
index b4f5046..b0ee4c4 100644
--- a/docs/source/adr/index.rst
+++ b/docs/source/adr/index.rst
@@ -1,15 +1,28 @@
 Architecture Decision Records
 =============================
 
-Design decisions that are not obvious from reading the code.  Each ADR
-documents **why** a decision was made, not just what — the code already
+Design decisions that are not obvious from reading the code. Each ADR
+documents **why** a decision was made, not just what. The code already
 shows the what.
 
-Decisions are grouped by system layer:
+ADRs come in two layers:
+
+- **Implementation decisions** (numbered ``001``-``008``): runtime,
+  data model and operational choices discovered while building
+  PROTEA. They explain trade-offs of concrete code paths (KNN
+  algorithm choice, queue topology, deduplication strategy, retries,
+  etc.).
+- **Strategic decisions** (``D1``-``D30``): plan-level decisions
+  taken in the master plan v3 (2026-05-05). They drive the structure
+  of the project, the deployment story, and the thesis writing
+  cadence.
+
+Implementation decisions
+------------------------
 
 .. list-table::
    :header-rows: 1
-   :widths: 10 50 40
+   :widths: 8 50 42
 
    * - ADR
      - Decision
@@ -17,9 +30,6 @@ Decisions are grouped by system layer:
    * - 001
      - :doc:`KNN on CPU, not pgvector or GPU <001-knn-without-pgvector>`
      - pgvector does not scale to 500K+ vectors; GPU must be reserved for inference
-   * - 006
-     - :doc:`Sequence deduplication by MD5 <006-sequence-deduplication-by-md5>`
-     - 30K duplicate sequences in Swiss-Prot waste hours of GPU time
    * - 002
      - :doc:`Two-session worker pattern <002-two-session-worker-pattern>`
      - A mid-operation crash left the job invisible to monitoring
@@ -32,6 +42,152 @@ Decisions are grouped by system layer:
    * - 005
      - :doc:`Reusable RabbitMQ connections <005-thread-local-rabbitmq-connections>`
      - A coordinator dispatching 500 batches opened 500 TCP connections
+   * - 006
+     - :doc:`Sequence deduplication by MD5 <006-sequence-deduplication-by-md5>`
+     - 30K duplicate sequences in Swiss-Prot waste hours of GPU time
+   * - 007
+     - :doc:`Contract-first integration with protea-reranker-lab <007-contract-first-lab-integration>`
+     - Re-ranker iteration cadence would contaminate the production dependency tree
+   * - 008
+     - :doc:`PK coverage fix in cafaeval fork <008-cafaeval-pk-coverage-fix>`
+     - Upstream cafaeval reports coverage > 1 in PK; precision is under-divided
+
+Strategic decisions
+-------------------
+
+Decisions taken in the master plan v3 (2026-05-05). Statuses:
+*Accepted*, *Pending* (gate opens at the indicated phase), *Deferred*
+(scheduled later in the timeline) or *Obsolete* (superseded by a
+later revision).
+
+.. list-table::
+   :header-rows: 1
+   :widths: 6 38 12 44
+
+   * - ID
+     - Decision
+     - Status
+     - Phase / Gate
+   * - D1
+     - :doc:`Project structure (7 code repos) <D01-project-structure>`
+     - Accepted
+     - F0 (closed); enacted F0-F2
+   * - D2
+     - :doc:`export_research_dataset in protea-core <D02-export-research-dataset-location>`
+     - Accepted
+     - F1
+   * - D3
+     - :doc:`GOPrediction.features as JSONB <D03-goprediction-features-jsonb>`
+     - Accepted
+     - F3
+   * - D4
+     - :doc:`API versioning <D04-api-versioning>`
+     - Pending
+     - gate at F4
+   * - D5
+     - :doc:`Front-end in protea-core <D05-frontend-in-core>`
+     - Accepted
+     - F1
+   * - D6
+     - :doc:`Authentication strategy <D06-authentication>`
+     - Pending
+     - gate at F5
+   * - D7
+     - :doc:`Observability stack <D07-observability-stack>`
+     - Pending
+     - gate at F-OPS
+   * - D8
+     - :doc:`UI component library <D08-ui-components>`
+     - Accepted
+     - F8a
+   * - D9
+     - :doc:`OBSOLETE: lab as runtime dependency <D09-obsolete-lab-runtime-dep>`
+     - Obsolete
+     - superseded by D1
+   * - D10
+     - :doc:`schema_sha v2 migration <D10-schema-sha-v2>`
+     - Pending
+     - T1.6 (requires_human)
+   * - D11
+     - :doc:`Job narrative model <D11-job-narrative-model>`
+     - Accepted
+     - F3
+   * - D12
+     - :doc:`F-EXP as QA reproduction <D12-fexp-qa-reproduction>`
+     - Accepted
+     - F-EXP
+   * - D13
+     - :doc:`Early UI track parallel to F2 <D13-early-ui-track>`
+     - Accepted
+     - F8a / F8b
+   * - D14
+     - :doc:`Plugin granularity (deferred) <D14-plugin-granularity>`
+     - Deferred
+     - F9 post-defense
+   * - D15
+     - :doc:`protea-method shipping channels <D15-protea-method-shipping>`
+     - Accepted
+     - F-OPS
+   * - D16
+     - :doc:`Thesis repository location <D16-thesis-location>`
+     - Accepted
+     - F0
+   * - D17
+     - :doc:`OBSOLETE: thesis template choice <D17-obsolete-thesis-template>`
+     - Obsolete
+     - n/a
+   * - D18
+     - :doc:`Thesis writing model <D18-thesis-writing-model>`
+     - Accepted
+     - F-THESIS
+   * - D19
+     - :doc:`F-RESEARCH targets <D19-fresearch-targets>`
+     - Accepted
+     - F-RESEARCH
+   * - D20
+     - :doc:`Co-supervisor review cadence <D20-supervisors-cadence>`
+     - Accepted
+     - F-THESIS
+   * - D21
+     - :doc:`Thesis writing parallel from F0 <D21-thesis-track-parallel>`
+     - Accepted
+     - F-THESIS
+   * - D22
+     - :doc:`Thesis as research diary <D22-thesis-research-diary>`
+     - Accepted
+     - F-THESIS
+   * - D23
+     - :doc:`LAFA submission strategy <D23-lafa-submission>`
+     - Accepted
+     - F-LAFA
+   * - D24
+     - :doc:`Hardcoded params externalisation (T-CONF) <D24-hardcoded-params>`
+     - Accepted
+     - F0 (closed)
+   * - D25
+     - :doc:`HPC operation mode <D25-hpc-mode>`
+     - Pending
+     - gate at F-OPS
+   * - D26
+     - :doc:`Container runtime: OCI plus Apptainer <D26-container-runtime>`
+     - Accepted
+     - F-OPS
+   * - D27
+     - :doc:`Image registry <D27-image-registry>`
+     - Pending
+     - gate at F-OPS
+   * - D28
+     - :doc:`Secrets management <D28-secrets-management>`
+     - Pending
+     - gate at F-OPS
+   * - D29
+     - :doc:`Release pipeline <D29-release-pipeline>`
+     - Pending
+     - gate at F-OPS
+   * - D30
+     - :doc:`Insights appendix <D30-insights-appendix>`
+     - Accepted
+     - F7
 
 .. toctree::
    :maxdepth: 1
@@ -43,3 +199,35 @@ Decisions are grouped by system layer:
    004-dead-letter-queue-and-retry-strategy
    005-thread-local-rabbitmq-connections
    006-sequence-deduplication-by-md5
+   007-contract-first-lab-integration
+   008-cafaeval-pk-coverage-fix
+   D01-project-structure
+   D02-export-research-dataset-location
+   D03-goprediction-features-jsonb
+   D04-api-versioning
+   D05-frontend-in-core
+   D06-authentication
+   D07-observability-stack
+   D08-ui-components
+   D09-obsolete-lab-runtime-dep
+   D10-schema-sha-v2
+   D11-job-narrative-model
+   D12-fexp-qa-reproduction
+   D13-early-ui-track
+   D14-plugin-granularity
+   D15-protea-method-shipping
+   D16-thesis-location
+   D17-obsolete-thesis-template
+   D18-thesis-writing-model
+   D19-fresearch-targets
+   D20-supervisors-cadence
+   D21-thesis-track-parallel
+   D22-thesis-research-diary
+   D23-lafa-submission
+   D24-hardcoded-params
+   D25-hpc-mode
+   D26-container-runtime
+   D27-image-registry
+   D28-secrets-management
+   D29-release-pipeline
+   D30-insights-appendix
diff --git a/docs/source/appendix/configuration.rst b/docs/source/appendix/configuration.rst
index acab5d6..fde52b3 100644
--- a/docs/source/appendix/configuration.rst
+++ b/docs/source/appendix/configuration.rst
@@ -18,8 +18,33 @@ YAML structure
    queue:
      amqp_url: amqp://guest:guest@localhost:5672/
 
-Both keys are required. The file is loaded by
-``protea.infrastructure.settings.load_settings(project_root)`` at startup.
+   storage:
+     artifacts_dir: storage/evaluation_artifacts   # cafaeval output
+     backend: local                                 # "local" | "minio"
+     root: storage/artifacts                        # local backend root
+     minio:
+       endpoint: localhost:9000
+       bucket: protea
+       access_key: minioadmin
+       secret_key: minioadmin
+       secure: false                                # true for HTTPS
+
+   admin:
+     token: protea-admin
+
+Only ``database.url`` and ``queue.amqp_url`` are strictly required; the
+``storage``, ``admin`` sections have working defaults. The file is loaded
+by ``protea.infrastructure.settings.load_settings(project_root)`` at
+startup.
+
+The ``storage`` block drives the ``ArtifactStore`` abstraction described
+in :doc:`/reference/infrastructure`. With ``backend: local`` (default)
+all blobs land under ``storage/artifacts/`` on the API host. Setting
+``backend: minio`` activates the S3-compatible path — requires the
+``[storage]`` extra (``pip install 'protea[storage]'``) and a running
+MinIO instance (see ``docker compose --profile storage up``). Paths
+under ``storage.*`` are resolved relative to the project root when not
+absolute.
 
 Environment variable overrides
 ------------------------------
@@ -35,6 +60,25 @@ Environment variable overrides
        string using the ``postgresql+psycopg`` driver.
    * - ``PROTEA_AMQP_URL``
      - Overrides ``queue.amqp_url``. Standard AMQP URL format.
+   * - ``PROTEA_ARTIFACTS_DIR``
+     - Overrides ``storage.artifacts_dir`` (the ``cafaeval`` artefacts
+       directory used by ``run_cafa_evaluation``).
+   * - ``PROTEA_STORAGE_BACKEND``
+     - Overrides ``storage.backend`` — ``local`` (default) or ``minio``.
+   * - ``PROTEA_STORAGE_ROOT``
+     - Overrides ``storage.root`` (local backend root directory).
+   * - ``PROTEA_MINIO_ENDPOINT``
+     - Overrides ``storage.minio.endpoint`` (e.g. ``localhost:9000``).
+   * - ``PROTEA_MINIO_BUCKET``
+     - Overrides ``storage.minio.bucket``.
+   * - ``PROTEA_MINIO_ACCESS_KEY``
+     - Overrides ``storage.minio.access_key``.
+   * - ``PROTEA_MINIO_SECRET_KEY``
+     - Overrides ``storage.minio.secret_key``.
+   * - ``PROTEA_MINIO_SECURE``
+     - Overrides ``storage.minio.secure`` — truthy enables HTTPS.
+   * - ``PROTEA_ADMIN_TOKEN``
+     - Overrides ``admin.token``.
 
 Frontend
 --------
@@ -98,8 +142,10 @@ credentials ``guest`` / ``guest``). The seven PROTEA queues are:
      - ``insert_proteins``, ``fetch_uniprot_metadata``, ``load_ontology_snapshot``,
        ``load_goa_annotations``, ``load_quickgo_annotations``,
        ``compute_embeddings`` (coordinator), ``predict_go_terms`` (coordinator),
-       ``generate_evaluation_set``, ``run_cafa_evaluation``,
-       ``train_reranker``, ``train_reranker_auto``
+       ``generate_evaluation_set``, ``run_cafa_evaluation``
+   * - ``protea.training``
+     - QueueConsumer
+     - ``export_research_dataset`` (serialised; GPU/RAM-intensive)
    * - ``protea.embeddings``
      - QueueConsumer
      - ``compute_embeddings`` coordinator (serialised, one at a time)
@@ -117,3 +163,175 @@ credentials ``guest`` / ``guest``). The seven PROTEA queues are:
      - ``store_predictions`` — bulk GOPrediction insert (ephemeral)
 
 Queues are declared at worker startup and survive broker restarts.
+
+Tuning settings
+---------------
+
+PROTEA exposes throughput, retry policy and boundary limits through
+``protea.config.tuning.TuningSettings`` (pydantic). Values are
+resolved per call (defaults < ``tuning:`` section in
+``protea/config/system.yaml`` < env vars).
+
+Env var convention: ``PROTEA_TUNING__<group>__<field>``. Double
+underscore is the path separator (matches pydantic-settings'
+``env_nested_delimiter``) so it never collides with single
+underscores inside field names.
+
+Categories are derived from ``docs/CONFIG_INVENTORY.md`` (T-CONF.1
+of master plan v3) and migrated incrementally in T-CONF.2.
+
+QueueTuning
+~~~~~~~~~~~
+
+RabbitMQ publisher and consumer policy.
+
+.. list-table::
+   :widths: 30 12 58
+   :header-rows: 1
+
+   * - Field
+     - Default
+     - Purpose
+   * - ``publisher_max_attempts``
+     - 12
+     - Reintentos máximos al publicar a RabbitMQ. 12 attempts cubren ~4 min de broker downtime con backoff exponencial cap a 30s.
+   * - ``publisher_base_delay``
+     - 1.0
+     - Backoff inicial publisher en segundos. Multiplica x2 por intento.
+   * - ``oom_max_retries``
+     - 5
+     - Reintentos al hit CUDA OOM en GPU worker.
+   * - ``oom_base_delay``
+     - 5
+     - Backoff inicial OOM en segundos.
+   * - ``oom_max_delay``
+     - 300
+     - Cap del backoff OOM en segundos (5 min).
+
+YAML excerpt::
+
+   tuning:
+     queue:
+       publisher_max_attempts: 12
+       oom_max_retries: 5
+
+Env override example::
+
+   PROTEA_TUNING__QUEUE__PUBLISHER_MAX_ATTEMPTS=20
+
+WorkerTuning
+~~~~~~~~~~~~
+
+Pool sizes, in-process caches, reaper timeouts, HTTP cache TTL.
+
+.. list-table::
+   :widths: 32 12 56
+   :header-rows: 1
+
+   * - Field
+     - Default
+     - Purpose
+   * - ``db_pool_size``
+     - 20
+     - SQLAlchemy connection pool size.
+   * - ``db_pool_max_overflow``
+     - 40
+     - Conexiones extra permitidas durante picos.
+   * - ``db_pool_recycle_seconds``
+     - 3600
+     - Reciclar conexiones tras N segundos.
+   * - ``model_cache_max``
+     - 1
+     - Modelos PLM en cache por proceso de embeddings.
+   * - ``ref_cache_max``
+     - 1
+     - Reference data sets en cache por proceso predict.
+   * - ``reaper_main_timeout_seconds``
+     - 86400
+     - Timeout duro antes de marcar jobs FAILED en producción (24h).
+   * - ``reaper_default_timeout_seconds``
+     - 3600
+     - Default constructor de StaleJobReaper.
+   * - ``reaper_stall_seconds``
+     - 1800
+     - Tiempo sin JobEvent antes de considerar un job stalled.
+   * - ``api_cache_default_ttl_seconds``
+     - 300.0
+     - TTL default cache HTTP.
+
+OperationTuning
+~~~~~~~~~~~~~~~
+
+Module-level chunk and batch sizes used inside operations.
+
+.. list-table::
+   :widths: 28 12 60
+   :header-rows: 1
+
+   * - Field
+     - Default
+     - Purpose
+   * - ``annotation_chunk_size``
+     - 10_000
+     - Filas por chunk al cargar/iterar anotaciones.
+   * - ``stream_chunk_size``
+     - 2_000
+     - Chunk size streaming PyArrow / SQLAlchemy yield_per.
+   * - ``store_chunk_size``
+     - 10_000
+     - Filas por chunk al publicar predictions a la cola store.
+   * - ``numpy_query_chunk``
+     - 500
+     - Query chunk size para KNN numpy backend (caps memoria de la matriz de distancias).
+
+HTTP retry policy and per-source timeouts (UniProt, GOA, QuickGO,
+ontology) live inside the respective pydantic payloads
+(``InsertProteinsPayload``, ``LoadGoaAnnotationsPayload``, etc.) by
+design: callers pick them per-job rather than as global infra
+defaults.
+
+APILimits
+~~~~~~~~~
+
+HTTP boundary limits enforced at the FastAPI router layer.
+
+.. list-table::
+   :widths: 26 14 60
+   :header-rows: 1
+
+   * - Field
+     - Default
+     - Purpose
+   * - ``max_fasta_bytes``
+     - 52428800 (50 MB)
+     - Tope upload FASTA en bytes. Aplica a ``annotate`` y ``query_sets``.
+   * - ``max_comment_length``
+     - 500
+     - Caracteres máximos por comentario en /support.
+   * - ``recent_limit``
+     - 20
+     - Items devueltos por defecto en /support/recent.
+   * - ``page_limit``
+     - 100
+     - Page size hard cap para list endpoints de soporte.
+
+Config-exempt: research methodology constants
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The following constants are **deliberately not** in TuningSettings
+because changing them would shift the canonical numbers reported
+in the thesis and papers:
+
+- ``EMBEDDING_PCA_DIM = 16`` (``core/reranker.py``): part of the
+  feature schema contract that ``protea-contracts`` will own; it
+  gates compatibility with trained boosters.
+- ``N_THRESHOLDS = 101`` (``core/metrics.py``): CAFA Fmax sweep
+  granularity. Changing it produces non-comparable Fmax numbers.
+
+Structural exempt
+~~~~~~~~~~~~~~~~~
+
+Format-spec positional indices live in code (e.g. GAF column indices
+in ``core/operations/load_goa_annotations.py``). They are not
+configurable because doing so would mean PROTEA stops reading the
+GAF format.
diff --git a/docs/source/appendix/howto_guides.rst b/docs/source/appendix/howto_guides.rst
index 90331ee..ae18bb8 100644
--- a/docs/source/appendix/howto_guides.rst
+++ b/docs/source/appendix/howto_guides.rst
@@ -1,6 +1,20 @@
 How-to Guides
 =============
 
+.. contents:: On this page
+   :local:
+   :depth: 2
+
+.. admonition:: Audience and scope
+   :class: tip
+
+   **Read this if:** you have one specific task to accomplish — load an
+   ontology, upload a FASTA, train a re-ranker, scale a worker — and you
+   want the shortest path from a clean stack to a finished job.
+
+   **Read** :doc:`reproduction_guide` **instead if:** you want to regenerate
+   every figure and table in :doc:`/results` end-to-end.
+
 Submit a job via the API
 ------------------------
 
@@ -341,6 +355,113 @@ The prediction set must have been generated with
 Apply a trained re-ranker to new predictions via
 ``GET /scoring/prediction-sets/{id}/rerank.tsv?reranker_id=<uuid>``.
 
+Register a reranker from ``protea-reranker-lab``
+-------------------------------------------------
+
+Re-rankers trained offline in ``protea-reranker-lab`` (separate repo,
+contract-first integration) are registered in PROTEA in four steps:
+export a frozen dataset, train in the lab, register the resulting
+run, and invoke ``predict_go_terms`` with the new ``reranker_model_id``.
+
+**Step 1 — Export the frozen dataset.** Submit an
+``export_research_dataset`` job. The operation generates
+``train.parquet`` / ``eval.parquet`` / ``manifest.json`` and uploads
+them through the configured ``ArtifactStore`` under
+``datasets/<output_name>/``.
+
+.. code-block:: bash
+
+   curl -s -X POST http://127.0.0.1:8000/jobs \
+     -H "Content-Type: application/json" \
+     -d '{
+       "operation": "export_research_dataset",
+       "queue_name": "protea.jobs",
+       "payload": {
+         "embedding_config_id": "<config-uuid>",
+         "ontology_snapshot_id": "<snapshot-uuid>",
+         "train_versions": [200, 210, 215],
+         "test_versions": [220],
+         "annotation_source": "goa",
+         "output_name": "rkv8-full-aa-multisnap",
+         "k": 5,
+         "search_backend": "faiss",
+         "compute_alignments": true,
+         "compute_taxonomy": true,
+         "use_embedding_pca": true
+       }
+     }'
+
+The job emits ``export_research_dataset.published`` once the three
+files have been uploaded. The ``result`` dict contains ``train_uri``,
+``eval_uri`` and ``manifest_uri`` — ``file://…`` URIs with the local
+backend, ``s3://bucket/…`` with MinIO.
+
+**Step 2 — Train in the lab.** In a ``protea-reranker-lab`` checkout,
+point the lab's spec at the dataset URI and run its training CLI:
+
+.. code-block:: bash
+
+   cd ../protea-reranker-lab
+   poetry run python -m protea_reranker_lab.train \
+     --manifest file:///abs/path/storage/artifacts/datasets/rkv8-full-aa-multisnap/manifest.json \
+     --output-name rkv8-full-aa-multisnap
+
+The lab writes ``runs/<name>/`` containing ``run.json``, ``spec.yaml``
+and ``model.txt`` (the LightGBM booster).
+
+**Step 3 — Register the run in PROTEA.** ``scripts/register_reranker.py``
+parses the run directory, uploads the booster to the configured
+``ArtifactStore`` under ``rerankers/<run_id>/model.txt``, computes
+``feature_schema_sha`` via
+``protea_reranker_lab.contracts.compute_feature_schema_sha``, and
+inserts a ``RerankerModel`` row.
+
+.. code-block:: bash
+
+   poetry run python scripts/register_reranker.py \
+     --run-dir ../protea-reranker-lab/runs/rkv8-full-aa-multisnap
+
+The script prints the new ``RerankerModel`` UUID to stdout. Use
+``--prediction-set-id`` / ``--evaluation-set-id`` to back-link the row
+to existing DB artefacts, ``--name-override`` to pick a custom name,
+and ``--force`` to replace an existing row with the same name.
+
+**Step 4 — Predict with the reranker.** Reference the new model by
+UUID in the ``predict_go_terms`` payload:
+
+.. code-block:: bash
+
+   curl -s -X POST http://127.0.0.1:8000/jobs \
+     -H "Content-Type: application/json" \
+     -d '{
+       "operation": "predict_go_terms",
+       "queue_name": "protea.jobs",
+       "payload": {
+         "embedding_config_id": "<config-uuid>",
+         "annotation_set_id": "<annotation-set-uuid>",
+         "ontology_snapshot_id": "<snapshot-uuid>",
+         "limit_per_entry": 5,
+         "compute_alignments": true,
+         "compute_taxonomy": true,
+         "compute_v6_features": true,
+         "reranker_model_id": "<reranker-model-uuid>"
+       }
+     }'
+
+The coordinator validates that the row has both ``artifact_uri`` and
+``feature_schema_sha`` set, emits ``predict_go_terms.reranker_bound``,
+and snapshots both fields into every batch payload. Each batch worker
+computes a live feature-schema sha from its active feature flags and
+applies the booster **only** when shas match exactly; on mismatch it
+emits ``reranker.schema_mismatch`` and falls back to KNN distance
+ordering without crashing.
+
+.. note::
+   ``reranker_score`` is currently surfaced in-memory only and exposed
+   through the ``predict_go_terms_batch.done`` event — ``GOPrediction``
+   has no column for it yet. Persistence is tracked for a future
+   schema change.
+
 Use one-click annotation
 -------------------------
 
diff --git a/docs/source/appendix/index.rst b/docs/source/appendix/index.rst
index dbeb4d2..b3e8f20 100644
--- a/docs/source/appendix/index.rst
+++ b/docs/source/appendix/index.rst
@@ -1,10 +1,39 @@
 Appendix
 ========
 
+Operational material that complements the architecture and reference
+sections: how to install PROTEA, how to configure it, recipes for common
+tasks, the script to reproduce the thesis results, and the on-call runbook.
+
+:doc:`installation_and_quickstart`
+   Bring up the full stack from a fresh checkout: dependencies, ``manage.sh``,
+   the eleven process roles, and a ten-minute end-to-end smoke test.
+
+:doc:`configuration`
+   ``protea/config/system.yaml`` reference, environment-variable overrides,
+   and the per-environment knobs that change behaviour without code edits.
+
+:doc:`howto_guides`
+   Task-oriented recipes — load an ontology, ingest GOA, upload a FASTA query
+   set, compute embeddings, predict GO terms, scale workers — with the exact
+   ``curl`` and ``manage.sh`` commands. Read this when you have one specific
+   thing to accomplish.
+
+:doc:`reproduction_guide`
+   The full, ordered procedure that regenerates every figure and table in the
+   :doc:`/results` chapter from a clean database. Read this when you want to
+   reproduce the thesis evaluation end-to-end.
+
+:doc:`runbook`
+   On-call procedures: diagnosing stuck jobs, draining queues, recovering
+   after a worker crash, restoring from backup.
+
 .. toctree::
    :maxdepth: 2
+   :hidden:
 
    installation_and_quickstart
    configuration
    howto_guides
+   reproduction_guide
    runbook
diff --git a/docs/source/appendix/installation_and_quickstart.rst b/docs/source/appendix/installation_and_quickstart.rst
index a8cb447..5aa1390 100644
--- a/docs/source/appendix/installation_and_quickstart.rst
+++ b/docs/source/appendix/installation_and_quickstart.rst
@@ -20,6 +20,17 @@ Install dependencies
    cd PROTEA
    poetry install          # installs runtime + dev dependencies
 
+Optional extras:
+
+.. code-block:: bash
+
+   poetry install -E storage   # adds the 'minio' client for the
+                               # MinIO artifact-store backend
+
+The ``[storage]`` extra is only required when ``storage.backend: minio``
+is set in ``system.yaml`` (or ``PROTEA_STORAGE_BACKEND=minio``). The
+default local-filesystem backend works with the base install.
+
 Configuration
 -------------
 
@@ -71,6 +82,19 @@ Start the dev stack
 
    bash scripts/manage.sh start [N]   # N = batch workers per pipeline (default 1)
 
+.. note::
+   The optional MinIO artifact store is declared under a dedicated
+   Compose profile and is **not** started by the default
+   ``docker compose up``. To bring it up alongside the rest of the
+   infrastructure containers, use:
+
+   .. code-block:: bash
+
+      docker compose --profile storage up -d minio
+
+   The MinIO console is then available at http://localhost:9001
+   (default credentials ``minioadmin`` / ``minioadmin``).
+
 This starts all processes in the background and writes PIDs to ``logs/pids/``:
 
 .. list-table::
@@ -88,21 +112,27 @@ This starts all processes in the background and writes PIDs to ``logs/pids/``:
    * - Worker — ``protea.jobs``
      - —
      - ``logs/worker-jobs.log``
-   * - Worker — ``protea.embeddings``
+   * - Worker — ``protea.training``
+     - —
+     - ``logs/worker-training.log``
+   * - Worker — ``protea.embeddings`` (serialised coordinator)
      - —
-     - ``logs/worker-embeddings.log``
+     - ``logs/worker-embeddings-coord.log``
    * - Worker — ``protea.embeddings.batch`` (×N)
      - —
      - ``logs/worker-embeddings-batch-*.log``
-   * - Worker — ``protea.embeddings.write`` (×N)
+   * - Worker — ``protea.embeddings.write``
      - —
-     - ``logs/worker-embeddings-write-*.log``
+     - ``logs/worker-embeddings-write.log``
    * - Worker — ``protea.predictions.batch`` (×N)
      - —
      - ``logs/worker-predictions-batch-*.log``
-   * - Worker — ``protea.predictions.write`` (×N)
+   * - Worker — ``protea.predictions.write``
+     - —
+     - ``logs/worker-predictions-write.log``
+   * - Stale job reaper (``reaper``)
      - —
-     - ``logs/worker-predictions-write-*.log``
+     - ``logs/worker-reaper.log``
    * - Next.js frontend
      - http://127.0.0.1:3000
      - ``logs/frontend.log``
diff --git a/docs/source/appendix/reproduction_guide.rst b/docs/source/appendix/reproduction_guide.rst
new file mode 100644
index 0000000..92e3fe4
--- /dev/null
+++ b/docs/source/appendix/reproduction_guide.rst
@@ -0,0 +1,526 @@
+Reproduction guide
+==================
+
+.. admonition:: Audience and scope
+   :class: tip
+
+   **Read this if:** you want to reproduce the full thesis evaluation
+   end-to-end against the GOA 220 → 229 temporal holdout.
+
+   **Read** :doc:`howto_guides` **instead if:** you have one specific task to
+   accomplish — load an ontology, upload a FASTA, predict GO terms for your
+   own proteins, scale a worker. The how-to is recipe-style and stops at the
+   step you need; this guide is a single ordered procedure that runs every
+   experiment in sequence.
+
+.. admonition:: Provisional expected values — pending final recompute
+   :class: warning
+
+   The expected Fmax values cited throughout this guide (baseline
+   0.412 / 0.590 / 0.668, the +1.5–4 % ``alignment_weighted`` gain, the v2/v3
+   re-ranker targets) are the pre-2026-04-10 numbers and will be refreshed
+   for the Zenodo deposit. The *procedure* itself — every curl command,
+   payload, operation name, and the order in which they are issued — is
+   stable and will not change. See :doc:`/results` for the full provisional
+   notice and the reason behind the recompute.
+
+This appendix documents the exact sequence of steps required to reproduce the
+experimental results reported in :doc:`../results`. The target is a fresh
+PROTEA installation against the GOA 220 → 229 temporal holdout, covering all
+nine experiments: the ``k`` sweep, the aspect-separated KNN ablation, the five
+heuristic scoring configurations, the three re-ranker iterations
+(``v1``, ``v2``, ``v3``), and the external benchmark against eggNOG-mapper,
+Pannzer2, and InterProScan 6.
+
+Every command is expressed against the public HTTP API. The API runs at
+``http://127.0.0.1:8000`` after ``bash scripts/manage.sh start``; environment
+variables such as ``API=http://127.0.0.1:8000`` are used for brevity.
+
+.. contents::
+   :local:
+   :depth: 2
+
+Infrastructure
+--------------
+
+The full experimental campaign used:
+
+- **15 GOA snapshots** (releases 160 through 229) loaded as independent
+  ``AnnotationSet`` rows, all referencing a single ``OntologySnapshot``.
+- **GO ontology** release ``2026-01-23`` plus the CAFA6 Information Accretion
+  file (``IA_cafa6.tsv``) for IA-weighted evaluation.
+- **527 K ESM-C 300M embeddings** (embedding dimension 960, stored as
+  ``pgvector`` ``VECTOR(960)``).
+- **Evaluation set** computed from the GOA 220 → 229 delta. The delta contains
+  2 831 NK, 3 410 LK, and 15 313 PK proteins.
+- **Query set** consisting of the ~20 000 proteins present in the delta.
+- **Evaluator**: ``cafaeval`` with IA weighting.
+
+The key reference UUIDs from the original campaign are recorded in
+``EXPERIMENTS.md`` at the root of the repository. Reproducing the experiments
+on a new deployment regenerates these UUIDs; the shell variables below are
+placeholders that the user fills in after each preparation step.
+
+Stage 1 — Prepare infrastructure
+--------------------------------
+
+Step 1.1 — Load the GO ontology
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Queue a ``load_ontology_snapshot`` job. The OBO file is versioned by
+``obo_version``; loading the same release twice is idempotent.
+
+.. code-block:: bash
+
+   curl -X POST $API/annotations/snapshots/load \
+     -H "Content-Type: application/json" \
+     -d '{
+       "obo_url": "http://release.geneontology.org/2026-01-23/ontology/go.obo",
+       "obo_version": "2026-01-23"
+     }'
+
+Poll ``GET /jobs/{id}`` until ``status == "SUCCEEDED"``, then capture the
+snapshot ID:
+
+.. code-block:: bash
+
+   SNAPSHOT_ID=$(curl -s $API/annotations/snapshots \
+     | jq -r '.[0].id')
+
+Step 1.2 — Load GOA annotation sets
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For the temporal re-ranker training pipeline the campaign loads 15 releases
+(``160`` through ``229``). For the minimum reproduction path only two are
+required: ``220`` (the ``t0`` reference) and ``229`` (the ``t1`` ground truth).
+
+.. code-block:: bash
+
+   for REL in 160 165 170 175 180 185 190 195 200 205 210 215 220 225 229; do
+     curl -X POST $API/annotations/sets/load-goa \
+       -H "Content-Type: application/json" \
+       -d "{
+         \"gaf_url\": \"http://release.geneontology.org/2026-01-23/annotations/goa_uniprot_all.gaf.gz\",
+         \"ontology_snapshot_id\": \"$SNAPSHOT_ID\",
+         \"source_tag\": \"goa_${REL}\"
+       }"
+   done
+
+Each load emits ``ProteinGOAnnotation`` rows filtered against canonical
+accessions already in the database, so ``insert_proteins`` for the UniProt
+slice of interest must have been executed beforehand.
+
+Record the two critical IDs once the jobs complete:
+
+.. code-block:: bash
+
+   OLD_SET=$(curl -s "$API/annotations/sets?source_tag=goa_220" | jq -r '.[0].id')
+   NEW_SET=$(curl -s "$API/annotations/sets?source_tag=goa_229" | jq -r '.[0].id')
+
+Step 1.3 — Generate the NK/LK/PK evaluation set
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: bash
+
+   curl -X POST $API/annotations/evaluation-sets/generate \
+     -H "Content-Type: application/json" \
+     -d "{
+       \"old_annotation_set_id\": \"$OLD_SET\",
+       \"new_annotation_set_id\": \"$NEW_SET\",
+       \"ontology_snapshot_id\": \"$SNAPSHOT_ID\",
+       \"name\": \"goa_220_to_229\"
+     }"
+
+   EVAL_SET=$(curl -s "$API/annotations/evaluation-sets?name=goa_220_to_229" \
+     | jq -r '.[0].id')
+
+The operation implements the CAFA5 protocol described in
+:doc:`../architecture/evaluation`: NOT-propagation through the GO DAG,
+experimental evidence filtering, and per-namespace classification. The
+summary counts stored on the ``EvaluationSet`` row should match the numbers
+reported in Infrastructure above.
+
+Step 1.4 — Compute ESM-C reference embeddings
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Create the embedding config first (ESM-C 300M, mean-pooled, float16 storage):
+
+.. code-block:: bash
+
+   curl -X POST $API/embeddings/configs \
+     -H "Content-Type: application/json" \
+     -d '{
+       "model_name": "esmc_300m",
+       "pooling": "mean",
+       "dtype": "float16"
+     }'
+
+   EMB_CONFIG=$(curl -s $API/embeddings/configs | jq -r '.[0].id')
+
+Then enqueue the coordinator job. The coordinator is serialised on the
+``protea.embeddings`` queue to prevent concurrent GPU model loads; batch and
+write workers scale independently.
+
+.. code-block:: bash
+
+   curl -X POST $API/jobs \
+     -H "Content-Type: application/json" \
+     -d "{
+       \"operation\": \"compute_embeddings\",
+       \"payload\": {
+         \"embedding_config_id\": \"$EMB_CONFIG\",
+         \"target\": \"all_sequences\"
+       }
+     }"
+
+The full reference set contains 527 K sequences; total wall-clock time is
+approximately 6–8 hours on a single GPU. Monitor ``manage.sh status`` and the
+``protea.embeddings.batch`` worker logs for progress.
+
+Stage 2 — Baseline KNN experiments
+----------------------------------
+
+Stage 2 reproduces experiments 1 and 2 from ``EXPERIMENTS.md``:
+the ``k`` sweep and the ``aspect_separated_knn`` ablation.
+
+Experiment 1 — ``k`` sweep
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Run one ``predict_go_terms`` job for each target ``k``. Feature-engineering
+flags are left disabled at this stage — the scoring and re-ranker experiments
+reuse a single enriched prediction set generated in Stage 3.
+
+.. code-block:: bash
+
+   for K in 5 10 20 50; do
+     curl -X POST $API/embeddings/predict \
+       -H "Content-Type: application/json" \
+       -d "{
+         \"embedding_config_id\": \"$EMB_CONFIG\",
+         \"query_annotation_set_id\": \"$NEW_SET\",
+         \"reference_annotation_set_id\": \"$OLD_SET\",
+         \"ontology_snapshot_id\": \"$SNAPSHOT_ID\",
+         \"k\": $K,
+         \"aspect_separated_knn\": true,
+         \"backend\": \"faiss\",
+         \"index_type\": \"IVFFlat\",
+         \"name\": \"k${K}_aspect_sep\"
+       }"
+   done
+
+For each resulting prediction set, run CAFA evaluation against the evaluation
+set:
+
+.. code-block:: bash
+
+   for PS in $(curl -s $API/embeddings/prediction-sets | jq -r '.[].id'); do
+     curl -X POST $API/annotations/evaluation-sets/$EVAL_SET/run \
+       -H "Content-Type: application/json" \
+       -d "{\"prediction_set_id\": \"$PS\", \"scoring_config_name\": \"embedding_only\"}"
+   done
+
+The expected ``k = 5`` baseline Fmax (IA-weighted) is ``0.412 / 0.590 / 0.668``
+for NK BPO/MFO/CCO and degrades monotonically for larger ``k``. Use ``k = 5``
+for all downstream experiments.
+
+Experiment 2 — ``aspect_separated_knn``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Re-run prediction with ``aspect_separated_knn: false`` and compare against the
+``k = 5`` result from Experiment 1:
+
+.. code-block:: bash
+
+   curl -X POST $API/embeddings/predict \
+     -H "Content-Type: application/json" \
+     -d "{
+       \"embedding_config_id\": \"$EMB_CONFIG\",
+       \"query_annotation_set_id\": \"$NEW_SET\",
+       \"reference_annotation_set_id\": \"$OLD_SET\",
+       \"ontology_snapshot_id\": \"$SNAPSHOT_ID\",
+       \"k\": 5,
+       \"aspect_separated_knn\": false,
+       \"backend\": \"faiss\",
+       \"index_type\": \"IVFFlat\",
+       \"name\": \"k5_aspect_unified\"
+     }"
+
+Differences between the two variants are within ±0.011 Fmax across all nine
+cells. The campaign retains ``aspect_separated_knn = true`` for uniform aspect
+coverage.
+
+Stage 3 — Feature engineering and scoring
+-----------------------------------------
+
+Experiment 3 — Heuristic scoring configurations
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+All scoring configurations operate on a single enriched prediction set that
+includes alignment and taxonomy features. Generate it once:
+
+.. code-block:: bash
+
+   curl -X POST $API/embeddings/predict \
+     -H "Content-Type: application/json" \
+     -d "{
+       \"embedding_config_id\": \"$EMB_CONFIG\",
+       \"query_annotation_set_id\": \"$NEW_SET\",
+       \"reference_annotation_set_id\": \"$OLD_SET\",
+       \"ontology_snapshot_id\": \"$SNAPSHOT_ID\",
+       \"k\": 5,
+       \"aspect_separated_knn\": true,
+       \"backend\": \"faiss\",
+       \"index_type\": \"IVFFlat\",
+       \"compute_alignments\": true,
+       \"compute_taxonomy\": true,
+       \"compute_reranker_features\": true,
+       \"name\": \"k5_full_features\"
+     }"
+
+   PS_FULL=$(curl -s "$API/embeddings/prediction-sets?name=k5_full_features" \
+     | jq -r '.[0].id')
+
+The resulting prediction set populates the 20 numeric and 3 categorical
+feature columns documented in :doc:`../reference/core` and is reused by
+Stage 3, Stage 4, and Stage 5.
+
+Seed the scoring configuration presets and evaluate each one:
+
+.. code-block:: bash
+
+   curl -X POST $API/scoring/configs/presets
+
+   for CFG in embedding_only alignment_weighted evidence_primary \
+              embedding_plus_evidence composite; do
+     curl -X POST $API/annotations/evaluation-sets/$EVAL_SET/run \
+       -H "Content-Type: application/json" \
+       -d "{
+         \"prediction_set_id\": \"$PS_FULL\",
+         \"scoring_config_name\": \"$CFG\"
+       }"
+   done
+
+The ``alignment_weighted`` preset is expected to dominate every cell,
+improving the ``embedding_only`` baseline by +1.5 % to +4 % Fmax. Every scoring
+configuration that mixes evidence-code weights degrades the baseline under
+IA-weighted CAFA evaluation.
+
+Stage 4 — Re-ranker training
+----------------------------
+
+Experiment 4 — Re-ranker v1 (per-aspect LightGBM)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Version ``v1`` trains nine LightGBM binary classifiers (one per NK/LK/PK ×
+BPO/MFO/CCO cell) on 12 temporal splits (GOA 160 → 165, 165 → 170, …,
+215 → 220). Train first without class balancing and then with
+``neg_pos_ratio = 10`` to observe the effect on the BPO cells, which otherwise
+early-stop after a single boosting iteration.
+
+.. code-block:: bash
+
+   curl -X POST $API/scoring/rerankers/train \
+     -H "Content-Type: application/json" \
+     -d "{
+       \"name\": \"lgbm_v1_unbalanced\",
+       \"strategy\": \"per_aspect\",
+       \"splits\": [[160,165],[165,170],[170,175],[175,180],[180,185],
+                    [185,190],[190,195],[195,200],[200,205],[205,210],
+                    [210,215],[215,220]],
+       \"test_split\": [220, 229],
+       \"ontology_snapshot_id\": \"$SNAPSHOT_ID\",
+       \"embedding_config_id\": \"$EMB_CONFIG\"
+     }"
+
+   curl -X POST $API/scoring/rerankers/train \
+     -H "Content-Type: application/json" \
+     -d "{
+       \"name\": \"lgbm_v1_balanced\",
+       \"strategy\": \"per_aspect\",
+       \"neg_pos_ratio\": 10,
+       \"splits\": [[160,165],[165,170],[170,175],[175,180],[180,185],
+                    [185,190],[190,195],[195,200],[200,205],[205,210],
+                    [210,215],[215,220]],
+       \"test_split\": [220, 229],
+       \"ontology_snapshot_id\": \"$SNAPSHOT_ID\",
+       \"embedding_config_id\": \"$EMB_CONFIG\"
+     }"
+
+In the unbalanced run six of the nine models early-stop at iteration 1 due to
+extreme class imbalance (≈0.17 % positives in BPO). The balanced run recovers
+BPO (+0.124 AUC for LK-BPO) but does not outperform ``alignment_weighted``.
+
+Experiment 5 — Re-ranker v2 (per-category + IA weighting)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Version ``v2`` collapses the nine per-aspect models into three per-category
+models (NK, LK, PK) and passes the per-term Information Accretion values as
+``sample_weight`` to LightGBM, so that rare terms contribute more to the
+training loss. Hyperparameters move to ``learning_rate = 0.01`` and
+``num_boost_round = 1000`` with ``early_stopping_rounds = 50``.
+
+.. code-block:: bash
+
+   curl -X POST $API/scoring/rerankers/train \
+     -H "Content-Type: application/json" \
+     -d "{
+       \"name\": \"lgbm_v2_full\",
+       \"strategy\": \"per_category\",
+       \"neg_pos_ratio\": 10,
+       \"ia_weights\": true,
+       \"learning_rate\": 0.01,
+       \"num_boost_round\": 1000,
+       \"early_stopping_rounds\": 50,
+       \"splits\": [[160,165],[165,170],[170,175],[175,180],[180,185],
+                    [185,190],[190,195],[195,200],[200,205],[205,210],
+                    [210,215],[215,220]],
+       \"test_split\": [220, 229],
+       \"ontology_snapshot_id\": \"$SNAPSHOT_ID\",
+       \"embedding_config_id\": \"$EMB_CONFIG\"
+     }"
+
+The expected ``v2 full`` Fmax matches or exceeds ``v1`` in every cell but does
+not yet overtake ``alignment_weighted`` globally.
+
+Experiment 6 — Re-ranker v3 (full alignment and taxonomy features)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Version ``v3`` is identical to ``v2`` except that the training-data generator
+calls ``compute_alignment()`` and ``compute_taxonomy()`` for every
+(query, reference) pair in the historical splits, so that the alignment and
+taxonomy feature columns are populated throughout training (in ``v1`` and
+``v2`` those columns were hardcoded to ``NULL`` because the features were only
+computed at prediction time).
+
+.. code-block:: bash
+
+   curl -X POST $API/scoring/rerankers/train \
+     -H "Content-Type: application/json" \
+     -d "{
+       \"name\": \"lgbm_v3_full\",
+       \"strategy\": \"per_category\",
+       \"neg_pos_ratio\": 10,
+       \"ia_weights\": true,
+       \"compute_alignments\": true,
+       \"compute_taxonomy\": true,
+       \"learning_rate\": 0.01,
+       \"num_boost_round\": 1000,
+       \"early_stopping_rounds\": 50,
+       \"splits\": [[160,165],[165,170],[170,175],[175,180],[180,185],
+                    [185,190],[190,195],[195,200],[200,205],[205,210],
+                    [210,215],[215,220]],
+       \"test_split\": [220, 229],
+       \"ontology_snapshot_id\": \"$SNAPSHOT_ID\",
+       \"embedding_config_id\": \"$EMB_CONFIG\"
+     }"
+
+Training wall-clock time is approximately 2 h 45 min on a single CPU machine.
+The alignment overhead during training data generation is marginal (~15 min
+over ``v2``). Three models are produced: ``lgbm_v3_full-nk``,
+``lgbm_v3_full-lk``, ``lgbm_v3_full-pk``.
+
+Score and evaluate the enriched prediction set with each ``v3`` model:
+
+.. code-block:: bash
+
+   for CAT in nk lk pk; do
+     curl -X POST $API/annotations/evaluation-sets/$EVAL_SET/run \
+       -H "Content-Type: application/json" \
+       -d "{
+         \"prediction_set_id\": \"$PS_FULL\",
+         \"reranker_name\": \"lgbm_v3_full-$CAT\"
+       }"
+   done
+
+Expected result: ``v3`` outperforms the ``alignment_weighted`` heuristic in 7
+of the 9 evaluation cells and is the best global configuration reported in
+:doc:`../results`.
+
+Stage 5 — External tool benchmarks
+----------------------------------
+
+All external tools are evaluated on the same 20 281-protein delta as PROTEA
+using ``scripts/evaluate_external_tool.py``. The script normalises each tool's
+output into a CAFA-style ``predictions.tsv`` and runs ``cafaeval`` with IA
+weights against the same evaluation set.
+
+Experiment 7 — eggNOG-mapper
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: bash
+
+   docker run --rm -v $(pwd)/data:/data \
+     quay.io/biocontainers/eggnog-mapper:2.1.13--pyhdfd78af_2 \
+     emapper.py -i /data/query.fasta -o /data/eggnog_out \
+       -m diamond --go_evidence experimental \
+       --tax_scope auto --target_orthologs all --cpu 8
+
+   poetry run python scripts/evaluate_external_tool.py \
+     --tool eggnog \
+     --predictions data/eggnog_out.emapper.annotations \
+     --evaluation-set $EVAL_SET
+
+PROTEA reranker v3 is expected to outperform eggNOG-mapper in 9 of 9 cells
+(differences up to +0.306 Fmax in NK-CCO).
+
+Experiment 8 — Pannzer2 and the data-leakage analysis
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Pannzer2 is invoked via the Helsinki web server; results are downloaded as
+HTML, parsed into a CAFA-style TSV, and scored the same way.
+
+.. code-block:: bash
+
+   poetry run python scripts/evaluate_external_tool.py \
+     --tool pannzer2 \
+     --predictions data/pannzer2_predictions.tsv \
+     --evaluation-set $EVAL_SET
+
+Pannzer2 posts the highest apparent Fmax in the benchmark (e.g. NK-MFO 0.717),
+but its reference database was pulled in March 2026 — after the GOA 229 cutoff
+that defines the ground truth. The leakage measurement compares the
+(protein, GO term) pairs in the ground truth against those in each tool's
+predictions and reports the exact-match overlap per NK/LK/PK category. For
+Pannzer2 this overlap reaches **62.4 %** of the NK ground truth, fully
+explaining its apparent advantage over temporally strict methods. PROTEA is
+the only tool in the benchmark that freezes its reference at ``t0``, so its
+numbers are the only fair upper bound.
+
+Experiment 9 — InterProScan 6
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: bash
+
+   docker run --rm -v $(pwd)/data:/data \
+     interpro/interproscan:6.0.0 \
+     interproscan.sh -i /data/query.fasta -f TSV -goterms \
+       -d /data/interproscan_out
+
+   poetry run python scripts/evaluate_external_tool.py \
+     --tool interproscan \
+     --predictions data/interproscan_out/query.fasta.tsv \
+     --evaluation-set $EVAL_SET
+
+PROTEA reranker v3 is expected to outperform InterProScan 6 in 8 of 9 cells.
+
+Checklist
+---------
+
+The nine experiments above fully reproduce the figures and tables in
+:doc:`../results`:
+
+1. Experiment 1 — ``k`` sweep (``k ∈ {5, 10, 20, 50}``)
+2. Experiment 2 — ``aspect_separated_knn`` ablation
+3. Experiment 3 — Heuristic scoring (five presets)
+4. Experiment 4 — Re-ranker ``v1`` (per-aspect, unbalanced and balanced)
+5. Experiment 5 — Re-ranker ``v2`` (per-category, IA-weighted)
+6. Experiment 6 — Re-ranker ``v3`` (per-category, full alignment and
+   taxonomy features) — best global configuration
+7. Experiment 7 — eggNOG-mapper benchmark
+8. Experiment 8 — Pannzer2 benchmark plus data-leakage analysis
+9. Experiment 9 — InterProScan 6 benchmark
+
+Every prediction set, evaluation result, and re-ranker model is persisted in
+the database with a UUID that can be recorded alongside the thesis tables,
+making each reported number traceable to a concrete job, a concrete payload,
+and a concrete input snapshot.
diff --git a/docs/source/architecture/data_model.rst b/docs/source/architecture/data_model.rst
index 2aaaf18..d17f46c 100644
--- a/docs/source/architecture/data_model.rst
+++ b/docs/source/architecture/data_model.rst
@@ -1,6 +1,10 @@
 Data Model
 ==========
 
+.. contents:: On this page
+   :local:
+   :depth: 2
+
 All models use SQLAlchemy 2.x declarative style with ``Mapped[]`` type annotations.
 The schema is managed by Alembic (22 migrations to date).
 
@@ -139,11 +143,29 @@ Embeddings
    chunking). Referenced by both ``SequenceEmbedding`` rows and ``PredictionSet`` rows
    to ensure query and reference embeddings are always comparable.
 
+   Valid values for ``model_backend`` are ``esm`` (HuggingFace ``EsmModel`` /
+   ESM-2), ``esm3c`` (ESM SDK ``ESMC``), ``t5`` (``T5EncoderModel`` for ProstT5
+   and ``prot_t5_xl_uniref50``), ``ankh`` (``T5EncoderModel`` for
+   ``ElnaggarLab/ankh-base`` / ``ankh-large``, loaded via ``AutoTokenizer``,
+   forced to ``bfloat16`` on CUDA because Ankh overflows to ``NaN`` under
+   FP16, and tokenised char-by-char with ``is_split_into_words=True`` because
+   its SentencePiece vocabulary maps literal spaces to ``<unk>``; the
+   ``<AA2fold>`` prefix is never injected), and ``auto`` (alias for ``esm``).
+
 **SequenceEmbedding**
    Stores a pgvector VECTOR for one (sequence, config, chunk) triple.
    When chunking is disabled: ``chunk_index_s=0``, ``chunk_index_e=NULL``.
    When chunking is enabled: each chunk is a separate row with its own start/end indices.
 
+   ``chunk_index_s`` and ``chunk_index_e`` are **amino-acid positions** on
+   every backend. The embedding operation strips all special tokens
+   (CLS/BOS/EOS and ProstT5's ``<AA2fold>`` prefix) from the residue tensor
+   before chunking, so ``chunk_index_s=0`` always refers to the first amino
+   acid and ``chunk_index_e`` equals the amino-acid length for the final
+   chunk of a full-length sequence. This convention was unified on
+   2026-04-10; embeddings computed before that date used backend-specific
+   slicing and must be recomputed to be directly comparable.
+
    .. note::
       KNN search is **never** performed at the DB layer. Embeddings are loaded into
       numpy arrays and searched via ``protea.core.knn_search`` using numpy or FAISS.
@@ -219,9 +241,35 @@ Predictions
    ``compute_reranker_features=true``.
 
 **RerankerModel**
-   Stores a trained LightGBM binary classifier. References the ``PredictionSet``
-   and ``EvaluationSet`` used for training. Contains the serialized model string,
-   validation metrics (JSONB), and feature importance (JSONB).
+   Stores a trained LightGBM binary (or LambdaRank) re-ranker. References the
+   ``PredictionSet`` and ``EvaluationSet`` used for training (both
+   ``SET NULL`` on delete). Two storage modes coexist:
+
+   - **Inline (legacy)** — ``model_data`` (``Text``, now nullable) holds the
+     serialized booster string. Rows created before the 2026-04 integration
+     with ``protea-reranker-lab`` use this path.
+   - **Artifact-backed (preferred)** — ``artifact_uri`` (``String(512)``)
+     points at a ``file://`` or ``s3://`` URI resolved by the
+     ``ArtifactStore``. Rows inserted via ``scripts/register_reranker.py``
+     always use this path and leave ``model_data`` NULL.
+
+   Additional provenance columns record the lab run that produced the model:
+
+   - ``feature_schema_sha`` (``String(16)``) — 12-hex-char fingerprint
+     of the feature families the booster was trained on, computed via
+     ``protea_reranker_lab.contracts.compute_feature_schema_sha``.
+     **Load-bearing at inference time**: ``predict_go_terms`` refuses to
+     apply a booster whose ``feature_schema_sha`` does not match the live
+     feature set, falling back to KNN ordering rather than scoring with
+     NaN-filled columns.
+   - ``embedding_config_id`` / ``ontology_snapshot_id`` (FKs, both
+     ``SET NULL``) — the embedding recipe and ontology release the
+     booster was trained against.
+   - ``producer_version`` (``String(64)``) / ``producer_git_sha``
+     (``String(40)``) — PROTEA ``__version__`` and HEAD sha at export
+     time, recorded in the dataset manifest and propagated here.
+   - ``spec_yaml`` (``Text``) — the full ``ExperimentSpec`` YAML used to
+     drive the lab training run, for reproducibility.
 
 **ScoringConfig**
    Defines a named scoring recipe: a set of feature weights and parameters
@@ -324,3 +372,13 @@ Status enum
      - Operation raised an exception
    * - ``cancelled``
      - Cancelled via API before or during execution
+
+.. seealso::
+
+   - :doc:`operations` — every operation lists the tables it touches.
+   - :doc:`/reference/infrastructure` — the SQLAlchemy ``Mapped[]`` classes
+     behind every table on this page.
+   - :doc:`/adr/006-sequence-deduplication-by-md5` — why the
+     ``Sequence`` ↔ ``Protein`` split exists.
+   - :doc:`/adr/001-knn-without-pgvector` — why ``SequenceEmbedding`` uses
+     pgvector for storage but not for search.
diff --git a/docs/source/architecture/evaluation.rst b/docs/source/architecture/evaluation.rst
index 47a7164..736fe88 100644
--- a/docs/source/architecture/evaluation.rst
+++ b/docs/source/architecture/evaluation.rst
@@ -1,6 +1,10 @@
 CAFA Evaluation Protocol
 ========================
 
+.. contents:: On this page
+   :local:
+   :depth: 2
+
 PROTEA implements the evaluation protocol used in the CAFA5 (Critical
 Assessment of protein Function Annotation) challenge. This page explains the
 protocol, the NK/LK/PK classification, and how to run an evaluation end-to-end
@@ -24,6 +28,193 @@ NOT qualifier — meaning the protein is *not* associated with that term — are
 excluded, and their exclusion is propagated to all GO descendants through the
 ``is_a`` and ``part_of`` relationships.
 
+Formal definition
+-----------------
+
+This section gives a rigorous statement of the NK/LK/PK partitioning. The
+notation follows :cite:`cafa2013` and matches the reference implementation in
+:mod:`protea.core.evaluation`.
+
+Preliminaries
+~~~~~~~~~~~~~
+
+Let :math:`\mathcal{P}` denote the universe of UniProt proteins present in
+PROTEA and let :math:`\mathcal{N} = \{\mathrm{F}, \mathrm{P}, \mathrm{C}\}`
+denote the three GO namespaces (molecular function, biological process and
+cellular component). A fixed GO snapshot :math:`\sigma` determines a set of
+terms :math:`\mathcal{G}_\sigma` together with an aspect function
+:math:`\alpha_\sigma: \mathcal{G}_\sigma \to \mathcal{N}` that assigns each
+term to exactly one namespace.
+
+Define the GO children relation under the subset of edges used by the
+evaluation protocol (the ``is_a`` and ``part_of`` relationships only):
+
+.. math::
+
+   D_\sigma \;=\; \{(u, v) \in \mathcal{G}_\sigma \times \mathcal{G}_\sigma
+      \;:\; v \text{ is a direct } \mathtt{is\_a} \text{ or }
+      \mathtt{part\_of} \text{ child of } u \}.
+
+The reflexive transitive closure of :math:`D_\sigma` yields, for every term
+:math:`t`, its set of descendants
+
+.. math::
+
+   \mathrm{desc}_\sigma(t) \;=\; \{ v \in \mathcal{G}_\sigma
+      \;:\; (t, v) \in D_\sigma^{+} \}.
+
+Let :math:`\mathsf{Exp}` denote the set of experimental evidence codes
+
+.. math::
+
+   \mathsf{Exp} \;=\; \{\mathrm{EXP},\, \mathrm{IDA},\, \mathrm{IPI},\,
+      \mathrm{IMP},\, \mathrm{IGI},\, \mathrm{IEP},\, \mathrm{TAS},\,
+      \mathrm{IC}\}
+
+together with their ECO equivalents (the mapping is enumerated in
+``protea.core.evidence_codes``).
+
+Annotation sets and NOT-propagation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+A raw annotation set :math:`s` loaded into PROTEA is a set of tuples
+:math:`(p, t, q, e) \in \mathcal{P} \times \mathcal{G}_\sigma \times Q \times E`
+where :math:`q` is a qualifier (possibly containing ``NOT``) and :math:`e` an
+evidence code. The *negative closure* of :math:`s` over two annotation sets
+:math:`s_0, s_1` sharing the same snapshot :math:`\sigma` is
+
+.. math::
+
+   \mathsf{neg}(s_0, s_1) \;=\; \bigcup_{\substack{(p, t, q, e)\, \in\, s_0\, \cup\, s_1 \\ \texttt{NOT} \,\in\, q}}
+       \; \{p\} \times \bigl(\{t\} \cup \mathrm{desc}_\sigma(t)\bigr).
+
+That is: every NOT-qualified annotation for a protein propagates to all
+descendants of the negated term through :math:`D_\sigma`, and the resulting
+``(protein, term)`` pairs are excluded from both sides of the delta. This
+matches the function :func:`protea.core.evaluation._build_negative_keys`.
+
+The set of experimental, non-negated annotations of protein :math:`p` in
+namespace :math:`n` at time :math:`t_i` is then
+
+.. math::
+
+   A_i(p, n) \;=\;
+      \bigl\{\, g \in \mathcal{G}_\sigma
+      \;:\; (p, g, q, e) \in s_i,
+      \; e \in \mathsf{Exp},
+      \; \texttt{NOT} \notin q,
+      \; \alpha_\sigma(g) = n,
+      \; (p, g) \notin \mathsf{neg}(s_0, s_1) \bigr\}.
+
+The per-namespace delta is the standard set difference
+
+.. math::
+
+   \Delta(p, n) \;=\; A_1(p, n) \setminus A_0(p, n),
+
+and a protein belongs to the *delta set*
+:math:`\mathcal{P}_\Delta \subseteq \mathcal{P}` iff it gained at least one
+annotation in at least one namespace:
+
+.. math::
+
+   \mathcal{P}_\Delta \;=\;
+      \Bigl\{ p \in \mathcal{P} \;:\; \bigcup_{n \in \mathcal{N}} \Delta(p, n) \neq \emptyset \Bigr\}.
+
+Partitioning
+~~~~~~~~~~~~
+
+The NK, LK and PK subsets are defined over :math:`(p, n)` pairs with
+:math:`p \in \mathcal{P}_\Delta` and :math:`\Delta(p, n) \neq \emptyset`.
+Let the indicator :math:`\kappa(p) \in \{0, 1\}` record whether the protein
+had **any** experimental annotation at :math:`t_0`:
+
+.. math::
+
+   \kappa(p) \;=\; \mathbb{1}\!\Bigl[ \bigcup_{n' \in \mathcal{N}} A_0(p, n') \neq \emptyset \Bigr].
+
+**No-Knowledge (NK)** — a single per-protein category:
+
+.. math::
+
+   \mathrm{NK} \;=\;
+      \bigl\{ (p, g) \;:\; p \in \mathcal{P}_\Delta,\;
+         \kappa(p) = 0,\;
+         g \in \textstyle\bigcup_{n \in \mathcal{N}} \Delta(p, n) \bigr\}.
+
+**Limited-Knowledge (LK)** — a per-``(protein, namespace)`` category:
+
+.. math::
+
+   \mathrm{LK} \;=\;
+      \bigl\{ (p, g) \;:\; \kappa(p) = 1,\;
+         \exists\, n \in \mathcal{N},\;
+         A_0(p, n) = \emptyset,\;
+         g \in \Delta(p, n) \bigr\}.
+
+**Partial-Knowledge (PK)** — also per-``(protein, namespace)``:
+
+.. math::
+
+   \mathrm{PK} \;=\;
+      \bigl\{ (p, g) \;:\; \kappa(p) = 1,\;
+         \exists\, n \in \mathcal{N},\;
+         A_0(p, n) \neq \emptyset,\;
+         g \in \Delta(p, n) \bigr\}.
+
+The associated *known-term exclusion file* used by ``cafaeval`` via the
+``-known`` flag on the PK pass is
+
+.. math::
+
+   \mathrm{PK}_{\text{known}} \;=\;
+      \bigl\{ (p, g) \;:\; (p, n) \in \mathrm{PK}_\text{keys},\;
+         g \in A_0(p, n) \bigr\},
+
+where :math:`\mathrm{PK}_\text{keys} = \{(p, n) : \exists\, g,\,
+(p, g) \in \mathrm{PK} \wedge \alpha_\sigma(g) = n\}`.
+
+Properties
+~~~~~~~~~~
+
+The partition has three properties that make the protocol well-defined and
+directly testable against the implementation.
+
+**Disjointness of NK from LK and PK.**
+A protein classified as NK satisfies :math:`\kappa(p) = 0`, which means
+:math:`A_0(p, n) = \emptyset` for every namespace :math:`n`. The LK and PK
+conditions both require :math:`\kappa(p) = 1`, so no protein appears in both
+NK and LK ∪ PK. Formally:
+
+.. math::
+
+   \{p : (p, g) \in \mathrm{NK}\} \;\cap\;
+   \{p : (p, g) \in \mathrm{LK} \cup \mathrm{PK}\} \;=\; \emptyset.
+
+**LK / PK mutual exclusion per namespace.**
+For a fixed namespace :math:`n`, the conditions :math:`A_0(p, n) = \emptyset`
+and :math:`A_0(p, n) \neq \emptyset` are mutually exclusive. Therefore no
+``(protein, namespace)`` pair can be simultaneously LK and PK.
+
+**LK and PK are *not* mutually exclusive across namespaces.**
+A protein with :math:`A_0(p, \mathrm{F}) \neq \emptyset` and
+:math:`A_0(p, \mathrm{P}) = \emptyset` can gain new annotations in *both*
+namespaces at :math:`t_1`; it then contributes PK pairs in :math:`\mathrm{F}`
+and LK pairs in :math:`\mathrm{P}` simultaneously. This is not a bug of the
+protocol: it reflects the per-``(protein, namespace)`` granularity that
+distinguishes CAFA5 from earlier rounds.
+
+**Relation to the implementation.**
+The definitions above correspond line-by-line to the classification loop of
+:func:`protea.core.evaluation.compute_evaluation_data`:
+
+- :math:`A_i(p, n)` ← ``_load_experimental_annotations_by_ns``;
+- :math:`\mathsf{neg}(s_0, s_1)` ← ``_build_negative_keys``;
+- :math:`\kappa(p)` ← ``had_anything_old = bool(old_ns_map)``;
+- the per-namespace ``delta_ns = new_ns - old_ns`` computes :math:`\Delta(p, n)`;
+- the branches ``if not old_ns`` and ``else`` realise the LK / PK separation;
+- :math:`\mathrm{PK}_{\text{known}}` is accumulated in the ``pk_known`` dict.
+
 NK / LK / PK classification
 -----------------------------
 
@@ -131,6 +322,16 @@ See :doc:`../reference/infrastructure` for the full ORM schema.
 Benchmark: PROTEA vs external tools
 -------------------------------------
 
+.. admonition:: Provisional results — pending final recompute
+   :class: warning
+
+   The Fmax values in the tables below were produced before the 2026-04-10
+   unification of the embedding-backend slicing convention (see
+   :doc:`operations`, section *Residue-tensor convention*). They will be
+   regenerated end-to-end for the Zenodo deposit accompanying the thesis.
+   The experimental protocol is stable — only the numbers will change. See
+   :doc:`/results` for the full provisional notice.
+
 PROTEA was benchmarked against two widely used function annotation tools
 using the temporal holdout GOA 220 → GOA 229 (NK: 2831, LK: 3410,
 PK: 15313 proteins). All evaluations use ``cafaeval`` with Information
@@ -274,3 +475,12 @@ Implementation reference
   :mod:`protea.core.operations.run_cafa_evaluation`
 - API router: ``protea/api/routers/annotations.py`` (download endpoints,
   generate and run routes)
+
+.. seealso::
+
+   - :doc:`/results` — the actual numbers obtained on the GOA 220 → 229
+     temporal holdout following this protocol.
+   - :doc:`/appendix/reproduction_guide` — the full ordered command sequence
+     to regenerate every figure end-to-end.
+   - :doc:`operations` — the ``compute_evaluation_delta``, ``run_cafa_evaluation``,
+     and ``train_reranker`` operations that implement the protocol.
diff --git a/docs/source/architecture/index.rst b/docs/source/architecture/index.rst
index 9110d40..0076f26 100644
--- a/docs/source/architecture/index.rst
+++ b/docs/source/architecture/index.rst
@@ -1,11 +1,77 @@
 Architecture
 ============
 
-This section describes the runtime architecture of PROTEA: its components, data model,
-job lifecycle, and extension points.
+This section describes the runtime architecture of PROTEA: its components,
+data model, job lifecycle, and extension points. Each page focuses on one
+concern and links to the others where they intersect.
+
+:doc:`system_overview`
+   The four horizontal layers (presentation, API, worker, data), the seven
+   RabbitMQ queues that connect them, and how a typical request flows through
+   the stack from FASTA upload to stored prediction.
+
+:doc:`job_lifecycle`
+   The two-session ``BaseWorker`` pattern, parent-child coordinator jobs,
+   ``RetryLaterError`` for serialised resources, atomic progress counters, and
+   the soft-cancellation contract.
+
+:doc:`data_model`
+   The relational schema in five logical groups — sequences and proteins,
+   ontology and annotations, embeddings, predictions, query sets and jobs —
+   with the deduplication and versioning rules that make every prediction
+   reproducible.
+
+:doc:`operations`
+   The ``Operation`` protocol that unifies every unit of domain logic, the
+   ``OperationRegistry``, and reference documentation for every operation
+   shipped with PROTEA (ingestion, embeddings, predictions, evaluation).
+
+:doc:`evaluation`
+   The CAFA temporal-holdout protocol, the NK/LK/PK classification, and the
+   end-to-end evaluation workflow used to produce the figures in
+   :doc:`/results`.
+
+Architecture Decision Records
+-----------------------------
+
+The pages above describe **what** the architecture looks like today. The
+:doc:`/adr/index` records explain **why** each major decision was taken —
+the constraint, the rejected alternatives, and the trade-off that closed
+the question.
+
+.. list-table::
+   :header-rows: 1
+   :widths: 8 52 40
+
+   * - ADR
+     - Decision
+     - Problem it solves
+   * - :doc:`001 </adr/001-knn-without-pgvector>`
+     - KNN on CPU, not pgvector or GPU
+     - pgvector does not scale to 500K+ vectors; the GPU must stay free for
+       embedding inference
+   * - :doc:`002 </adr/002-two-session-worker-pattern>`
+     - Two-session worker pattern
+     - A mid-operation crash used to leave the job invisible to monitoring
+   * - :doc:`003 </adr/003-queue-consumer-vs-operation-consumer>`
+     - ``QueueConsumer`` vs. ``OperationConsumer``
+     - Thousands of batch jobs per pipeline flooded the ``jobs`` table
+   * - :doc:`004 </adr/004-dead-letter-queue-and-retry-strategy>`
+     - Dead-letter queue and retry strategy
+     - Failed messages were silently lost; retries without backoff amplified
+       failures
+   * - :doc:`005 </adr/005-thread-local-rabbitmq-connections>`
+     - Reusable RabbitMQ connections
+     - A coordinator dispatching 500 batches opened 500 TCP connections
+   * - :doc:`006 </adr/006-sequence-deduplication-by-md5>`
+     - Sequence deduplication by MD5
+     - Tens of thousands of duplicate Swiss-Prot sequences wasted GPU hours
+
+The full ADR index lives at :doc:`/adr/index`.
 
 .. toctree::
    :maxdepth: 2
+   :hidden:
 
    system_overview
    job_lifecycle
diff --git a/docs/source/architecture/job_lifecycle.rst b/docs/source/architecture/job_lifecycle.rst
index 6e12ef3..6d1e475 100644
--- a/docs/source/architecture/job_lifecycle.rst
+++ b/docs/source/architecture/job_lifecycle.rst
@@ -87,6 +87,25 @@ increments the progress counter and detects completion. The ``Job`` model includ
 - ``progress_current`` — batches completed so far
 - ``progress_total`` — total batches dispatched
 
+.. admonition:: Snapshotted context in batch payloads
+   :class: note
+
+   Coordinators that dispatch to ephemeral batch queues
+   (``protea.embeddings.batch``, ``protea.predictions.batch``) serialise
+   the full :class:`ProteaPayload` into the AMQP body — there is no DB
+   row the worker can read from. When a coordinator needs to propagate
+   state resolved against a row that lives in PostgreSQL, it **snapshots
+   the relevant columns into the batch payload at dispatch time**.
+
+   Example: when ``predict_go_terms`` receives a ``reranker_model_id``
+   it loads the ``RerankerModel`` row, validates that ``artifact_uri``
+   and ``feature_schema_sha`` are both populated, and copies them into
+   every ``PredictGOTermsBatchPayload`` as
+   ``reranker_artifact_uri`` / ``reranker_feature_schema_sha``. The
+   batch worker therefore never re-queries the row, which keeps the
+   batch path free of read dependencies on tables that could be updated
+   mid-run.
+
 Deferred execution pattern
 ---------------------------
 
@@ -170,3 +189,14 @@ Any queued child jobs (status = QUEUED) are also cancelled atomically.
    does not interrupt the worker process. The worker will still complete the operation and
    attempt to write SUCCEEDED/FAILED, but the CANCELLED status is already committed and
    takes precedence in the frontend view.
+
+.. seealso::
+
+   - :doc:`operations` — the operations that workers actually run.
+   - :doc:`/reference/workers` — class-level docs for ``BaseWorker``,
+     ``QueueConsumer``, ``OperationConsumer``, and ``StaleJobReaper``.
+   - :doc:`/adr/002-two-session-worker-pattern` — why two sessions, not one.
+   - :doc:`/adr/003-queue-consumer-vs-operation-consumer` — when each
+     consumer type applies.
+   - :doc:`/adr/004-dead-letter-queue-and-retry-strategy` — how
+     ``RetryLaterError`` and the DLQ interact.
diff --git a/docs/source/architecture/operations.rst b/docs/source/architecture/operations.rst
index 48db9c2..6ae35bc 100644
--- a/docs/source/architecture/operations.rst
+++ b/docs/source/architecture/operations.rst
@@ -1,10 +1,24 @@
 Operations
 ==========
 
+.. contents:: On this page
+   :local:
+   :depth: 2
+
 An **Operation** is the fundamental unit of domain logic in PROTEA. Every task
 that a worker can execute — from a health-check ping to a full UniProt ingest —
 is encapsulated in a class that satisfies the ``Operation`` protocol.
 
+.. seealso::
+
+   - :doc:`/appendix/howto_guides` — task-oriented recipes that submit each
+     of these operations through the HTTP API with concrete payloads.
+   - :doc:`/architecture/data_model` — the ORM tables that operations read
+     from and write to (``Sequence``, ``Protein``, ``GOTerm``, ``AnnotationSet``,
+     ``EmbeddingConfig``, ``GOPrediction``, ``EvaluationSet``…).
+   - :doc:`/architecture/job_lifecycle` — how the worker layer dispatches
+     these operations and tracks their progress.
+
 The Operation protocol
 ----------------------
 
@@ -83,6 +97,9 @@ insert_proteins
 
 **Operation name:** ``insert_proteins`` — queue: ``protea.jobs``
 
+| **How to invoke this:** see *Submit a job via the API* in :doc:`/appendix/howto_guides`.
+| **Tables touched:** ``Protein`` and ``Sequence`` (see :doc:`/architecture/data_model`).
+
 Fetches protein sequences from the UniProt REST API in FASTA format and
 upserts them into the ``protein`` and ``sequence`` tables.
 
@@ -178,6 +195,9 @@ fetch_uniprot_metadata
 
 **Operation name:** ``fetch_uniprot_metadata`` — queue: ``protea.jobs``
 
+| **How to invoke this:** see *Fetch UniProt metadata for existing proteins* in :doc:`/appendix/howto_guides`.
+| **Tables touched:** ``ProteinUniProtMetadata`` (see :doc:`/architecture/data_model`).
+
 Fetches functional annotations from the UniProt REST API in TSV format and
 upserts ``ProteinUniProtMetadata`` rows, one per canonical accession.
 
@@ -290,6 +310,9 @@ load_ontology_snapshot
 
 **Operation name:** ``load_ontology_snapshot`` — queue: ``protea.jobs``
 
+| **How to invoke this:** see *Load a GO ontology snapshot* in :doc:`/appendix/howto_guides`.
+| **Tables touched:** ``OntologySnapshot``, ``GOTerm``, ``GOTermRelationship`` (see *GO ontology* in :doc:`/architecture/data_model`).
+
 Downloads a GO OBO file and populates ``OntologySnapshot`` + ``GOTerm`` +
 ``GOTermRelationship`` rows. Idempotent: if a snapshot with the same
 ``obo_version`` already exists and its relationships are present, the
@@ -339,6 +362,9 @@ load_goa_annotations
 
 **Operation name:** ``load_goa_annotations`` — queue: ``protea.jobs``
 
+| **How to invoke this:** see *Load GOA annotations* in :doc:`/appendix/howto_guides`.
+| **Tables touched:** ``AnnotationSet``, ``ProteinGOAnnotation`` (see *Annotation sets* in :doc:`/architecture/data_model`).
+
 Streams a GOA GAF 2.2 file (plain or gzip) and bulk-inserts
 ``AnnotationSet`` + ``ProteinGOAnnotation`` rows. Only accessions already
 present in the ``protein`` table are retained; all others are silently
@@ -398,6 +424,9 @@ load_quickgo_annotations
 
 **Operation name:** ``load_quickgo_annotations`` — queue: ``protea.jobs``
 
+| **How to invoke this:** see *Load QuickGO annotations* in :doc:`/appendix/howto_guides`.
+| **Tables touched:** ``AnnotationSet``, ``ProteinGOAnnotation`` (see *Annotation sets* in :doc:`/architecture/data_model`).
+
 Streams GO annotations from the QuickGO bulk download TSV API. Proteins
 are determined by the canonical accessions already in the DB — no external
 accession list is needed. Supports optional ECO ID → evidence code mapping,
@@ -450,6 +479,10 @@ compute_embeddings
 **Operation name:** ``compute_embeddings`` — queue: ``protea.embeddings``
 (coordinator, serialised; one at a time via ``RetryLaterError`` if GPU busy)
 
+| **How to invoke this:** see *Compute sequence embeddings* in :doc:`/appendix/howto_guides`.
+| **Tables touched:** ``EmbeddingConfig``, ``SequenceEmbedding`` (see *Embeddings* in :doc:`/architecture/data_model`).
+| **Lifecycle pattern:** parent-child coordinator with deferred completion — see :doc:`/architecture/job_lifecycle`.
+
 Coordinator operation: determines which sequences need embeddings and fans
 out ``ComputeEmbeddingsBatchOperation`` messages to ``protea.embeddings.batch``.
 Does **not** run GPU inference directly; returns ``OperationResult(deferred=True)``
@@ -514,8 +547,13 @@ closing the parent job when all batches are done.
 predict_go_terms
 ----------------
 
-**Operation name:** ``predict_go_terms`` — queue: ``protea.jobs``
-(coordinator; fans out KNN batch workers)
+**Operation name:** ``predict_go_terms`` — queue: ``protea.predictions``
+(coordinator; fans out KNN batch workers on ``protea.predictions.batch``)
+
+| **How to invoke this:** see *Predict GO terms* in :doc:`/appendix/howto_guides`.
+| **Tables touched:** ``PredictionSet``, ``GOPrediction`` (see *Predictions* in :doc:`/architecture/data_model`).
+| **Lifecycle pattern:** parent-child coordinator with deferred completion — see :doc:`/architecture/job_lifecycle`.
+| **KNN backend rationale:** see :doc:`/adr/001-knn-without-pgvector`.
 
 Coordinator operation: loads reference embeddings into a process-level cache,
 partitions query proteins into batches, and fans out ``PredictGOTermsBatch``
@@ -575,6 +613,45 @@ Payload fields
      - Compute 5 aggregate re-ranker features per prediction: ``vote_count``,
        ``k_position``, ``go_term_frequency``, ``ref_annotation_density``, and
        ``neighbor_distance_std``.
+   * - ``reranker_model_id``
+     - ``null``
+     - Optional UUID of a registered ``RerankerModel`` (typically produced
+       by ``protea-reranker-lab`` and registered via
+       ``scripts/register_reranker.py``). When set, the coordinator looks
+       up ``artifact_uri`` and ``feature_schema_sha``, validates both are
+       present (raises ``ValueError`` otherwise), and emits a
+       ``predict_go_terms.reranker_bound`` event. The snapshotted
+       ``artifact_uri`` / ``feature_schema_sha`` are then propagated into
+       every batch payload so workers do not have to re-query the row.
+
+Reranker validation and fallback
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When ``reranker_model_id`` is set, each batch worker runs
+``_apply_reranker_if_aligned`` after ``prediction_dicts`` has been built.
+The flow is:
+
+1. Compute ``live_sha = compute_feature_schema_sha(active_families)`` from
+   the live ``compute_alignments`` / ``compute_taxonomy`` / ``compute_v6_features``
+   flags.
+2. If ``live_sha != reranker_feature_schema_sha`` the worker emits
+   ``reranker.schema_mismatch`` (level ``error``) and returns stats with
+   ``applied=False``. The batch continues using the KNN ordering — **no
+   crash**, and no partial scoring.
+3. On match, the worker attaches the GOTerm aspect to each dict, calls
+   :func:`protea.core.reranking.apply_reranker`, and writes the
+   ``reranker_score`` field into every prediction dict in memory.
+
+``reranker_score`` is **in-memory only** — ``GOPrediction`` does not yet
+have a column for it, so the score is surfaced through the
+``predict_go_terms_batch.done`` event (nested ``reranker`` block with
+applied/skipped/mean_score/etc.) but is not persisted.
+
+.. note::
+   The lab contract is imported lazily in the batch worker. If the
+   production image ships without ``protea_reranker_lab`` installed the
+   worker emits ``reranker.skipped`` with ``reason=contracts_unavailable``
+   and proceeds with KNN ordering.
 
 Reference cache
 ~~~~~~~~~~~~~~~
@@ -666,7 +743,11 @@ stored in the DB) using the same ``compute_evaluation_data`` logic.
 run_cafa_evaluation
 -------------------
 
-**Operation name:** ``run_cafa_evaluation`` — queue: ``protea.jobs``
+**Operation name:** ``run_cafa_evaluation`` — queue: ``protea.evaluations``
+
+| **How to invoke this:** see *Run a CAFA evaluation* in :doc:`/appendix/howto_guides`.
+| **Tables touched:** ``EvaluationSet``, ``EvaluationResult`` (see *Evaluation* in :doc:`/architecture/data_model`).
+| **Protocol:** :doc:`/architecture/evaluation` for the NK/LK/PK semantics.
 
 Runs the ``cafaeval`` evaluator against NK, LK, and PK ground-truth settings
 for a given ``EvaluationSet`` × ``PredictionSet`` pair. Persists an
@@ -738,22 +819,50 @@ SIGTERM handling
 ``SIGINT`` handlers to defaults so that forked pool workers can be terminated
 cleanly. The original handlers are restored afterwards.
 
+.. _train-reranker-operation:
+
 train_reranker
 --------------
 
-**Operation name:** ``train_reranker`` — queue: ``protea.jobs``
-
-Trains a LightGBM binary classifier that re-scores GO term predictions.
-Requires a ``PredictionSet`` (with feature engineering columns populated) and
-an ``EvaluationSet`` (temporal holdout delta) to derive binary labels.
-
-The training pipeline:
-
-1. Loads predictions and joins with ground-truth labels from the evaluation set.
-2. Prepares a feature matrix with 19 numeric and 3 categorical features.
-3. Trains with stratified train/validation split and ``is_unbalance=True``.
-4. Stores the serialized model, validation metrics (AUC, logloss, precision,
-   recall, F1), and feature importance in a ``RerankerModel`` row.
+.. deprecated:: contract-first-lab-integration
+
+   LightGBM training has been decoupled from PROTEA and now lives in
+   `protea-reranker-lab <https://github.com/frapercan/protea-reranker-lab>`_.
+   ``TrainRerankerOperation`` and ``TrainRerankerAutoOperation`` are
+   **no longer registered** in the ``OperationRegistry`` — a ``POST /jobs``
+   request with ``operation_name: train_reranker`` or ``train_reranker_auto``
+   will be rejected. The classes remain importable as internal helpers
+   that :ref:`export-research-dataset-operation` reuses in-process to run
+   the shared KNN + feature-generation pipeline in ``dump_only`` mode.
+
+   The narrative below (feature matrix, LightGBM hyperparameters,
+   validation metrics) is retained as the canonical specification of
+   the booster that the lab trains — the lab consumes the exact same
+   feature schema PROTEA produces. For the live end-to-end flow, see
+   :ref:`export-research-dataset-operation` below and the
+   ``/reranker-models/import`` HTTP surface.
+
+**Operation name:** ``train_reranker`` — *internal helper, not queued*
+
+| **How to invoke this:** no longer invocable via ``/jobs``. See
+  :ref:`export-research-dataset-operation` and the ``POST /datasets``
+  endpoint; booster training runs in ``protea-reranker-lab``.
+| **Tables touched:** ``RerankerModel`` (written by ``/reranker-models/import``, not by this helper directly).
+| **Evaluation context:** :doc:`/architecture/evaluation` and :doc:`/results`.
+
+Trains a LightGBM binary classifier that re-scores GO term predictions on top
+of the embedding-based KNN baseline. The input is a
+``(PredictionSet, EvaluationSet)`` pair: predictions supply the feature matrix
+(stored in ``GOPrediction`` columns populated at prediction time through
+``compute_alignments=True`` and ``compute_taxonomy=True``), the evaluation set
+supplies the ground-truth delta used to derive binary labels.
+
+The design follows the principle of keeping the training signal
+*interpretable*: the re-ranker is not an end-to-end deep network over raw
+embeddings, but a gradient-boosted decision tree over hand-engineered features
+that each have a well-known meaning (alignment identity, taxonomic distance,
+neighbour agreement). Feature importance can therefore be inspected directly
+and used to drive the iterative training loop documented in :doc:`../results`.
 
 Payload fields
 ~~~~~~~~~~~~~~
@@ -772,8 +881,490 @@ Payload fields
      - *(required)*
      - UUID of the ``EvaluationSet`` providing ground-truth labels.
 
-**train_reranker_auto** is a convenience variant that auto-selects the most
-recent prediction set and evaluation set for training.
+Feature matrix
+~~~~~~~~~~~~~~
+
+The feature set is defined statically in
+:data:`protea.core.reranker.NUMERIC_FEATURES` and
+:data:`protea.core.reranker.CATEGORICAL_FEATURES`. It comprises **20 numeric**
+and **3 categorical** columns, grouped by origin:
+
+.. list-table:: Numeric features (20)
+   :header-rows: 1
+   :widths: 25 15 60
+
+   * - Feature
+     - Group
+     - Description
+   * - ``distance``
+     - KNN
+     - Cosine or L2 distance from query to nearest-neighbour reference.
+       Lower = closer in embedding space.
+   * - ``identity_nw``
+     - Alignment
+     - Needleman–Wunsch global identity percentage (parasail, BLOSUM62).
+   * - ``similarity_nw``
+     - Alignment
+     - NW similarity percentage (identical + positive-scoring substitutions).
+   * - ``alignment_score_nw``
+     - Alignment
+     - Raw NW alignment score.
+   * - ``gaps_pct_nw``
+     - Alignment
+     - Fraction of aligned positions that are gaps in the NW alignment.
+   * - ``alignment_length_nw``
+     - Alignment
+     - Length of the NW alignment (number of columns).
+   * - ``identity_sw``
+     - Alignment
+     - Smith–Waterman local identity percentage.
+   * - ``similarity_sw``
+     - Alignment
+     - SW similarity percentage.
+   * - ``alignment_score_sw``
+     - Alignment
+     - Raw SW alignment score.
+   * - ``gaps_pct_sw``
+     - Alignment
+     - Gap fraction in the SW alignment.
+   * - ``alignment_length_sw``
+     - Alignment
+     - Length of the SW alignment.
+   * - ``length_query``
+     - Length
+     - Residue count of the query protein sequence.
+   * - ``length_ref``
+     - Length
+     - Residue count of the matched reference sequence.
+   * - ``taxonomic_distance``
+     - Taxonomy
+     - Distance between query and reference in the NCBI taxonomy tree
+       (ete3 ``NCBITaxa``).
+   * - ``taxonomic_common_ancestors``
+     - Taxonomy
+     - Number of common ancestors shared between query and reference taxa.
+   * - ``vote_count``
+     - Aggregate
+     - Number of top-``k`` neighbours that transfer the same GO term to the
+       query (higher = stronger consensus).
+   * - ``k_position``
+     - Aggregate
+     - Rank of the first neighbour supporting the term (1 = top hit).
+   * - ``go_term_frequency``
+     - Aggregate
+     - Frequency of the GO term in the entire reference set (prior).
+   * - ``ref_annotation_density``
+     - Aggregate
+     - Average number of GO annotations per reference protein in the batch.
+   * - ``neighbor_distance_std``
+     - Aggregate
+     - Standard deviation of KNN distances for the query
+       (low = tight cluster, high = uncertain).
+
+.. list-table:: Categorical features (3)
+   :header-rows: 1
+   :widths: 25 75
+
+   * - Feature
+     - Description
+   * - ``qualifier``
+     - GO annotation qualifier propagated from the reference
+       (e.g. ``enables``, ``involved_in``, ``part_of``). ``NOT``-qualified
+       references are already filtered out upstream by the evaluation
+       pipeline.
+   * - ``evidence_code``
+     - GO evidence code of the reference annotation (EXP, IDA, IMP, …, or
+       electronic IEA). Acts as a prior on annotation reliability.
+   * - ``taxonomic_relation``
+     - Coarse taxonomic relation between query and reference
+       (``same_species``, ``same_genus``, ``same_family``, …, ``unrelated``).
+
+All three categorical features are converted to pandas ``category`` dtype in
+:func:`protea.core.reranker.prepare_dataset`; LightGBM consumes them directly,
+so no manual label encoding is required. Missing values in numeric columns
+are left as ``NaN`` — LightGBM handles them natively by learning an optimal
+direction at each split.
+
+Training protocol
+~~~~~~~~~~~~~~~~~
+
+The training loop is implemented in :func:`protea.core.reranker.train` and
+proceeds as follows:
+
+1. **Label derivation.** Each row of the training DataFrame carries a binary
+   ``label`` column: ``1`` if the predicted ``(protein, go_term)`` pair is
+   present in the NK ∪ LK ∪ PK ground truth of the associated
+   ``EvaluationSet``, ``0`` otherwise. The label is computed upstream by the
+   ``/scoring/prediction-sets/{id}/training-data.tsv`` endpoint.
+
+2. **Stratified split.** Positives and negatives are split *independently*
+   into train and validation fractions (default ``val_fraction = 0.2``) so
+   that the positive rate is preserved on both sides. Shuffling uses a fixed
+   seed (default ``42``) for reproducibility.
+
+3. **Optional class-balance control.** Because the raw training set is
+   extremely imbalanced (positive rate typically ≤ 1 %), the caller can
+   pass ``neg_pos_ratio`` to subsample negatives independently on the train
+   and validation splits. The default is ``None`` (keep all negatives); the
+   results chapter documents the ratios used for each re-ranker iteration.
+
+4. **Optional per-sample weighting.** When ``sample_weight`` is provided
+   (for example, the Information Accretion of each GO term), the weights
+   are attached to both the training and validation datasets so that
+   high-weight samples contribute more to the LightGBM loss. This lets the
+   re-ranker be trained directly against the IA-weighted Fmax metric used
+   in :doc:`evaluation`.
+
+5. **LightGBM training.** The Booster is trained with the default
+   hyperparameters below, merged with any overrides passed in the ``params``
+   argument. Early stopping is enabled on the validation split.
+
+.. list-table:: LightGBM default hyperparameters
+   :header-rows: 1
+   :widths: 30 30 40
+
+   * - Parameter
+     - Default
+     - Notes
+   * - ``objective``
+     - ``binary``
+     - Binary classification.
+   * - ``metric``
+     - ``["binary_logloss", "auc"]``
+     - Both are reported; early stopping uses the first metric.
+   * - ``boosting_type``
+     - ``gbdt``
+     - Standard gradient-boosted decision trees.
+   * - ``num_leaves``
+     - ``31``
+     - LightGBM default.
+   * - ``learning_rate``
+     - ``0.01``
+     - Low LR + early stopping; see ``num_boost_round``.
+   * - ``feature_fraction``
+     - ``0.8``
+     - Column sub-sampling per tree (reduces overfitting).
+   * - ``bagging_fraction``
+     - ``0.8``
+     - Row sub-sampling per boosting round.
+   * - ``bagging_freq``
+     - ``5``
+     - Apply bagging every 5 iterations.
+   * - ``seed``
+     - ``42``
+     - Fixed for reproducibility.
+   * - ``num_boost_round``
+     - ``1000``
+     - Maximum boosting iterations.
+   * - ``early_stopping_rounds``
+     - ``50``
+     - Stop if validation logloss does not improve for 50 rounds.
+
+Validation metrics
+~~~~~~~~~~~~~~~~~~
+
+After training, the validation set is scored and the following metrics are
+written to ``RerankerModel.metrics`` as a JSONB dict:
+
+- ``best_iteration`` — boosting round at which early stopping triggered.
+- ``val_auc`` — ROC-AUC on the validation split.
+- ``val_logloss`` — binary logloss at the best iteration.
+- ``val_precision`` / ``val_recall`` / ``val_f1`` — computed at the
+  ``p ≥ 0.5`` decision threshold. These are reported for quick inspection
+  but are **not** the primary objective: the re-ranker is ultimately scored
+  through the full CAFA evaluation pipeline (:doc:`evaluation`), which uses
+  IA-weighted Fmax per ``(category, namespace)`` cell.
+- ``train_samples`` / ``val_samples`` — row counts after any negative
+  subsampling.
+- ``positive_rate`` — fraction of rows with ``label == 1`` *before* any
+  subsampling.
+
+In addition, :func:`~protea.core.reranker.train` returns a
+gain-based feature-importance dictionary (``feature_name → total gain``),
+which is persisted in ``RerankerModel.feature_importance``. This is the
+signal used in :doc:`../results` to drive the v1 → v2 → v3 iterations of the
+re-ranker.
+
+.. note::
+   Cross-validation is **not** performed inside the training operation.
+   The temporal-holdout re-ranker design uses 13 historical GOA splits
+   (releases 160 through 220) as independent training folds; the
+   cross-validation loop is driven at a higher level by
+   ``scripts/run_experiments.py``, which invokes ``train_reranker`` once per
+   split. See :doc:`../results` for the aggregate numbers.
+
+train_reranker_auto
+~~~~~~~~~~~~~~~~~~~
+
+**Operation name:** ``train_reranker_auto`` — *internal helper, not queued*
+
+.. note::
+
+   Formerly a convenience variant that auto-selected the most recent
+   ``PredictionSet`` and ``EvaluationSet`` and forwarded them to
+   ``train_reranker``. Like ``train_reranker`` it is **no longer
+   registered** in the ``OperationRegistry``. The class survives only
+   as the in-process engine that ``ExportResearchDatasetOperation``
+   drives in ``dump_only`` mode to produce the frozen parquet triple
+   consumed by the lab.
+
+.. _export-research-dataset-operation:
+
+export_research_dataset
+------------------------
+
+**Operation name:** ``export_research_dataset`` — queue: ``protea.training``
+
+| **How to invoke this:** see *Registering a reranker from protea-reranker-lab* in :doc:`/appendix/howto_guides`.
+| **Tables touched:** read-only over existing ``PredictionSet`` / feature data. Writes blobs via the configured ``ArtifactStore`` (no ORM writes).
+
+Runs the same KNN + feature-generation pipeline as ``train_reranker_auto``
+but **skips LightGBM training entirely** and publishes the resulting
+``train.parquet`` / ``eval.parquet`` / ``manifest.json`` triple through the
+configured :class:`~protea.infrastructure.storage.ArtifactStore`. The
+produced dataset is the canonical input consumed by
+``protea-reranker-lab`` for offline re-ranker training.
+
+The manifest embeds ``schema_version="v2"``, the PROTEA
+``producer_version`` (``protea.__version__``) and ``producer_git_sha`` so
+any lab run can be traced back to the exact PROTEA HEAD that produced
+its training data.
+
+Payload fields
+~~~~~~~~~~~~~~
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30 10 60
+
+   * - Field
+     - Default
+     - Description
+   * - ``embedding_config_id``
+     - *(required)*
+     - UUID of the ``EmbeddingConfig`` used for KNN.
+   * - ``ontology_snapshot_id``
+     - *(required)*
+     - UUID of the ``OntologySnapshot`` used to resolve GO terms.
+   * - ``train_versions``
+     - *(required)*
+     - List of historical annotation-set versions forming the training
+       snapshot pairs (must contain at least 2 entries; sorted
+       ascending).
+   * - ``test_versions``
+     - *(required)*
+     - List of annotation-set versions reserved for evaluation
+       (non-empty; sorted ascending).
+   * - ``annotation_source``
+     - ``"goa"``
+     - Annotation source tag recorded in the manifest.
+   * - ``output_name``
+     - *(required)*
+     - Human label for the published dataset. Used as ``name`` in the
+       manifest and as the store key prefix ``datasets/<output_name>/``.
+   * - ``k``
+     - ``5``
+     - KNN neighbour count.
+   * - ``search_backend``
+     - ``"faiss"``
+     - KNN backend (``"numpy"`` or ``"faiss"``).
+   * - ``compute_alignments``
+     - ``false``
+     - Include NW alignment and length feature families.
+   * - ``compute_taxonomy``
+     - ``false``
+     - Include the taxonomy-pair feature family.
+   * - ``expand_votes_to_ancestors``
+     - ``false``
+     - Propagate neighbour votes up the GO DAG before feature
+       materialisation.
+   * - ``use_embedding_pca``
+     - ``false``
+     - Include embedding PCA and v6-era feature families (enables
+       ``anc2vec_*``, ``emb_pca``, ``taxonomy_voters``, ``go_context``).
+
+Execution flow
+~~~~~~~~~~~~~~
+
+.. code-block:: text
+
+   1. validate payload (Pydantic v2, strict mode)
+   2. load Settings; resolve ArtifactStore via get_artifact_store(settings)
+   3. run train_reranker_auto in dump-only mode against a temp dir
+      (per_cell training_scope, no booster trained)
+   4. for each of train.parquet / eval.parquet / manifest.json:
+        store.put("datasets/<output_name>/<file>", path)
+   5. emit export_research_dataset.published {backend, key_prefix, files}
+   6. return OperationResult(result={
+        output_name, key_prefix, storage_backend,
+        train_uri, eval_uri, manifest_uri, ...auto_result
+      })
+
+Emitted events
+~~~~~~~~~~~~~~
+
+The operation relays the underlying ``train_reranker_auto`` events
+under its own namespace (``train_reranker_auto.*`` →
+``export_research_dataset.*``) and additionally emits:
+
+- ``export_research_dataset.started`` — payload echoed back with
+  resolved backend.
+- ``export_research_dataset.rows_written`` — n_train_rows / n_eval_rows
+  per shard once the parquets are materialised.
+- ``export_research_dataset.published`` — ``{backend, key_prefix, files}``
+  when the three artefacts have been uploaded successfully.
+- ``export_research_dataset.completed`` — final summary including the
+  three resulting URIs (``file://…`` or ``s3://bucket/key``).
+
+Ephemeral consumer operations
+-----------------------------
+
+Four operations run as :class:`OperationConsumer` workers on the high-throughput
+batch queues. They differ from the operations documented above in three ways:
+
+1. **No ``Job`` row.** The coordinator publishes payloads directly to the queue;
+   the consumer executes them without creating a child ``Job`` row in the DB.
+   Progress is tracked by atomically incrementing
+   ``Job.progress_current`` on the *parent* job.
+2. **No HTTP endpoint.** These operations cannot be submitted through
+   ``POST /jobs``. They are only reachable through the fan-out logic of the
+   coordinator operations ``compute_embeddings`` and ``predict_go_terms``.
+3. **Payload is the full message.** Because there is no ``Job`` row to read
+   from, the full :class:`ProteaPayload` is serialised into the AMQP body.
+
+They still implement the ``Operation`` protocol and are registered alongside
+the other thirteen in ``protea/core/operation_catalog.py``. Bringing the
+total to **17 registered operations** (13 job-backed + 4 ephemeral).
+
+compute_embeddings_batch
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Queue:** ``protea.embeddings.batch`` — **consumer:** ``OperationConsumer``
+
+Runs the protein language model forward pass for one batch of sequences.
+Loads the model on first use, caches it at process level, performs GPU
+inference (ESM-2, ESM-C, ProtT5/ProstT5, or Ankh base/large), pools
+residue-level representations according to the ``EmbeddingConfig`` strategy,
+converts to float32, and publishes a ``StoreEmbeddings`` message to
+``protea.embeddings.write``.
+
+Backends are selected via ``EmbeddingConfig.model_backend``:
+
+* ``esm``   — HuggingFace ``EsmModel`` (ESM-2 family); single-sequence forward.
+* ``esm3c`` — ESM SDK ``ESMC`` (ESM3c family); FP16 on GPU, no external tokenizer.
+* ``t5``    — HuggingFace ``T5EncoderModel`` (ProstT5, ``prot_t5_xl_uniref50``…);
+  the ``<AA2fold>`` prefix is auto-injected when the model name contains
+  ``prostt5``.
+* ``ankh``  — HuggingFace ``T5EncoderModel`` loaded via ``AutoTokenizer``
+  (``ElnaggarLab/ankh-base``, ``ElnaggarLab/ankh-large``). Shares the batched
+  T5 pipeline but with two mandatory deviations from the ProstT5 path,
+  both verified end-to-end with real weights on 2026-04-10:
+
+  1. **bfloat16 on CUDA, never float16.** Ankh was pre-trained on TPU in
+     bfloat16 and its T5 LayerNorm overflows to ``NaN`` under FP16 on every
+     forward pass. The loader pins ``torch_dtype=torch.bfloat16`` on GPU
+     and ``torch.float32`` on CPU.
+  2. **Char-level tokenisation with** ``is_split_into_words=True``. Ankh's
+     SentencePiece vocabulary treats literal spaces as ``<unk>``, so the
+     ProstT5-style ``" ".join(seq)`` path emits ~50 % ``<unk>`` tokens and
+     destroys the embedding. ``_embed_ankh`` instead passes
+     ``[list(seq) for seq in batch]`` with ``is_split_into_words=True``
+     (verified 0 ``<unk>``). The ``<AA2fold>`` prefix is never injected.
+* ``auto``  — falls back to ``esm``.
+
+Residue-tensor convention
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Every backend strips **all** special tokens before pooling and chunking, so
+the residue tensor has exactly ``N`` rows where ``N`` is the amino-acid
+length of the input sequence. This makes ``chunk_index_s`` and
+``chunk_index_e`` mean the same thing on every backend: indices into the
+amino-acid sequence, not into a backend-specific token tensor.
+
+.. list-table::
+   :header-rows: 1
+   :widths: 15 50 15
+
+   * - Backend
+     - Tokens stripped from residue slice
+     - residue_len
+   * - ``esm``
+     - CLS (position 0) + EOS (last position)
+     - ``N``
+   * - ``esm3c``
+     - BOS (position 0) + EOS (last position)
+     - ``N``
+   * - ``t5`` (``prot_t5_xl_uniref50``)
+     - EOS (last position)
+     - ``N``
+   * - ``t5`` (ProstT5)
+     - ``<AA2fold>`` (position 0) + EOS (last position)
+     - ``N``
+   * - ``ankh``
+     - EOS (last position)
+     - ``N``
+
+This convention was unified on 2026-04-10. Before that date, the T5 family
+kept the trailing EOS and ProstT5 additionally kept the leading
+``<AA2fold>`` prefix token, so chunk indices on those backends were offset
+by 0–2 positions relative to the amino-acid sequence. Embeddings computed
+under the old convention are not directly comparable with new ones and
+must be recomputed.
+
+A single GPU worker is typically sufficient because each batch already
+saturates the device; the coordinator serialises dispatch with
+``RetryLaterError(delay=60s)`` when another embedding job is ``RUNNING``.
+
+store_embeddings
+~~~~~~~~~~~~~~~~
+
+**Queue:** ``protea.embeddings.write`` — **consumer:** ``OperationConsumer``
+
+Bulk-inserts ``SequenceEmbedding`` rows (one per chunk) into the pgvector
+``VECTOR`` column, using PostgreSQL ``INSERT ... ON CONFLICT DO NOTHING`` to
+tolerate re-runs. After each successful insert it atomically increments
+``Job.progress_current`` on the parent ``compute_embeddings`` job and closes
+the job when ``progress_current == progress_total``.
+
+This stage is deliberately decoupled from GPU inference so that slow disk
+writes never block the GPU, and so the write worker can be scaled
+independently.
+
+predict_go_terms_batch
+~~~~~~~~~~~~~~~~~~~~~~
+
+**Queue:** ``protea.predictions.batch`` — **consumer:** ``OperationConsumer``
+
+Runs the KNN + GO transfer pipeline for one batch of query proteins:
+
+1. Loads query embeddings from the DB.
+2. Resolves the reference cache (process-level float16 array; loaded from disk
+   at ``data/ref_cache/<embedding_config>__<annotation_set>_*.npy`` if present,
+   otherwise streamed from PostgreSQL in chunks of 2 000 rows and persisted).
+3. Performs per-aspect KNN search via numpy or FAISS (``Flat``, ``IVFFlat``,
+   ``HNSW``) using the configured metric (``cosine`` or ``l2``).
+4. Transfers GO terms from neighbours to the query, computing predicted
+   distances, optional alignment features (NW/SW via ``parasail``), optional
+   taxonomic features (``ete3`` NCBITaxa), and optional re-ranker aggregate
+   features.
+5. Publishes a ``StorePredictions`` message to ``protea.predictions.write``.
+
+GPU is not required — KNN search runs on CPU unless a FAISS GPU index is
+configured at process startup.
+
+store_predictions
+~~~~~~~~~~~~~~~~~
+
+**Queue:** ``protea.predictions.write`` — **consumer:** ``OperationConsumer``
+
+Bulk-inserts ``GOPrediction`` rows (one row per predicted ``(protein, go_term)``
+pair) and atomically increments ``Job.progress_current`` on the parent
+``predict_go_terms`` job. Uses ``INSERT ... ON CONFLICT DO NOTHING`` so that
+re-delivered messages are a no-op.
+
+All feature-engineering columns declared in :class:`GOPrediction`
+(``identity_nw``, ``similarity_sw``, ``taxonomic_distance``, etc.) are written
+in the same insert when they are present in the payload.
 
 Registering a new operation
 ----------------------------
diff --git a/docs/source/architecture/system_overview.rst b/docs/source/architecture/system_overview.rst
index b7af516..b9635bc 100644
--- a/docs/source/architecture/system_overview.rst
+++ b/docs/source/architecture/system_overview.rst
@@ -93,8 +93,12 @@ Services and data stores
         - ``insert_proteins``, ``fetch_uniprot_metadata``, ``load_ontology_snapshot``,
           ``load_goa_annotations``, ``load_quickgo_annotations``,
           ``compute_embeddings`` (coordinator), ``predict_go_terms`` (coordinator),
-          ``generate_evaluation_set``, ``run_cafa_evaluation``,
-          ``train_reranker``, ``train_reranker_auto``
+          ``generate_evaluation_set``, ``run_cafa_evaluation``
+      * - ``protea.training``
+        - QueueConsumer
+        - ``export_research_dataset`` — serialised, GPU/RAM-intensive KNN + feature
+          generation + artifact-store upload. LightGBM training itself has been
+          moved to ``protea-reranker-lab`` and no longer runs inside PROTEA.
       * - ``protea.embeddings``
         - QueueConsumer
         - ``compute_embeddings`` coordinator (serialised: one at a time, 60 s retry delay if GPU busy)
@@ -141,6 +145,28 @@ Services and data stores
       KNN search is performed in Python using numpy or FAISS, never at the DB layer.
       See :ref:`knn-constraint` in the howto guides.
 
+**Artifact store (local FS by default, optional MinIO)**
+
+   Large produced blobs — re-ranker boosters, exported research datasets
+   (``train.parquet`` / ``eval.parquet`` / ``manifest.json``) — do not live
+   in PostgreSQL. They are written through the ``ArtifactStore`` protocol
+   defined in ``protea/infrastructure/storage/``. Two backends are
+   available:
+
+   - **LocalFsArtifactStore** (default): blobs land under
+     ``storage/artifacts/`` on the API host. URIs are ``file:///…``.
+   - **MinioArtifactStore** (optional): an S3-compatible object store.
+     Activated by setting ``storage.backend: minio`` in ``system.yaml``
+     (or ``PROTEA_STORAGE_BACKEND=minio``) and starting the compose
+     profile: ``docker compose --profile storage up``. URIs are
+     ``s3://<bucket>/<key>``.
+
+   Both backends satisfy the same four-method protocol (``put``, ``get``,
+   ``url``, ``exists``), so operation code is agnostic of which backend
+   is active. If MinIO is configured but unreachable at startup the
+   factory logs a warning and degrades to the local FS — a missing
+   optional service never crashes the stack.
+
 **Next.js frontend (port 3000)**
 
    Single-page application for job management. Displays job list with status filtering,
@@ -174,7 +200,7 @@ Code layout
                           annotate, showcase, support
      core/
        contracts/         Operation protocol, ProteaPayload, OperationResult
-       operations/        Domain logic (11 operation modules, 16 registered instances)
+       operations/        Domain logic (12 operation modules, 17 registered instances)
        knn_search.py      KNN backends: numpy brute-force and FAISS (Flat/IVFFlat/HNSW)
        feature_engineering.py  Alignment (parasail NW/SW) and taxonomy (ete3 NCBITaxa)
        scoring.py         Scoring engine (weighted formulas, composite scores)
@@ -237,8 +263,8 @@ Technology stack
      - NumPy / FAISS
      - —
    * - Frontend
-     - Next.js 19 + Tailwind v4
-     - 19 / 4
+     - Next.js + React + Tailwind
+     - 16 / 19 / 4
    * - Dependency management
      - Poetry
      - 1.x
@@ -247,6 +273,61 @@ All Python dependencies are declared in ``pyproject.toml`` with pinned version
 ranges; ``poetry.lock`` guarantees reproducible installs. The ``dev`` dependency
 group adds pytest, pytest-cov, and related tooling without affecting production.
 
+Re-ranker lab integration
+--------------------------
+
+Re-ranker model development is deliberately split into a **separate
+repository** (``protea-reranker-lab``) consumed by PROTEA through a
+narrow, contract-first interface. PROTEA does not import lab training
+code at runtime, and the lab does not import PROTEA session or queue
+code. The coupling is mediated by three files:
+
+- **Frozen dataset**: PROTEA writes ``train.parquet``, ``eval.parquet``,
+  and ``manifest.json`` (schema version ``v2``) to the configured
+  ``ArtifactStore`` via the ``export_research_dataset`` operation.
+- **Booster artefact**: the lab produces ``runs/<name>/model.txt``
+  (LightGBM ``Booster``) together with ``run.json`` and ``spec.yaml``.
+- **Shared contract**: the lab module
+  ``protea_reranker_lab.contracts`` exposes ``ManifestV1`` and
+  ``compute_feature_schema_sha(feature_families)``; PROTEA imports
+  ``compute_feature_schema_sha`` at predict time only, to validate
+  feature compatibility.
+
+.. code-block:: text
+
+   ┌──────────────────────┐        export_research_dataset        ┌────────────────────────┐
+   │       PROTEA         │───────────────────────────────────────▶│       Artifact          │
+   │ (KNN + features)     │     train.parquet / eval.parquet        │       Store             │
+   │                      │     manifest.json (schema_version=v2)   │  (local FS or MinIO)    │
+   └──────────┬───────────┘                                         └──────────┬──────────────┘
+              │                                                                 │
+              │                                                                 ▼
+              │                                                     ┌──────────────────────┐
+              │                                                     │ protea-reranker-lab  │
+              │                                                     │  trains LightGBM     │
+              │                                                     │  booster offline     │
+              │                                                     └──────────┬───────────┘
+              │                                                                 │
+              │                 scripts/register_reranker.py ─ uploads ─┐       │
+              │                                                          ▼      │
+              │                                                     ┌──────────────────────┐
+              │                                                     │  RerankerModel row   │
+              │                                                     │  artifact_uri,       │
+              │                                                     │  feature_schema_sha  │
+              │                                                     └──────────┬───────────┘
+              │                                                                 │
+              ▼                                                                 │
+   predict_go_terms ─ payload.reranker_model_id ──────────────────────────────▶─┘
+
+At predict time, ``predict_go_terms`` accepts an optional
+``reranker_model_id``. The coordinator snapshots the booster's
+``artifact_uri`` and ``feature_schema_sha`` into every batch payload;
+each batch worker re-computes a live schema sha from the active
+feature flags and applies the booster **only** when the shas match
+exactly. Strict equality is intentional: a subset match would silently
+score the booster with missing columns, so mismatch fails safe and the
+batch continues with KNN distance ordering.
+
 Testing strategy
 ----------------
 
@@ -268,3 +349,10 @@ The test suite is split into two categories:
 
    poetry run pytest                   # unit tests only
    poetry run pytest --with-postgres   # full suite including integration tests
+
+.. seealso::
+
+   - :doc:`job_lifecycle` — how a single job moves through the worker layer.
+   - :doc:`data_model` — the relational tables that back every layer above.
+   - :doc:`operations` — the units of domain logic dispatched by workers.
+   - :doc:`/adr/index` — design decisions behind the layering above.
diff --git a/docs/source/conf.py b/docs/source/conf.py
index c4d6b30..815f303 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -13,8 +13,14 @@
     'sphinx.ext.intersphinx',
     'sphinx_copybutton',
     'sphinx_design',
+    'sphinxcontrib.bibtex',
 ]
 
+# sphinxcontrib-bibtex configuration
+bibtex_bibfiles = ['references.bib']
+bibtex_default_style = 'alpha'
+bibtex_reference_style = 'author_year'
+
 templates_path = ['_templates']
 exclude_patterns = []
 
@@ -23,7 +29,19 @@
 html_theme = 'shibuya'
 
 autodoc_mock_imports = [
-    'yaml', 'pika',
+    'yaml',
+    'pika',
+    'torch',
+    'transformers',
+    'faiss',
+    'parasail',
+    'ete3',
+    'lightgbm',
+    'cafaeval',
+    'numpy',
+    'pandas',
+    'scipy',
+    'sklearn',
 ]
 
 master_doc = 'index'
diff --git a/docs/source/glossary.rst b/docs/source/glossary.rst
new file mode 100644
index 0000000..ed174e7
--- /dev/null
+++ b/docs/source/glossary.rst
@@ -0,0 +1,153 @@
+Glossary
+========
+
+A reference list of acronyms, domain terms, and PROTEA-specific concepts that
+appear throughout the documentation. Use Sphinx's ``:term:`` role to link to
+any entry from another page.
+
+.. glossary::
+   :sorted:
+
+   GO
+      **Gene Ontology.** A structured controlled vocabulary that describes
+      gene-product attributes across three orthogonal aspects: Molecular
+      Function (MFO), Biological Process (BPO), and Cellular Component (CCO).
+      Terms are organised as a directed acyclic graph; a more specific term
+      is a child of every more general term that subsumes it. PROTEA stores a
+      complete release as one :term:`OntologySnapshot`.
+
+   GAF
+      **Gene Association File.** The plain-text tabular format used by
+      UniProt-GOA to publish protein → GO term annotations. PROTEA's
+      ``load_goa_annotations`` operation streams GAF 2.2 files.
+
+   GOA
+      **Gene Ontology Annotation** project at EBI. Publishes high-volume
+      protein → GO term assignments derived from experiments, sequence
+      curation, and electronic annotation.
+
+   ECO
+      **Evidence and Conclusion Ontology.** Identifies how a GO annotation
+      was derived (experimental, computational, author statement, etc.).
+      QuickGO returns ECO IDs which PROTEA optionally maps to GAF-style
+      evidence codes.
+
+   IA
+      **Information Accretion.** A weighting scheme used in CAFA evaluation
+      that down-weights uninformative GO terms and rewards prediction of rare,
+      specific terms. PROTEA's evaluation uses the IA weights from the CAFA6
+      benchmark.
+
+   Fmax
+      The maximum F-measure achieved over all decision thresholds for a
+      binary or multi-label classifier. The headline metric reported by
+      ``cafaeval`` and the primary measure used in :doc:`results`.
+
+   NK
+      **No-Knowledge** evaluation category. The query protein had **no**
+      experimental annotations in the older snapshot for the namespace under
+      evaluation. The hardest of the three CAFA categories.
+
+   LK
+      **Limited-Knowledge** evaluation category. The query protein had
+      annotations in some namespaces at t0 but not the one under evaluation.
+
+   PK
+      **Partial-Knowledge** evaluation category. The query protein already
+      had annotations in the namespace under evaluation; new ones were added
+      between t0 and t1.
+
+   t0
+      The older annotation snapshot in a temporal-holdout split. Functions as
+      the *reference set*: a method may use any annotation present at t0 as
+      ground truth for transfer.
+
+   t1
+      The newer annotation snapshot in a temporal-holdout split. Annotations
+      that exist at t1 but not at t0 form the evaluation ground truth.
+
+   Operation
+      The fundamental unit of domain logic in PROTEA. Any class implementing
+      ``name: str`` and ``execute(session, payload, *, emit) -> OperationResult``.
+      See :doc:`/architecture/operations`.
+
+   Coordinator
+      An :term:`Operation` that does not perform the heavy work itself but
+      partitions it into batches and publishes child messages to a downstream
+      queue. Coordinators return ``OperationResult(deferred=True)`` so the
+      parent ``Job`` row remains in ``RUNNING`` until the last child finishes.
+      ``compute_embeddings`` and ``predict_go_terms`` are coordinators.
+
+   Deferred
+      An ``OperationResult`` flag that tells ``BaseWorker`` *not* to
+      transition the parent ``Job`` to ``SUCCEEDED`` when ``execute()``
+      returns. Used by coordinators that hand work off to child workers
+      tracked through atomic progress counters.
+
+   QueueConsumer
+      The worker class that handles user-visible jobs backed by a ``Job`` row.
+      Implements the two-session lifecycle (claim → execute) and writes
+      ``JobEvent`` rows on every state transition. See
+      :doc:`/architecture/job_lifecycle`.
+
+   OperationConsumer
+      The worker class that handles fire-and-forget batch tasks. The payload
+      is carried inline in the message rather than referenced by a UUID, no
+      child ``Job`` row is created, and progress is reported via an atomic
+      increment on the parent job's counter.
+
+   RetryLaterError
+      A sentinel exception that an :term:`Operation` can raise when a shared
+      resource (e.g.\ the GPU) is temporarily unavailable. ``BaseWorker``
+      catches it, resets the job to ``QUEUED``, and re-publishes the message
+      after the requested delay. Used to serialise embedding coordinators
+      against a single-GPU host.
+
+   pgvector
+      A PostgreSQL extension that adds a ``VECTOR`` column type. PROTEA uses
+      it solely for **storing** embedding vectors; nearest-neighbour queries
+      run in Python (NumPy or FAISS), not via SQL. See
+      :doc:`/adr/001-knn-without-pgvector`.
+
+   FAISS
+      Facebook AI Similarity Search. The approximate nearest-neighbour
+      library used by PROTEA's prediction pipeline at scale (IVFFlat index).
+
+   ESM
+      **Evolutionary Scale Modeling.** A family of protein language models
+      from Meta AI. PROTEA primarily uses ``ESMC 300M`` to produce
+      512-dimensional sequence embeddings.
+
+   OntologySnapshot
+      One full GO release stored in PROTEA, versioned by ``obo_version``
+      from the OBO header. Every prediction is permanently linked to the
+      snapshot it was produced against, which is what makes the pipeline
+      reproducible.
+
+   AnnotationSet
+      A batch of ``ProteinGOAnnotation`` rows grouped by source
+      (``goa`` or ``quickgo``) and tied to one :term:`OntologySnapshot`.
+      Two annotation sets from different sources or dates can coexist and
+      be compared.
+
+   EmbeddingConfig
+      An immutable record of all parameters that affect the geometry of an
+      embedding (model, chunking, pooling). Identified by a UUID; changing
+      any field produces a new configuration. Every ``SequenceEmbedding``
+      and every ``GOPrediction`` carries the UUID it was computed against.
+
+   PredictionSet
+      The result container for a prediction job. Links a query set, an
+      :term:`EmbeddingConfig`, an :term:`AnnotationSet`, and an
+      :term:`OntologySnapshot`. Holds many ``GOPrediction`` rows.
+
+   EvaluationSet
+      The CAFA-style temporal-holdout split derived from a (t0, t1) pair of
+      annotation sets. Stores per-protein NK/LK/PK classifications per
+      namespace. Consumed by ``run_cafa_evaluation`` and the re-ranker
+      training pipeline.
+
+   manage.sh
+      The shell script under ``scripts/manage.sh`` that starts, stops,
+      scales, and inspects the nine long-running processes that make up the
+      PROTEA dev stack. The reference for everyday operations.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index ee5d458..b188885 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -13,8 +13,8 @@ metadata enrichment, and job orchestration.
 
    <div style="margin: 1rem 0;"></div>
 
-.. grid:: 1 2 2 3
-   :gutter: 2
+.. grid:: 1 2 2 2
+   :gutter: 3
    :margin: 2 0 2 0
 
    .. grid-item-card:: Quickstart
@@ -23,8 +23,8 @@ metadata enrichment, and job orchestration.
       :shadow: md
       :text-align: left
 
-      Set up PROTEA and run your first job. :bdg:`5 min` :bdg-primary:`Beginner`
-      See :doc:`Start here → <appendix/installation_and_quickstart>`.
+      :bdg-primary:`Start here` Bring up the full stack from a fresh checkout
+      and run your first job in about ten minutes.
 
    .. grid-item-card:: Architecture
       :link: architecture/index
@@ -32,26 +32,8 @@ metadata enrichment, and job orchestration.
       :shadow: md
       :text-align: left
 
-      System design, job lifecycle, and data model. :bdg-info:`Design` :bdg:`Clean Architecture`
-      Explore :doc:`architecture/index`.
-
-   .. grid-item-card:: Operations
-      :link: architecture/operations
-      :link-type: doc
-      :shadow: md
-      :text-align: left
-
-      16 registered operations: ingestion, embedding, prediction, evaluation, re-ranking. :bdg-success:`UniProt`
-      See :doc:`architecture/operations`.
-
-   .. grid-item-card:: Results
-      :link: results
-      :link-type: doc
-      :shadow: md
-      :text-align: left
-
-      Benchmark, ablation studies, and data leakage analysis. :bdg-danger:`Evaluation`
-      See :doc:`results`.
+      :bdg-info:`Design` System layers, job lifecycle, data model, all 16
+      operations, the CAFA evaluation protocol, and the ADRs that explain *why*.
 
    .. grid-item-card:: API Reference
       :link: reference/index
@@ -59,26 +41,17 @@ metadata enrichment, and job orchestration.
       :shadow: md
       :text-align: left
 
-      Autodoc-driven reference for all modules. :bdg-secondary:`autodoc`
-      Browse :doc:`reference/index`.
+      :bdg-secondary:`autodoc` Symbol-level documentation for ``protea.core``,
+      ``protea.infrastructure``, the FastAPI routers, and every worker class.
 
-   .. grid-item-card:: Configuration
-      :link: appendix/configuration
-      :link-type: doc
-      :shadow: md
-      :text-align: left
-
-      YAML and environment-variable settings. :bdg:`system.yaml`
-      See :doc:`appendix/configuration`.
-
-   .. grid-item-card:: How-to Guides
-      :link: appendix/howto_guides
+   .. grid-item-card:: Results
+      :link: results
       :link-type: doc
       :shadow: md
       :text-align: left
 
-      Adding operations, running workers, extending the system. :bdg-warning:`Guides`
-      Go to :doc:`appendix/howto_guides`.
+      :bdg-danger:`Evaluation` Benchmark numbers, ablation studies, the
+      re-ranker training pipeline, and the figures that back the thesis.
 
 .. raw:: html
 
@@ -98,9 +71,13 @@ metadata enrichment, and job orchestration.
 
    abstract
    introduction
+   related_work
    architecture/index
+   plugin-authoring
    results
    appendix/index
+   glossary
+   references
 
 .. toctree::
    :caption: API Reference
diff --git a/docs/source/introduction.rst b/docs/source/introduction.rst
index 4680240..4726c88 100644
--- a/docs/source/introduction.rst
+++ b/docs/source/introduction.rst
@@ -1,70 +1,262 @@
 Introduction
 ============
 
+Motivation
+----------
+
+The gap between the number of protein sequences deposited in public databases
+and the number that carry experimentally verified functional annotations has
+grown by several orders of magnitude over the past decade. UniProtKB/TrEMBL
+stores more than 250 million unreviewed sequences, while UniProtKB/Swiss-Prot —
+the manually curated subset — contains fewer than 600 000. Closing this gap by
+wet-lab experiments is economically infeasible, and the community has
+therefore invested in *automated* functional annotation: computational
+pipelines that transfer Gene Ontology (GO) terms :cite:`cafa2013` from a small
+set of well-characterised reference proteins to the much larger set of
+unannotated targets.
+
+Automated GO term prediction is now a mature research area with well-defined
+benchmarks (the CAFA challenges :cite:`cafa2013,cafa2019,cafa2023`), a
+reference scoring tool (``cafa-evaluator`` :cite:`cafaeval2023`), and a rich
+catalogue of methods surveyed in :doc:`related_work`. Most open-source
+pipelines, however, were released as *research artefacts* — single-purpose
+scripts optimised for the paper that accompanies them. When integrated into a
+production setting (a web server, a shared lab platform, a recurring batch
+job) they expose a cluster of engineering issues that are rarely discussed in
+the original publications:
+
+1. **Reproducibility under versioned data.** Annotation transfer depends on
+   three versioned inputs: the GO ontology release, the reference annotation
+   set, and the embedding or similarity model. A prediction that is
+   reproducible today often cannot be replayed one year later because none
+   of these versions is recorded alongside the prediction.
+2. **Temporal integrity of evaluation.** Benchmarking a tool against its own
+   reference database — the default configuration for most open-source
+   predictors — exposes the evaluation to data leakage: the tool has already
+   seen the annotations that the benchmark treats as ground truth. This
+   problem is acknowledged by the CAFA protocol but ignored by many method
+   papers.
+3. **Architectural coupling.** Pipelines that conflate database sessions,
+   message-broker connections, orchestration logic, and domain computation
+   in the same worker class are hard to extend, hard to unit-test, and
+   fragile under the partial failures typical of long-running jobs against
+   external APIs.
+
+PROTEA is designed to address all three problems jointly, as a single system,
+so that each concern is enforced by construction rather than by convention.
+
 The legacy coupling problem
-----------------------------
+---------------------------
+
+The **Protein Information System (PIS)** and **FANTASIA** codebases were
+developed at the Andalusian Centre for Biomedical Bioinformatics (CBBIO) and
+established foundational infrastructure for protein data ingestion and
+functional annotation at scale. Both systems share a structural limitation
+that directly motivates PROTEA: their workers conflate multiple concerns into
+single classes.
+
+A typical PIS/FANTASIA worker manages its own database session, connects
+directly to the message broker, orchestrates task sequencing, *and* executes
+domain logic — all in the same class. This coupling produces code that is
+difficult to unit-test (because all infrastructure must be mocked at once),
+hard to extend (because adding a new operation requires understanding the
+entire execution context), and fragile under failure (because a queue
+disconnect or database error can leave jobs in ambiguous states with no audit
+trail). The consequences are well-documented in the enterprise software
+literature :cite:`fowler2002patterns`.
+
+Research questions
+------------------
+
+This thesis investigates whether the engineering issues above can be resolved
+by architectural discipline *without* sacrificing prediction quality or
+computational efficiency. It is organised around three research questions:
+
+**RQ1 — Reproducible architecture.**
+   Can a protein functional annotation pipeline be architected so that every
+   prediction is *exactly reproducible* given the same input data, without
+   sacrificing horizontal scalability on a single GPU and a modest number
+   of CPU workers?
+
+**RQ2 — Temporal integrity of evaluation.**
+   To what extent does temporal data leakage inflate the apparent performance
+   of homology-based GO prediction tools (Pannzer2, InterProScan,
+   eggNOG-mapper) when they are benchmarked against their own current
+   reference databases, and can a fair temporal holdout protocol be enforced
+   at the data model level?
+
+**RQ3 — Feature engineering on top of KNN.**
+   Does a learned re-ranker that exploits classical pairwise alignment metrics
+   (Needleman–Wunsch, Smith–Waterman) and taxonomic distance features on top
+   of embedding-based KNN results consistently outperform the baseline
+   embedding similarity score, across the full CAFA NK/LK/PK partitioning and
+   all three GO namespaces?
+
+Hypotheses
+----------
+
+The three questions are paired with three falsifiable hypotheses:
 
-The **Protein Information System (PIS)** and **FANTASIA** established foundational infrastructure
-for protein data ingestion and functional annotation at scale. However, both systems share a
-structural limitation: their workers conflate multiple concerns into single classes.
+**H1.** A strict separation between an ``Operation`` protocol (pure domain
+logic), a two-session job lifecycle (claim → execute), and a typed
+infrastructure layer is sufficient to express every existing PIS/FANTASIA
+workflow. No domain operation requires direct access to the message broker
+or to the session-management layer.
 
-A typical PIS/FANTASIA worker manages its own database session, connects directly to the message
-broker, orchestrates task sequencing, *and* executes domain logic — all in the same class. This
-coupling produces code that is difficult to unit-test (because all infrastructure must be mocked
-at once), hard to extend (because adding a new operation requires understanding the entire
-execution context), and fragile under failure (because a queue disconnect or DB error can leave
-jobs in ambiguous states with no audit trail).
+**H2.** Open-source homology-based tools evaluated against their current
+reference databases exhibit *measurable* exact-match overlap with the
+ground-truth annotations of a temporal holdout. This overlap is quantifiable
+and large enough to account for a significant fraction of their apparent
+Fmax advantage over strictly temporal methods.
+
+**H3.** A LightGBM binary classifier trained on 20 numeric and 3 categorical
+features derived from alignment and taxonomy can outperform the baseline
+cosine similarity score of an embedding-only pipeline across all 9 cells of
+the NK/LK/PK × BPO/MFO/CCO grid.
+
+Contributions
+-------------
+
+.. admonition:: Provisional numbers in C2 and C3
+   :class: warning
+
+   The specific figures cited below (62.4 % NK leakage in C2, "improves Fmax
+   across all 9 cells" in C3) come from the pre-2026-04-10 experimental run
+   and will be regenerated for the Zenodo deposit accompanying the thesis.
+   The *direction* of the findings — large data-leakage overlap for Pannzer2,
+   and the re-ranker surpassing the heuristic — is stable; only the exact
+   values may move slightly when the chapter is re-rendered. See
+   :doc:`/results` for the full provisional notice.
+
+The thesis makes three contributions, one per research question:
+
+**C1 — A reproducible platform for protein functional annotation** built on
+a typed operation protocol, a two-session job lifecycle, a RabbitMQ job queue
+with seven routed queues, and a PostgreSQL + pgvector data model that
+versions every input (OBO release, annotation set source, embedding config)
+by UUID. The platform is released as open source and runs end-to-end on a
+single workstation with one GPU. PROTEA currently consolidates sixteen
+registered operations covering ingestion, embedding, prediction, evaluation,
+and re-ranking, as well as a one-click ``/annotate`` endpoint that takes a
+FASTA upload and returns ranked GO predictions.
+
+**C2 — A quantitative data-leakage analysis** of Pannzer2, InterProScan, and
+eggNOG-mapper against a GOA 220 → 229 temporal holdout. The analysis measures
+exact-match overlap between each tool's predictions and the ground-truth
+annotations and shows that up to 62.4 % of the NK ground truth is already
+present in the Pannzer2 reference database, fully explaining its apparent
+advantage over temporally strict methods. The chapter :doc:`results` presents
+the full numbers and discusses the interpretation.
+
+**C3 — A temporal-holdout re-ranking pipeline** trained on 13 historical GOA
+splits (releases 160 through 220) using alignment and taxonomy features on
+top of ESM-C 300M KNN results. The final re-ranker (``v3``) is shown to
+improve Fmax over the embedding-only baseline across all 9 evaluation cells
+of the NK/LK/PK × BPO/MFO/CCO grid, while keeping the training signal
+interpretable (per-feature importances are reported in :doc:`results`).
 
 The PROTEA approach
 -------------------
 
-PROTEA is architected around a deliberate separation of three layers:
+PROTEA realises these contributions through a deliberate separation of three
+layers:
 
 **Infrastructure layer** (``protea/infrastructure/``)
-   Manages database sessions, connection factories, configuration loading, and the RabbitMQ
-   transport. This layer knows nothing about domain logic.
+   Manages database sessions, connection factories, configuration loading, and
+   the RabbitMQ transport. This layer knows nothing about domain logic.
 
 **Execution layer** (``protea/workers/``)
-   Orchestrates the job lifecycle: claiming a job, dispatching it to the correct operation,
-   and recording the outcome. The ``BaseWorker`` uses two independent sessions by design —
-   one to claim (QUEUED → RUNNING) and one to execute — ensuring that even a mid-execution
-   crash leaves the DB in a consistent, inspectable state.
+   Orchestrates the job lifecycle: claiming a job, dispatching it to the
+   correct operation, and recording the outcome. The ``BaseWorker`` uses two
+   independent sessions by design — one to claim (``QUEUED → RUNNING``) and
+   one to execute — ensuring that even a mid-execution crash leaves the
+   database in a consistent, inspectable state.
 
 **Domain layer** (``protea/core/``)
-   Pure domain logic. Each ``Operation`` receives an open session and an ``emit`` callback;
-   it returns an ``OperationResult``. Operations do not manage sessions, queues, or HTTP
-   routing. They are individually testable with a mocked session and a noop emit function.
+   Pure domain logic. Each ``Operation`` receives an open session and an
+   ``emit`` callback; it returns an ``OperationResult``. Operations do not
+   manage sessions, queues, or HTTP routing. They are individually testable
+   with a mocked session and a noop emit function.
+
+The three layers communicate only through well-defined interfaces. Chapter
+:doc:`architecture/system_overview` describes the runtime stack, chapter
+:doc:`architecture/job_lifecycle` documents the two-session lifecycle in
+detail, and chapter :doc:`architecture/operations` lists every registered
+operation together with its payload schema, execution flow, and side effects.
 
 An incremental migration
--------------------------
+------------------------
 
-The goal of PROTEA is not a complete rewrite. PIS tables (``protein``, ``sequence``,
-``protein_uniprot_metadata``) and FANTASIA computation workflows are progressively migrated
-into this architecture as new capabilities are added. Each migration step must preserve or
-improve computational efficiency and must not introduce regressions in the data model.
+The goal of PROTEA is not a complete rewrite. PIS tables (``protein``,
+``sequence``, ``protein_uniprot_metadata``) and FANTASIA computation
+workflows are progressively migrated into the new architecture as new
+capabilities are added. Each migration step must preserve or improve
+computational efficiency and must not introduce regressions in the data model.
+The discipline that makes this incremental evolution safe is the combination
+of a typed operation protocol, an append-only audit log (``JobEvent``), and
+database migrations managed by Alembic.
 
 Current capabilities
 ---------------------
 
-PROTEA currently provides sixteen registered operations spanning the full protein
-functional annotation pipeline:
+PROTEA currently provides sixteen registered operations spanning the full
+protein functional annotation pipeline:
 
 - **Data ingestion** — ``insert_proteins``, ``fetch_uniprot_metadata``,
-  ``load_ontology_snapshot``, ``load_goa_annotations``, ``load_quickgo_annotations``
+  ``load_ontology_snapshot``, ``load_goa_annotations``,
+  ``load_quickgo_annotations``.
 - **Embedding computation** — ``compute_embeddings`` (coordinator),
-  ``compute_embeddings_batch``, ``store_embeddings``
+  ``compute_embeddings_batch``, ``store_embeddings``.
 - **GO term prediction** — ``predict_go_terms`` (coordinator),
-  ``predict_go_terms_batch``, ``store_predictions``
-- **Evaluation** — ``generate_evaluation_set``, ``run_cafa_evaluation``
-- **Re-ranking** — ``train_reranker``, ``train_reranker_auto``
-- **Diagnostics** — ``ping``
+  ``predict_go_terms_batch``, ``store_predictions``.
+- **Evaluation** — ``generate_evaluation_set``, ``run_cafa_evaluation``.
+- **Re-ranker dataset publishing** — ``export_research_dataset`` (LightGBM
+  training itself lives in ``protea-reranker-lab``; PROTEA only produces
+  the frozen train/eval parquets and serves the registered boosters).
+- **Diagnostics** — ``ping``.
 
 A scoring engine applies weighted formulas or trained LightGBM re-rankers to
-prediction sets. A one-click ``/annotate`` endpoint automates the entire workflow
-from FASTA upload to GO term prediction.
+prediction sets. The one-click ``/annotate`` endpoint automates the entire
+workflow from FASTA upload to ranked GO term prediction. The full operation
+catalogue is documented in :doc:`architecture/operations`, including the
+four ephemeral consumer operations that fan out GPU and KNN work across
+batch queues.
 
 .. admonition:: Design principle
    :class: note
 
-   New operations are added by implementing the ``Operation`` protocol and registering them
-   at worker startup. No changes to the infrastructure or execution layers are required.
+   New operations are added by implementing the ``Operation`` protocol and
+   registering the instance at worker startup. No changes to the
+   infrastructure or execution layers are required. This is the property
+   that makes Hypothesis **H1** testable: if a new workflow requires
+   modifications outside the domain layer, the architectural claim is
+   falsified.
+
+Thesis outline
+--------------
+
+The remainder of this thesis is organised as follows.
+
+- :doc:`related_work` situates PROTEA against existing workflow engines, the
+  CAFA evaluation tradition, homology- and embedding-based GO prediction
+  methods, and the protein language models that supply its embedding
+  backends.
+- :doc:`architecture/index` describes the system architecture in five
+  chapters: the runtime stack and requirements (``system_overview``), the
+  two-session job lifecycle (``job_lifecycle``), the versioned data model
+  (``data_model``), the operation catalogue (``operations``), and the
+  evaluation protocol with formal NK/LK/PK definitions (``evaluation``).
+- :doc:`results` presents the experimental evaluation: ablations over ``k``
+  and the scoring function, the three re-ranker iterations, the external
+  benchmark against Pannzer2 / InterProScan / eggNOG-mapper, and the
+  quantitative data-leakage analysis.
+- :doc:`appendix/index` contains installation and quickstart instructions,
+  a configuration reference, how-to guides, operational runbooks, and
+  architectural decision records (ADRs) for every non-obvious design choice.
+- :doc:`references` lists every cited work.
+
+Readers interested in architecture should start with
+:doc:`architecture/system_overview`; readers interested in empirical results
+should jump directly to :doc:`results`; readers interested in reproducing the
+pipeline end-to-end should follow :doc:`appendix/installation_and_quickstart`
+and then the how-to guides.
diff --git a/docs/source/plugin-authoring.rst b/docs/source/plugin-authoring.rst
new file mode 100644
index 0000000..fbbb615
--- /dev/null
+++ b/docs/source/plugin-authoring.rst
@@ -0,0 +1,202 @@
+Plugin author guide
+====================
+
+PROTEA is built around a plugin architecture. Annotation sources, PLM
+backends, experiment runners and per-candidate features are added as
+out-of-tree contributions without modifying ``protea-core``. This
+page is the top-level guide for plugin authors: which abstract base
+class to implement, where the implementation lives, how the platform
+discovers it, and where to find the per-repo guides with concrete
+templates.
+
+The canonical source of truth for the contracts themselves is the
+`protea-contracts <https://github.com/frapercan/protea-contracts>`_
+package and its Sphinx documentation; this page links to it
+throughout.
+
+Architecture in one paragraph
+-----------------------------
+
+``protea-core`` is the platform: the ORM, the FastAPI surface, the
+RabbitMQ workers, the orchestration loop. Plugins live in four
+sibling repositories. Each repository declares its plugins through
+the Python ``entry_points`` mechanism (one mechanism, four named
+groups). At startup ``protea-core`` queries
+``importlib.metadata.entry_points`` for each group and loads the
+plugin instances; from that moment on, every dispatch by name is a
+dictionary lookup.
+
+The four plugin layers
+----------------------
+
+.. list-table::
+   :header-rows: 1
+   :widths: 18 30 26 26
+
+   * - Layer
+     - ABC
+     - Repository
+     - Entry-point group
+   * - Annotation sources
+     - :class:`protea_contracts.AnnotationSource`
+     - `protea-sources <https://github.com/frapercan/protea-sources>`_
+     - ``protea.sources``
+   * - PLM backends
+     - :class:`protea_contracts.EmbeddingBackend`
+     - `protea-backends <https://github.com/frapercan/protea-backends>`_
+     - ``protea.backends``
+   * - Experiment runners
+     - :class:`protea_contracts.ExperimentRunner`
+     - `protea-runners <https://github.com/frapercan/protea-runners>`_
+     - ``protea.runners``
+   * - Per-candidate features
+     - :class:`protea_contracts.FeatureRegistry`
+     - ``protea-core/protea/core/features/``
+     - in-process registry (no entry-point group)
+
+Picking the right ABC
+---------------------
+
+**You want to ingest a new annotation source** (a database release, a
+file format, a web API that produces ``ProteinGOAnnotation`` rows).
+Implement ``AnnotationSource`` in ``protea-sources``. Examples
+shipped today: ``goa``, ``quickgo``, ``uniprot``.
+
+**You want to add a new protein language model** (a HuggingFace
+checkpoint, a structure-aware encoder, a distilled variant).
+Implement ``EmbeddingBackend`` in ``protea-backends``. Examples
+shipped today: ``esm``, ``t5``, ``ankh``, ``esm3c``.
+
+**You want to add a new training method** (a different boosting
+algorithm, a graph neural network, a retrieval-neural ranker).
+Implement ``ExperimentRunner`` in ``protea-runners``. Examples
+shipped today: ``knn``, ``baseline``, ``lightgbm`` (the latter
+materialises in F2A.7).
+
+**You want to add a feature to the re-ranker** (a new sequence
+metric, a new ontology-aware embedding, a new taxonomic signal).
+Register a :class:`protea_contracts.Feature` in
+``protea-core/protea/core/features/<family>.py``. This is in-process
+and does not use ``entry_points``: the registry is gathered at import
+time from a fixed list of family modules. The feature's ``family``
+field decides where it appears in the dataset schema and feeds into
+``compute_schema_sha`` (see :doc:`adr/D10-schema-sha-v2`).
+
+If your idea fits none of these layers, it probably belongs in
+``protea-core`` itself. Open an issue describing what you want to
+add; the architecture review may suggest a fifth layer or surface a
+hidden constraint.
+
+Anatomy of a plugin
+-------------------
+
+Independent of the layer, every plugin follows the same shape:
+
+1. A Python module under the relevant repository, named after the
+   plugin (``protea_backends/myplugin/__init__.py``).
+2. A class that subclasses the relevant ABC and implements the
+   abstract methods, with a class attribute ``name`` matching the
+   entry-point name.
+3. A module-level instance ``plugin = MyPlugin()`` that is what the
+   entry-point resolves to.
+4. A line under ``[tool.poetry.plugins."protea.<group>"]`` in the
+   repository's ``pyproject.toml``::
+
+      myplugin = "protea_<group>.myplugin:plugin"
+
+5. A test file that exercises the contract: instance type, ABC
+   compliance, ``name`` attribute, discoverability via
+   ``entry_points(group="protea.<group>")``, and the public method
+   signatures. The existing test files in each repository are good
+   templates.
+
+Heavy dependencies belong behind Poetry extras and are imported
+lazily inside the method that needs them, not at module top. This
+keeps plugin discovery import-cheap; ``protea-core`` does not pay for
+``torch`` (or any other heavy dependency) at startup if no caller is
+actually invoking the backend that uses it.
+
+Where to find the concrete guides
+---------------------------------
+
+Each plugin repository ships its own contributing guide with a
+runnable template, the SemVer policy that applies to its public
+surface, and CI expectations:
+
+- **protea-backends**: see ``docs/source/contributing.rst`` in
+  the repository, and the per-backend pages
+  (``docs/source/backends/{esm,t5,ankh,esm3c}.rst``) for examples of
+  how to document a backend's quirks (numerical type, pooling rule,
+  tokeniser idiosyncrasies).
+- **protea-contracts**: see ``docs/source/contributing.rst`` for
+  the SemVer rules that govern when a contract change is patch,
+  minor or major, the procedure for adding a feature to
+  ``ALL_FEATURES`` (which changes the schema sha and forces booster
+  retraining), and the ABC additive-vs-breaking guidance.
+- **protea-sources** and **protea-runners**: Sphinx scaffolding for
+  these is on the doc lane (Doc-T8); until it lands, the existing
+  README plus the ``protea-backends`` guide above are the closest
+  template (the patterns transfer: substitute the ABC and the
+  entry-point group).
+
+Discovery in code
+-----------------
+
+Should you want to verify a plugin is discoverable from a Python
+shell::
+
+    from importlib.metadata import entry_points
+
+    eps = entry_points(group="protea.backends")
+    for ep in eps:
+        print(ep.name, "->", ep.value)
+        plugin = ep.load()
+        print("  name attr:", plugin.name)
+
+This is exactly what ``protea-core`` does at startup. The only thing
+``protea-core`` adds is a sanity check: ``plugin.name`` must equal
+``ep.name`` or the worker raises ``RuntimeError`` rather than start.
+This catches typos in the entry-point declaration the only place
+they could otherwise hide.
+
+Schema invariants and reproducibility
+-------------------------------------
+
+Plugins must respect the platform's reproducibility contract. Two
+specific places this matters:
+
+- **Feature plugins** participate in ``compute_schema_sha``. Adding a
+  feature changes the digest, which is correct: existing re-ranker
+  boosters trained against the old digest will refuse to load
+  against the new one. Bump the package minor and re-train.
+  See :doc:`adr/D10-schema-sha-v2` for the parallel-column migration
+  that brings every consumer onto a single source of truth.
+- **Embedding backends** must return float16 embeddings of shape
+  ``(batch_size, hidden_dim)``. Special tokens (``CLS``, ``EOS``,
+  ``BOS``, prefix tokens) must be stripped before pooling. Variations
+  in tokenisation policy across backends are acceptable as long as
+  the final pooled vector is a faithful per-protein representation.
+
+Both invariants are enforced by tests in ``protea-core`` and by
+golden parquet bit-exact comparisons in F2 (T2B.2 of the master
+plan). Breaking either is loud, not silent.
+
+Roadmap
+-------
+
+Several phases of the master plan v3 directly affect plugin authors:
+
+- **F2A.7**: ``protea-runners.lightgbm`` absorbs the standalone
+  ``protea-reranker-lab`` repository as the canonical LightGBM
+  runner.
+- **F2B**: the in-process ``FeatureRegistry`` is wired into
+  ``parquet_export`` and ``predict_go_terms`` so that every
+  registered feature flows end-to-end without manual list
+  maintenance.
+- **F2C**: ``protea-method`` extracts the inference path as a
+  pure-Python package consumable without the platform; this becomes
+  the single shippable target for downstream adopters and for the
+  LAFA submission containers (F-LAFA).
+- **F9** (post-defense): if third parties publish plugins, the per
+  group repositories may split into per-plugin repositories. See
+  :doc:`adr/D14-plugin-granularity`.
diff --git a/docs/source/reference/api.rst b/docs/source/reference/api.rst
index 4b1235e..da8c653 100644
--- a/docs/source/reference/api.rst
+++ b/docs/source/reference/api.rst
@@ -1,6 +1,10 @@
 HTTP API
 ========
 
+.. contents:: On this page
+   :local:
+   :depth: 2
+
 The PROTEA HTTP API is a FastAPI application that exposes eleven routers.
 All state mutations flow through this layer: it writes ``Job`` rows to
 PostgreSQL and publishes messages to RabbitMQ. The API is stateless between
@@ -275,7 +279,7 @@ Endpoints summary
      - Retrieve a snapshot with its full list of GO terms.
    * - ``PATCH``
      - ``/annotations/snapshots/{id}/ia-url``
-     - Set the InterPro Archive URL on an ontology snapshot.
+     - Set the Information Accretion (IA) file URL on an ontology snapshot.
    * - ``POST``
      - ``/annotations/snapshots/load``
      - Queue a ``load_ontology_snapshot`` job.
@@ -537,3 +541,12 @@ Common payload examples by operation:
        "compute_reranker_features": false
      }
    }
+
+.. seealso::
+
+   - :doc:`/architecture/operations` — every operation referenced in a
+     payload, with field-level documentation.
+   - :doc:`/appendix/howto_guides` — concrete ``curl`` recipes that submit
+     each endpoint end-to-end.
+   - :doc:`/architecture/job_lifecycle` — how the API turns a request into a
+     persistent ``Job`` row and a queue message.
diff --git a/docs/source/reference/core.rst b/docs/source/reference/core.rst
index 13f7316..767c806 100644
--- a/docs/source/reference/core.rst
+++ b/docs/source/reference/core.rst
@@ -1,6 +1,10 @@
 Core
 ====
 
+.. contents:: On this page
+   :local:
+   :depth: 2
+
 The ``protea.core`` package contains all domain logic. It has no dependency
 on the infrastructure layer: operations receive an open SQLAlchemy session
 and an ``emit`` callback, but they do not manage connections, queues, or
@@ -40,6 +44,51 @@ dispatch time; new operations are registered at process startup in
    :undoc-members:
    :show-inheritance:
 
+``parent_progress`` exposes the shared
+``_update_parent_progress`` helper used by every coordinator
+operation (``compute_embeddings``, ``predict_go_terms``) to advance
+the parent job's progress as child workers finish their batches.
+Extracted to its own module in F0 (T0.7) to remove duplicated copies
+across coordinators.
+
+.. automodule:: protea.core.contracts.parent_progress
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Retry middleware
+----------------
+
+``protea.core.retry`` implements the ``with_retry`` decorator used by
+``BaseWorker`` to wrap the execute session against transient
+database errors (deadlocks, connection drops, serialisation
+failures). Exponential backoff with jitter; the maximum number of
+attempts and the backoff base are controlled by
+``settings.WorkerTuning.retry_max_attempts`` and
+``settings.WorkerTuning.retry_backoff_base`` (see
+:doc:`/appendix/configuration`). Added as part of F0 (T0.3) of the
+master plan v3.
+
+.. automodule:: protea.core.retry
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Operation catalogue
+-------------------
+
+``protea.core.operation_catalog`` builds the singleton
+``OperationRegistry`` that workers consult at message dispatch. The
+public function ``build_operation_registry()`` instantiates each
+operation class and registers it under its canonical name. Adding a
+new operation is a one-line edit here plus a new module under
+``protea/core/operations/``.
+
+.. automodule:: protea.core.operation_catalog
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 Utilities
 ---------
 
@@ -124,28 +173,70 @@ Re-ranker
 ---------
 
 ``protea.core.reranker`` implements a LightGBM binary classifier that
-re-scores GO term predictions using 19 numeric features (embedding distance,
-NW/SW alignment metrics, sequence lengths, taxonomic distance, and 5
-aggregate re-ranker signals) plus 3 categorical features (qualifier,
-evidence code, taxonomic relation).
+re-scores GO term predictions using 20 numeric features (embedding distance,
+NW/SW alignment metrics, sequence lengths, taxonomic distance and common
+ancestors, and 5 aggregate re-ranker signals) plus 3 categorical features
+(qualifier, evidence code, taxonomic relation). The full feature list is
+documented in :ref:`train_reranker <train-reranker-operation>`.
 
 The module provides:
 
-- ``prepare_dataset(df)`` — extracts and coerces feature columns.
-- ``train(df)`` — stratified train/val split with ``is_unbalance=True``,
-  returns a ``TrainResult`` with the model, validation metrics (AUC,
-  logloss, precision, recall, F1), and feature importance.
-- ``predict(model, df)`` — returns probability scores [0, 1].
+- ``prepare_dataset(df)`` — extracts and coerces feature columns. Numeric
+  columns are coerced with ``errors="coerce"`` (invalid strings become
+  ``NaN``); categorical columns are converted to pandas ``category`` dtype,
+  which LightGBM consumes directly without manual label encoding.
+- ``train(df)`` — stratified positive/negative split with early-stopping on a
+  held-out validation set (default 20 %). Returns a ``TrainResult`` with the
+  Booster, validation metrics (AUC, logloss, precision, recall, F1 at the
+  0.5 threshold), the best boosting iteration, and gain-based feature
+  importance.
+- ``predict(model, df)`` — returns probability scores in ``[0, 1]``.
 - ``model_to_string()`` / ``model_from_string()`` — serialization for DB
   storage in the ``RerankerModel`` table.
 - ``load_training_tsv()`` — parses a training data TSV as produced by the
   ``/scoring/prediction-sets/{id}/training-data.tsv`` endpoint.
 
+.. note::
+
+   ``load_reranker`` / ``apply_reranker`` / ``infer_active_feature_families``
+   were originally split into a sibling ``protea.core.reranking`` module;
+   they were folded back into ``protea.core.reranker`` to remove a naming
+   trap (``reranker`` vs ``reranking`` were impossible to grep apart).
+   This module is now the single inference-side surface.
+
 .. automodule:: protea.core.reranker
    :members:
    :undoc-members:
    :show-inheritance:
 
+Parquet export (``protea.core.parquet_export``)
+------------------------------------------------
+
+``protea.core.parquet_export`` consolidates per-split, per-category
+parquet shards produced by the KNN + feature pipeline into the frozen
+dataset layout consumed by ``protea-reranker-lab``: exactly
+``train.parquet``, ``eval.parquet`` and ``manifest.json`` under a single
+directory. The manifest follows ``ManifestV1`` (schema version ``v2``)
+and records PROTEA's ``producer_version`` + ``producer_git_sha``.
+
+The single public function ``export_reranker_parquets(...)`` is shared
+by two callers:
+
+- ``train_reranker._dump_frozen_dataset`` — thin wrapper that uses this
+  helper to emit the dataset alongside a training run.
+- ``ExportResearchDatasetOperation`` — stand-alone operation that only
+  materialises and publishes the dataset, without running LightGBM.
+
+When ``store`` is provided, the three consolidated files are
+additionally uploaded under ``key_prefix`` using the ``ArtifactStore``
+interface, and the returned dict includes ``train_uri`` / ``eval_uri``
+/ ``manifest_uri``.
+
+.. automodule:: protea.core.parquet_export
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 Scoring
 -------
 
@@ -226,9 +317,10 @@ different namespaces simultaneously (e.g., LK in CCO and PK in BPO).
 Operations
 ----------
 
-PROTEA ships sixteen registered operation instances at worker startup in
-``scripts/worker.py``. Each operation is a class that implements the
-``Operation`` protocol: a ``name`` string and an ``execute`` method.
+PROTEA ships seventeen registered operation instances at worker startup
+via ``protea.core.operation_catalog.build_operation_registry``. Each
+operation is a class that implements the ``Operation`` protocol: a
+``name`` string and an ``execute`` method.
 Operations are stateless with respect to infrastructure — they receive a
 session and emit structured events, but do not open connections or manage
 transactions.
@@ -352,15 +444,91 @@ transactions.
    :undoc-members:
    :show-inheritance:
 
-**train_reranker**
-   Trains a LightGBM binary classifier re-ranker from a PredictionSet +
-   EvaluationSet pair. Uses temporal holdout labels and 22 features (embedding
-   distance, alignment metrics, taxonomy, aggregate signals). Stores the
-   serialized model, validation metrics, and feature importance in a
-   ``RerankerModel`` row. ``TrainRerankerAutoOperation`` is a convenience
-   variant that auto-selects training parameters.
+**export_research_dataset**
+   Publishes the frozen re-ranker dataset (``train.parquet`` /
+   ``eval.parquet`` / ``manifest.json``) consumed by
+   ``protea-reranker-lab``. Runs the KNN + feature-generation pipeline
+   via ``TrainRerankerAutoOperation`` in ``dump_only`` mode and uploads
+   the resulting artefacts through the configured ``ArtifactStore``
+   (local FS by default, MinIO when the ``storage`` compose profile is
+   active). Manifest records PROTEA's ``producer_version`` /
+   ``producer_git_sha`` for full traceability from lab runs back to
+   PROTEA HEAD.
+
+.. automodule:: protea.core.operations.export_research_dataset
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Training-dump helpers
+---------------------
+
+``protea.core.training_dump_helpers`` is the home of the KNN /
+feature-generation helpers that were extracted in F0 (T0.6) when
+``protea.core.operations.train_reranker`` was deleted. The module is
+deliberately not an operation — it is reused in-process by
+:class:`ExportResearchDatasetOperation` to materialise ``train`` and
+``eval`` shards before the ``parquet_export`` consolidation pass.
+LightGBM training itself lives in
+`protea-reranker-lab <https://github.com/frapercan/protea-reranker-lab>`_,
+which consumes the published ``Dataset`` rows produced by
+``export_research_dataset``.
+
+.. automodule:: protea.core.training_dump_helpers
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Internal helpers
+----------------
+
+These modules are imported by the operations and the feature
+engineering layer; they are documented here for completeness but are
+not part of the public API.
+
+- ``protea.core.anc2vec_embeddings`` — anc2vec ancestry embeddings for
+  GO terms, used as features by the re-ranker (see ADR D19 for the
+  GeOKG replacement candidate).
+- ``protea.core.annotation_intern`` — string interning helper for
+  reducing memory pressure when loading large annotation sets.
+- ``protea.core.disk_cache`` — generic on-disk cache with TTL used by
+  the KNN reference loader and the PCA cache.
+- ``protea.core.feature_enricher`` — orchestrator that combines
+  alignment, taxonomy and anc2vec features into a single
+  per-candidate row.
+- ``protea.core.pca_cache`` — per-PLM PCA projection cache, used to
+  pre-compute the ``emb_pca`` feature family.
+
+.. automodule:: protea.core.anc2vec_embeddings
+   :members:
+   :undoc-members:
+   :show-inheritance:
 
-.. automodule:: protea.core.operations.train_reranker
+.. automodule:: protea.core.annotation_intern
    :members:
    :undoc-members:
    :show-inheritance:
+
+.. automodule:: protea.core.disk_cache
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: protea.core.feature_enricher
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: protea.core.pca_cache
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. seealso::
+
+   - :doc:`/architecture/operations` — narrative documentation for every
+     operation listed above, with payload examples and execution flow.
+   - :doc:`infrastructure` — the ORM models that ``protea.core`` reads and
+     writes.
+   - :doc:`/appendix/howto_guides` — task-oriented recipes that exercise
+     these modules end-to-end.
diff --git a/docs/source/reference/infrastructure.rst b/docs/source/reference/infrastructure.rst
index 0f4d0fc..782708c 100644
--- a/docs/source/reference/infrastructure.rst
+++ b/docs/source/reference/infrastructure.rst
@@ -1,6 +1,10 @@
 Infrastructure
 ==============
 
+.. contents:: On this page
+   :local:
+   :depth: 2
+
 The ``protea.infrastructure`` package implements the persistence and messaging
 layer. It is the only package that imports SQLAlchemy, psycopg2, or aio-pika
 directly. All other layers interact with the database through the session
@@ -206,10 +210,19 @@ corresponding flags were set in the prediction payload.
 
 **Re-ranker Models**
 
-``RerankerModel`` stores a trained LightGBM binary classifier for re-scoring
-GO term predictions. Each row contains the serialized model string, validation
-metrics (AUC, logloss, precision, recall, F1), feature importance, and
-references to the ``PredictionSet`` and ``EvaluationSet`` used for training.
+``RerankerModel`` stores a trained LightGBM re-ranker for re-scoring GO
+term predictions. The booster itself can live either **inline** in the
+legacy ``model_data`` (``Text``, nullable) column or **by reference** in
+``artifact_uri`` (``String(512)``) resolved through an ``ArtifactStore``.
+New rows registered via ``scripts/register_reranker.py`` from a
+``protea-reranker-lab`` run always use the artifact-backed path.
+
+Provenance columns travel with every artifact-backed row:
+``feature_schema_sha`` (``String(16)``, load-bearing at inference time),
+``embedding_config_id`` / ``ontology_snapshot_id`` (FKs, both
+``SET NULL``), ``producer_version`` (``String(64)``), ``producer_git_sha``
+(``String(40)``) and ``spec_yaml`` (``Text``). ``metrics`` and
+``feature_importance`` remain JSONB.
 
 .. automodule:: protea.infrastructure.orm.models.embedding.reranker_model
    :members:
@@ -263,6 +276,65 @@ and the API server.
    :undoc-members:
    :show-inheritance:
 
+Artifact storage
+----------------
+
+``protea.infrastructure.storage`` defines the ``ArtifactStore`` Protocol
+and its two concrete backends. It is the single surface for writing and
+reading large produced blobs (re-ranker boosters, frozen datasets) and
+is kept strictly separate from the evaluation-artifacts directory
+consumed by ``run_cafa_evaluation``.
+
+``ArtifactStore`` is a ``typing.Protocol`` with four methods:
+
+- ``put(key: str, src: Path | bytes) -> str`` — store a blob under
+  ``key`` and return its URI.
+- ``get(key: str) -> bytes`` — fetch raw bytes stored at ``key``.
+- ``url(key: str) -> str`` — return the backend-specific URI for
+  ``key`` without performing I/O.
+- ``exists(key: str) -> bool`` — check whether ``key`` is present.
+
+URIs are always persisted verbatim in the database so consumers can
+resolve them without knowing the concrete backend:
+
+- ``LocalFsArtifactStore`` emits ``file:///absolute/path/...`` URIs.
+- ``MinioArtifactStore`` emits ``s3://<bucket>/<key>`` URIs.
+
+The ``MinioArtifactStore`` client is imported lazily so PROTEA can be
+installed without the ``minio`` package — the constructor raises a
+clear ``ImportError`` pointing at the ``[storage]`` extra when the
+dependency is missing.
+
+``get_artifact_store(settings)`` is the entry point used by every
+operation that needs to write a blob. It reads
+``settings.storage_backend`` (``"local"`` or ``"minio"``) and returns
+the appropriate instance. If MinIO is selected but the endpoint or
+credentials are incomplete, or the server is unreachable at
+construction time, the factory logs a warning and returns a
+``LocalFsArtifactStore`` rooted at ``settings.storage_root`` (or
+``settings.artifacts_dir`` as a fallback). This prevents missing
+optional infrastructure from crashing the stack in development.
+
+.. automodule:: protea.infrastructure.storage
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: protea.infrastructure.storage.local
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: protea.infrastructure.storage.minio_store
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: protea.infrastructure.storage.factory
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 Queue
 -----
 
@@ -295,3 +367,12 @@ guaranteeing that workers always find the DB row before they try to claim it.
    :members:
    :undoc-members:
    :show-inheritance:
+
+.. seealso::
+
+   - :doc:`/architecture/data_model` — the conceptual model behind every
+     ORM class above.
+   - :doc:`workers` — how ``BaseWorker`` and the consumer classes use the
+     publisher and session helpers documented here.
+   - :doc:`/adr/005-thread-local-rabbitmq-connections` — why the publisher
+     reuses one connection per thread.
diff --git a/docs/source/reference/workers.rst b/docs/source/reference/workers.rst
index 8abbed3..8f399cb 100644
--- a/docs/source/reference/workers.rst
+++ b/docs/source/reference/workers.rst
@@ -110,3 +110,12 @@ seconds = 6 hours). It marks them as ``FAILED`` with error code
    :members:
    :undoc-members:
    :show-inheritance:
+
+.. seealso::
+
+   - :doc:`/architecture/job_lifecycle` — the two-session lifecycle and
+     parent-child coordinator pattern that ``BaseWorker`` implements.
+   - :doc:`/architecture/operations` — what these workers actually run.
+   - :doc:`/adr/002-two-session-worker-pattern` — design rationale.
+   - :doc:`/adr/003-queue-consumer-vs-operation-consumer` — when each
+     consumer subclass applies.
diff --git a/docs/source/references.bib b/docs/source/references.bib
new file mode 100644
index 0000000..de9a0d7
--- /dev/null
+++ b/docs/source/references.bib
@@ -0,0 +1,273 @@
+% ============================================================================
+%  PROTEA — thesis bibliography
+%  Entries are grouped by topic and cited throughout the RST sources via the
+%  :cite: role (sphinxcontrib-bibtex).  Keys are stable identifiers — do not
+%  rename without updating the corresponding :cite: calls.
+% ============================================================================
+
+% ---------------------------------------------------------------------------
+%  Workflow engines and software architecture
+% ---------------------------------------------------------------------------
+
+@article{galaxy2018,
+  author       = {Afgan, Enis and Baker, Dannon and Batut, B{\'e}r{\'e}nice and
+                  van den Beek, Marius and Bouvier, Dave and {\v C}ech, Martin
+                  and Chilton, John and Clements, Dave and Coraor, Nate and
+                  Gr{\"u}ning, Bj{\"o}rn A. and others},
+  title        = {The {Galaxy} platform for accessible, reproducible and
+                  collaborative biomedical analyses: 2018 update},
+  journal      = {Nucleic Acids Research},
+  volume       = {46},
+  number       = {W1},
+  pages        = {W537--W544},
+  year         = {2018},
+  doi          = {10.1093/nar/gky379}
+}
+
+@article{snakemake2012,
+  author       = {K{\"o}ster, Johannes and Rahmann, Sven},
+  title        = {{Snakemake}---a scalable bioinformatics workflow engine},
+  journal      = {Bioinformatics},
+  volume       = {28},
+  number       = {19},
+  pages        = {2520--2522},
+  year         = {2012},
+  doi          = {10.1093/bioinformatics/bts480}
+}
+
+@article{nextflow2017,
+  author       = {Di Tommaso, Paolo and Chatzou, Maria and Floden, Evan W.
+                  and Barja, Pablo Prieto and Palumbo, Emilio and
+                  Notredame, Cedric},
+  title        = {{Nextflow} enables reproducible computational workflows},
+  journal      = {Nature Biotechnology},
+  volume       = {35},
+  number       = {4},
+  pages        = {316--319},
+  year         = {2017},
+  doi          = {10.1038/nbt.3820}
+}
+
+@book{fowler2002patterns,
+  author    = {Fowler, Martin},
+  title     = {Patterns of Enterprise Application Architecture},
+  publisher = {Addison-Wesley},
+  year      = {2002},
+  isbn      = {978-0321127426}
+}
+
+% ---------------------------------------------------------------------------
+%  CAFA challenge and evaluation
+% ---------------------------------------------------------------------------
+
+@article{cafa2013,
+  author       = {Radivojac, Predrag and Clark, Wyatt T. and Oron, Tal Ronnen
+                  and Schnoes, Alexandra M. and Wittkop, Tobias and Sokolov,
+                  Artem and others},
+  title        = {A large-scale evaluation of computational protein function
+                  prediction},
+  journal      = {Nature Methods},
+  volume       = {10},
+  number       = {3},
+  pages        = {221--227},
+  year         = {2013},
+  doi          = {10.1038/nmeth.2340}
+}
+
+@article{cafa2019,
+  author       = {Zhou, Naihui and Jiang, Yuxiang and Bergquist, Timothy R.
+                  and Lee, Alexandra J. and Kacsoh, Balint Z. and others},
+  title        = {The {CAFA} challenge reports improved protein function
+                  prediction and new functional annotations for hundreds of
+                  genes through experimental screens},
+  journal      = {Genome Biology},
+  volume       = {20},
+  number       = {1},
+  pages        = {244},
+  year         = {2019},
+  doi          = {10.1186/s13059-019-1835-8}
+}
+
+@misc{cafa2023,
+  author       = {{CAFA Consortium}},
+  title        = {{CAFA5}: Protein function prediction ({Kaggle} competition)},
+  year         = {2023},
+  howpublished = {\url{https://www.kaggle.com/competitions/cafa-5-protein-function-prediction}},
+  note         = {Accessed 2026-04-10}
+}
+
+@article{ia2013,
+  author       = {Clark, Wyatt T. and Radivojac, Predrag},
+  title        = {Information-theoretic evaluation of predicted ontological
+                  annotations},
+  journal      = {Bioinformatics},
+  volume       = {29},
+  number       = {13},
+  pages        = {i53--i61},
+  year         = {2013},
+  doi          = {10.1093/bioinformatics/btt228}
+}
+
+@misc{cafaeval2023,
+  author       = {Piovesan, Damiano and others},
+  title        = {{cafa-evaluator}: Official {CAFA5} evaluation tool},
+  year         = {2023},
+  howpublished = {\url{https://github.com/BioComputingUP/CAFA-evaluator}},
+  note         = {Accessed 2026-04-10}
+}
+
+% ---------------------------------------------------------------------------
+%  Homology-based GO prediction tools
+% ---------------------------------------------------------------------------
+
+@article{pannzer2018,
+  author       = {T{\"o}r{\"o}nen, Petri and Medlar, Alan and Holm, Liisa},
+  title        = {{PANNZER2}: a rapid functional annotation web server},
+  journal      = {Nucleic Acids Research},
+  volume       = {46},
+  number       = {W1},
+  pages        = {W84--W88},
+  year         = {2018},
+  doi          = {10.1093/nar/gky350}
+}
+
+@article{interproscan2014,
+  author       = {Jones, Philip and Binns, David and Chang, Hsin-Yu and
+                  Fraser, Matthew and Li, Weizhong and McAnulla, Craig and
+                  others},
+  title        = {{InterProScan 5}: genome-scale protein function
+                  classification},
+  journal      = {Bioinformatics},
+  volume       = {30},
+  number       = {9},
+  pages        = {1236--1240},
+  year         = {2014},
+  doi          = {10.1093/bioinformatics/btu031}
+}
+
+@article{eggnog2021,
+  author       = {Cantalapiedra, Carlos P. and Hern{\'a}ndez-Plaza, Ana and
+                  Letunic, Ivica and Bork, Peer and Huerta-Cepas, Jaime},
+  title        = {{eggNOG-mapper v2}: functional annotation, orthology
+                  assignments, and domain prediction at the metagenomic scale},
+  journal      = {Molecular Biology and Evolution},
+  volume       = {38},
+  number       = {12},
+  pages        = {5825--5829},
+  year         = {2021},
+  doi          = {10.1093/molbev/msab293}
+}
+
+% ---------------------------------------------------------------------------
+%  Embedding-based GO prediction
+% ---------------------------------------------------------------------------
+
+@article{deepgoplus2020,
+  author       = {Kulmanov, Maxat and Hoehndorf, Robert},
+  title        = {{DeepGOPlus}: improved protein function prediction from
+                  sequence},
+  journal      = {Bioinformatics},
+  volume       = {36},
+  number       = {2},
+  pages        = {422--429},
+  year         = {2020},
+  doi          = {10.1093/bioinformatics/btz595}
+}
+
+@article{sprofgo2023,
+  author       = {Yuan, Qianmu and Xie, Junjie and Xie, Jiancong and Zhao,
+                  Huiying and Yang, Yuedong},
+  title        = {Fast and accurate protein function prediction from sequence
+                  through pretrained language model and homology-based label
+                  diffusion},
+  journal      = {Briefings in Bioinformatics},
+  volume       = {24},
+  number       = {3},
+  pages        = {bbad117},
+  year         = {2023},
+  doi          = {10.1093/bib/bbad117}
+}
+
+% ---------------------------------------------------------------------------
+%  Protein language models
+% ---------------------------------------------------------------------------
+
+@article{prottrans2022,
+  author       = {Elnaggar, Ahmed and Heinzinger, Michael and Dallago,
+                  Christian and Rehawi, Ghalia and Wang, Yu and Jones, Llion
+                  and Gibbs, Tom and Feher, Tamas and Angerer, Christoph and
+                  Steinegger, Martin and Bhowmik, Debsindhu and Rost, Burkhard},
+  title        = {{ProtTrans}: Toward understanding the language of life
+                  through self-supervised learning},
+  journal      = {IEEE Transactions on Pattern Analysis and Machine
+                  Intelligence},
+  volume       = {44},
+  number       = {10},
+  pages        = {7112--7127},
+  year         = {2022},
+  doi          = {10.1109/TPAMI.2021.3095381}
+}
+
+@article{esm2_2023,
+  author       = {Lin, Zeming and Akin, Halil and Rao, Roshan and Hie, Brian
+                  and Zhu, Zhongkai and Lu, Wenting and Smetanin, Nikita and
+                  Verkuil, Robert and Kabeli, Ori and Shmueli, Yaniv and
+                  dos Santos Costa, Allan and Fazel-Zarandi, Maryam and
+                  Sercu, Tom and Candido, Salvatore and Rives, Alexander},
+  title        = {Evolutionary-scale prediction of atomic-level protein
+                  structure with a language model},
+  journal      = {Science},
+  volume       = {379},
+  number       = {6637},
+  pages        = {1123--1130},
+  year         = {2023},
+  doi          = {10.1126/science.ade2574}
+}
+
+@misc{esmc2024,
+  author       = {{EvolutionaryScale Team}},
+  title        = {{ESM Cambrian}: Revealing the mysteries of proteins with
+                  unsupervised learning},
+  year         = {2024},
+  howpublished = {\url{https://www.evolutionaryscale.ai/blog/esm-cambrian}},
+  note         = {Accessed 2026-04-10}
+}
+
+% ---------------------------------------------------------------------------
+%  Supporting libraries
+% ---------------------------------------------------------------------------
+
+@article{faiss2021,
+  author       = {Johnson, Jeff and Douze, Matthijs and J{\'e}gou, Herv{\'e}},
+  title        = {Billion-scale similarity search with {GPUs}},
+  journal      = {IEEE Transactions on Big Data},
+  volume       = {7},
+  number       = {3},
+  pages        = {535--547},
+  year         = {2021},
+  doi          = {10.1109/TBDATA.2019.2921572}
+}
+
+@article{parasail2016,
+  author       = {Daily, Jeff},
+  title        = {Parasail: {SIMD} {C} library for global, semi-global, and
+                  local pairwise sequence alignments},
+  journal      = {BMC Bioinformatics},
+  volume       = {17},
+  number       = {1},
+  pages        = {81},
+  year         = {2016},
+  doi          = {10.1186/s12859-016-0930-z}
+}
+
+@article{ete32016,
+  author       = {Huerta-Cepas, Jaime and Serra, Francois and Bork, Peer},
+  title        = {{ETE 3}: Reconstruction, analysis, and visualization of
+                  phylogenomic data},
+  journal      = {Molecular Biology and Evolution},
+  volume       = {33},
+  number       = {6},
+  pages        = {1635--1638},
+  year         = {2016},
+  doi          = {10.1093/molbev/msw046}
+}
diff --git a/docs/source/references.rst b/docs/source/references.rst
new file mode 100644
index 0000000..30f0fe7
--- /dev/null
+++ b/docs/source/references.rst
@@ -0,0 +1,11 @@
+References
+==========
+
+All citations throughout this thesis are managed with
+``sphinxcontrib-bibtex`` and resolved from ``docs/source/references.bib``.
+The list below is rendered from every entry cited in the preceding chapters,
+sorted alphabetically by author.
+
+.. bibliography:: references.bib
+   :style: alpha
+   :all:
diff --git a/docs/source/related_work.rst b/docs/source/related_work.rst
new file mode 100644
index 0000000..2e79b63
--- /dev/null
+++ b/docs/source/related_work.rst
@@ -0,0 +1,161 @@
+Related work
+============
+
+PROTEA sits at the intersection of three lines of research that have so far
+evolved largely in isolation: (i) workflow and pipeline engineering for
+large-scale bioinformatics, (ii) automated Gene Ontology term prediction from
+sequence, and (iii) protein language models as general-purpose feature
+extractors. This chapter situates the system against each of these bodies of
+work and articulates the specific gap that motivates the platform.
+
+Workflow systems for bioinformatics
+-----------------------------------
+
+General-purpose workflow engines such as Galaxy :cite:`galaxy2018`,
+Snakemake :cite:`snakemake2012`, and Nextflow :cite:`nextflow2017` provide
+reproducible pipeline execution, DAG-style task scheduling, and containerised
+tool encapsulation. They target the *"compose existing tools"* workflow
+pattern: each step wraps a CLI executable, inputs and outputs are files on
+disk, and provenance is captured by pinning container digests.
+
+This pattern is poorly suited to PROTEA's domain for three reasons. First,
+UniProt ingestion and GO annotation are not one-shot file conversions: they
+are stateful, long-running, partial-progress-tolerant processes that must
+survive broker disconnects and resume without re-downloading. Second,
+embedding computation and KNN prediction operate on shared in-memory state
+(the reference cache) that cannot be serialised to files between steps
+without prohibitive I/O overhead. Third, the consumers of the output are
+interactive users through an HTTP API, not batch scripts — they need
+sub-second status queries, structured event timelines, and the ability to
+cancel in-flight jobs.
+
+PROTEA is therefore architected as an application with an internal job queue,
+not as a pipeline expressed in a DSL. The design draws on the *job queue*
+pattern familiar from web applications (Celery, Sidekiq, RQ), adapted to the
+scientific computing setting through a strict separation between domain
+operations and infrastructure — see :doc:`architecture/system_overview`.
+
+The PIS and FANTASIA precursors
+-------------------------------
+
+The **Protein Information System** (PIS) and **FANTASIA** codebases were
+developed at CBBIO as end-to-end systems for protein data management and
+functional annotation transfer. PIS established the ingestion side — a
+PostgreSQL schema, a RabbitMQ-backed job queue, and Python workers that
+paginate the UniProt REST API. FANTASIA extended this with GPU embedding
+computation and KNN-based annotation transfer using ProtT5 and ESM models.
+
+Both systems proved that the pipeline was tractable at UniProtKB/Swiss-Prot
+scale (500 000+ reviewed proteins), but share a structural weakness: each
+worker conflates a database session, the AMQP channel, orchestration logic,
+and domain code in a single class. The consequences are well-known from
+enterprise software :cite:`fowler2002patterns`: unit-testable pieces are hard
+to isolate, new operations inherit boilerplate from an unrelated base class,
+and partial failures (a broker reconnect mid-job) leave jobs in ambiguous
+states because state transitions and business logic share the same session.
+PROTEA is an explicit response: it keeps the data model, the queue topology,
+and the empirical lessons from PIS/FANTASIA, but rebuilds the execution layer
+around an ``Operation`` protocol that is pure domain logic and testable with
+a mocked session.
+
+Automated GO term prediction
+----------------------------
+
+The Critical Assessment of Functional Annotation (CAFA) challenges
+:cite:`cafa2013,cafa2019,cafa2023` have established the reference benchmark
+for automated GO term prediction: given a set of target proteins whose
+experimental annotations are known at time ``t1`` but not at time ``t0``,
+methods submit scored (protein, GO term) predictions and are evaluated
+against the ``t1 − t0`` delta using Information Accretion (IA) weighting.
+
+Published methods span three families:
+
+- **Homology-based transfer** — BLAST-style search against a reference set,
+  followed by annotation transfer through sequence identity thresholds. The
+  canonical open-source tools are **Pannzer2** :cite:`pannzer2018`,
+  **InterProScan** :cite:`interproscan2014` (domain-level signatures), and
+  **eggNOG-mapper** :cite:`eggnog2021` (orthology groups).
+- **Embedding-based transfer** — replace BLAST similarity with cosine
+  distance in the embedding space of a protein language model, then transfer
+  annotations from nearest neighbours. **DeepGOPlus** :cite:`deepgoplus2020`
+  combines a CNN over sequence with DIAMOND hits; **SPROF-GO**
+  :cite:`sprofgo2023` uses ProtT5 embeddings and a learned aggregator.
+- **Deep-learning classifiers** — end-to-end networks that predict per-GO-term
+  probabilities directly from sequence or embeddings, e.g. **GoFormer** and
+  related transformer-based models.
+
+PROTEA falls into the embedding-based family but makes three deliberate
+choices that distinguish it from prior work. First, KNN search is performed
+in Python (numpy or FAISS :cite:`faiss2021`) rather than in the database, a
+design decision motivated by the observed latency and memory behaviour of
+pgvector on 500 000+ vectors (documented in ADR-001). Second, the reference
+set is *frozen at t0* by construction — the ingestion pipeline records the
+``OntologySnapshot`` OBO version and the ``AnnotationSet`` source version of
+every reference annotation, so that a prediction produced today is exactly
+reproducible against the same references tomorrow. Third, learned
+re-ranking (``train_reranker``) operates on hand-engineered features on top
+of KNN results (Needleman–Wunsch and Smith–Waterman alignment metrics via
+``parasail`` :cite:`parasail2016`, taxonomic distance via ``ete3``
+:cite:`ete32016`, and neighbour-aggregate signals) rather than on raw
+embeddings, keeping the training signal interpretable.
+
+Information-theoretic evaluation with ``cafaeval``
+--------------------------------------------------
+
+A recurring source of confusion in the GO prediction literature is the
+difference between *naive* Fmax (averaged over predictions, independent of
+term specificity) and the CAFA *weighted* Fmax that uses Information
+Accretion :cite:`ia2013` to down-weight trivially correct predictions of root
+or near-root terms. The open-source ``cafaeval`` package :cite:`cafaeval2023`
+is the reference implementation of the CAFA scoring protocol, including IA
+propagation, NK/LK/PK partitioning, and per-namespace Fmax reporting.
+
+PROTEA delegates scoring entirely to ``cafaeval`` — the ``run_cafa_evaluation``
+operation writes CAFA-format TSVs, invokes ``cafaeval`` as a subprocess, and
+parses the resulting per-namespace metrics into an ``EvaluationResult`` row.
+This design decision avoids the temptation of reimplementing the scoring
+logic: any bug in IA computation would invalidate every reported number, and
+the literature has already converged on a single trusted implementation. See
+:doc:`architecture/evaluation` for the full protocol.
+
+Protein language models
+-----------------------
+
+The embedding backends supported by PROTEA are all publicly released,
+pre-trained protein language models:
+
+- **ProtT5** :cite:`prottrans2022` — encoder–decoder transformer trained on
+  UniRef50 with a T5 denoising objective. The ``prot_t5_xl_uniref50`` checkpoint
+  produces 1024-dimensional residue embeddings, which are mean-pooled to one
+  vector per sequence in the default PROTEA configuration.
+- **ESM-2** :cite:`esm2_2023` — decoder-only transformer from Meta AI trained
+  on UniRef50 with a masked-language-modelling objective. Checkpoints of
+  35 M, 150 M, 650 M, 3 B, and 15 B parameters are available; PROTEA
+  benchmarks use the 650 M ``esm2_t33_650M_UR50D`` variant.
+- **ESM-C** :cite:`esmc2024` — compressed ESM family released by EvolutionaryScale
+  in 2024. The 300 M checkpoint produces 960-dimensional embeddings at a
+  fraction of the inference cost of ESM-2 650 M while preserving most of the
+  downstream task performance. ESM-C is the default backend in PROTEA's
+  benchmarks because of this favourable cost/accuracy trade-off.
+
+All three backends are wrapped by a single ``EmbeddingConfig`` row that
+records the model checkpoint, layer selection, pooling strategy, and any
+post-processing (L2 normalisation). This discipline is necessary for
+reproducibility: a prediction run annotated with ``embedding_config_id`` can
+always be replayed against the exact same model weights and pooling recipe.
+
+Positioning PROTEA
+------------------
+
+Against this backdrop, PROTEA's contribution is not a new prediction
+algorithm but a **reproducible, auditable, and extensible platform** that
+turns the existing literature into an executable system. The three
+architectural invariants — typed operations, two-session job lifecycle, and
+versioned reference data — are specifically designed so that an
+embedding-based GO predictor can be benchmarked against external tools
+(Pannzer2, InterProScan, eggNOG-mapper) under a *fair* temporal holdout:
+reference annotations frozen at ``t0``, ground truth computed from
+``t1 − t0``, and every data source tagged with an immutable version.
+Chapter :doc:`results` quantifies the effect of this discipline through a
+data-leakage analysis of the external tools and a set of ablation studies
+over the PROTEA pipeline itself.
diff --git a/docs/source/results.rst b/docs/source/results.rst
index 960bc4f..e39f38f 100644
--- a/docs/source/results.rst
+++ b/docs/source/results.rst
@@ -1,6 +1,36 @@
 Results
 =======
 
+.. contents:: On this page
+   :local:
+   :depth: 2
+
+.. admonition:: Provisional results — pending final recompute
+   :class: warning
+
+   Every number reported in this chapter was produced **before** the
+   2026-04-10 unification of the embedding-backend slicing convention
+   (see :doc:`/architecture/operations`, section *Residue-tensor
+   convention*, for the details of the change). As a consequence:
+
+   * The ``prot_t5_xl_uniref50`` and ProstT5 embeddings used by the
+     reranker benchmark appendix no longer match the code that would
+     regenerate them today and are not comparable to new runs.
+   * The ESM-C 300M numbers used in this chapter are technically
+     unaffected by the slicing fix (the ESM3c path was already stripping
+     BOS + EOS), but they will nevertheless be recomputed end-to-end as
+     part of a single clean run so that the Zenodo deposit accompanying
+     the thesis contains exactly one, fully reproducible result set.
+   * Do not quote the specific Fmax values from the tables below in
+     external communications until this chapter is re-rendered after the
+     final run.
+
+   The experimental protocol (GOA 220 → GOA 229 temporal holdout, NK/LK/PK
+   categorisation, IA-weighted ``cafaeval``, :math:`k=5` KNN, scoring
+   configurations, and the three re-ranker generations) is **stable** and
+   will not change in the final run — only the numerical values will be
+   regenerated.
+
 This chapter presents the experimental evaluation of PROTEA's GO term prediction
 pipeline. All experiments use the same temporal holdout (GOA 220 → GOA 229) and
 are scored with ``cafaeval`` using Information Accretion (IA) weighting from the
@@ -410,3 +440,12 @@ evaluation pipelines — a core design goal of PROTEA.
 strengthen the generalisability claims. The re-ranker's training data is also
 limited to the GOA snapshots available in PROTEA's database (releases 160–220);
 expanding this range may further improve performance.
+
+.. seealso::
+
+   - :doc:`/appendix/reproduction_guide` — exact command sequence to
+     regenerate every figure and table on this page.
+   - :doc:`/architecture/evaluation` — the CAFA temporal-holdout protocol
+     that defines the NK/LK/PK categories and the IA-weighted Fmax used here.
+   - :doc:`/architecture/operations` — ``run_cafa_evaluation`` and
+     ``train_reranker`` are the operations that produced these numbers.
diff --git a/poetry.lock b/poetry.lock
index 53de19c..f5d320a 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -69,7 +69,7 @@ version = "0.7.0"
 description = "Reusable constraint types to use with typing.Annotated"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
+groups = ["main", "dev", "plugins"]
 files = [
     {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"},
     {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"},
@@ -94,6 +94,65 @@ typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""}
 [package.extras]
 trio = ["trio (>=0.31.0) ; python_version < \"3.10\"", "trio (>=0.32.0) ; python_version >= \"3.10\""]
 
+[[package]]
+name = "argon2-cffi"
+version = "25.1.0"
+description = "Argon2 for Python"
+optional = true
+python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"storage\""
+files = [
+    {file = "argon2_cffi-25.1.0-py3-none-any.whl", hash = "sha256:fdc8b074db390fccb6eb4a3604ae7231f219aa669a2652e0f20e16ba513d5741"},
+    {file = "argon2_cffi-25.1.0.tar.gz", hash = "sha256:694ae5cc8a42f4c4e2bf2ca0e64e51e23a040c6a517a85074683d3959e1346c1"},
+]
+
+[package.dependencies]
+argon2-cffi-bindings = "*"
+
+[[package]]
+name = "argon2-cffi-bindings"
+version = "25.1.0"
+description = "Low-level CFFI bindings for Argon2"
+optional = true
+python-versions = ">=3.9"
+groups = ["main"]
+markers = "extra == \"storage\""
+files = [
+    {file = "argon2_cffi_bindings-25.1.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:3d3f05610594151994ca9ccb3c771115bdb4daef161976a266f0dd8aa9996b8f"},
+    {file = "argon2_cffi_bindings-25.1.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:8b8efee945193e667a396cbc7b4fb7d357297d6234d30a489905d96caabde56b"},
+    {file = "argon2_cffi_bindings-25.1.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:3c6702abc36bf3ccba3f802b799505def420a1b7039862014a65db3205967f5a"},
+    {file = "argon2_cffi_bindings-25.1.0-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a1c70058c6ab1e352304ac7e3b52554daadacd8d453c1752e547c76e9c99ac44"},
+    {file = "argon2_cffi_bindings-25.1.0-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e2fd3bfbff3c5d74fef31a722f729bf93500910db650c925c2d6ef879a7e51cb"},
+    {file = "argon2_cffi_bindings-25.1.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c4f9665de60b1b0e99bcd6be4f17d90339698ce954cfd8d9cf4f91c995165a92"},
+    {file = "argon2_cffi_bindings-25.1.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ba92837e4a9aa6a508c8d2d7883ed5a8f6c308c89a4790e1e447a220deb79a85"},
+    {file = "argon2_cffi_bindings-25.1.0-cp314-cp314t-win32.whl", hash = "sha256:84a461d4d84ae1295871329b346a97f68eade8c53b6ed9a7ca2d7467f3c8ff6f"},
+    {file = "argon2_cffi_bindings-25.1.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b55aec3565b65f56455eebc9b9f34130440404f27fe21c3b375bf1ea4d8fbae6"},
+    {file = "argon2_cffi_bindings-25.1.0-cp314-cp314t-win_arm64.whl", hash = "sha256:87c33a52407e4c41f3b70a9c2d3f6056d88b10dad7695be708c5021673f55623"},
+    {file = "argon2_cffi_bindings-25.1.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:aecba1723ae35330a008418a91ea6cfcedf6d31e5fbaa056a166462ff066d500"},
+    {file = "argon2_cffi_bindings-25.1.0-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:2630b6240b495dfab90aebe159ff784d08ea999aa4b0d17efa734055a07d2f44"},
+    {file = "argon2_cffi_bindings-25.1.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:7aef0c91e2c0fbca6fc68e7555aa60ef7008a739cbe045541e438373bc54d2b0"},
+    {file = "argon2_cffi_bindings-25.1.0-cp39-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1e021e87faa76ae0d413b619fe2b65ab9a037f24c60a1e6cc43457ae20de6dc6"},
+    {file = "argon2_cffi_bindings-25.1.0-cp39-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d3e924cfc503018a714f94a49a149fdc0b644eaead5d1f089330399134fa028a"},
+    {file = "argon2_cffi_bindings-25.1.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:c87b72589133f0346a1cb8d5ecca4b933e3c9b64656c9d175270a000e73b288d"},
+    {file = "argon2_cffi_bindings-25.1.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:1db89609c06afa1a214a69a462ea741cf735b29a57530478c06eb81dd403de99"},
+    {file = "argon2_cffi_bindings-25.1.0-cp39-abi3-win32.whl", hash = "sha256:473bcb5f82924b1becbb637b63303ec8d10e84c8d241119419897a26116515d2"},
+    {file = "argon2_cffi_bindings-25.1.0-cp39-abi3-win_amd64.whl", hash = "sha256:a98cd7d17e9f7ce244c0803cad3c23a7d379c301ba618a5fa76a67d116618b98"},
+    {file = "argon2_cffi_bindings-25.1.0-cp39-abi3-win_arm64.whl", hash = "sha256:b0fdbcf513833809c882823f98dc2f931cf659d9a1429616ac3adebb49f5db94"},
+    {file = "argon2_cffi_bindings-25.1.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:6dca33a9859abf613e22733131fc9194091c1fa7cb3e131c143056b4856aa47e"},
+    {file = "argon2_cffi_bindings-25.1.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:21378b40e1b8d1655dd5310c84a40fc19a9aa5e6366e835ceb8576bf0fea716d"},
+    {file = "argon2_cffi_bindings-25.1.0-pp310-pypy310_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d588dec224e2a83edbdc785a5e6f3c6cd736f46bfd4b441bbb5aa1f5085e584"},
+    {file = "argon2_cffi_bindings-25.1.0-pp310-pypy310_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5acb4e41090d53f17ca1110c3427f0a130f944b896fc8c83973219c97f57b690"},
+    {file = "argon2_cffi_bindings-25.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:da0c79c23a63723aa5d782250fbf51b768abca630285262fb5144ba5ae01e520"},
+    {file = "argon2_cffi_bindings-25.1.0.tar.gz", hash = "sha256:b957f3e6ea4d55d820e40ff76f450952807013d361a65d7f28acc0acbf29229d"},
+]
+
+[package.dependencies]
+cffi = [
+    {version = ">=1.0.1", markers = "python_version < \"3.14\""},
+    {version = ">=2.0.0b1", markers = "python_version >= \"3.14\""},
+]
+
 [[package]]
 name = "asttokens"
 version = "3.0.1"
@@ -409,11 +468,11 @@ files = [
 ]
 
 [[package]]
-name = "cafaeval"
-version = "1.2.1"
-description = "CAFA evaluator code"
+name = "cafaeval-protea"
+version = "0.1.0"
+description = "Speedup fork of cafaeval (CAFA-evaluator-PK) with a bit-exact parity harness"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.9"
 groups = ["main"]
 files = []
 develop = false
@@ -423,11 +482,15 @@ matplotlib = "*"
 numpy = "*"
 pandas = "*"
 
+[package.extras]
+docs = ["furo (>=2024.1)", "sphinx (>=7)"]
+fast = ["pyarrow (>=12)"]
+
 [package.source]
 type = "git"
-url = "https://github.com/claradepaolis/CAFA-evaluator-PK.git"
-reference = "HEAD"
-resolved_reference = "16a6a6df40efb00cf0963a59b488b98d31feb520"
+url = "https://github.com/frapercan/cafaeval-protea.git"
+reference = "main"
+resolved_reference = "836f35390b36eb808d56abfaab84fcf6d52cda44"
 
 [[package]]
 name = "certifi"
@@ -435,19 +498,117 @@ version = "2026.2.25"
 description = "Python package for providing Mozilla's CA Bundle."
 optional = false
 python-versions = ">=3.7"
-groups = ["main", "dev"]
+groups = ["main", "dev", "plugins"]
 files = [
     {file = "certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa"},
     {file = "certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7"},
 ]
 
+[[package]]
+name = "cffi"
+version = "2.0.0"
+description = "Foreign Function Interface for Python calling C code."
+optional = true
+python-versions = ">=3.9"
+groups = ["main"]
+markers = "extra == \"storage\""
+files = [
+    {file = "cffi-2.0.0-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:0cf2d91ecc3fcc0625c2c530fe004f82c110405f101548512cce44322fa8ac44"},
+    {file = "cffi-2.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f73b96c41e3b2adedc34a7356e64c8eb96e03a3782b535e043a986276ce12a49"},
+    {file = "cffi-2.0.0-cp310-cp310-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:53f77cbe57044e88bbd5ed26ac1d0514d2acf0591dd6bb02a3ae37f76811b80c"},
+    {file = "cffi-2.0.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3e837e369566884707ddaf85fc1744b47575005c0a229de3327f8f9a20f4efeb"},
+    {file = "cffi-2.0.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:5eda85d6d1879e692d546a078b44251cdd08dd1cfb98dfb77b670c97cee49ea0"},
+    {file = "cffi-2.0.0-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:9332088d75dc3241c702d852d4671613136d90fa6881da7d770a483fd05248b4"},
+    {file = "cffi-2.0.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc7de24befaeae77ba923797c7c87834c73648a05a4bde34b3b7e5588973a453"},
+    {file = "cffi-2.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:cf364028c016c03078a23b503f02058f1814320a56ad535686f90565636a9495"},
+    {file = "cffi-2.0.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e11e82b744887154b182fd3e7e8512418446501191994dbf9c9fc1f32cc8efd5"},
+    {file = "cffi-2.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8ea985900c5c95ce9db1745f7933eeef5d314f0565b27625d9a10ec9881e1bfb"},
+    {file = "cffi-2.0.0-cp310-cp310-win32.whl", hash = "sha256:1f72fb8906754ac8a2cc3f9f5aaa298070652a0ffae577e0ea9bd480dc3c931a"},
+    {file = "cffi-2.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:b18a3ed7d5b3bd8d9ef7a8cb226502c6bf8308df1525e1cc676c3680e7176739"},
+    {file = "cffi-2.0.0-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:b4c854ef3adc177950a8dfc81a86f5115d2abd545751a304c5bcf2c2c7283cfe"},
+    {file = "cffi-2.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2de9a304e27f7596cd03d16f1b7c72219bd944e99cc52b84d0145aefb07cbd3c"},
+    {file = "cffi-2.0.0-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:baf5215e0ab74c16e2dd324e8ec067ef59e41125d3eade2b863d294fd5035c92"},
+    {file = "cffi-2.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:730cacb21e1bdff3ce90babf007d0a0917cc3e6492f336c2f0134101e0944f93"},
+    {file = "cffi-2.0.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:6824f87845e3396029f3820c206e459ccc91760e8fa24422f8b0c3d1731cbec5"},
+    {file = "cffi-2.0.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:9de40a7b0323d889cf8d23d1ef214f565ab154443c42737dfe52ff82cf857664"},
+    {file = "cffi-2.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8941aaadaf67246224cee8c3803777eed332a19d909b47e29c9842ef1e79ac26"},
+    {file = "cffi-2.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a05d0c237b3349096d3981b727493e22147f934b20f6f125a3eba8f994bec4a9"},
+    {file = "cffi-2.0.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:94698a9c5f91f9d138526b48fe26a199609544591f859c870d477351dc7b2414"},
+    {file = "cffi-2.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:5fed36fccc0612a53f1d4d9a816b50a36702c28a2aa880cb8a122b3466638743"},
+    {file = "cffi-2.0.0-cp311-cp311-win32.whl", hash = "sha256:c649e3a33450ec82378822b3dad03cc228b8f5963c0c12fc3b1e0ab940f768a5"},
+    {file = "cffi-2.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:66f011380d0e49ed280c789fbd08ff0d40968ee7b665575489afa95c98196ab5"},
+    {file = "cffi-2.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:c6638687455baf640e37344fe26d37c404db8b80d037c3d29f58fe8d1c3b194d"},
+    {file = "cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d"},
+    {file = "cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c"},
+    {file = "cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe"},
+    {file = "cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062"},
+    {file = "cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e"},
+    {file = "cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037"},
+    {file = "cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba"},
+    {file = "cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94"},
+    {file = "cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187"},
+    {file = "cffi-2.0.0-cp312-cp312-win32.whl", hash = "sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18"},
+    {file = "cffi-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5"},
+    {file = "cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6"},
+    {file = "cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb"},
+    {file = "cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca"},
+    {file = "cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b"},
+    {file = "cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b"},
+    {file = "cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2"},
+    {file = "cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3"},
+    {file = "cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26"},
+    {file = "cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c"},
+    {file = "cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b"},
+    {file = "cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27"},
+    {file = "cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75"},
+    {file = "cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91"},
+    {file = "cffi-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5"},
+    {file = "cffi-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13"},
+    {file = "cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b"},
+    {file = "cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c"},
+    {file = "cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef"},
+    {file = "cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775"},
+    {file = "cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205"},
+    {file = "cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1"},
+    {file = "cffi-2.0.0-cp314-cp314-win32.whl", hash = "sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f"},
+    {file = "cffi-2.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25"},
+    {file = "cffi-2.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad"},
+    {file = "cffi-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9"},
+    {file = "cffi-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d"},
+    {file = "cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c"},
+    {file = "cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8"},
+    {file = "cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc"},
+    {file = "cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592"},
+    {file = "cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512"},
+    {file = "cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4"},
+    {file = "cffi-2.0.0-cp314-cp314t-win32.whl", hash = "sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e"},
+    {file = "cffi-2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6"},
+    {file = "cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9"},
+    {file = "cffi-2.0.0-cp39-cp39-macosx_10_13_x86_64.whl", hash = "sha256:fe562eb1a64e67dd297ccc4f5addea2501664954f2692b69a76449ec7913ecbf"},
+    {file = "cffi-2.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:de8dad4425a6ca6e4e5e297b27b5c824ecc7581910bf9aee86cb6835e6812aa7"},
+    {file = "cffi-2.0.0-cp39-cp39-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:4647afc2f90d1ddd33441e5b0e85b16b12ddec4fca55f0d9671fef036ecca27c"},
+    {file = "cffi-2.0.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3f4d46d8b35698056ec29bca21546e1551a205058ae1a181d871e278b0b28165"},
+    {file = "cffi-2.0.0-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:e6e73b9e02893c764e7e8d5bb5ce277f1a009cd5243f8228f75f842bf937c534"},
+    {file = "cffi-2.0.0-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:cb527a79772e5ef98fb1d700678fe031e353e765d1ca2d409c92263c6d43e09f"},
+    {file = "cffi-2.0.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:61d028e90346df14fedc3d1e5441df818d095f3b87d286825dfcbd6459b7ef63"},
+    {file = "cffi-2.0.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0f6084a0ea23d05d20c3edcda20c3d006f9b6f3fefeac38f59262e10cef47ee2"},
+    {file = "cffi-2.0.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1cd13c99ce269b3ed80b417dcd591415d3372bcac067009b6e0f59c7d4015e65"},
+    {file = "cffi-2.0.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:89472c9762729b5ae1ad974b777416bfda4ac5642423fa93bd57a09204712322"},
+    {file = "cffi-2.0.0-cp39-cp39-win32.whl", hash = "sha256:2081580ebb843f759b9f617314a24ed5738c51d2aee65d31e02f6f7a2b97707a"},
+    {file = "cffi-2.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:b882b3df248017dba09d6b16defe9b5c407fe32fc7c65a9c69798e6175601be9"},
+    {file = "cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529"},
+]
+
+[package.dependencies]
+pycparser = {version = "*", markers = "implementation_name != \"PyPy\""}
+
 [[package]]
 name = "charset-normalizer"
 version = "3.4.5"
 description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
 optional = false
 python-versions = ">=3.7"
-groups = ["main", "dev"]
+groups = ["main", "dev", "plugins"]
 files = [
     {file = "charset_normalizer-3.4.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4167a621a9a1a986c73777dbc15d4b5eac8ac5c10393374109a343d4013ec765"},
     {file = "charset_normalizer-3.4.5-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3f64c6bf8f32f9133b668c7f7a7cbdbc453412bc95ecdbd157f3b1e377a92990"},
@@ -988,7 +1149,7 @@ version = "1.11.0.post1"
 description = "A library for efficient similarity search and clustering of dense vectors."
 optional = false
 python-versions = ">=3.9"
-groups = ["main"]
+groups = ["main", "plugins"]
 markers = "python_version >= \"3.14\""
 files = [
     {file = "faiss_cpu-1.11.0.post1-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:e079d44ea22919f6477fea553b05854c68838ab553e1c6b1237437a8becdf89d"},
@@ -1041,7 +1202,7 @@ version = "1.13.2"
 description = "A library for efficient similarity search and clustering of dense vectors."
 optional = false
 python-versions = "<3.15,>=3.10"
-groups = ["main"]
+groups = ["main", "plugins"]
 markers = "python_version < \"3.14\""
 files = [
     {file = "faiss_cpu-1.13.2-cp310-abi3-macosx_14_0_arm64.whl", hash = "sha256:a9064eb34f8f64438dd5b95c8f03a780b1a3f0b99c46eeacb1f0b5d15fc02dc1"},
@@ -1269,6 +1430,40 @@ pygments = ">=2.7"
 sphinx = ">=7.0,<10.0"
 sphinx-basic-ng = ">=1.0.0b2"
 
+[[package]]
+name = "gitdb"
+version = "4.0.12"
+description = "Git Object Database"
+optional = false
+python-versions = ">=3.7"
+groups = ["dev"]
+files = [
+    {file = "gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf"},
+    {file = "gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571"},
+]
+
+[package.dependencies]
+smmap = ">=3.0.1,<6"
+
+[[package]]
+name = "gitpython"
+version = "3.1.46"
+description = "GitPython is a Python library used to interact with Git repositories"
+optional = false
+python-versions = ">=3.7"
+groups = ["dev"]
+files = [
+    {file = "gitpython-3.1.46-py3-none-any.whl", hash = "sha256:79812ed143d9d25b6d176a10bb511de0f9c67b1fa641d82097b0ab90398a2058"},
+    {file = "gitpython-3.1.46.tar.gz", hash = "sha256:400124c7d0ef4ea03f7310ac2fbf7151e09ff97f2a3288d64a440c584a29c37f"},
+]
+
+[package.dependencies]
+gitdb = ">=4.0.1,<5"
+
+[package.extras]
+doc = ["sphinx (>=7.1.2,<7.2)", "sphinx-autodoc-typehints", "sphinx_rtd_theme"]
+test = ["coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock ; python_version < \"3.8\"", "mypy (==1.18.2) ; python_version >= \"3.9\"", "pre-commit", "pytest (>=7.3.1)", "pytest-cov", "pytest-instafail", "pytest-mock", "pytest-sugar", "typing-extensions ; python_version < \"3.11\""]
+
 [[package]]
 name = "greenlet"
 version = "3.3.2"
@@ -1480,7 +1675,7 @@ version = "3.11"
 description = "Internationalized Domain Names in Applications (IDNA)"
 optional = false
 python-versions = ">=3.8"
-groups = ["main", "dev"]
+groups = ["main", "dev", "plugins"]
 files = [
     {file = "idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea"},
     {file = "idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902"},
@@ -1618,7 +1813,7 @@ version = "1.5.3"
 description = "Lightweight pipelining with Python functions"
 optional = false
 python-versions = ">=3.9"
-groups = ["main"]
+groups = ["main", "dev"]
 files = [
     {file = "joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713"},
     {file = "joblib-1.5.3.tar.gz", hash = "sha256:8561a3269e6801106863fd0d6d84bb737be9e7631e33aaed3fb9ce5953688da3"},
@@ -1751,6 +1946,18 @@ files = [
     {file = "kiwisolver-1.5.0.tar.gz", hash = "sha256:d4193f3d9dc3f6f79aaed0e5637f45d98850ebf01f7ca20e69457f3e8946b66a"},
 ]
 
+[[package]]
+name = "latexcodec"
+version = "3.0.1"
+description = "A lexer and codec to work with LaTeX code in Python."
+optional = false
+python-versions = ">=3.9"
+groups = ["dev"]
+files = [
+    {file = "latexcodec-3.0.1-py3-none-any.whl", hash = "sha256:a9eb8200bff693f0437a69581f7579eb6bca25c4193515c09900ce76451e452e"},
+    {file = "latexcodec-3.0.1.tar.gz", hash = "sha256:e78a6911cd72f9dec35031c6ec23584de6842bfbc4610a9678868d14cdfb0357"},
+]
+
 [[package]]
 name = "librt"
 version = "0.8.1"
@@ -1858,7 +2065,7 @@ version = "4.6.0"
 description = "LightGBM Python-package"
 optional = false
 python-versions = ">=3.7"
-groups = ["main"]
+groups = ["main", "dev", "plugins"]
 files = [
     {file = "lightgbm-4.6.0-py3-none-macosx_10_15_x86_64.whl", hash = "sha256:b7a393de8a334d5c8e490df91270f0763f83f959574d504c7ccb9eee4aef70ed"},
     {file = "lightgbm-4.6.0-py3-none-macosx_12_0_arm64.whl", hash = "sha256:2dafd98d4e02b844ceb0b61450a660681076b1ea6c7adb8c566dfd66832aafad"},
@@ -2106,6 +2313,26 @@ files = [
     {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
 ]
 
+[[package]]
+name = "minio"
+version = "7.2.20"
+description = "MinIO Python SDK for Amazon S3 Compatible Cloud Storage"
+optional = true
+python-versions = ">=3.9"
+groups = ["main"]
+markers = "extra == \"storage\""
+files = [
+    {file = "minio-7.2.20-py3-none-any.whl", hash = "sha256:eb33dd2fb80e04c3726a76b13241c6be3c4c46f8d81e1d58e757786f6501897e"},
+    {file = "minio-7.2.20.tar.gz", hash = "sha256:95898b7a023fbbfde375985aa77e2cd6a0762268db79cf886f002a9ea8e68598"},
+]
+
+[package.dependencies]
+argon2-cffi = "*"
+certifi = "*"
+pycryptodome = "*"
+typing-extensions = "*"
+urllib3 = "*"
+
 [[package]]
 name = "mpmath"
 version = "1.3.0"
@@ -2352,7 +2579,7 @@ version = "2.4.3"
 description = "Fundamental package for array computing in Python"
 optional = false
 python-versions = ">=3.11"
-groups = ["main"]
+groups = ["main", "dev", "plugins"]
 files = [
     {file = "numpy-2.4.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:33b3bf58ee84b172c067f56aeadc7ee9ab6de69c5e800ab5b10295d54c581adb"},
     {file = "numpy-2.4.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8ba7b51e71c05aa1f9bc3641463cd82308eab40ce0d5c7e1fd4038cbf9938147"},
@@ -2877,7 +3104,7 @@ version = "26.0"
 description = "Core utilities for Python packages"
 optional = false
 python-versions = ">=3.8"
-groups = ["main", "dev"]
+groups = ["main", "dev", "plugins"]
 files = [
     {file = "packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529"},
     {file = "packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4"},
@@ -2889,7 +3116,7 @@ version = "3.0.1"
 description = "Powerful data structures for data analysis, time series, and statistics"
 optional = false
 python-versions = ">=3.11"
-groups = ["main"]
+groups = ["main", "dev"]
 files = [
     {file = "pandas-3.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:de09668c1bf3b925c07e5762291602f0d789eca1b3a781f99c1c78f6cac0e7ea"},
     {file = "pandas-3.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:24ba315ba3d6e5806063ac6eb717504e499ce30bd8c236d8693a5fd3f084c796"},
@@ -3185,6 +3412,18 @@ test-arrow = ["arro3-compute", "arro3-core", "nanoarrow", "pyarrow"]
 tests = ["check-manifest", "coverage (>=7.4.2)", "defusedxml", "markdown2", "olefile", "packaging", "pyroma (>=5)", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "trove-classifiers (>=2024.10.12)"]
 xmp = ["defusedxml"]
 
+[[package]]
+name = "platformdirs"
+version = "4.9.6"
+description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
+optional = false
+python-versions = ">=3.10"
+groups = ["dev"]
+files = [
+    {file = "platformdirs-4.9.6-py3-none-any.whl", hash = "sha256:e61adb1d5e5cb3441b4b7710bea7e4c12250ca49439228cc1021c00dcfac0917"},
+    {file = "platformdirs-4.9.6.tar.gz", hash = "sha256:3bfa75b0ad0db84096ae777218481852c0ebc6c727b3168c1b9e0118e458cf0a"},
+]
+
 [[package]]
 name = "pluggy"
 version = "1.6.0"
@@ -3216,6 +3455,162 @@ files = [
 [package.dependencies]
 wcwidth = "*"
 
+[[package]]
+name = "protea-backends"
+version = "0.0.1"
+description = "Protein language model embedding backends for the PROTEA stack: ESM family, T5/ProtT5, Ankh, ESM-C."
+optional = false
+python-versions = ">=3.12,<4.0"
+groups = ["plugins"]
+files = []
+develop = false
+
+[package.dependencies]
+numpy = ">=1.24"
+protea-contracts = {git = "https://github.com/frapercan/protea-contracts.git", branch = "master"}
+
+[package.extras]
+all = ["esm (>=3.1)", "sentencepiece (>=0.2)", "torch (>=2.1)", "transformers (>=4.40)"]
+ankh = ["sentencepiece (>=0.2)", "torch (>=2.1)", "transformers (>=4.40)"]
+esm = ["torch (>=2.1)", "transformers (>=4.40)"]
+esm3c = ["esm (>=3.1)", "torch (>=2.1)"]
+t5 = ["sentencepiece (>=0.2)", "torch (>=2.1)", "transformers (>=4.40)"]
+
+[package.source]
+type = "git"
+url = "https://github.com/frapercan/protea-backends.git"
+reference = "master"
+resolved_reference = "918631f5dacb9810ad94511efe0aed74e655d4ea"
+
+[[package]]
+name = "protea-contracts"
+version = "0.1.0"
+description = "Shared ABCs, payload schemas, feature registry contract and compute_schema_sha helper for the PROTEA stack."
+optional = false
+python-versions = ">=3.12,<4.0"
+groups = ["plugins"]
+files = []
+develop = false
+
+[package.dependencies]
+numpy = ">=1.24"
+pyarrow = ">=14"
+pydantic = ">=2.5"
+
+[package.source]
+type = "git"
+url = "https://github.com/frapercan/protea-contracts.git"
+reference = "master"
+resolved_reference = "3936b70fe6fb1a3a1925009d694a0df8f2a72d91"
+
+[[package]]
+name = "protea-method"
+version = "0.0.1"
+description = "Pure inference: KNN search, feature compute, apply reranker. Standalone library; no FastAPI, no SQLAlchemy."
+optional = false
+python-versions = ">=3.12,<4.0"
+groups = ["plugins"]
+files = []
+develop = false
+
+[package.dependencies]
+faiss-cpu = ">=1.7"
+lightgbm = ">=4.0"
+numpy = ">=1.24"
+protea-contracts = {git = "https://github.com/frapercan/protea-contracts.git", branch = "master"}
+
+[package.source]
+type = "git"
+url = "https://github.com/frapercan/protea-method.git"
+reference = "master"
+resolved_reference = "7b80e14e49d06390db3960d8d9fd293a9eebeae3"
+
+[[package]]
+name = "protea-reranker-lab"
+version = "0.2.0"
+description = "Research sandbox for iterating on PROTEA's GO-term reranker."
+optional = false
+python-versions = ">=3.11"
+groups = ["dev"]
+files = []
+develop = false
+
+[package.dependencies]
+lightgbm = ">=4.3"
+numpy = ">=1.26"
+pandas = ">=2.2"
+pyarrow = ">=15"
+pydantic = ">=2.6"
+pyyaml = ">=6"
+scikit-learn = ">=1.4"
+wandb = ">=0.16"
+
+[package.source]
+type = "git"
+url = "https://github.com/frapercan/protea-reranker-lab.git"
+reference = "main"
+resolved_reference = "8f7ae938ac4ea4d44cc69dfd394fbe04b8ae1537"
+
+[[package]]
+name = "protea-runners"
+version = "0.0.1"
+description = "Experiment runner plugins for the PROTEA stack: LightGBM training, KNN baseline, future GNN and retrieval-neural runners."
+optional = false
+python-versions = ">=3.12,<4.0"
+groups = ["plugins"]
+files = []
+develop = false
+
+[package.dependencies]
+lightgbm = ">=4.0"
+numpy = ">=1.24"
+protea-contracts = {git = "https://github.com/frapercan/protea-contracts.git", branch = "master"}
+pyarrow = ">=14"
+
+[package.source]
+type = "git"
+url = "https://github.com/frapercan/protea-runners.git"
+reference = "master"
+resolved_reference = "3a6619c3d561e2218f9a91ee359e5673ab0d0d1f"
+
+[[package]]
+name = "protea-sources"
+version = "0.0.1"
+description = "Annotation source plugins for the PROTEA stack: GOA, QuickGO, UniProt, future InterProScan."
+optional = false
+python-versions = ">=3.12,<4.0"
+groups = ["plugins"]
+files = []
+develop = false
+
+[package.dependencies]
+protea-contracts = {git = "https://github.com/frapercan/protea-contracts.git", branch = "master"}
+requests = ">=2.31"
+
+[package.source]
+type = "git"
+url = "https://github.com/frapercan/protea-sources.git"
+reference = "master"
+resolved_reference = "1a8a139f443eb7566acb896af6af70860a7ed619"
+
+[[package]]
+name = "protobuf"
+version = "7.34.1"
+description = ""
+optional = false
+python-versions = ">=3.10"
+groups = ["dev"]
+files = [
+    {file = "protobuf-7.34.1-cp310-abi3-macosx_10_9_universal2.whl", hash = "sha256:d8b2cc79c4d8f62b293ad9b11ec3aebce9af481fa73e64556969f7345ebf9fc7"},
+    {file = "protobuf-7.34.1-cp310-abi3-manylinux2014_aarch64.whl", hash = "sha256:5185e0e948d07abe94bb76ec9b8416b604cfe5da6f871d67aad30cbf24c3110b"},
+    {file = "protobuf-7.34.1-cp310-abi3-manylinux2014_s390x.whl", hash = "sha256:403b093a6e28a960372b44e5eb081775c9b056e816a8029c61231743d63f881a"},
+    {file = "protobuf-7.34.1-cp310-abi3-manylinux2014_x86_64.whl", hash = "sha256:8ff40ce8cd688f7265326b38d5a1bed9bfdf5e6723d49961432f83e21d5713e4"},
+    {file = "protobuf-7.34.1-cp310-abi3-win32.whl", hash = "sha256:34b84ce27680df7cca9f231043ada0daa55d0c44a2ddfaa58ec1d0d89d8bf60a"},
+    {file = "protobuf-7.34.1-cp310-abi3-win_amd64.whl", hash = "sha256:e97b55646e6ce5cbb0954a8c28cd39a5869b59090dfaa7df4598a7fba869468c"},
+    {file = "protobuf-7.34.1-py3-none-any.whl", hash = "sha256:bb3812cd53aefea2b028ef42bd780f5b96407247f20c6ef7c679807e9d188f11"},
+    {file = "protobuf-7.34.1.tar.gz", hash = "sha256:9ce42245e704cc5027be797c1db1eb93184d44d1cdd71811fb2d9b25ad541280"},
+]
+
 [[package]]
 name = "psutil"
 version = "6.1.1"
@@ -3372,7 +3767,7 @@ version = "23.0.1"
 description = "Python library for Apache Arrow"
 optional = false
 python-versions = ">=3.10"
-groups = ["main"]
+groups = ["main", "dev", "plugins"]
 files = [
     {file = "pyarrow-23.0.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:3fab8f82571844eb3c460f90a75583801d14ca0cc32b1acc8c361650e006fd56"},
     {file = "pyarrow-23.0.1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:3f91c038b95f71ddfc865f11d5876c42f343b4495535bd262c7b321b0b94507c"},
@@ -3426,6 +3821,38 @@ files = [
     {file = "pyarrow-23.0.1.tar.gz", hash = "sha256:b8c5873e33440b2bc2f4a79d2b47017a89c5a24116c055625e6f2ee50523f019"},
 ]
 
+[[package]]
+name = "pybtex"
+version = "0.26.1"
+description = "A BibTeX-compatible bibliography processor in Python"
+optional = false
+python-versions = ">=3.8"
+groups = ["dev"]
+files = [
+    {file = "pybtex-0.26.1-py3-none-any.whl", hash = "sha256:e26c0412cc54f5f21b2a6d9d175762a2d2af9ccf3a8f651cdb89ec035db77aa1"},
+    {file = "pybtex-0.26.1.tar.gz", hash = "sha256:2e5543bea424e60e9e42eef70bff597be48649d8f68ba061a7a092b2477d5464"},
+]
+
+[package.dependencies]
+latexcodec = ">=1.0.4"
+pyyaml = ">=3.1"
+
+[[package]]
+name = "pybtex-docutils"
+version = "1.0.3"
+description = "A docutils backend for pybtex."
+optional = false
+python-versions = ">=3.7"
+groups = ["dev"]
+files = [
+    {file = "pybtex-docutils-1.0.3.tar.gz", hash = "sha256:3a7ebdf92b593e00e8c1c538aa9a20bca5d92d84231124715acc964d51d93c6b"},
+    {file = "pybtex_docutils-1.0.3-py3-none-any.whl", hash = "sha256:8fd290d2ae48e32fcb54d86b0efb8d573198653c7e2447d5bec5847095f430b9"},
+]
+
+[package.dependencies]
+docutils = ">=0.14"
+pybtex = ">=0.16"
+
 [[package]]
 name = "pycodestyle"
 version = "2.14.0"
@@ -3438,13 +3865,78 @@ files = [
     {file = "pycodestyle-2.14.0.tar.gz", hash = "sha256:c4b5b517d278089ff9d0abdec919cd97262a3367449ea1c8b49b91529167b783"},
 ]
 
+[[package]]
+name = "pycparser"
+version = "3.0"
+description = "C parser in Python"
+optional = true
+python-versions = ">=3.10"
+groups = ["main"]
+markers = "extra == \"storage\" and implementation_name != \"PyPy\""
+files = [
+    {file = "pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992"},
+    {file = "pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29"},
+]
+
+[[package]]
+name = "pycryptodome"
+version = "3.23.0"
+description = "Cryptographic library for Python"
+optional = true
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
+groups = ["main"]
+markers = "extra == \"storage\""
+files = [
+    {file = "pycryptodome-3.23.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a176b79c49af27d7f6c12e4b178b0824626f40a7b9fed08f712291b6d54bf566"},
+    {file = "pycryptodome-3.23.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:573a0b3017e06f2cffd27d92ef22e46aa3be87a2d317a5abf7cc0e84e321bd75"},
+    {file = "pycryptodome-3.23.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:63dad881b99ca653302b2c7191998dd677226222a3f2ea79999aa51ce695f720"},
+    {file = "pycryptodome-3.23.0-cp27-cp27m-win32.whl", hash = "sha256:b34e8e11d97889df57166eda1e1ddd7676da5fcd4d71a0062a760e75060514b4"},
+    {file = "pycryptodome-3.23.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:7ac1080a8da569bde76c0a104589c4f414b8ba296c0b3738cf39a466a9fb1818"},
+    {file = "pycryptodome-3.23.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:6fe8258e2039eceb74dfec66b3672552b6b7d2c235b2dfecc05d16b8921649a8"},
+    {file = "pycryptodome-3.23.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:0011f7f00cdb74879142011f95133274741778abba114ceca229adbf8e62c3e4"},
+    {file = "pycryptodome-3.23.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:90460fc9e088ce095f9ee8356722d4f10f86e5be06e2354230a9880b9c549aae"},
+    {file = "pycryptodome-3.23.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4764e64b269fc83b00f682c47443c2e6e85b18273712b98aa43bcb77f8570477"},
+    {file = "pycryptodome-3.23.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb8f24adb74984aa0e5d07a2368ad95276cf38051fe2dc6605cbcf482e04f2a7"},
+    {file = "pycryptodome-3.23.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d97618c9c6684a97ef7637ba43bdf6663a2e2e77efe0f863cce97a76af396446"},
+    {file = "pycryptodome-3.23.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9a53a4fe5cb075075d515797d6ce2f56772ea7e6a1e5e4b96cf78a14bac3d265"},
+    {file = "pycryptodome-3.23.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:763d1d74f56f031788e5d307029caef067febf890cd1f8bf61183ae142f1a77b"},
+    {file = "pycryptodome-3.23.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:954af0e2bd7cea83ce72243b14e4fb518b18f0c1649b576d114973e2073b273d"},
+    {file = "pycryptodome-3.23.0-cp313-cp313t-win32.whl", hash = "sha256:257bb3572c63ad8ba40b89f6fc9d63a2a628e9f9708d31ee26560925ebe0210a"},
+    {file = "pycryptodome-3.23.0-cp313-cp313t-win_amd64.whl", hash = "sha256:6501790c5b62a29fcb227bd6b62012181d886a767ce9ed03b303d1f22eb5c625"},
+    {file = "pycryptodome-3.23.0-cp313-cp313t-win_arm64.whl", hash = "sha256:9a77627a330ab23ca43b48b130e202582e91cc69619947840ea4d2d1be21eb39"},
+    {file = "pycryptodome-3.23.0-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:187058ab80b3281b1de11c2e6842a357a1f71b42cb1e15bce373f3d238135c27"},
+    {file = "pycryptodome-3.23.0-cp37-abi3-macosx_10_9_x86_64.whl", hash = "sha256:cfb5cd445280c5b0a4e6187a7ce8de5a07b5f3f897f235caa11f1f435f182843"},
+    {file = "pycryptodome-3.23.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67bd81fcbe34f43ad9422ee8fd4843c8e7198dd88dd3d40e6de42ee65fbe1490"},
+    {file = "pycryptodome-3.23.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c8987bd3307a39bc03df5c8e0e3d8be0c4c3518b7f044b0f4c15d1aa78f52575"},
+    {file = "pycryptodome-3.23.0-cp37-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aa0698f65e5b570426fc31b8162ed4603b0c2841cbb9088e2b01641e3065915b"},
+    {file = "pycryptodome-3.23.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:53ecbafc2b55353edcebd64bf5da94a2a2cdf5090a6915bcca6eca6cc452585a"},
+    {file = "pycryptodome-3.23.0-cp37-abi3-musllinux_1_2_i686.whl", hash = "sha256:156df9667ad9f2ad26255926524e1c136d6664b741547deb0a86a9acf5ea631f"},
+    {file = "pycryptodome-3.23.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:dea827b4d55ee390dc89b2afe5927d4308a8b538ae91d9c6f7a5090f397af1aa"},
+    {file = "pycryptodome-3.23.0-cp37-abi3-win32.whl", hash = "sha256:507dbead45474b62b2bbe318eb1c4c8ee641077532067fec9c1aa82c31f84886"},
+    {file = "pycryptodome-3.23.0-cp37-abi3-win_amd64.whl", hash = "sha256:c75b52aacc6c0c260f204cbdd834f76edc9fb0d8e0da9fbf8352ef58202564e2"},
+    {file = "pycryptodome-3.23.0-cp37-abi3-win_arm64.whl", hash = "sha256:11eeeb6917903876f134b56ba11abe95c0b0fd5e3330def218083c7d98bbcb3c"},
+    {file = "pycryptodome-3.23.0-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:350ebc1eba1da729b35ab7627a833a1a355ee4e852d8ba0447fafe7b14504d56"},
+    {file = "pycryptodome-3.23.0-pp27-pypy_73-win32.whl", hash = "sha256:93837e379a3e5fd2bb00302a47aee9fdf7940d83595be3915752c74033d17ca7"},
+    {file = "pycryptodome-3.23.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:ddb95b49df036ddd264a0ad246d1be5b672000f12d6961ea2c267083a5e19379"},
+    {file = "pycryptodome-3.23.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e95564beb8782abfd9e431c974e14563a794a4944c29d6d3b7b5ea042110b4"},
+    {file = "pycryptodome-3.23.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14e15c081e912c4b0d75632acd8382dfce45b258667aa3c67caf7a4d4c13f630"},
+    {file = "pycryptodome-3.23.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7fc76bf273353dc7e5207d172b83f569540fc9a28d63171061c42e361d22353"},
+    {file = "pycryptodome-3.23.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:45c69ad715ca1a94f778215a11e66b7ff989d792a4d63b68dc586a1da1392ff5"},
+    {file = "pycryptodome-3.23.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:865d83c906b0fc6a59b510deceee656b6bc1c4fa0d82176e2b77e97a420a996a"},
+    {file = "pycryptodome-3.23.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89d4d56153efc4d81defe8b65fd0821ef8b2d5ddf8ed19df31ba2f00872b8002"},
+    {file = "pycryptodome-3.23.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3f2d0aaf8080bda0587d58fc9fe4766e012441e2eed4269a77de6aea981c8be"},
+    {file = "pycryptodome-3.23.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:64093fc334c1eccfd3933c134c4457c34eaca235eeae49d69449dc4728079339"},
+    {file = "pycryptodome-3.23.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:ce64e84a962b63a47a592690bdc16a7eaf709d2c2697ababf24a0def566899a6"},
+    {file = "pycryptodome-3.23.0.tar.gz", hash = "sha256:447700a657182d60338bab09fdb27518f8856aecd80ae4c6bdddb67ff5da44ef"},
+]
+
 [[package]]
 name = "pydantic"
 version = "2.12.5"
 description = "Data validation using Python type hints"
 optional = false
 python-versions = ">=3.9"
-groups = ["main"]
+groups = ["main", "dev", "plugins"]
 files = [
     {file = "pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d"},
     {file = "pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49"},
@@ -3466,7 +3958,7 @@ version = "2.41.5"
 description = "Core functionality for Pydantic validation and serialization"
 optional = false
 python-versions = ">=3.9"
-groups = ["main"]
+groups = ["main", "dev", "plugins"]
 files = [
     {file = "pydantic_core-2.41.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:77b63866ca88d804225eaa4af3e664c5faf3568cea95360d21f4725ab6e07146"},
     {file = "pydantic_core-2.41.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dfa8a0c812ac681395907e71e1274819dec685fec28273a28905df579ef137e2"},
@@ -3699,7 +4191,7 @@ version = "2.9.0.post0"
 description = "Extensions to the standard Python datetime module"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
-groups = ["main"]
+groups = ["main", "dev"]
 files = [
     {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
     {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
@@ -3726,7 +4218,7 @@ version = "6.0.3"
 description = "YAML parser and emitter for Python"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
+groups = ["main", "dev"]
 files = [
     {file = "PyYAML-6.0.3-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:c2514fceb77bc5e7a2f7adfaa1feb2fb311607c9cb518dbc378688ec73d8292f"},
     {file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c57bb8c96f6d1808c030b1687b9b5fb476abaa47f0db9c0101f5e9f394e97f4"},
@@ -3933,7 +4425,7 @@ version = "2.32.5"
 description = "Python HTTP for Humans."
 optional = false
 python-versions = ">=3.9"
-groups = ["main", "dev"]
+groups = ["main", "dev", "plugins"]
 files = [
     {file = "requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6"},
     {file = "requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf"},
@@ -4042,7 +4534,7 @@ version = "1.8.0"
 description = "A set of python modules for machine learning and data mining"
 optional = false
 python-versions = ">=3.11"
-groups = ["main"]
+groups = ["main", "dev"]
 files = [
     {file = "scikit_learn-1.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:146b4d36f800c013d267b29168813f7a03a43ecd2895d04861f1240b564421da"},
     {file = "scikit_learn-1.8.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:f984ca4b14914e6b4094c5d52a32ea16b49832c03bd17a110f004db3c223e8e1"},
@@ -4104,7 +4596,7 @@ version = "1.17.1"
 description = "Fundamental algorithms for scientific computing in Python"
 optional = false
 python-versions = ">=3.11"
-groups = ["main"]
+groups = ["main", "dev", "plugins"]
 files = [
     {file = "scipy-1.17.1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:1f95b894f13729334fb990162e911c9e5dc1ab390c58aa6cbecb389c5b5e28ec"},
     {file = "scipy-1.17.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:e18f12c6b0bc5a592ed23d3f7b891f68fd7f8241d69b7883769eb5d5dfb52696"},
@@ -4256,6 +4748,70 @@ files = [
 test = ["pytest"]
 testpaths = ["test"]
 
+[[package]]
+name = "sentry-sdk"
+version = "2.58.0"
+description = "Python client for Sentry (https://sentry.io)"
+optional = false
+python-versions = ">=3.6"
+groups = ["dev"]
+files = [
+    {file = "sentry_sdk-2.58.0-py2.py3-none-any.whl", hash = "sha256:688d1c704ddecf382ea3326f21a67453d4caa95592d722b7c780a36a9d23109e"},
+    {file = "sentry_sdk-2.58.0.tar.gz", hash = "sha256:c1144d947352d54e5b7daa63596d9f848adf684989c06c4f5a659f0c85a18f6f"},
+]
+
+[package.dependencies]
+certifi = "*"
+urllib3 = ">=1.26.11"
+
+[package.extras]
+aiohttp = ["aiohttp (>=3.5)"]
+anthropic = ["anthropic (>=0.16)"]
+arq = ["arq (>=0.23)"]
+asyncio = ["httpcore[asyncio] (==1.*)"]
+asyncpg = ["asyncpg (>=0.23)"]
+beam = ["apache-beam (>=2.12)"]
+bottle = ["bottle (>=0.12.13)"]
+celery = ["celery (>=3)"]
+celery-redbeat = ["celery-redbeat (>=2)"]
+chalice = ["chalice (>=1.16.0)"]
+clickhouse-driver = ["clickhouse-driver (>=0.2.0)"]
+django = ["django (>=1.8)"]
+falcon = ["falcon (>=1.4)"]
+fastapi = ["fastapi (>=0.79.0)"]
+flask = ["blinker (>=1.1)", "flask (>=0.11)", "markupsafe"]
+google-genai = ["google-genai (>=1.29.0)"]
+grpcio = ["grpcio (>=1.21.1)", "protobuf (>=3.8.0)"]
+http2 = ["httpcore[http2] (==1.*)"]
+httpx = ["httpx (>=0.16.0)"]
+huey = ["huey (>=2)"]
+huggingface-hub = ["huggingface_hub (>=0.22)"]
+langchain = ["langchain (>=0.0.210)"]
+langgraph = ["langgraph (>=0.6.6)"]
+launchdarkly = ["launchdarkly-server-sdk (>=9.8.0)"]
+litellm = ["litellm (>=1.77.5,!=1.82.7,!=1.82.8)"]
+litestar = ["litestar (>=2.0.0)"]
+loguru = ["loguru (>=0.5)"]
+mcp = ["mcp (>=1.15.0)"]
+openai = ["openai (>=1.0.0)", "tiktoken (>=0.3.0)"]
+openfeature = ["openfeature-sdk (>=0.7.1)"]
+opentelemetry = ["opentelemetry-distro (>=0.35b0)"]
+opentelemetry-experimental = ["opentelemetry-distro"]
+opentelemetry-otlp = ["opentelemetry-distro[otlp] (>=0.35b0)"]
+pure-eval = ["asttokens", "executing", "pure_eval"]
+pydantic-ai = ["pydantic-ai (>=1.0.0)"]
+pymongo = ["pymongo (>=3.1)"]
+pyspark = ["pyspark (>=2.4.4)"]
+quart = ["blinker (>=1.1)", "quart (>=0.16.1)"]
+rq = ["rq (>=0.6)"]
+sanic = ["sanic (>=0.8)"]
+sqlalchemy = ["sqlalchemy (>=1.2)"]
+starlette = ["starlette (>=0.19.1)"]
+starlite = ["starlite (>=1.48)"]
+statsig = ["statsig (>=0.55.3)"]
+tornado = ["tornado (>=6)"]
+unleash = ["UnleashClient (>=6.0.1)"]
+
 [[package]]
 name = "setuptools"
 version = "82.0.1"
@@ -4299,12 +4855,24 @@ version = "1.17.0"
 description = "Python 2 and 3 compatibility utilities"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
-groups = ["main"]
+groups = ["main", "dev"]
 files = [
     {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"},
     {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"},
 ]
 
+[[package]]
+name = "smmap"
+version = "5.0.3"
+description = "A pure Python implementation of a sliding window memory map manager"
+optional = false
+python-versions = ">=3.7"
+groups = ["dev"]
+files = [
+    {file = "smmap-5.0.3-py3-none-any.whl", hash = "sha256:c106e05d5a61449cf6ba9a1e650227ecfb141590d2a98412103ff35d89fc7b2f"},
+    {file = "smmap-5.0.3.tar.gz", hash = "sha256:4d9debb8b99007ae47165abc08670bd74cb74b5227dda7f643eccc4e9eb5642c"},
+]
+
 [[package]]
 name = "snowballstemmer"
 version = "3.0.1"
@@ -4440,6 +5008,27 @@ lint = ["mypy", "ruff (==0.5.5)", "types-docutils"]
 standalone = ["Sphinx (>=5)"]
 test = ["pytest"]
 
+[[package]]
+name = "sphinxcontrib-bibtex"
+version = "2.7.0"
+description = "Sphinx extension for BibTeX style citations."
+optional = false
+python-versions = ">=3.10"
+groups = ["dev"]
+files = [
+    {file = "sphinxcontrib_bibtex-2.7.0-py3-none-any.whl", hash = "sha256:28cf0ec7a957d1c7548d5749317ed472ce877e1b629f430f88e3789aa51f87b1"},
+    {file = "sphinxcontrib_bibtex-2.7.0.tar.gz", hash = "sha256:fee700f7aae29bb8f654c62913f00d34ac44fc0b8ca0fa67ac922ff4453addee"},
+]
+
+[package.dependencies]
+docutils = ">=0.20"
+pybtex = ">=0.25"
+pybtex-docutils = ">=1.0.2"
+Sphinx = ">=7.4"
+
+[package.extras]
+test = ["pytest", "pytest-cov", "sphinx-autoapi"]
+
 [[package]]
 name = "sphinxcontrib-devhelp"
 version = "2.0.0"
@@ -4722,7 +5311,7 @@ version = "3.6.0"
 description = "threadpoolctl"
 optional = false
 python-versions = ">=3.9"
-groups = ["main"]
+groups = ["main", "dev"]
 files = [
     {file = "threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb"},
     {file = "threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e"},
@@ -4894,6 +5483,13 @@ files = [
     {file = "torch-2.10.0-2-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:418997cb02d0a0f1497cf6a09f63166f9f5df9f3e16c8a716ab76a72127c714f"},
     {file = "torch-2.10.0-2-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:13ec4add8c3faaed8d13e0574f5cd4a323c11655546f91fbe6afa77b57423574"},
     {file = "torch-2.10.0-2-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:e521c9f030a3774ed770a9c011751fb47c4d12029a3d6522116e48431f2ff89e"},
+    {file = "torch-2.10.0-3-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a1ff626b884f8c4e897c4c33782bdacdff842a165fee79817b1dd549fdda1321"},
+    {file = "torch-2.10.0-3-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:ac5bdcbb074384c66fa160c15b1ead77839e3fe7ed117d667249afce0acabfac"},
+    {file = "torch-2.10.0-3-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:98c01b8bb5e3240426dcde1446eed6f40c778091c8544767ef1168fc663a05a6"},
+    {file = "torch-2.10.0-3-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:80b1b5bfe38eb0e9f5ff09f206dcac0a87aadd084230d4a36eea5ec5232c115b"},
+    {file = "torch-2.10.0-3-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:46b3574d93a2a8134b3f5475cfb98e2eb46771794c57015f6ad1fb795ec25e49"},
+    {file = "torch-2.10.0-3-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:b1d5e2aba4eb7f8e87fbe04f86442887f9167a35f092afe4c237dfcaaef6e328"},
+    {file = "torch-2.10.0-3-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:0228d20b06701c05a8f978357f657817a4a63984b0c90745def81c18aedfa591"},
     {file = "torch-2.10.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:5276fa790a666ee8becaffff8acb711922252521b28fbce5db7db5cf9cb2026d"},
     {file = "torch-2.10.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:aaf663927bcd490ae971469a624c322202a2a1e68936eb952535ca4cd3b90444"},
     {file = "torch-2.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:a4be6a2a190b32ff5c8002a0977a25ea60e64f7ba46b1be37093c141d9c49aeb"},
@@ -5274,7 +5870,7 @@ version = "4.15.0"
 description = "Backported and Experimental Type Hints for Python 3.9+"
 optional = false
 python-versions = ">=3.9"
-groups = ["main", "dev"]
+groups = ["main", "dev", "plugins"]
 files = [
     {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"},
     {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"},
@@ -5286,7 +5882,7 @@ version = "0.4.2"
 description = "Runtime typing introspection tools"
 optional = false
 python-versions = ">=3.9"
-groups = ["main"]
+groups = ["main", "dev", "plugins"]
 files = [
     {file = "typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7"},
     {file = "typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464"},
@@ -5301,7 +5897,7 @@ version = "2025.3"
 description = "Provider of IANA time zone data"
 optional = false
 python-versions = ">=2"
-groups = ["main"]
+groups = ["main", "dev"]
 markers = "sys_platform == \"win32\" or sys_platform == \"emscripten\""
 files = [
     {file = "tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1"},
@@ -5314,7 +5910,7 @@ version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
-groups = ["main", "dev"]
+groups = ["main", "dev", "plugins"]
 files = [
     {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
     {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
@@ -5345,6 +5941,50 @@ h11 = ">=0.8"
 [package.extras]
 standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.15.1) ; sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"", "watchfiles (>=0.20)", "websockets (>=10.4)"]
 
+[[package]]
+name = "wandb"
+version = "0.26.0"
+description = "A CLI and library for interacting with the Weights & Biases API."
+optional = false
+python-versions = ">=3.9"
+groups = ["dev"]
+files = [
+    {file = "wandb-0.26.0-py3-none-macosx_12_0_arm64.whl", hash = "sha256:1ece94a2a5eda1d0e3a2d8a2fd28aa0187705d6efa5ac4c0b8680083583b7ec1"},
+    {file = "wandb-0.26.0-py3-none-macosx_12_0_x86_64.whl", hash = "sha256:92f6f303fe2af50e3f711833a835150f9b4d8082874bfd9868cf15491ea2947e"},
+    {file = "wandb-0.26.0-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:a17aae051a31831388cff880251c1b5bc38fbf6a283a0ee7c543709e8e9633d1"},
+    {file = "wandb-0.26.0-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:28a14ffc014e523498d077dfde12839b7be586ca8c3190e72e7167c1aea6ee4c"},
+    {file = "wandb-0.26.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:fb9a63babeee044fecf65a4675f7dfb0efaea4986e498a3bc8f948558af877e7"},
+    {file = "wandb-0.26.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:3eb88fb556a64bf4492cf571bb851d47871901c096f0540d841ccb50f5cbeb66"},
+    {file = "wandb-0.26.0-py3-none-win32.whl", hash = "sha256:362828d48d21dd4877e28fdce40421ebdfc16d1fe0b59e8371b12d75bbc3f1e7"},
+    {file = "wandb-0.26.0-py3-none-win_amd64.whl", hash = "sha256:21a8346434fd30e1bc13a26b226fc29b6f33a1cb346d610cbcb4040c3b0e1f63"},
+    {file = "wandb-0.26.0-py3-none-win_arm64.whl", hash = "sha256:99bd11974e9005d3a3f82e1fabfc4909ffa1fdede23a8839f5fbaea2f5be9033"},
+    {file = "wandb-0.26.0.tar.gz", hash = "sha256:0356853895b53fe110e2ed17a1d49c15405498f08e5fbc339deab384f2df45f1"},
+]
+
+[package.dependencies]
+click = ">=8.0.1"
+gitpython = ">=1.0.0,<3.1.29 || >3.1.29"
+packaging = "*"
+platformdirs = "*"
+protobuf = ">4.21.0,<5.28.0 || >5.28.0,<5.29.0 || >5.29.0,<8"
+pydantic = "<3"
+pyyaml = "*"
+requests = ">=2.0.0,<3"
+sentry-sdk = ">=2.0.0"
+typing-extensions = ">=4.8,<5"
+
+[package.extras]
+aws = ["boto3", "botocore (>=1.5.76)"]
+azure = ["azure-identity", "azure-storage-blob"]
+gcp = ["google-cloud-storage"]
+importers = ["filelock", "mlflow", "polars (<=1.2.1)", "rich", "tenacity"]
+kubeflow = ["google-cloud-storage", "kubernetes", "minio", "sh"]
+launch = ["awscli", "azure-containerregistry", "azure-identity", "azure-storage-blob", "boto3", "botocore (>=1.5.76)", "chardet", "google-auth", "google-cloud-aiplatform", "google-cloud-artifact-registry", "google-cloud-compute", "google-cloud-storage", "iso8601", "jsonschema", "kubernetes", "kubernetes-asyncio", "nbconvert", "nbformat", "optuna", "pydantic", "pyyaml (>=6.0.0)", "tomli", "tornado (>=6.5.0) ; python_version >= \"3.9\"", "typing-extensions"]
+media = ["bokeh", "imageio (>=2.28.1)", "moviepy (>=1.0.0)", "numpy", "pillow", "plotly (>=5.18.0)", "rdkit", "soundfile"]
+models = ["cloudpickle"]
+sweeps = ["sweeps (>=0.2.0)"]
+workspaces = ["wandb-workspaces"]
+
 [[package]]
 name = "wcwidth"
 version = "0.6.0"
@@ -5531,7 +6171,10 @@ files = [
     {file = "zstd-1.5.7.3.tar.gz", hash = "sha256:403e5205f4ac04b92e6b0cda654be2f51de268228a0db0067bc087faacf2f495"},
 ]
 
+[extras]
+storage = ["minio"]
+
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.12,<4.0"
-content-hash = "d640923dbd204299fc10398e1ec5cc607a8deeec7d4e49051da26fa7b40e2712"
+content-hash = "b79797f6fa6c70bda1fa4268a3b0ebb5cb6cbfa4decc0300f8ea8dfd734cdaae"
diff --git a/protea/__init__.py b/protea/__init__.py
index e69de29..493f741 100644
--- a/protea/__init__.py
+++ b/protea/__init__.py
@@ -0,0 +1 @@
+__version__ = "0.3.0"
diff --git a/protea/api/app.py b/protea/api/app.py
index 28c0756..13e8569 100644
--- a/protea/api/app.py
+++ b/protea/api/app.py
@@ -7,17 +7,24 @@
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
 
+from protea.api.middleware import VisitorCounterMiddleware
 from protea.api.routers import admin as admin_router
 from protea.api.routers import annotate as annotate_router
 from protea.api.routers import annotations as annotations_router
+from protea.api.routers import benchmark as benchmark_router
+from protea.api.routers import datasets as datasets_router
 from protea.api.routers import embeddings as embeddings_router
 from protea.api.routers import jobs as jobs_router
 from protea.api.routers import maintenance as maintenance_router
 from protea.api.routers import proteins as proteins_router
 from protea.api.routers import query_sets as query_sets_router
+from protea.api.routers import registry as registry_router
+from protea.api.routers import reranker_models as reranker_models_router
 from protea.api.routers import scoring as scoring_router
 from protea.api.routers import showcase as showcase_router
 from protea.api.routers import support as support_router
+from protea.core.operation_catalog import build_operation_registry
+from protea.infrastructure.benchmark_config import load_benchmark_config
 from protea.infrastructure.session import build_session_factory
 from protea.infrastructure.settings import load_settings
 
@@ -70,16 +77,39 @@ def create_app(project_root: Path | None = None) -> FastAPI:
                 "name": "scoring",
                 "description": "Scoring configs, scored prediction export, and CAFA metrics.",
             },
+            {
+                "name": "benchmark",
+                "description": (
+                    "Per-embedding / per-stage Fmax matrix across every "
+                    "evaluation result. Powers the /benchmark page in the UI."
+                ),
+            },
             {"name": "support", "description": "Community thumbs-up and comments."},
             {
                 "name": "annotate",
                 "description": "One-click protein annotation — upload FASTA, auto-run the full pipeline.",
             },
+            {
+                "name": "datasets",
+                "description": (
+                    "Frozen re-ranker datasets — enqueue export jobs, "
+                    "list/fetch registered dumps, resolve URIs for the lab."
+                ),
+            },
+            {
+                "name": "reranker-models",
+                "description": (
+                    "Register lab-trained LightGBM boosters — multipart "
+                    "upload or by-reference import of artefacts already in MinIO."
+                ),
+            },
         ],
     )
     app.state.session_factory = factory
     app.state.amqp_url = settings.amqp_url
     app.state.artifacts_dir = settings.artifacts_dir
+    app.state.operation_registry = build_operation_registry()
+    app.state.benchmark_config = load_benchmark_config(project_root)
 
     allowed_origins = [
         "http://localhost:3000",
@@ -93,6 +123,10 @@ def create_app(project_root: Path | None = None) -> FastAPI:
         allow_methods=["*"],
         allow_headers=["*"],
     )
+    # Anonymous visitor counter — writes one row per GET into visitor_event
+    # with a daily-rotated-salt hash instead of the IP. Powers the Grafana
+    # "unique visitors" dashboard.
+    app.add_middleware(VisitorCounterMiddleware)
 
     @app.get("/health", tags=["health"])
     def health_check() -> dict[str, str]:
@@ -101,7 +135,14 @@ def health_check() -> dict[str, str]:
 
     @app.get("/health/ready", tags=["health"])
     def readiness_check() -> dict[str, str]:
-        """Readiness probe — verifies database and RabbitMQ connections."""
+        """Readiness probe — verifies database, RabbitMQ, and (if configured) MinIO.
+
+        The artifact-store check is load-bearing: ``POST /datasets`` and
+        ``/reranker-models/import`` silently misbehave if MinIO is
+        configured but unreachable (``Dataset`` rows would be written
+        against the local filesystem). Failing readiness here keeps
+        docker / k8s from routing traffic until the store is back.
+        """
         from sqlalchemy import text
 
         from protea.infrastructure.session import session_scope
@@ -109,7 +150,6 @@ def readiness_check() -> dict[str, str]:
         with session_scope(factory) as session:
             session.execute(text("SELECT 1"))
 
-        # Check RabbitMQ connectivity
         import pika
 
         try:
@@ -118,6 +158,15 @@ def readiness_check() -> dict[str, str]:
         except Exception as exc:
             raise HTTPException(status_code=503, detail=f"RabbitMQ unreachable: {exc}") from exc
 
+        if (settings.storage_backend or "local").lower() == "minio":
+            from protea.infrastructure.storage import get_artifact_store
+            from protea.infrastructure.storage.factory import ArtifactStoreUnavailable
+
+            try:
+                get_artifact_store(settings)
+            except ArtifactStoreUnavailable as exc:
+                raise HTTPException(status_code=503, detail=str(exc)) from exc
+
         return {"status": "ready"}
 
     app.include_router(annotate_router.router)
@@ -130,7 +179,11 @@ def readiness_check() -> dict[str, str]:
     app.include_router(admin_router.router)
     app.include_router(scoring_router.router)
     app.include_router(showcase_router.router)
+    app.include_router(benchmark_router.router)
     app.include_router(support_router.router)
+    app.include_router(datasets_router.router)
+    app.include_router(reranker_models_router.router)
+    app.include_router(registry_router.router)
 
     sphinx_build = project_root / "docs" / "build" / "html"
     if sphinx_build.exists():
diff --git a/protea/api/cache.py b/protea/api/cache.py
new file mode 100644
index 0000000..870755e
--- /dev/null
+++ b/protea/api/cache.py
@@ -0,0 +1,52 @@
+"""Tiny in-process TTL cache for aggregate API endpoints.
+
+Built for stats/listing endpoints that run DISTINCT-over-JOIN queries on 10M+
+row tables: queries that are structurally slow (tens of seconds) and whose
+results change slowly enough that a 5-minute TTL is not user-visible.
+
+Process-local by design: resets on uvicorn restart, does not need Redis, does
+not leak across workers. Good enough for a single-instance deployment.
+"""
+
+from __future__ import annotations
+
+import threading
+import time
+from collections.abc import Callable
+from typing import Any
+
+from protea.config.tuning import get_tuning
+
+
+def _default_ttl() -> float:
+    """Resolved each call so env/yaml overrides apply at runtime."""
+    return get_tuning().worker.api_cache_default_ttl_seconds
+
+
+_lock = threading.Lock()
+_store: dict[str, tuple[float, Any]] = {}
+
+
+def cached(key: str, ttl: float, producer: Callable[[], Any]) -> Any:
+    """Return ``producer()`` result, cached under ``key`` for ``ttl`` seconds."""
+    now = time.monotonic()
+    with _lock:
+        hit = _store.get(key)
+        if hit is not None and hit[0] > now:
+            return hit[1]
+    value = producer()
+    with _lock:
+        _store[key] = (now + ttl, value)
+    return value
+
+
+def invalidate(key: str | None = None) -> None:
+    """Drop a single key, or the whole cache when ``key`` is ``None``."""
+    with _lock:
+        if key is None:
+            _store.clear()
+        else:
+            _store.pop(key, None)
+
+
+__all__ = ["cached", "invalidate", "_default_ttl"]
diff --git a/protea/api/deps.py b/protea/api/deps.py
index 7c2dcbb..18c3846 100644
--- a/protea/api/deps.py
+++ b/protea/api/deps.py
@@ -1,4 +1,5 @@
 """Shared FastAPI dependency functions for all routers."""
+
 from __future__ import annotations
 
 from pathlib import Path
@@ -6,6 +7,9 @@
 from sqlalchemy.orm import Session, sessionmaker
 from starlette.requests import Request
 
+from protea.core.contracts.registry import OperationRegistry
+from protea.infrastructure.benchmark_config import BenchmarkConfig
+
 
 def get_session_factory(request: Request) -> sessionmaker[Session]:
     factory = getattr(request.app.state, "session_factory", None)
@@ -14,6 +18,13 @@ def get_session_factory(request: Request) -> sessionmaker[Session]:
     return factory  # type: ignore[no-any-return]
 
 
+def get_operation_registry(request: Request) -> OperationRegistry:
+    reg = getattr(request.app.state, "operation_registry", None)
+    if reg is None:
+        raise RuntimeError("app.state.operation_registry is not set")
+    return reg  # type: ignore[no-any-return]
+
+
 def get_amqp_url(request: Request) -> str:
     url = getattr(request.app.state, "amqp_url", None)
     if url is None:
@@ -26,3 +37,10 @@ def get_artifacts_dir(request: Request) -> Path:
     if d is None:
         raise RuntimeError("app.state.artifacts_dir is not set")
     return d  # type: ignore[no-any-return]
+
+
+def get_benchmark_config(request: Request) -> BenchmarkConfig:
+    cfg = getattr(request.app.state, "benchmark_config", None)
+    if cfg is None:
+        raise RuntimeError("app.state.benchmark_config is not set")
+    return cfg  # type: ignore[no-any-return]
diff --git a/protea/api/middleware/__init__.py b/protea/api/middleware/__init__.py
new file mode 100644
index 0000000..237cc13
--- /dev/null
+++ b/protea/api/middleware/__init__.py
@@ -0,0 +1 @@
+from .visitor_counter import VisitorCounterMiddleware  # noqa: F401
diff --git a/protea/api/middleware/visitor_counter.py b/protea/api/middleware/visitor_counter.py
new file mode 100644
index 0000000..6c86d63
--- /dev/null
+++ b/protea/api/middleware/visitor_counter.py
@@ -0,0 +1,188 @@
+"""Anonymous visitor counting middleware.
+
+Records one row per user-visible request into the ``visitor_event`` table so
+that Grafana (or any SQL client) can compute "unique visitors per day" and
+similar aggregate traffic metrics without storing IP addresses or using
+cookies.
+
+Privacy design
+--------------
+The client IP is never persisted. Instead, we compute a short hash:
+
+    visitor_hash = sha256(daily_salt || client_ip)[:16]
+
+where ``daily_salt`` is a 32-byte random value held only in process memory
+and rotated on every calendar day (UTC). When the day rolls over the old
+salt is discarded, so cross-day correlation becomes cryptographically
+infeasible — the same "rotating salt" approach used by Plausible and Fathom.
+
+Noise filters
+-------------
+The middleware is deliberately narrow in scope: it only counts requests that
+represent actual user navigation. It skips assets, polling endpoints, health
+probes and metrics scrapes. See ``_should_record``.
+"""
+
+from __future__ import annotations
+
+import hashlib
+import logging
+import secrets
+import threading
+from datetime import UTC, date, datetime
+
+from sqlalchemy.exc import SQLAlchemyError
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.requests import Request
+from starlette.types import ASGIApp
+
+from protea.infrastructure.orm.models import VisitorEvent
+from protea.infrastructure.session import session_scope
+
+logger = logging.getLogger(__name__)
+
+# Paths that are polling/asset/infra and should never count toward visits.
+# Prefix match, compared with the request path *after* stripping the ASGI
+# root_path so "/api-proxy/health" and "/health" both normalise to "/health".
+_IGNORED_PREFIXES: tuple[str, ...] = (
+    "/_next/",
+    "/static/",
+    "/favicon",
+    "/metrics",
+    "/health",
+    "/sphinx/",
+    # Job polling (the biggest source of request volume).
+    "/jobs",  # GET /jobs, /jobs/{id}, /jobs/{id}/events
+)
+
+# Methods that represent passive reads we want to count. POST/PUT/DELETE are
+# mutations that also matter, but counting them doesn't change the "unique
+# visitors" signal meaningfully, so we keep the table small by only logging
+# GET. Flip this set if you want full request auditing instead.
+_COUNTED_METHODS: frozenset[str] = frozenset({"GET"})
+
+
+class _DailySalt:
+    """Thread-safe rotating salt keyed by UTC date."""
+
+    def __init__(self) -> None:
+        self._lock = threading.Lock()
+        self._day: date | None = None
+        self._salt: bytes = b""
+
+    def get(self) -> bytes:
+        today = datetime.now(UTC).date()
+        with self._lock:
+            if today != self._day:
+                self._day = today
+                self._salt = secrets.token_bytes(32)
+            return self._salt
+
+
+_salt = _DailySalt()
+
+
+def _client_ip(request: Request) -> str:
+    """Extract the real client IP, honouring proxy headers.
+
+    Order of precedence matches what ngrok / Cloudflare / Traefik set:
+    1. ``X-Forwarded-For`` (first entry, which is the original client)
+    2. ``X-Real-IP``
+    3. Starlette's ``request.client.host`` (falls back to the direct peer)
+    """
+    xff = request.headers.get("x-forwarded-for")
+    if xff:
+        # Comma-separated list: client, proxy1, proxy2, ...
+        return xff.split(",")[0].strip()
+    xri = request.headers.get("x-real-ip")
+    if xri:
+        return xri.strip()
+    if request.client:
+        return request.client.host
+    return "unknown"
+
+
+def _normalised_path(request: Request) -> str:
+    """Return the request path with any proxy/root prefix stripped.
+
+    uvicorn is launched with ``--root-path /api-proxy`` but when Next.js
+    rewrites ``/api-proxy/foo`` to the upstream it forwards the full path as
+    ``/api-proxy/foo``, and ASGI ``scope["root_path"]`` is not always
+    populated for those requests. We therefore consult three possible
+    sources, in order: ``scope["root_path"]``, the FastAPI app's own
+    ``root_path`` attribute, and as a last resort a literal ``/api-proxy``
+    prefix.
+    """
+    path = request.url.path
+    candidates = [
+        request.scope.get("root_path", "") or "",
+        getattr(request.app, "root_path", "") or "",
+        "/api-proxy",
+    ]
+    for root in candidates:
+        if root and path.startswith(root):
+            stripped = path[len(root):]
+            return stripped or "/"
+    return path
+
+
+def _should_record(request: Request) -> bool:
+    if request.method not in _COUNTED_METHODS:
+        return False
+    path = _normalised_path(request)
+    for prefix in _IGNORED_PREFIXES:
+        if path.startswith(prefix):
+            return False
+    return True
+
+
+def _visitor_hash(ip: str) -> str:
+    return hashlib.sha256(_salt.get() + ip.encode("utf-8")).hexdigest()[:16]
+
+
+def _truncate_path(path: str, max_len: int = 255) -> str:
+    if len(path) <= max_len:
+        return path
+    return path[: max_len - 3] + "..."
+
+
+class VisitorCounterMiddleware(BaseHTTPMiddleware):
+    """Writes one ``VisitorEvent`` row per recorded request.
+
+    The session factory is read from ``app.state.session_factory`` — set by
+    ``create_app()`` at startup. If the factory isn't present (e.g. during
+    tests that instantiate a bare FastAPI app), the middleware degrades to
+    a no-op so it never breaks the request.
+    """
+
+    def __init__(self, app: ASGIApp) -> None:
+        super().__init__(app)
+
+    async def dispatch(self, request, call_next):
+        response = await call_next(request)
+
+        if not _should_record(request):
+            return response
+
+        factory = getattr(request.app.state, "session_factory", None)
+        if factory is None:
+            return response
+
+        try:
+            ip = _client_ip(request)
+            event = VisitorEvent(
+                day=datetime.now(UTC).date(),
+                visitor_hash=_visitor_hash(ip),
+                path=_truncate_path(_normalised_path(request)),
+                method=request.method,
+                status=response.status_code,
+            )
+            with session_scope(factory) as session:
+                session.add(event)
+        except SQLAlchemyError as exc:
+            # Never let analytics break the real response.
+            logger.warning("visitor_counter insert failed: %s", exc)
+        except Exception as exc:  # pragma: no cover — defensive
+            logger.warning("visitor_counter unexpected error: %s", exc)
+
+        return response
diff --git a/protea/api/routers/annotate.py b/protea/api/routers/annotate.py
index c85d09a..ee22899 100644
--- a/protea/api/routers/annotate.py
+++ b/protea/api/routers/annotate.py
@@ -5,6 +5,7 @@
 QuerySet, and kicks off ``compute_embeddings``.  Returns all the IDs the
 frontend needs to chain ``predict_go_terms`` once embeddings finish.
 """
+
 from __future__ import annotations
 
 from typing import Any
@@ -45,7 +46,8 @@
 
 
 def _best_embedding_config(session: Session) -> EmbeddingConfig | None:
-    """Pick the config with the most computed embeddings (prefer ESM-2)."""
+    """Pick the smallest model that already has embeddings — the quick-annotation
+    path is latency-sensitive, so a 300M PLM beats a 3B one for the default."""
     rows = (
         session.query(
             EmbeddingConfig,
@@ -53,12 +55,11 @@ def _best_embedding_config(session: Session) -> EmbeddingConfig | None:
         )
         .outerjoin(SequenceEmbedding, SequenceEmbedding.embedding_config_id == EmbeddingConfig.id)
         .group_by(EmbeddingConfig.id)
-        .order_by(func.count(SequenceEmbedding.id).desc())
+        .order_by(EmbeddingConfig.param_count.asc().nulls_last())
         .all()
     )
     if not rows:
         return None
-    # Prefer a config that already has embeddings
     for config, cnt in rows:
         if cnt > 0:
             return config
@@ -66,19 +67,11 @@ def _best_embedding_config(session: Session) -> EmbeddingConfig | None:
 
 
 def _newest_annotation_set(session: Session) -> AnnotationSet | None:
-    return (
-        session.query(AnnotationSet)
-        .order_by(AnnotationSet.created_at.desc())
-        .first()
-    )
+    return session.query(AnnotationSet).order_by(AnnotationSet.created_at.desc()).first()
 
 
 def _newest_ontology_snapshot(session: Session) -> OntologySnapshot | None:
-    return (
-        session.query(OntologySnapshot)
-        .order_by(OntologySnapshot.loaded_at.desc())
-        .first()
-    )
+    return session.query(OntologySnapshot).order_by(OntologySnapshot.loaded_at.desc()).first()
 
 
 @router.post("", summary="Annotate proteins from FASTA")
@@ -99,18 +92,28 @@ async def annotate(
     ``predict_go_terms`` once embeddings are ready.
     """
     # ── Parse FASTA ──────────────────────────────────────────────────
-    _MAX_FASTA_BYTES = 50 * 1024 * 1024  # 50 MB
+    from protea.config.tuning import get_tuning
+
+    max_bytes = get_tuning().api.max_fasta_bytes
     if file is not None:
         raw = await file.read()
-        if len(raw) > _MAX_FASTA_BYTES:
-            raise HTTPException(status_code=413, detail="FASTA file exceeds 50 MB limit")
+        if len(raw) > max_bytes:
+            raise HTTPException(
+                status_code=413,
+                detail=f"FASTA file exceeds {max_bytes // (1024 * 1024)} MB limit",
+            )
         try:
             content = raw.decode("utf-8")
         except UnicodeDecodeError:
-            raise HTTPException(status_code=422, detail="FASTA file must be UTF-8 encoded") from None
+            raise HTTPException(
+                status_code=422, detail="FASTA file must be UTF-8 encoded"
+            ) from None
     elif fasta_text:
-        if len(fasta_text.encode("utf-8")) > _MAX_FASTA_BYTES:
-            raise HTTPException(status_code=413, detail="FASTA text exceeds 50 MB limit")
+        if len(fasta_text.encode("utf-8")) > max_bytes:
+            raise HTTPException(
+                status_code=413,
+                detail=f"FASTA text exceeds {max_bytes // (1024 * 1024)} MB limit",
+            )
         content = fasta_text
     else:
         raise HTTPException(status_code=422, detail="Provide a FASTA file or fasta_text")
@@ -120,7 +123,7 @@ async def annotate(
         raise HTTPException(status_code=422, detail="No valid sequences found in the FASTA input")
 
     seen: set[str] = set()
-    for acc, _ in records:
+    for acc, _, _ in records:
         if acc in seen:
             raise HTTPException(status_code=422, detail=f"Duplicate accession: '{acc}'")
         seen.add(acc)
@@ -129,7 +132,7 @@ async def annotate(
     with session_scope(factory) as session:
         # Upsert sequences
         hash_to_seq_id: dict[str, int] = {}
-        hashes = [Sequence.compute_hash(seq) for _, seq in records]
+        hashes = [Sequence.compute_hash(seq) for _, seq, _ in records]
         existing = (
             session.query(Sequence.sequence_hash, Sequence.id)
             .filter(Sequence.sequence_hash.in_(hashes))
@@ -137,7 +140,7 @@ async def annotate(
         )
         for h, sid in existing:
             hash_to_seq_id[h] = sid
-        for (_, seq), h in zip(records, hashes, strict=False):
+        for (_, seq, _), h in zip(records, hashes, strict=False):
             if h not in hash_to_seq_id:
                 new_seq = Sequence(sequence=seq, sequence_hash=h)
                 session.add(new_seq)
@@ -153,7 +156,7 @@ async def annotate(
                 sequence_id=hash_to_seq_id[h],
                 accession=acc,
             )
-            for (acc, _), h in zip(records, hashes, strict=False)
+            for (acc, _, _), h in zip(records, hashes, strict=False)
         ]
         session.add_all(entries)
         session.flush()
@@ -185,9 +188,7 @@ async def annotate(
 
         # ── Check for trained reranker ────────────────────────────────
         best_reranker = (
-            session.query(RerankerModel)
-            .order_by(RerankerModel.created_at.desc())
-            .first()
+            session.query(RerankerModel).order_by(RerankerModel.created_at.desc()).first()
         )
         reranker_id = best_reranker.id if best_reranker else None
 
diff --git a/protea/api/routers/annotations.py b/protea/api/routers/annotations.py
index cf26501..f0fd11d 100644
--- a/protea/api/routers/annotations.py
+++ b/protea/api/routers/annotations.py
@@ -10,17 +10,20 @@
 from fastapi import APIRouter, Depends, HTTPException, Query
 from fastapi.responses import StreamingResponse
 from pydantic import ValidationError
-from sqlalchemy import func
+from sqlalchemy import func, select
 from sqlalchemy.exc import IntegrityError
 from sqlalchemy.orm import Session, sessionmaker
 
-from protea.api.deps import get_amqp_url, get_artifacts_dir, get_session_factory
-from protea.core.evaluation import compute_evaluation_data
+from protea.api.cache import cached
+from protea.api.deps import get_amqp_url, get_benchmark_config, get_session_factory
+from protea.core.domain.aspect import ASPECT_CAFA_CODES
+from protea.core.evaluation import load_evaluation_data_for_set
 from protea.core.operations.generate_evaluation_set import GenerateEvaluationSetPayload
 from protea.core.operations.load_goa_annotations import LoadGOAAnnotationsPayload
 from protea.core.operations.load_ontology_snapshot import LoadOntologySnapshotPayload
 from protea.core.operations.load_quickgo_annotations import LoadQuickGOAnnotationsPayload
 from protea.core.operations.run_cafa_evaluation import RunCafaEvaluationPayload
+from protea.infrastructure.benchmark_config import BenchmarkConfig
 from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
 from protea.infrastructure.orm.models.annotation.evaluation_result import EvaluationResult
 from protea.infrastructure.orm.models.annotation.evaluation_set import EvaluationSet
@@ -28,6 +31,7 @@
 from protea.infrastructure.orm.models.annotation.go_term_relationship import GOTermRelationship
 from protea.infrastructure.orm.models.annotation.ontology_snapshot import OntologySnapshot
 from protea.infrastructure.orm.models.annotation.protein_go_annotation import ProteinGOAnnotation
+from protea.infrastructure.orm.models.embedding.scoring_config import ScoringConfig
 from protea.infrastructure.orm.models.job import Job, JobEvent
 from protea.infrastructure.orm.models.protein.protein import Protein
 from protea.infrastructure.orm.models.sequence.sequence import Sequence
@@ -37,6 +41,7 @@
 router = APIRouter(prefix="/annotations", tags=["annotations"])
 
 _JOBS_QUEUE = "protea.jobs"
+_EVALUATIONS_QUEUE = "protea.evaluations"
 
 
 # ── Ontology Snapshots ────────────────────────────────────────────────────────
@@ -46,33 +51,41 @@
 def list_snapshots(
     factory: sessionmaker[Session] = Depends(get_session_factory),
 ) -> list[dict[str, Any]]:
-    """List all loaded GO ontology snapshots with their GO term counts, newest first."""
-    with session_scope(factory) as session:
-        count_sub = (
-            session.query(
-                GOTerm.ontology_snapshot_id,
-                func.count(GOTerm.id).label("cnt"),
+    """List all loaded GO ontology snapshots with their GO term counts, newest first.
+
+    Cached 5 minutes — the GROUP BY over go_term (N million rows per snapshot)
+    takes multiple seconds, and snapshots are effectively immutable once loaded.
+    """
+
+    def _compute() -> list[dict[str, Any]]:
+        with session_scope(factory) as session:
+            count_sub = (
+                session.query(
+                    GOTerm.ontology_snapshot_id,
+                    func.count(GOTerm.id).label("cnt"),
+                )
+                .group_by(GOTerm.ontology_snapshot_id)
+                .subquery()
             )
-            .group_by(GOTerm.ontology_snapshot_id)
-            .subquery()
-        )
-        rows = (
-            session.query(OntologySnapshot, count_sub.c.cnt)
-            .outerjoin(count_sub, OntologySnapshot.id == count_sub.c.ontology_snapshot_id)
-            .order_by(OntologySnapshot.loaded_at.desc())
-            .all()
-        )
-        return [
-            {
-                "id": str(s.id),
-                "obo_url": s.obo_url,
-                "obo_version": s.obo_version,
-                "ia_url": s.ia_url,
-                "loaded_at": s.loaded_at.isoformat(),
-                "go_term_count": cnt or 0,
-            }
-            for s, cnt in rows
-        ]
+            rows = (
+                session.query(OntologySnapshot, count_sub.c.cnt)
+                .outerjoin(count_sub, OntologySnapshot.id == count_sub.c.ontology_snapshot_id)
+                .order_by(OntologySnapshot.loaded_at.desc())
+                .all()
+            )
+            return [
+                {
+                    "id": str(s.id),
+                    "obo_url": s.obo_url,
+                    "obo_version": s.obo_version,
+                    "ia_url": s.ia_url,
+                    "loaded_at": s.loaded_at.isoformat(),
+                    "go_term_count": cnt or 0,
+                }
+                for s, cnt in rows
+            ]
+
+    return cached("annotations:snapshots", 300.0, _compute)
 
 
 @router.get("/snapshots/{snapshot_id}", summary="Get ontology snapshot details")
@@ -182,35 +195,43 @@ def list_annotation_sets(
     source: str | None = Query(default=None, description="Filter by source: `goa` or `quickgo`."),
     factory: sessionmaker[Session] = Depends(get_session_factory),
 ) -> list[dict[str, Any]]:
-    """List annotation sets with their annotation counts, newest first. Optionally filter by source."""
-    with session_scope(factory) as session:
-        count_sub = (
-            session.query(
-                ProteinGOAnnotation.annotation_set_id,
-                func.count(ProteinGOAnnotation.id).label("cnt"),
+    """List annotation sets with their annotation counts, newest first. Optionally filter by source.
+
+    Cached 5 minutes — GROUP BY over protein_go_annotation (80M rows) takes
+    6+ seconds. Per-source views are cached independently.
+    """
+
+    def _compute() -> list[dict[str, Any]]:
+        with session_scope(factory) as session:
+            count_sub = (
+                session.query(
+                    ProteinGOAnnotation.annotation_set_id,
+                    func.count(ProteinGOAnnotation.id).label("cnt"),
+                )
+                .group_by(ProteinGOAnnotation.annotation_set_id)
+                .subquery()
             )
-            .group_by(ProteinGOAnnotation.annotation_set_id)
-            .subquery()
-        )
-        q = session.query(AnnotationSet, count_sub.c.cnt).outerjoin(
-            count_sub, AnnotationSet.id == count_sub.c.annotation_set_id
-        )
-        if source is not None:
-            q = q.filter(AnnotationSet.source == source)
-        rows = q.order_by(AnnotationSet.created_at.desc()).all()
-        return [
-            {
-                "id": str(a.id),
-                "source": a.source,
-                "source_version": a.source_version,
-                "ontology_snapshot_id": str(a.ontology_snapshot_id),
-                "job_id": str(a.job_id) if a.job_id else None,
-                "created_at": a.created_at.isoformat(),
-                "meta": a.meta,
-                "annotation_count": cnt or 0,
-            }
-            for a, cnt in rows
-        ]
+            q = session.query(AnnotationSet, count_sub.c.cnt).outerjoin(
+                count_sub, AnnotationSet.id == count_sub.c.annotation_set_id
+            )
+            if source is not None:
+                q = q.filter(AnnotationSet.source == source)
+            rows = q.order_by(AnnotationSet.created_at.desc()).all()
+            return [
+                {
+                    "id": str(a.id),
+                    "source": a.source,
+                    "source_version": a.source_version,
+                    "ontology_snapshot_id": str(a.ontology_snapshot_id),
+                    "job_id": str(a.job_id) if a.job_id else None,
+                    "created_at": a.created_at.isoformat(),
+                    "meta": a.meta,
+                    "annotation_count": cnt or 0,
+                }
+                for a, cnt in rows
+            ]
+
+    return cached(f"annotations:sets:{source or '*'}", 300.0, _compute)
 
 
 @router.get("/sets/{set_id}", summary="Get annotation set details")
@@ -389,28 +410,32 @@ def list_evaluation_sets(
 def delete_evaluation_set(
     eval_id: UUID,
     factory: sessionmaker[Session] = Depends(get_session_factory),
-    artifacts_dir: Path = Depends(get_artifacts_dir),
 ) -> None:
-    """Delete an evaluation set and all its results. Cascades to EvaluationResult rows."""
+    """Delete an evaluation set and all its results, cascading to EvaluationResult
+    rows and removing their artifact-store objects (ground-truth + per-result
+    cafaeval outputs)."""
+    from protea.core.evaluation import groundtruth_key_for
+    from protea.infrastructure.settings import load_settings
+    from protea.infrastructure.storage import get_artifact_store
+
     with session_scope(factory) as session:
         e = session.get(EvaluationSet, eval_id)
         if e is None:
             raise HTTPException(status_code=404, detail="EvaluationSet not found")
-        # Collect result IDs to clean up artifact dirs
-        result_ids = [
-            str(r.id)
-            for r in session.query(EvaluationResult)
+        result_keys: list[str] = []
+        for r in (
+            session.query(EvaluationResult)
             .filter(EvaluationResult.evaluation_set_id == eval_id)
             .all()
-        ]
+        ):
+            result_keys.extend((r.results or {}).get("artifacts", {}).get("keys") or [])
         session.delete(e)
 
-    import shutil
-
-    for rid in result_ids:
-        result_dir = artifacts_dir / rid
-        if result_dir.exists():
-            shutil.rmtree(result_dir, ignore_errors=True)
+    project_root = Path(__file__).resolve().parents[3]
+    store = get_artifact_store(load_settings(project_root))
+    store.delete(groundtruth_key_for(eval_id))
+    for key in result_keys:
+        store.delete(key)
 
 
 @router.get("/evaluation-sets/{eval_id}", summary="Get evaluation set details")
@@ -453,13 +478,7 @@ def download_gt_nk(
     """
     with session_scope(factory) as session:
         e = _eval_set_or_404(session, eval_id)
-        ann_old = session.get(AnnotationSet, e.old_annotation_set_id)
-        data = compute_evaluation_data(
-            session,
-            e.old_annotation_set_id,
-            e.new_annotation_set_id,
-            ann_old.ontology_snapshot_id,
-        )
+        data, _ = load_evaluation_data_for_set(session, e)
         lines = [
             f"{protein}\t{go_id}\n"
             for protein, go_ids in sorted(data.nk.items())
@@ -486,13 +505,7 @@ def download_gt_lk(
     """
     with session_scope(factory) as session:
         e = _eval_set_or_404(session, eval_id)
-        ann_old = session.get(AnnotationSet, e.old_annotation_set_id)
-        data = compute_evaluation_data(
-            session,
-            e.old_annotation_set_id,
-            e.new_annotation_set_id,
-            ann_old.ontology_snapshot_id,
-        )
+        data, _ = load_evaluation_data_for_set(session, e)
         lines = [
             f"{protein}\t{go_id}\n"
             for protein, go_ids in sorted(data.lk.items())
@@ -521,13 +534,7 @@ def download_gt_pk(
     """
     with session_scope(factory) as session:
         e = _eval_set_or_404(session, eval_id)
-        ann_old = session.get(AnnotationSet, e.old_annotation_set_id)
-        data = compute_evaluation_data(
-            session,
-            e.old_annotation_set_id,
-            e.new_annotation_set_id,
-            ann_old.ontology_snapshot_id,
-        )
+        data, _ = load_evaluation_data_for_set(session, e)
         lines = [
             f"{protein}\t{go_id}\n"
             for protein, go_ids in sorted(data.pk.items())
@@ -555,13 +562,7 @@ def download_known_terms(
     """
     with session_scope(factory) as session:
         e = _eval_set_or_404(session, eval_id)
-        ann_old = session.get(AnnotationSet, e.old_annotation_set_id)
-        data = compute_evaluation_data(
-            session,
-            e.old_annotation_set_id,
-            e.new_annotation_set_id,
-            ann_old.ontology_snapshot_id,
-        )
+        data, _ = load_evaluation_data_for_set(session, e)
         lines = [
             f"{protein}\t{go_id}\n"
             for protein, go_ids in sorted(data.known.items())
@@ -593,13 +594,7 @@ def download_delta_fasta(
     """
     with session_scope(factory) as session:
         e = _eval_set_or_404(session, eval_id)
-        ann_old = session.get(AnnotationSet, e.old_annotation_set_id)
-        data = compute_evaluation_data(
-            session,
-            e.old_annotation_set_id,
-            e.new_annotation_set_id,
-            ann_old.ontology_snapshot_id,
-        )
+        data, _ = load_evaluation_data_for_set(session, e)
 
         # Collect requested accessions with their NK/LK/PK label
         accession_label: dict[str, str] = {}
@@ -622,11 +617,10 @@ def download_delta_fasta(
                 },
             )
 
-        # Fetch proteins + sequences in one query
         rows = (
             session.query(Protein, Sequence)
             .join(Sequence, Protein.sequence_id == Sequence.id)
-            .filter(Protein.accession.in_(list(accession_label.keys())))
+            .filter(Protein.accession.in_(accession_label.keys()))
             .order_by(Protein.accession)
             .all()
         )
@@ -667,14 +661,14 @@ def run_cafa_evaluation(
     body: dict[str, Any],
     factory: sessionmaker[Session] = Depends(get_session_factory),
     amqp_url: str = Depends(get_amqp_url),
-    artifacts_dir: Path = Depends(get_artifacts_dir),
+    cfg: BenchmarkConfig = Depends(get_benchmark_config),
 ) -> dict[str, Any]:
     """Queue a job that runs the CAFA evaluator (NK / LK / PK) for a prediction set.
 
     Body must contain ``prediction_set_id`` (required) and optionally
     ``max_distance`` (float).
     """
-    body = {**body, "evaluation_set_id": str(eval_id), "artifacts_dir": str(artifacts_dir)}
+    body = {**body, "evaluation_set_id": str(eval_id)}
     try:
         RunCafaEvaluationPayload.model_validate(body)
     except ValidationError as exc:
@@ -683,7 +677,17 @@ def run_cafa_evaluation(
     with session_scope(factory) as session:
         if session.get(EvaluationSet, eval_id) is None:
             raise HTTPException(status_code=404, detail="EvaluationSet not found")
-        job = Job(operation="run_cafa_evaluation", queue_name=_JOBS_QUEUE, payload=body)
+        # Auto-apply baseline scoring_config so every eval_result lands in the
+        # benchmark matrix. Without this, unclassified rows (scoring_config_id
+        # and reranker_model_id both NULL) are filtered out by _stage_of().
+        if not body.get("scoring_config_id") and not body.get("reranker_model_id") \
+                and not body.get("rerankers") and cfg.baseline_scoring_name:
+            baseline = session.execute(
+                select(ScoringConfig).where(ScoringConfig.name == cfg.baseline_scoring_name)
+            ).scalar_one_or_none()
+            if baseline is not None:
+                body = {**body, "scoring_config_id": str(baseline.id)}
+        job = Job(operation="run_cafa_evaluation", queue_name=_EVALUATIONS_QUEUE, payload=body)
         session.add(job)
         session.flush()
         job_id = job.id
@@ -691,11 +695,11 @@ def run_cafa_evaluation(
             JobEvent(
                 job_id=job_id,
                 event="job.created",
-                fields={"operation": "run_cafa_evaluation", "queue": _JOBS_QUEUE},
+                fields={"operation": "run_cafa_evaluation", "queue": _EVALUATIONS_QUEUE},
             )
         )
 
-    publish_job(amqp_url, _JOBS_QUEUE, job_id)
+    publish_job(amqp_url, _EVALUATIONS_QUEUE, job_id)
     return {"id": str(job_id), "status": "queued"}
 
 
@@ -717,7 +721,7 @@ def _rows() -> Iterator[str]:
             yield "setting\tnamespace\tfmax\tprecision\trecall\ttau\tcoverage\tn_proteins\n"
             for setting in ("NK", "LK", "PK"):
                 ns_data = result.results.get(setting, {})
-                for ns in ("BPO", "MFO", "CCO"):
+                for ns in ASPECT_CAFA_CODES:
                     m = ns_data.get(ns)
                     if m is None:
                         continue
@@ -742,23 +746,29 @@ def download_evaluation_artifacts(
     eval_id: UUID,
     result_id: UUID,
     factory: sessionmaker[Session] = Depends(get_session_factory),
-    artifacts_dir: Path = Depends(get_artifacts_dir),
 ) -> StreamingResponse:
     with session_scope(factory) as session:
         result = session.get(EvaluationResult, result_id)
         if result is None or result.evaluation_set_id != eval_id:
             raise HTTPException(status_code=404, detail="EvaluationResult not found")
+        keys = (result.results or {}).get("artifacts", {}).get("keys") or []
 
-    result_dir = artifacts_dir / str(result_id)
-    if not result_dir.exists():
+    if not keys:
         raise HTTPException(status_code=404, detail="No artifacts found for this result")
 
+    from protea.infrastructure.settings import load_settings
+    from protea.infrastructure.storage import get_artifact_store
+
+    project_root = Path(__file__).resolve().parents[3]
+    store = get_artifact_store(load_settings(project_root))
+    prefix = f"eval_artifacts/{result_id}/"
+
     def _zip_stream() -> Iterator[bytes]:
         buf = io.BytesIO()
         with zipfile.ZipFile(buf, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
-            for path in sorted(result_dir.rglob("*")):
-                if path.is_file():
-                    zf.write(path, path.relative_to(result_dir))
+            for key in sorted(keys):
+                rel = key[len(prefix):] if key.startswith(prefix) else key
+                zf.writestr(rel, store.get(key))
         yield buf.getvalue()
 
     return StreamingResponse(
@@ -810,20 +820,21 @@ def delete_evaluation_result(
     eval_id: UUID,
     result_id: UUID,
     factory: sessionmaker[Session] = Depends(get_session_factory),
-    artifacts_dir: Path = Depends(get_artifacts_dir),
 ) -> None:
+    from protea.infrastructure.settings import load_settings
+    from protea.infrastructure.storage import get_artifact_store
+
     with session_scope(factory) as session:
         result = session.get(EvaluationResult, result_id)
         if result is None or result.evaluation_set_id != eval_id:
             raise HTTPException(status_code=404, detail="EvaluationResult not found")
+        keys = (result.results or {}).get("artifacts", {}).get("keys") or []
         session.delete(result)
 
-    # Remove artifact directory if present (best-effort)
-    result_dir = artifacts_dir / str(result_id)
-    if result_dir.exists():
-        import shutil
-
-        shutil.rmtree(result_dir, ignore_errors=True)
+    project_root = Path(__file__).resolve().parents[3]
+    store = get_artifact_store(load_settings(project_root))
+    for key in keys:
+        store.delete(key)
 
 
 # ── GO subgraph ───────────────────────────────────────────────────────────────
diff --git a/protea/api/routers/benchmark.py b/protea/api/routers/benchmark.py
new file mode 100644
index 0000000..630c806
--- /dev/null
+++ b/protea/api/routers/benchmark.py
@@ -0,0 +1,365 @@
+"""Benchmark matrix endpoints.
+
+Exposes a per-embedding, per-stage view of every ``EvaluationResult`` in the
+database so the UI can render the full PLM comparison grid for the thesis
+benchmark.
+
+Where the ``/showcase`` endpoint collapses all models into a few method
+buckets and takes the maximum across every embedding, this module preserves
+**which** embedding produced each number and **which scoring config** was
+used — one stage per distinct ``scoring_config.name`` found in the DB, plus
+an implicit ``"reranker"`` stage for evaluations that used a reranker.
+
+Zero domain constants are hardcoded here: stage labels, preferred default,
+baseline tag, GO categories and aspects all come from
+``protea/config/benchmark.yaml`` via :class:`BenchmarkConfig`. Model display
+metadata (display name, family, param count) comes from the dedicated columns
+on ``embedding_config`` — no HF-name regex heuristics.
+
+Two endpoints are provided:
+
+``GET /benchmark/embeddings``
+    One row per ``EmbeddingConfig`` with its persisted display metadata.
+
+``GET /benchmark/matrix``
+    One row per
+    ``(embedding_config, evaluation_set, stage, category, aspect)`` tuple —
+    best-Fmax only. Response also includes:
+
+    - ``stages``:            every stage observed in the data (with label/kind)
+    - ``evaluation_sets``:   per-eval-set metadata (stats, source, obo version)
+    - ``best_per_cell``:     cross-model winner per (category, aspect) cell
+    - ``categories`` / ``aspects``: from YAML config
+"""
+
+from __future__ import annotations
+
+import uuid
+from typing import Any
+
+from fastapi import APIRouter, Depends, Query
+from sqlalchemy import select
+from sqlalchemy.orm import Session, aliased, sessionmaker
+
+from protea.api.deps import get_benchmark_config, get_session_factory
+from protea.api.stages import RERANKER_STAGE as _RERANKER_STAGE
+from protea.api.stages import StageKind  # noqa: F401  (re-exported for type hints)
+from protea.api.stages import stage_kind as _stage_kind
+from protea.api.stages import stage_of as _stage_of
+from protea.infrastructure.benchmark_config import BenchmarkConfig
+from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
+from protea.infrastructure.orm.models.annotation.evaluation_result import EvaluationResult
+from protea.infrastructure.orm.models.annotation.evaluation_set import EvaluationSet
+from protea.infrastructure.orm.models.annotation.ontology_snapshot import OntologySnapshot
+from protea.infrastructure.orm.models.embedding.embedding_config import EmbeddingConfig
+from protea.infrastructure.orm.models.embedding.prediction_set import PredictionSet
+from protea.infrastructure.orm.models.embedding.scoring_config import ScoringConfig
+from protea.infrastructure.session import session_scope
+
+router = APIRouter(prefix="/benchmark", tags=["benchmark"])
+
+
+def _stage_sort_index(stage: str, preferred: tuple[str, ...]) -> tuple[int, int, str]:
+    """Sort order: preferred (in YAML order) → other scorings (alpha) → reranker."""
+    if stage == _RERANKER_STAGE:
+        return (2, 0, stage)
+    if stage in preferred:
+        return (0, preferred.index(stage), stage)
+    return (1, 0, stage)
+
+
+def _embedding_display(cfg: EmbeddingConfig) -> dict[str, Any]:
+    """Flatten the persisted display metadata for the API response.
+
+    Falls back to raw ``model_name`` / ``model_backend`` only when the
+    explicit columns are ``NULL`` (keeps the response non-empty for
+    embeddings that were inserted before the display columns existed).
+    """
+    return {
+        "id": str(cfg.id),
+        "model_name": cfg.model_name,
+        "model_backend": cfg.model_backend,
+        "description": cfg.description,
+        "pooling": cfg.pooling,
+        "layer_agg": cfg.layer_agg,
+        "display_name": cfg.display_name or cfg.model_name,
+        "family": cfg.family or cfg.model_backend,
+        "param_count": cfg.param_count,
+    }
+
+
+def _eval_set_label(
+    es: EvaluationSet,
+    old_src: str | None,
+    new_src: str | None,
+    old_src_version: str | None,
+    new_src_version: str | None,
+    override: str | None,
+) -> str:
+    """Human-readable label for an evaluation set.
+
+    Precedence: explicit YAML override → ``source source_version`` delta →
+    UUID prefix fallback.
+    """
+    if override:
+        return override
+    if old_src and new_src:
+        old_tag = f"{old_src} {old_src_version}" if old_src_version else old_src
+        new_tag = f"{new_src} {new_src_version}" if new_src_version else new_src
+        return f"{old_tag} → {new_tag}"
+    return f"eval_set {str(es.id)[:8]}…"
+
+
+# ── Endpoints ──────────────────────────────────────────────────────────────
+
+
+@router.get(
+    "/embeddings",
+    summary="List embedding configs with persisted display metadata",
+)
+def list_benchmark_embeddings(
+    factory: sessionmaker[Session] = Depends(get_session_factory),
+) -> dict[str, Any]:
+    """Return every ``EmbeddingConfig`` with its persisted display metadata.
+
+    The metadata lives in ``embedding_config.display_name / family /
+    param_count`` — filled at creation time by the seed scripts. No
+    heuristic inference happens here.
+    """
+    with session_scope(factory) as session:
+        cfgs = (
+            session.execute(select(EmbeddingConfig).order_by(EmbeddingConfig.created_at.asc()))
+            .scalars()
+            .all()
+        )
+        out = [_embedding_display(cfg) for cfg in cfgs]
+        return {"embeddings": out, "total": len(out)}
+
+
+@router.get(
+    "/matrix",
+    summary="Per-embedding / per-stage Fmax matrix across all evaluation results",
+)
+def get_benchmark_matrix(
+    evaluation_set_id: uuid.UUID | None = Query(
+        default=None,
+        description="If set, restrict rows to this evaluation set only.",
+    ),
+    stage: str | None = Query(
+        default=None,
+        description=(
+            "If set, restrict rows to this pipeline stage "
+            "(any scoring_config.name, or 'reranker')."
+        ),
+    ),
+    k: int | None = Query(
+        default=None,
+        ge=1,
+        description=(
+            "If set, restrict rows to PredictionSets computed with this "
+            "``limit_per_entry`` (K). Typical values: 3, 5, 10."
+        ),
+    ),
+    factory: sessionmaker[Session] = Depends(get_session_factory),
+    cfg: BenchmarkConfig = Depends(get_benchmark_config),
+) -> dict[str, Any]:
+    """Return a long-format table with one row per
+    ``(embedding_config, evaluation_set, stage, category, aspect)`` tuple
+    containing the best Fmax / precision / recall observed in the DB,
+    plus per-eval-set metadata and a cross-model leaderboard.
+    """
+    with session_scope(factory) as session:
+        # Single query: EvaluationResult joined to PredictionSet → embedding
+        # and left-joined to ScoringConfig for the stage name.
+        stmt = (
+            select(
+                EvaluationResult,
+                PredictionSet.embedding_config_id,
+                PredictionSet.limit_per_entry.label("k"),
+                ScoringConfig.name.label("scoring_name"),
+            )
+            .join(PredictionSet, PredictionSet.id == EvaluationResult.prediction_set_id)
+            .outerjoin(
+                ScoringConfig, ScoringConfig.id == EvaluationResult.scoring_config_id
+            )
+        )
+        if evaluation_set_id is not None:
+            stmt = stmt.where(EvaluationResult.evaluation_set_id == evaluation_set_id)
+        if k is not None:
+            stmt = stmt.where(PredictionSet.limit_per_entry == k)
+
+        # Dedupe on the Python side: row count is always small (O(hundreds)),
+        # and "best-Fmax per cell" is clearest as an in-memory fold.
+        # Key now includes K so the same (embedding, stage, cell) tuple yields
+        # one row per K (3, 5, 10) instead of collapsing them.
+        best: dict[tuple[str, str, str, int, str, str], dict[str, Any]] = {}
+        eval_set_ids: set[str] = set()
+        embedding_ids: set[str] = set()
+        stages_seen: set[str] = set()
+        ks_seen: set[int] = set()
+
+        for er, embedding_config_id, row_k, scoring_name in session.execute(stmt).all():
+            st = _stage_of(er, scoring_name)
+            if st is None or st in cfg.hidden_stages:
+                continue
+            stages_seen.add(st)
+            if stage is not None and st != stage:
+                continue
+
+            eid = str(embedding_config_id)
+            esid = str(er.evaluation_set_id)
+            embedding_ids.add(eid)
+            eval_set_ids.add(esid)
+            ks_seen.add(int(row_k))
+            results = er.results or {}
+
+            for cat in cfg.categories:
+                cat_data = results.get(cat) or {}
+                if not cat_data:
+                    continue
+                for asp in cfg.aspects:
+                    cell = cat_data.get(asp) or {}
+                    fmax = cell.get("fmax")
+                    if fmax is None:
+                        continue
+
+                    key = (eid, esid, st, int(row_k), cat, asp)
+                    cur = best.get(key)
+                    if cur is None or fmax > cur["fmax"]:
+                        best[key] = {
+                            "embedding_config_id": eid,
+                            "evaluation_set_id": esid,
+                            "stage": st,
+                            "k": int(row_k),
+                            "category": cat,
+                            "aspect": asp,
+                            "fmax": round(float(fmax), 4),
+                            "precision": _round(cell.get("precision")),
+                            "recall": _round(cell.get("recall")),
+                            "coverage": _round(cell.get("coverage")),
+                            "n_proteins": cell.get("n_proteins"),
+                            "evaluation_result_id": str(er.id),
+                        }
+
+        # Stable stage ordering based on YAML preferred_default_stages.
+        stages_payload = [
+            {
+                "name": s,
+                "label": cfg.label_for_stage(s),
+                "kind": _stage_kind(s),
+                "is_baseline": s == cfg.baseline_scoring_name,
+            }
+            for s in sorted(
+                stages_seen, key=lambda x: _stage_sort_index(x, cfg.preferred_default_stages)
+            )
+        ]
+
+        rows = sorted(
+            best.values(),
+            key=lambda r: (
+                r["evaluation_set_id"],
+                _stage_sort_index(r["stage"], cfg.preferred_default_stages),
+                r["embedding_config_id"],
+                r["k"],
+                r["category"],
+                r["aspect"],
+            ),
+        )
+
+        # Cross-model leaderboard: for each (cat, asp) cell, pick the single
+        # best row across every embedding and stage currently selected. When
+        # a stage filter is active, the leaderboard is naturally restricted
+        # to that stage — same semantics as the main table.
+        leaderboard: dict[tuple[str, str], dict[str, Any]] = {}
+        for r in rows:
+            lkey = (r["category"], r["aspect"])
+            cur = leaderboard.get(lkey)
+            if cur is None or r["fmax"] > cur["fmax"]:
+                leaderboard[lkey] = r
+        best_per_cell = [
+            {
+                "category": cat,
+                "aspect": asp,
+                "fmax": entry["fmax"],
+                "precision": entry["precision"],
+                "recall": entry["recall"],
+                "coverage": entry["coverage"],
+                "embedding_config_id": entry["embedding_config_id"],
+                "k": entry["k"],
+                "stage": entry["stage"],
+                "evaluation_result_id": entry["evaluation_result_id"],
+                "evaluation_set_id": entry["evaluation_set_id"],
+            }
+            for cat in cfg.categories
+            for asp in cfg.aspects
+            if (entry := leaderboard.get((cat, asp))) is not None
+        ]
+
+        # Enrich eval set metadata. Only fetch the ones actually present in
+        # the filtered result — the UI selector draws from the full catalog,
+        # which the frontend gets via an unfiltered call.
+        eval_sets_payload: list[dict[str, Any]] = []
+        if eval_set_ids:
+            old_as = aliased(AnnotationSet)
+            new_as = aliased(AnnotationSet)
+            old_os = aliased(OntologySnapshot)
+            new_os = aliased(OntologySnapshot)
+            es_stmt = (
+                select(
+                    EvaluationSet,
+                    old_as.source.label("old_source"),
+                    old_as.source_version.label("old_src_version"),
+                    new_as.source.label("new_source"),
+                    new_as.source_version.label("new_src_version"),
+                    old_os.obo_version.label("old_obo"),
+                    new_os.obo_version.label("new_obo"),
+                )
+                .join(old_as, old_as.id == EvaluationSet.old_annotation_set_id)
+                .join(new_as, new_as.id == EvaluationSet.new_annotation_set_id)
+                .outerjoin(old_os, old_os.id == old_as.ontology_snapshot_id)
+                .outerjoin(new_os, new_os.id == new_as.ontology_snapshot_id)
+                .where(EvaluationSet.id.in_([uuid.UUID(x) for x in eval_set_ids]))
+            )
+            for es, old_s, old_sv, new_s, new_sv, old_obo, new_obo in session.execute(
+                es_stmt
+            ).all():
+                esid = str(es.id)
+                stats = es.stats or {}
+                eval_sets_payload.append(
+                    {
+                        "id": esid,
+                        "label": _eval_set_label(
+                            es, old_s, new_s, old_sv, new_sv,
+                            cfg.eval_set_labels.get(esid),
+                        ),
+                        "old_source": old_s,
+                        "old_source_version": old_sv,
+                        "new_source": new_s,
+                        "new_source_version": new_sv,
+                        "old_obo_version": old_obo,
+                        "new_obo_version": new_obo,
+                        "stats": stats,
+                    }
+                )
+            eval_sets_payload.sort(key=lambda e: e["label"])
+
+        return {
+            "rows": rows,
+            "total": len(rows),
+            "evaluation_sets": eval_sets_payload,
+            "embedding_config_ids": sorted(embedding_ids),
+            "stages": stages_payload,
+            "categories": list(cfg.categories),
+            "aspects": list(cfg.aspects),
+            "ks": sorted(ks_seen),
+            "best_per_cell": best_per_cell,
+            "filters": {
+                "evaluation_set_id": str(evaluation_set_id) if evaluation_set_id else None,
+                "stage": stage,
+                "k": k,
+            },
+        }
+
+
+def _round(v: Any) -> float | None:
+    return round(float(v), 4) if v is not None else None
diff --git a/protea/api/routers/datasets.py b/protea/api/routers/datasets.py
new file mode 100644
index 0000000..7e007d1
--- /dev/null
+++ b/protea/api/routers/datasets.py
@@ -0,0 +1,171 @@
+"""Frozen re-ranker dataset registry.
+
+``POST /datasets`` enqueues an ``export_research_dataset`` job that runs
+KNN + feature generation, publishes train/eval/manifest artefacts to the
+configured artifact store (local FS or MinIO) and inserts a ``Dataset``
+row once the upload completes. The row is the durable handle the lab
+uses to pull the exact dump by name or id.
+
+``GET /datasets`` and ``GET /datasets/{id_or_name}`` expose the registry
+for the lab's ``pull_dataset.py`` and for UI consumers.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+from uuid import UUID
+
+from fastapi import APIRouter, Depends, HTTPException, Query
+from pydantic import BaseModel, Field, field_validator
+from sqlalchemy.orm import Session, sessionmaker
+
+from protea.api.deps import get_amqp_url, get_session_factory
+from protea.core.utils import utcnow
+from protea.infrastructure.orm.models.embedding.dataset import Dataset
+from protea.infrastructure.orm.models.job import Job, JobEvent
+from protea.infrastructure.queue.publisher import publish_job
+from protea.infrastructure.session import session_scope
+
+router = APIRouter(prefix="/datasets", tags=["datasets"])
+
+
+class CreateDatasetRequest(BaseModel):
+    """Body for ``POST /datasets``.
+
+    Mirrors the ``export_research_dataset`` operation payload. The caller
+    does not pick a queue — the dataset export always runs on the
+    ``protea.training`` worker (serialized, GPU/RAM-intensive).
+    """
+
+    output_name: str = Field(..., min_length=1, max_length=255)
+    embedding_config_id: str = Field(..., min_length=1)
+    ontology_snapshot_id: str = Field(..., min_length=1)
+    train_versions: list[int] = Field(..., min_length=2)
+    test_versions: list[int] = Field(..., min_length=1)
+    annotation_source: str = "goa"
+    k: int = Field(default=5, gt=0)
+    search_backend: str = "faiss"
+    compute_alignments: bool = False
+    compute_taxonomy: bool = False
+    expand_votes_to_ancestors: bool = False
+    use_embedding_pca: bool = False
+
+    @field_validator("output_name", "embedding_config_id", "ontology_snapshot_id", mode="before")
+    @classmethod
+    def _strip(cls, v: str) -> str:
+        if not isinstance(v, str) or not v.strip():
+            raise ValueError("must be a non-empty string")
+        return v.strip()
+
+
+def _dataset_to_dict(d: Dataset) -> dict[str, Any]:
+    return {
+        "id": str(d.id),
+        "name": d.name,
+        "operation": d.operation,
+        "job_id": str(d.job_id) if d.job_id else None,
+        "storage_backend": d.storage_backend,
+        "key_prefix": d.key_prefix,
+        "train_uri": d.train_uri,
+        "eval_uri": d.eval_uri,
+        "manifest_uri": d.manifest_uri,
+        "schema_sha": d.schema_sha,
+        "manifest_sha": d.manifest_sha,
+        "n_train_rows": d.n_train_rows,
+        "n_eval_rows": d.n_eval_rows,
+        "k": d.k,
+        "annotation_source": d.annotation_source,
+        "embedding_config_id": str(d.embedding_config_id) if d.embedding_config_id else None,
+        "ontology_snapshot_id": str(d.ontology_snapshot_id) if d.ontology_snapshot_id else None,
+        "train_snapshot_pairs": d.train_snapshot_pairs,
+        "eval_snapshot_pair": d.eval_snapshot_pair,
+        "producer_version": d.producer_version,
+        "producer_git_sha": d.producer_git_sha,
+        "meta": d.meta,
+        "created_at": d.created_at.isoformat() if d.created_at else None,
+    }
+
+
+@router.post("", summary="Enqueue a dataset export job")
+def create_dataset(
+    body: CreateDatasetRequest,
+    factory: sessionmaker[Session] = Depends(get_session_factory),
+    amqp_url: str = Depends(get_amqp_url),
+) -> dict[str, Any]:
+    """Enqueue an ``export_research_dataset`` job.
+
+    Returns ``{job_id}``. Poll ``GET /jobs/{job_id}`` for status; once the
+    job is ``SUCCEEDED``, ``GET /datasets/{name}`` returns the registered
+    row with its artifact URIs.
+    """
+    # Up-front conflict check — saves a whole KNN run if the name is taken.
+    with session_scope(factory) as session:
+        existing = (
+            session.query(Dataset.id).filter(Dataset.name == body.output_name).first()
+        )
+        if existing is not None:
+            raise HTTPException(
+                status_code=409,
+                detail=f"Dataset {body.output_name!r} already exists",
+            )
+
+        queue_name = "protea.training"
+        payload = body.model_dump()
+        job = Job(
+            operation="export_research_dataset",
+            queue_name=queue_name,
+            payload=payload,
+            meta={"created_at_iso": utcnow().isoformat()},
+        )
+        session.add(job)
+        session.flush()
+        job_id = job.id
+        session.add(
+            JobEvent(
+                job_id=job_id,
+                event="job.created",
+                fields={
+                    "operation": "export_research_dataset",
+                    "queue": queue_name,
+                    "output_name": body.output_name,
+                },
+            )
+        )
+
+    publish_job(amqp_url, queue_name, job_id)
+    return {"job_id": str(job_id), "queue": queue_name, "status": "queued"}
+
+
+@router.get("", summary="List registered datasets")
+def list_datasets(
+    name_like: str | None = Query(default=None, description="Substring filter on name"),
+    embedding_config_id: UUID | None = Query(default=None),
+    limit: int = Query(default=50, ge=1, le=500),
+    factory: sessionmaker[Session] = Depends(get_session_factory),
+) -> list[dict[str, Any]]:
+    with session_scope(factory) as session:
+        q = session.query(Dataset)
+        if name_like:
+            q = q.filter(Dataset.name.ilike(f"%{name_like}%"))
+        if embedding_config_id is not None:
+            q = q.filter(Dataset.embedding_config_id == embedding_config_id)
+        rows = q.order_by(Dataset.created_at.desc()).limit(limit).all()
+        return [_dataset_to_dict(r) for r in rows]
+
+
+@router.get("/{id_or_name}", summary="Get a dataset by id or name")
+def get_dataset(
+    id_or_name: str,
+    factory: sessionmaker[Session] = Depends(get_session_factory),
+) -> dict[str, Any]:
+    with session_scope(factory) as session:
+        row: Dataset | None = None
+        try:
+            row = session.get(Dataset, UUID(id_or_name))
+        except ValueError:
+            pass
+        if row is None:
+            row = session.query(Dataset).filter(Dataset.name == id_or_name).first()
+        if row is None:
+            raise HTTPException(status_code=404, detail=f"Dataset {id_or_name!r} not found")
+        return _dataset_to_dict(row)
diff --git a/protea/api/routers/embeddings.py b/protea/api/routers/embeddings.py
index 693e34f..287e533 100644
--- a/protea/api/routers/embeddings.py
+++ b/protea/api/routers/embeddings.py
@@ -10,6 +10,7 @@
 from sqlalchemy import func
 from sqlalchemy.orm import Session, sessionmaker
 
+from protea.api.cache import cached
 from protea.api.deps import get_amqp_url, get_session_factory
 from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
 from protea.infrastructure.orm.models.annotation.ontology_snapshot import OntologySnapshot
@@ -23,9 +24,9 @@
 
 router = APIRouter(prefix="/embeddings", tags=["embeddings"])
 
-_JOBS_QUEUE = "protea.jobs"
+_PREDICTIONS_QUEUE = "protea.predictions"
 
-_VALID_BACKENDS = {"esm", "esm3c", "t5", "auto"}
+_VALID_BACKENDS = {"esm", "esm3c", "t5", "ankh", "auto"}
 _VALID_LAYER_AGG = {"mean", "last", "concat"}
 _VALID_POOLING = {"mean", "max", "cls", "mean_max"}
 
@@ -143,19 +144,28 @@ def _config_to_dict(c: EmbeddingConfig, embedding_count: int | None = None) -> d
 def list_embedding_configs(
     factory: sessionmaker[Session] = Depends(get_session_factory),
 ) -> list[dict[str, Any]]:
-    """List all embedding configurations with their stored embedding counts, newest first."""
-    with session_scope(factory) as session:
-        rows = session.query(EmbeddingConfig).order_by(EmbeddingConfig.created_at.desc()).all()
-        counts = {
-            config_id: cnt
-            for config_id, cnt in session.query(
-                SequenceEmbedding.embedding_config_id,
-                func.count(SequenceEmbedding.id),
-            )
-            .group_by(SequenceEmbedding.embedding_config_id)
-            .all()
-        }
-        return [_config_to_dict(c, embedding_count=counts.get(c.id, 0)) for c in rows]
+    """List all embedding configurations with their stored embedding counts, newest first.
+
+    The per-config GROUP BY over a 4M-row table is cached 5 minutes — new
+    configs still appear immediately (they have 0 embeddings), only the
+    counts are stale.
+    """
+
+    def _compute() -> list[dict[str, Any]]:
+        with session_scope(factory) as session:
+            rows = session.query(EmbeddingConfig).order_by(EmbeddingConfig.created_at.desc()).all()
+            counts = {
+                config_id: cnt
+                for config_id, cnt in session.query(
+                    SequenceEmbedding.embedding_config_id,
+                    func.count(SequenceEmbedding.id),
+                )
+                .group_by(SequenceEmbedding.embedding_config_id)
+                .all()
+            }
+            return [_config_to_dict(c, embedding_count=counts.get(c.id, 0)) for c in rows]
+
+    return cached("embeddings:configs", 300.0, _compute)
 
 
 @router.post("/configs", summary="Create an embedding config")
@@ -268,9 +278,11 @@ def predict_go_terms(
 
     Required body fields: `embedding_config_id`, `annotation_set_id`, `ontology_snapshot_id`.
     Optional: `query_set_id` (FASTA upload), `limit_per_entry`, `distance_threshold`,
-    `batch_size`, `search_backend`, `compute_alignments`, `compute_taxonomy`,
-    `aspect_separated_knn` (bool, default false — builds one KNN index per GO aspect to
-    guarantee BPO/MFO/CCO coverage even when unified nearest neighbours carry only one aspect).
+    `batch_size`, `search_backend`. Feature-engineering flags default to True —
+    `compute_alignments`, `compute_taxonomy`, `compute_reranker_features` all run
+    unless explicitly set to false. `aspect_separated_knn` defaults to true
+    (one KNN index per GO aspect to guarantee BPO/MFO/CCO coverage even when
+    unified nearest neighbours carry only one aspect).
     """
 
     def _parse_uuid(key: str) -> UUID:
@@ -292,7 +304,7 @@ def _parse_uuid(key: str) -> UUID:
         if session.get(OntologySnapshot, ontology_snapshot_id) is None:
             raise HTTPException(status_code=404, detail="OntologySnapshot not found")
 
-        job = Job(operation="predict_go_terms", queue_name=_JOBS_QUEUE, payload=body)
+        job = Job(operation="predict_go_terms", queue_name=_PREDICTIONS_QUEUE, payload=body)
         session.add(job)
         session.flush()
         job_id = job.id
@@ -300,11 +312,11 @@ def _parse_uuid(key: str) -> UUID:
             JobEvent(
                 job_id=job_id,
                 event="job.created",
-                fields={"operation": "predict_go_terms", "queue": _JOBS_QUEUE},
+                fields={"operation": "predict_go_terms", "queue": _PREDICTIONS_QUEUE},
             )
         )
 
-    publish_job(amqp_url, _JOBS_QUEUE, job_id)
+    publish_job(amqp_url, _PREDICTIONS_QUEUE, job_id)
     return {"id": str(job_id), "status": "queued"}
 
 
@@ -315,47 +327,65 @@ def _parse_uuid(key: str) -> UUID:
 def list_prediction_sets(
     factory: sessionmaker[Session] = Depends(get_session_factory),
 ) -> list[dict[str, Any]]:
-    """List the 100 most recent prediction sets with their GO prediction counts."""
-    with session_scope(factory) as session:
-        # Single query with a correlated subquery for counts (avoids N+1).
-        count_subq = (
-            session.query(func.count(GOPrediction.id))
-            .filter(GOPrediction.prediction_set_id == PredictionSet.id)
-            .correlate(PredictionSet)
-            .scalar_subquery()
-        )
-        rows = (
-            session.query(
-                PredictionSet, EmbeddingConfig, AnnotationSet, OntologySnapshot, count_subq
+    """List the 100 most recent prediction sets.
+
+    The per-set GO-prediction count comes from a single ``GROUP BY`` query
+    rather than a correlated subquery — for tables in the 10⁷+ row range
+    PostgreSQL's planner reliably falls into a per-row index probe with
+    the correlated form (~30 s per outer row). The grouped variant runs
+    one index-only scan over ``prediction_set_id`` and returns all 25
+    counts at once, cached for 5 minutes alongside the rest of the
+    response.
+    """
+
+    def _compute() -> list[dict[str, Any]]:
+        with session_scope(factory) as session:
+            rows = (
+                session.query(
+                    PredictionSet,
+                    EmbeddingConfig,
+                    AnnotationSet,
+                    OntologySnapshot,
+                )
+                .join(EmbeddingConfig, PredictionSet.embedding_config_id == EmbeddingConfig.id)
+                .join(AnnotationSet, PredictionSet.annotation_set_id == AnnotationSet.id)
+                .join(OntologySnapshot, PredictionSet.ontology_snapshot_id == OntologySnapshot.id)
+                .order_by(PredictionSet.created_at.desc())
+                .limit(100)
+                .all()
             )
-            .join(EmbeddingConfig, PredictionSet.embedding_config_id == EmbeddingConfig.id)
-            .join(AnnotationSet, PredictionSet.annotation_set_id == AnnotationSet.id)
-            .join(OntologySnapshot, PredictionSet.ontology_snapshot_id == OntologySnapshot.id)
-            .order_by(PredictionSet.created_at.desc())
-            .limit(100)
-            .all()
-        )
-        result = []
-        for ps, ec, ann, snap, prediction_count in rows:
-            result.append(
+            counts = {
+                set_id: cnt
+                for set_id, cnt in session.query(
+                    GOPrediction.prediction_set_id,
+                    func.count(GOPrediction.id),
+                )
+                .group_by(GOPrediction.prediction_set_id)
+                .all()
+            }
+            return [
                 {
                     "id": str(ps.id),
                     "embedding_config_id": str(ps.embedding_config_id),
                     "embedding_config_name": ec.model_name,
                     "annotation_set_id": str(ps.annotation_set_id),
-                    "annotation_set_label": f"{ann.source} {ann.source_version}"
-                    if ann.source_version
-                    else ann.source,
+                    "annotation_set_label": (
+                        f"{ann.source} {ann.source_version}"
+                        if ann.source_version
+                        else ann.source
+                    ),
                     "ontology_snapshot_id": str(ps.ontology_snapshot_id),
                     "ontology_snapshot_version": snap.obo_version,
                     "query_set_id": str(ps.query_set_id) if ps.query_set_id else None,
                     "limit_per_entry": ps.limit_per_entry,
                     "distance_threshold": ps.distance_threshold,
                     "created_at": ps.created_at.isoformat(),
-                    "prediction_count": prediction_count or 0,
+                    "prediction_count": int(counts.get(ps.id, 0)),
                 }
-            )
-        return result
+                for ps, ec, ann, snap in rows
+            ]
+
+    return cached("embeddings:prediction-sets", 300.0, _compute)
 
 
 @router.get("/prediction-sets/{set_id}", summary="Get prediction set details")
@@ -825,24 +855,20 @@ def _generate():
             # `seen` set in Python — this preserves true streaming.
             from sqlalchemy import func as sa_func
 
-            min_dist = (
-                session.query(
-                    GOPrediction.protein_accession,
-                    GOPrediction.go_term_id,
-                    sa_func.min(GOPrediction.distance).label("min_distance"),
-                )
-                .filter(GOPrediction.prediction_set_id == set_id)
-            )
+            min_dist = session.query(
+                GOPrediction.protein_accession,
+                GOPrediction.go_term_id,
+                sa_func.min(GOPrediction.distance).label("min_distance"),
+            ).filter(GOPrediction.prediction_set_id == set_id)
             if max_distance is not None:
                 min_dist = min_dist.filter(GOPrediction.distance <= max_distance)
             min_dist = min_dist.group_by(
                 GOPrediction.protein_accession, GOPrediction.go_term_id
             ).subquery()
 
-            q = (
-                session.query(min_dist.c.protein_accession, GOTerm.go_id, min_dist.c.min_distance)
-                .join(GOTerm, min_dist.c.go_term_id == GOTerm.id)
-            )
+            q = session.query(
+                min_dist.c.protein_accession, GOTerm.go_id, min_dist.c.min_distance
+            ).join(GOTerm, min_dist.c.go_term_id == GOTerm.id)
             if aspect:
                 q = q.filter(GOTerm.aspect == aspect.upper())
             if delta_proteins is not None:
diff --git a/protea/api/routers/jobs.py b/protea/api/routers/jobs.py
index b2982bb..0a57f9b 100644
--- a/protea/api/routers/jobs.py
+++ b/protea/api/routers/jobs.py
@@ -8,12 +8,54 @@
 from pydantic import BaseModel, Field, field_validator
 from sqlalchemy.orm import Session, sessionmaker
 
-from protea.api.deps import get_amqp_url, get_session_factory
+from protea.api.deps import get_amqp_url, get_operation_registry, get_session_factory
+from protea.core.contracts.registry import OperationRegistry
 from protea.core.utils import utcnow
 from protea.infrastructure.orm.models.job import Job, JobEvent, JobStatus
 from protea.infrastructure.queue.publisher import publish_job
 from protea.infrastructure.session import session_scope
 
+
+def _operation_metadata(
+    registry: OperationRegistry,
+    operation_name: str,
+    payload: dict[str, Any] | None,
+    session: Session | None = None,
+) -> tuple[str | None, str | None]:
+    """Look up an operation's static description and dynamic payload summary.
+
+    Returns ``(description, summary)``. Both are ``None`` if the operation
+    is no longer registered (e.g. renamed/removed). ``summary`` is ``None``
+    if the operation does not implement ``summarize_payload`` or raises while
+    rendering it — we never want metadata enrichment to break the jobs API.
+
+    If ``summarize_payload`` accepts a ``session`` keyword (introspected via
+    ``inspect.signature``), the active SQLAlchemy session is forwarded so the
+    operation can resolve foreign keys (e.g. an EmbeddingConfig) and render
+    human-readable details. Otherwise it is called with payload only.
+    """
+    import inspect
+
+    try:
+        op = registry.get(operation_name)
+    except KeyError:
+        return None, None
+    description = getattr(op, "description", None) or None
+    summary: str | None = None
+    summarize = getattr(op, "summarize_payload", None)
+    if callable(summarize):
+        try:
+            sig = inspect.signature(summarize)
+            if "session" in sig.parameters:
+                rendered = summarize(payload or {}, session=session)
+            else:
+                rendered = summarize(payload or {})
+        except Exception:
+            rendered = ""
+        summary = rendered or None
+    return description, summary
+
+
 router = APIRouter(prefix="/jobs", tags=["jobs"])
 
 
@@ -81,6 +123,7 @@ def list_jobs(
     parent_job_id: UUID | None = Query(default=None),
     limit: int = Query(default=50, ge=1, le=500),
     factory: sessionmaker[Session] = Depends(get_session_factory),
+    registry: OperationRegistry = Depends(get_operation_registry),
 ) -> list[dict[str, Any]]:
     """List jobs with optional filtering.
 
@@ -106,29 +149,37 @@ def list_jobs(
             q = q.filter(Job.operation == operation)
 
         rows = q.order_by(Job.created_at.desc()).limit(limit).all()
-        return [
-            {
-                "id": str(j.id),
-                "operation": j.operation,
-                "queue_name": j.queue_name,
-                "status": j.status.value,
-                "parent_job_id": str(j.parent_job_id) if j.parent_job_id else None,
-                "created_at": j.created_at.isoformat(),
-                "started_at": j.started_at.isoformat() if j.started_at else None,
-                "finished_at": j.finished_at.isoformat() if j.finished_at else None,
-                "progress_current": j.progress_current,
-                "progress_total": j.progress_total,
-                "error_code": j.error_code,
-                "error_message": j.error_message,
-            }
-            for j in rows
-        ]
+        out: list[dict[str, Any]] = []
+        for j in rows:
+            description, summary = _operation_metadata(
+                registry, j.operation, j.payload, session=session
+            )
+            out.append(
+                {
+                    "id": str(j.id),
+                    "operation": j.operation,
+                    "operation_description": description,
+                    "operation_summary": summary,
+                    "queue_name": j.queue_name,
+                    "status": j.status.value,
+                    "parent_job_id": str(j.parent_job_id) if j.parent_job_id else None,
+                    "created_at": j.created_at.isoformat(),
+                    "started_at": j.started_at.isoformat() if j.started_at else None,
+                    "finished_at": j.finished_at.isoformat() if j.finished_at else None,
+                    "progress_current": j.progress_current,
+                    "progress_total": j.progress_total,
+                    "error_code": j.error_code,
+                    "error_message": j.error_message,
+                }
+            )
+        return out
 
 
 @router.get("/{job_id}", summary="Get job details")
 def get_job(
     job_id: UUID,
     factory: sessionmaker[Session] = Depends(get_session_factory),
+    registry: OperationRegistry = Depends(get_operation_registry),
 ) -> dict[str, Any]:
     """Retrieve full details for a single job including its payload, meta, and progress counters."""
     with session_scope(factory) as session:
@@ -136,9 +187,14 @@ def get_job(
         if j is None:
             raise HTTPException(status_code=404, detail="Job not found")
 
+        description, summary = _operation_metadata(
+            registry, j.operation, j.payload, session=session
+        )
         return {
             "id": str(j.id),
             "operation": j.operation,
+            "operation_description": description,
+            "operation_summary": summary,
             "queue_name": j.queue_name,
             "status": j.status.value,
             "payload": j.payload,
diff --git a/protea/api/routers/proteins.py b/protea/api/routers/proteins.py
index 2c027be..439f3c3 100644
--- a/protea/api/routers/proteins.py
+++ b/protea/api/routers/proteins.py
@@ -7,6 +7,7 @@
 from sqlalchemy import distinct, func
 from sqlalchemy.orm import Session, sessionmaker
 
+from protea.api.cache import cached
 from protea.api.deps import get_session_factory
 from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
 from protea.infrastructure.orm.models.annotation.go_term import GOTerm
@@ -27,51 +28,62 @@ def get_protein_stats(
     factory: sessionmaker[Session] = Depends(get_session_factory),
 ) -> dict[str, Any]:
     """Return aggregate counts: total proteins, canonical vs isoforms, reviewed,
-    and how many have metadata, embeddings, or GO annotations."""
-    with session_scope(factory) as session:
-        total = session.query(func.count(Protein.accession)).scalar() or 0
-        canonical = (
-            session.query(func.count(Protein.accession))
-            .filter(Protein.is_canonical.is_(True))
-            .scalar()
-            or 0
-        )
-        reviewed = (
-            session.query(func.count(Protein.accession)).filter(Protein.reviewed.is_(True)).scalar()
-            or 0
-        )
-        with_metadata = (
-            session.query(func.count(distinct(Protein.canonical_accession)))
-            .join(
-                ProteinUniProtMetadata,
-                Protein.canonical_accession == ProteinUniProtMetadata.canonical_accession,
+    and how many have metadata, embeddings, or GO annotations.
+
+    Cached for 5 minutes — the DISTINCT-over-JOIN counts scan 4M–80M rows and
+    take 30+ seconds to run from scratch. Counts move slowly enough that a
+    5-min staleness is invisible to users.
+    """
+
+    def _compute() -> dict[str, Any]:
+        with session_scope(factory) as session:
+            total = session.query(func.count(Protein.accession)).scalar() or 0
+            canonical = (
+                session.query(func.count(Protein.accession))
+                .filter(Protein.is_canonical.is_(True))
+                .scalar()
+                or 0
             )
-            .scalar()
-            or 0
-        )
-        with_embeddings = (
-            session.query(func.count(distinct(Protein.accession)))
-            .join(
-                SequenceEmbedding,
-                Protein.sequence_id == SequenceEmbedding.sequence_id,
+            reviewed = (
+                session.query(func.count(Protein.accession))
+                .filter(Protein.reviewed.is_(True))
+                .scalar()
+                or 0
             )
-            .scalar()
-            or 0
-        )
-        with_go = (
-            session.query(func.count(distinct(ProteinGOAnnotation.protein_accession))).scalar() or 0
-        )
+            with_metadata = (
+                session.query(func.count(distinct(Protein.canonical_accession)))
+                .join(
+                    ProteinUniProtMetadata,
+                    Protein.canonical_accession == ProteinUniProtMetadata.canonical_accession,
+                )
+                .scalar()
+                or 0
+            )
+            with_embeddings = (
+                session.query(func.count(distinct(Protein.accession)))
+                .join(
+                    SequenceEmbedding,
+                    Protein.sequence_id == SequenceEmbedding.sequence_id,
+                )
+                .scalar()
+                or 0
+            )
+            with_go = (
+                session.query(func.count(distinct(ProteinGOAnnotation.protein_accession))).scalar()
+                or 0
+            )
+            return {
+                "total": total,
+                "canonical": canonical,
+                "isoforms": total - canonical,
+                "reviewed": reviewed,
+                "unreviewed": total - reviewed,
+                "with_metadata": with_metadata,
+                "with_embeddings": with_embeddings,
+                "with_go_annotations": with_go,
+            }
 
-        return {
-            "total": total,
-            "canonical": canonical,
-            "isoforms": total - canonical,
-            "reviewed": reviewed,
-            "unreviewed": total - reviewed,
-            "with_metadata": with_metadata,
-            "with_embeddings": with_embeddings,
-            "with_go_annotations": with_go,
-        }
+    return cached("proteins:stats", 300.0, _compute)
 
 
 # ── List ──────────────────────────────────────────────────────────────────────
diff --git a/protea/api/routers/query_sets.py b/protea/api/routers/query_sets.py
index 55d6a74..ad05a3e 100644
--- a/protea/api/routers/query_sets.py
+++ b/protea/api/routers/query_sets.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import re
 from typing import Any
 from uuid import UUID
 
@@ -15,21 +16,53 @@
 router = APIRouter(prefix="/query-sets", tags=["query-sets"])
 
 
-def _parse_fasta(content: str) -> list[tuple[str, str]]:
-    """Return list of (accession, sequence) from FASTA text.
+_TAX_RE = re.compile(r"OX=(\d+)")
+_SPECIES_RE = re.compile(r"OS=(.+?)\s+(?:OX|GN|PE|SV)=")
 
-    The accession is the first whitespace-delimited token of each header line.
+
+def extract_uniprot_header_metadata(description: str) -> dict[str, Any]:
+    """Parse UniProt-style FASTA headers and extract taxonomy fields.
+
+    Matches the SwissProt/TrEMBL convention ``sp|ACC|NAME OS=<species> OX=<taxid>
+    GN=<gene> PE=<level> SV=<version>``. Returns ``{'taxonomy_id': int | None,
+    'species': str | None}``. Silent no-op for headers that don't follow the
+    convention — fields simply come back as ``None``.
+    """
+    tax_match = _TAX_RE.search(description)
+    species_match = _SPECIES_RE.search(description)
+    return {
+        "taxonomy_id": int(tax_match.group(1)) if tax_match else None,
+        "species": species_match.group(1).strip() if species_match else None,
+    }
+
+
+def _parse_fasta(content: str) -> list[tuple[str, str, str]]:
+    """Return list of (accession, sequence, description) from FASTA text.
+
+    ``accession`` is extracted from the first whitespace-delimited token of
+    the header, unwrapping UniProt-style prefixes: ``sp|P04637|P53_HUMAN`` →
+    ``P04637`` and ``tr|A0A...|...`` → ``A0A...``. For non-UniProt headers
+    the first token is used verbatim.
+    ``description`` is the full header (minus the leading ``>``) so downstream
+    callers can extract UniProt metadata such as ``OX=`` / ``OS=``.
     Sequences with no residues are silently skipped.
     """
-    records: list[tuple[str, str]] = []
+    records: list[tuple[str, str, str]] = []
     accession: str | None = None
+    header: str = ""
     seq_parts: list[str] = []
 
+    def _extract_accession(token: str) -> str:
+        parts = token.split("|")
+        if len(parts) >= 2 and parts[0] in ("sp", "tr") and parts[1]:
+            return parts[1]
+        return token
+
     def _flush() -> None:
         if accession is not None:
             seq = "".join(seq_parts).replace(" ", "").strip().upper()
             if seq:
-                records.append((accession, seq))
+                records.append((accession, seq, header))
 
     for line in content.splitlines():
         line = line.strip()
@@ -37,7 +70,10 @@ def _flush() -> None:
             continue
         if line.startswith(">"):
             _flush()
-            accession = line[1:].split()[0] if line[1:].strip() else None
+            header_body = line[1:]
+            first_token = header_body.split()[0] if header_body.strip() else None
+            accession = _extract_accession(first_token) if first_token else None
+            header = header_body
             seq_parts = []
         else:
             seq_parts.append(line)
@@ -73,10 +109,15 @@ async def create_query_set(
     preserving the original FASTA accession. Duplicate accessions within the
     same upload are rejected with 422.
     """
-    _MAX_FASTA_BYTES = 50 * 1024 * 1024  # 50 MB
+    from protea.config.tuning import get_tuning
+
+    max_bytes = get_tuning().api.max_fasta_bytes
     raw = await file.read()
-    if len(raw) > _MAX_FASTA_BYTES:
-        raise HTTPException(status_code=413, detail="FASTA file exceeds 50 MB limit")
+    if len(raw) > max_bytes:
+        raise HTTPException(
+            status_code=413,
+            detail=f"FASTA file exceeds {max_bytes // (1024 * 1024)} MB limit",
+        )
     try:
         content = raw.decode("utf-8")
     except UnicodeDecodeError:
@@ -88,7 +129,7 @@ async def create_query_set(
 
     # Reject duplicate accessions within the upload
     seen_accs: set[str] = set()
-    for acc, _ in records:
+    for acc, _, _ in records:
         if acc in seen_accs:
             raise HTTPException(
                 status_code=422,
@@ -99,7 +140,7 @@ async def create_query_set(
     with session_scope(factory) as session:
         # 1) Upsert sequences (deduplicated by MD5 hash)
         hash_to_seq_id: dict[str, int] = {}
-        hashes = [Sequence.compute_hash(seq) for _, seq in records]
+        hashes = [Sequence.compute_hash(seq) for _, seq, _ in records]
 
         existing = (
             session.query(Sequence.sequence_hash, Sequence.id)
@@ -109,7 +150,7 @@ async def create_query_set(
         for h, sid in existing:
             hash_to_seq_id[h] = sid
 
-        for (_, seq), h in zip(records, hashes, strict=False):
+        for (_, seq, _), h in zip(records, hashes, strict=False):
             if h not in hash_to_seq_id:
                 new_seq = Sequence(sequence=seq, sequence_hash=h)
                 session.add(new_seq)
@@ -121,15 +162,19 @@ async def create_query_set(
         session.add(qs)
         session.flush()
 
-        # 3) Create entries
-        entries = [
-            QuerySetEntry(
-                query_set_id=qs.id,
-                sequence_id=hash_to_seq_id[h],
-                accession=acc,
+        # 3) Create entries (extract UniProt OX=/OS= when present)
+        entries = []
+        for (acc, _, header), h in zip(records, hashes, strict=False):
+            meta = extract_uniprot_header_metadata(header)
+            entries.append(
+                QuerySetEntry(
+                    query_set_id=qs.id,
+                    sequence_id=hash_to_seq_id[h],
+                    accession=acc,
+                    taxonomy_id=meta["taxonomy_id"],
+                    species=meta["species"],
+                )
             )
-            for (acc, _), h in zip(records, hashes, strict=False)
-        ]
         session.add_all(entries)
         session.flush()
 
@@ -174,14 +219,27 @@ def get_query_set(
             .scalar()
         )
         entries = (
-            session.query(QuerySetEntry.accession, QuerySetEntry.sequence_id)
+            session.query(
+                QuerySetEntry.accession,
+                QuerySetEntry.sequence_id,
+                QuerySetEntry.taxonomy_id,
+                QuerySetEntry.species,
+            )
             .filter(QuerySetEntry.query_set_id == query_set_id)
             .order_by(QuerySetEntry.id)
             .all()
         )
 
         result = _query_set_to_dict(qs, entry_count)
-        result["entries"] = [{"accession": acc, "sequence_id": seq_id} for acc, seq_id in entries]
+        result["entries"] = [
+            {
+                "accession": acc,
+                "sequence_id": seq_id,
+                "taxonomy_id": tax_id,
+                "species": species,
+            }
+            for acc, seq_id, tax_id, species in entries
+        ]
         return result
 
 
diff --git a/protea/api/routers/registry.py b/protea/api/routers/registry.py
new file mode 100644
index 0000000..073406f
--- /dev/null
+++ b/protea/api/routers/registry.py
@@ -0,0 +1,140 @@
+"""Plugin registry endpoints.
+
+Three read-only endpoints listing the plugins discovered at runtime
+via :mod:`importlib.metadata.entry_points`:
+
+  * ``GET /backends`` — embedding backend plugins (``protea.backends``)
+  * ``GET /sources`` — annotation source plugins (``protea.sources``)
+  * ``GET /runners`` — experiment runner plugins (``protea.runners``)
+
+Each response is a flat list of :class:`PluginInfo` records describing
+the entry-point name, class, module path, and any plugin-specific
+metadata exposed via attributes (e.g. :attr:`AnnotationSource.version`).
+
+The endpoints are intentionally stateless — they re-scan
+``entry_points`` on every call rather than caching, so a worker
+that's just been restarted with a newly-installed extra surfaces in
+the next request without an API restart. The scan is cheap (sub-ms
+on the working set of ~10 plugins).
+"""
+
+from __future__ import annotations
+
+from importlib.metadata import entry_points
+from typing import Any
+
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+
+router = APIRouter(tags=["registry"])
+
+
+_KNOWN_GROUPS = {
+    "backends": "protea.backends",
+    "sources": "protea.sources",
+    "runners": "protea.runners",
+}
+
+
+class PluginInfo(BaseModel):
+    """Metadata for one discovered plugin."""
+
+    name: str
+    """Entry-point name (e.g. ``"esm"``, ``"goa"``, ``"lightgbm"``).
+    Matches the plugin's ``name`` class attribute by convention."""
+
+    cls: str
+    """Plugin class name (e.g. ``"EsmBackend"``, ``"GoaSource"``)."""
+
+    module: str
+    """Fully-qualified entry-point value
+    (e.g. ``"protea_backends.esm:plugin"``)."""
+
+    extra: dict[str, Any] = {}
+    """Plugin-specific metadata read from the loaded instance. Today
+    carries ``version`` for sources; empty for backends and runners."""
+
+
+class PluginListResponse(BaseModel):
+    """Response shape for the three registry endpoints."""
+
+    group: str
+    """The ``entry_points`` group queried
+    (e.g. ``"protea.backends"``)."""
+
+    plugins: list[PluginInfo]
+    """Sorted (by ``name``) list of discovered plugins."""
+
+
+def _discover(group: str) -> list[PluginInfo]:
+    """Resolve all entry points in ``group`` and build their PluginInfo.
+
+    Loading the entry-point fires the plugin module's import side
+    effects but should not raise for any first-party plugin (the
+    bootstrapping pattern keeps top-level imports cheap). If a
+    third-party plugin's load raises, the caller surfaces it as a
+    500 — better to fail loud than silently hide a broken install.
+    """
+    discovered: list[PluginInfo] = []
+    for ep in entry_points(group=group):
+        plugin = ep.load()
+        extra: dict[str, Any] = {}
+        version = getattr(plugin, "version", None)
+        if isinstance(version, str):
+            extra["version"] = version
+        discovered.append(
+            PluginInfo(
+                name=ep.name,
+                cls=type(plugin).__name__,
+                module=ep.value,
+                extra=extra,
+            )
+        )
+    discovered.sort(key=lambda p: p.name)
+    return discovered
+
+
+def _list_for(slug: str) -> PluginListResponse:
+    """Shared body for the three endpoints — looks up the canonical
+    ``entry_points`` group from ``_KNOWN_GROUPS`` and returns the
+    discovered plugins.
+    """
+    group = _KNOWN_GROUPS.get(slug)
+    if group is None:
+        raise HTTPException(status_code=404, detail=f"unknown registry: {slug!r}")
+    return PluginListResponse(group=group, plugins=_discover(group))
+
+
+@router.get("/backends", response_model=PluginListResponse)
+def list_backends() -> PluginListResponse:
+    """List all installed embedding backend plugins.
+
+    The plugin set depends on which ``protea-backends[<extra>]``
+    extras are installed (esm, t5, ankh, esm3c). With the default
+    install all four are discoverable; only the ones whose lazy
+    imports succeed at ``stream_*`` time will actually run on GPU.
+    """
+    return _list_for("backends")
+
+
+@router.get("/sources", response_model=PluginListResponse)
+def list_sources() -> PluginListResponse:
+    """List all installed annotation source plugins.
+
+    Today: ``goa``, ``quickgo``, ``uniprot`` (all real after
+    F2A.6-real). The ``extra.version`` field surfaces the
+    :attr:`AnnotationSource.version` declared on each plugin
+    (e.g. ``"uniprot-goa"``, ``"quickgo-rest"``)."""
+    return _list_for("sources")
+
+
+@router.get("/runners", response_model=PluginListResponse)
+def list_runners() -> PluginListResponse:
+    """List all installed experiment runner plugins.
+
+    Today: ``baseline``, ``knn``, ``lightgbm``. The latter two are
+    contract-surface stubs until F2A.7 (lab → ``protea-runners
+    .lightgbm`` migration) and F2C.1 (``protea-method`` extraction)
+    move the real implementations here.
+    """
+    return _list_for("runners")
diff --git a/protea/api/routers/reranker_models.py b/protea/api/routers/reranker_models.py
new file mode 100644
index 0000000..2bbee08
--- /dev/null
+++ b/protea/api/routers/reranker_models.py
@@ -0,0 +1,303 @@
+"""Re-ranker model registry.
+
+``POST /reranker-models/import`` accepts a trained booster from the
+``protea-reranker-lab`` (or any offline trainer), uploads it to the
+configured artifact store, and inserts a ``RerankerModel`` row linked
+back to the ``Dataset`` it was trained on. This replaces the in-PROTEA
+LightGBM training path (see Phase 4 of the decoupling plan).
+
+Both multipart and JSON-by-reference flows are supported:
+
+* **multipart** — lab sends ``model.txt`` + ``spec.yaml`` + ``run.json``
+  inline. Server uploads the booster to ``rerankers/<run_id>/model.txt``.
+  Simpler for dev.
+* **by-reference** — lab pre-uploads ``model.txt`` to MinIO under its
+  own key and POSTs JSON with ``artifact_uri`` + ``run_json`` +
+  ``spec_yaml`` text. Cleaner for prod.
+
+Both flows share ``_register_model`` so the DB shape is identical.
+"""
+
+from __future__ import annotations
+
+import json
+import uuid
+from typing import Any
+
+from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile
+from pydantic import BaseModel, Field
+from sqlalchemy.orm import Session, sessionmaker
+
+from protea.api.deps import get_session_factory
+from protea.infrastructure.orm.models.annotation.ontology_snapshot import OntologySnapshot
+from protea.infrastructure.orm.models.embedding.dataset import Dataset
+from protea.infrastructure.orm.models.embedding.embedding_config import EmbeddingConfig
+from protea.infrastructure.orm.models.embedding.reranker_model import RerankerModel
+from protea.infrastructure.session import session_scope
+from protea.infrastructure.settings import load_settings
+from protea.infrastructure.storage import get_artifact_store
+
+try:
+    from protea_reranker_lab.contracts import compute_feature_schema_sha
+except Exception:  # pragma: no cover — lab is an editable dev dep
+    compute_feature_schema_sha = None  # type: ignore[assignment]
+
+
+router = APIRouter(prefix="/reranker-models", tags=["reranker-models"])
+
+
+def _parse_cell(cell: str | None) -> tuple[str, str | None]:
+    """Parse ``training.cell`` from spec.yaml into ``(category, aspect)``.
+
+    Accepts ``"pk"``, ``"pk-bpo"``, etc. Falls back to ``("pk", None)``
+    — category is NOT NULL on RerankerModel.
+    """
+    if not cell:
+        return ("pk", None)
+    cell = cell.strip().lower()
+    if "-" in cell:
+        cat, asp = cell.split("-", 1)
+        return (cat, asp)
+    return (cell, None)
+
+
+def _extract_cell_from_spec(spec_yaml_text: str) -> str | None:
+    in_training = False
+    for raw in spec_yaml_text.splitlines():
+        line = raw.rstrip()
+        if not line or line.lstrip().startswith("#"):
+            continue
+        if not line.startswith(" "):
+            in_training = line.strip().rstrip(":") == "training"
+            continue
+        if in_training:
+            stripped = line.strip()
+            if stripped.startswith("cell:"):
+                return stripped.split(":", 1)[1].strip().strip('"').strip("'") or None
+    return None
+
+
+def _compute_feature_schema_sha(run: dict[str, Any]) -> str | None:
+    """Derive the feature fingerprint recorded for the run.
+
+    Preference order:
+      1. ``compute_feature_schema_sha(families, drop_features)`` — the
+         family-aware sha the lab writes into ``run.features``.
+      2. The dataset's ``schema_sha`` — fallback for older runs that
+         didn't record ``families_enabled``.
+    """
+    features = run.get("features", {}) or {}
+    families: list[str] | None = features.get("families_enabled") or run.get(
+        "dataset", {}
+    ).get("feature_families")
+    drop_features: list[str] = features.get("drop_features") or []
+    if families and compute_feature_schema_sha is not None:
+        return compute_feature_schema_sha(families, drop_features or None)
+    return run.get("dataset", {}).get("schema_sha")
+
+
+def _register_model(
+    *,
+    session: Session,
+    name: str,
+    artifact_uri: str,
+    run: dict[str, Any],
+    spec_yaml_text: str,
+    dataset_id_override: str | None,
+    external_source: str | None,
+    prediction_set_id: str | None,
+    evaluation_set_id: str | None,
+    force: bool,
+) -> uuid.UUID:
+    existing = session.query(RerankerModel).filter(RerankerModel.name == name).first()
+    if existing is not None:
+        if not force:
+            raise HTTPException(
+                status_code=409,
+                detail=f"RerankerModel name={name!r} already exists (id={existing.id})",
+            )
+        session.delete(existing)
+        session.flush()
+
+    category, aspect = _parse_cell(_extract_cell_from_spec(spec_yaml_text))
+
+    dataset = run.get("dataset", {}) or {}
+    dataset_uuid: uuid.UUID | None = None
+    if dataset_id_override:
+        dataset_uuid = uuid.UUID(dataset_id_override)
+    else:
+        dataset_name = dataset.get("name")
+        if dataset_name:
+            row = session.query(Dataset).filter(Dataset.name == dataset_name).first()
+            if row is not None:
+                dataset_uuid = row.id
+
+    feature_schema_sha = _compute_feature_schema_sha(run)
+    embedding_config_id_raw = dataset.get("embedding_config_id")
+    ontology_snapshot_id_raw = dataset.get("ontology_snapshot_id")
+
+    # Lab runs may carry FKs to entities that no longer exist in this PROTEA
+    # instance (DB resets, different deployment, etc.). NULL them rather than
+    # 500'ing — the booster itself is still valid; only the back-references
+    # to local entities are unresolvable.
+    embedding_config_id: uuid.UUID | None = None
+    if embedding_config_id_raw:
+        candidate = uuid.UUID(embedding_config_id_raw)
+        if session.query(EmbeddingConfig.id).filter(EmbeddingConfig.id == candidate).first():
+            embedding_config_id = candidate
+
+    ontology_snapshot_id: uuid.UUID | None = None
+    if ontology_snapshot_id_raw:
+        candidate = uuid.UUID(ontology_snapshot_id_raw)
+        if session.query(OntologySnapshot.id).filter(OntologySnapshot.id == candidate).first():
+            ontology_snapshot_id = candidate
+
+    model = RerankerModel(
+        name=name,
+        prediction_set_id=uuid.UUID(prediction_set_id) if prediction_set_id else None,
+        evaluation_set_id=uuid.UUID(evaluation_set_id) if evaluation_set_id else None,
+        category=category,
+        aspect=aspect,
+        model_data=None,
+        artifact_uri=artifact_uri,
+        feature_schema_sha=feature_schema_sha,
+        embedding_config_id=embedding_config_id,
+        ontology_snapshot_id=ontology_snapshot_id,
+        producer_version=dataset.get("producer_version"),
+        producer_git_sha=dataset.get("producer_git_sha"),
+        spec_yaml=spec_yaml_text,
+        metrics=run.get("metrics", {}) or {},
+        feature_importance=run.get("feature_importance", {}) or {},
+        # Categorical code maps live in metrics under a reserved key so the
+        # predict path can replicate the lab's sorted-unique encoding instead
+        # of falling back to ``pd.factorize`` (first-seen order, which gives
+        # different codes than training and silently corrupts LK/PK scores).
+        dataset_id=dataset_uuid,
+        external_source=external_source,
+    )
+    # Stash categorical_codes in metrics if the lab supplied them. Stored as
+    # ``metrics["__categorical_codes__"]`` to keep the column scalar-shaped
+    # without bloating spec_yaml.
+    cat_codes = run.get("categorical_codes")
+    if cat_codes:
+        m = dict(model.metrics or {})
+        m["__categorical_codes__"] = cat_codes
+        model.metrics = m
+    session.add(model)
+    session.flush()
+    return model.id
+
+
+@router.post("/import", status_code=201, summary="Import a lab-trained booster")
+async def import_reranker_model_multipart(
+    model_file: UploadFile = File(..., description="LightGBM model.txt"),
+    spec_yaml: UploadFile = File(..., description="ExperimentSpec spec.yaml"),
+    run_json: UploadFile = File(..., description="Lab run.json with metrics + provenance"),
+    name: str | None = Form(default=None, description="Override RerankerModel.name"),
+    dataset_id: str | None = Form(default=None, description="Override linked Dataset UUID"),
+    external_source: str | None = Form(default=None),
+    prediction_set_id: str | None = Form(default=None),
+    evaluation_set_id: str | None = Form(default=None),
+    force: bool = Form(default=False),
+    factory: sessionmaker[Session] = Depends(get_session_factory),
+) -> dict[str, Any]:
+    """Upload a trained booster and register a ``RerankerModel`` row.
+
+    The three files (``model.txt``, ``spec.yaml``, ``run.json``) mirror
+    the artefacts produced by ``protea-reranker-lab`` under ``runs/<name>/``.
+    """
+    model_bytes = await model_file.read()
+    spec_text = (await spec_yaml.read()).decode("utf-8")
+    try:
+        run = json.loads((await run_json.read()).decode("utf-8"))
+    except json.JSONDecodeError as exc:
+        raise HTTPException(status_code=422, detail=f"run.json is not valid JSON: {exc}") from exc
+
+    run_id = run.get("run_id") or (model_file.filename or "unknown").split(".")[0]
+    resolved_name = name or run_id
+
+    settings = load_settings(_resolve_project_root())
+    store = get_artifact_store(settings)
+    artifact_key = f"rerankers/{run_id}/model.txt"
+    artifact_uri = store.put(artifact_key, model_bytes)
+
+    with session_scope(factory) as session:
+        model_id = _register_model(
+            session=session,
+            name=resolved_name,
+            artifact_uri=artifact_uri,
+            run=run,
+            spec_yaml_text=spec_text,
+            dataset_id_override=dataset_id,
+            external_source=external_source,
+            prediction_set_id=prediction_set_id,
+            evaluation_set_id=evaluation_set_id,
+            force=force,
+        )
+
+    return {
+        "id": str(model_id),
+        "name": resolved_name,
+        "artifact_uri": artifact_uri,
+        "storage_backend": settings.storage_backend,
+    }
+
+
+class ImportRerankerByReferenceRequest(BaseModel):
+    """Body for ``POST /reranker-models/import-by-reference``.
+
+    Use this when the lab has already uploaded ``model.txt`` to MinIO
+    under its own key and just needs PROTEA to register the URI.
+    """
+
+    artifact_uri: str = Field(..., min_length=1)
+    spec_yaml: str = Field(..., description="Full spec.yaml text")
+    run: dict[str, Any] = Field(..., description="Parsed run.json")
+    name: str | None = None
+    dataset_id: str | None = None
+    external_source: str | None = None
+    prediction_set_id: str | None = None
+    evaluation_set_id: str | None = None
+    force: bool = False
+
+
+@router.post(
+    "/import-by-reference",
+    status_code=201,
+    summary="Register a booster already in the artifact store",
+)
+def import_reranker_model_by_reference(
+    body: ImportRerankerByReferenceRequest,
+    factory: sessionmaker[Session] = Depends(get_session_factory),
+) -> dict[str, Any]:
+    """Register a ``RerankerModel`` whose booster is already in MinIO.
+
+    The lab uploads the booster directly (faster, no double-hop) and
+    POSTs the URI + run.json + spec.yaml here. Server does not re-read
+    the artifact — it trusts the URI.
+    """
+    run_id = body.run.get("run_id") or "unknown"
+    resolved_name = body.name or run_id
+
+    with session_scope(factory) as session:
+        model_id = _register_model(
+            session=session,
+            name=resolved_name,
+            artifact_uri=body.artifact_uri,
+            run=body.run,
+            spec_yaml_text=body.spec_yaml,
+            dataset_id_override=body.dataset_id,
+            external_source=body.external_source,
+            prediction_set_id=body.prediction_set_id,
+            evaluation_set_id=body.evaluation_set_id,
+            force=body.force,
+        )
+
+    return {"id": str(model_id), "name": resolved_name, "artifact_uri": body.artifact_uri}
+
+
+def _resolve_project_root():
+    from pathlib import Path
+
+    # protea/api/routers/reranker_models.py → parents[3] = repo root
+    return Path(__file__).resolve().parents[3]
diff --git a/protea/api/routers/scoring.py b/protea/api/routers/scoring.py
index 71d0dd7..e4c4070 100644
--- a/protea/api/routers/scoring.py
+++ b/protea/api/routers/scoring.py
@@ -31,25 +31,21 @@
 
 import uuid
 from collections.abc import Iterator
+from pathlib import Path
 from typing import Any
 
 from fastapi import APIRouter, Depends, HTTPException, Query
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel, Field, field_validator
+from sqlalchemy import text
 
 from protea.api.deps import get_session_factory
-from protea.core.evaluation import compute_evaluation_data
+from protea.core.evaluation import compute_evaluation_data, load_evaluation_data_for_set
 from protea.core.metrics import compute_cafa_metrics
-from protea.core.reranker import (
-    model_from_string,
-    model_to_string,
-)
+from protea.core.reranker import load_reranker, model_from_string
 from protea.core.reranker import (
     predict as reranker_predict,
 )
-from protea.core.reranker import (
-    train as reranker_train,
-)
 from protea.core.scoring import compute_score
 from protea.infrastructure.orm.models.annotation.evaluation_set import EvaluationSet
 from protea.infrastructure.orm.models.annotation.go_term import GOTerm
@@ -59,10 +55,40 @@
 from protea.infrastructure.orm.models.embedding.scoring_config import (
     DEFAULT_EVIDENCE_WEIGHTS,
     DEFAULT_WEIGHTS,
+    FORMULA_EVIDENCE_WEIGHTED,
     VALID_FORMULAS,
     ScoringConfig,
 )
 from protea.infrastructure.session import session_scope
+from protea.infrastructure.settings import load_settings
+from protea.infrastructure.storage import get_artifact_store
+
+
+def _load_booster(rm: RerankerModel) -> Any:
+    """Load the LightGBM booster from either the legacy inline blob or
+    the new ``artifact_uri`` path.
+
+    Raises 409 when neither is available — the row exists but the
+    backing model is missing.
+    """
+    if rm.model_data:
+        return model_from_string(rm.model_data)
+    if rm.artifact_uri:
+        project_root = Path(__file__).resolve().parents[3]
+        store = get_artifact_store(load_settings(project_root))
+        return load_reranker(
+            rm.artifact_uri,
+            feature_schema_sha=rm.feature_schema_sha or rm.name,
+            store=store,
+        )
+    raise HTTPException(
+        status_code=409,
+        detail=(
+            f"RerankerModel {rm.id} has no booster — both ``model_data`` "
+            f"(legacy inline) and ``artifact_uri`` (artifact-store path) are NULL."
+        ),
+    )
+
 
 router = APIRouter(prefix="/scoring", tags=["scoring"])
 
@@ -83,29 +109,50 @@
             "identity_sw": 0.0,
             "evidence_weight": 0.0,
             "taxonomic_proximity": 0.0,
+            "neighbor_vote_fraction": 0.0,
         },
         "description": (
-            "Pure cosine similarity converted to [0, 1]. "
-            "Baseline config — no alignment, evidence, or taxonomy signals."
+            "Pure cosine similarity of the winning neighbour, converted to [0, 1]. "
+            "Baseline — tests the hypothesis that the nearest-neighbour distance "
+            "alone is enough signal."
         ),
     },
     {
-        "name": "embedding_plus_evidence",
-        "formula": "evidence_weighted",
+        "name": "vote_fraction",
+        "formula": "linear",
         "weights": {
-            "embedding_similarity": 1.0,
+            "embedding_similarity": 0.0,
             "identity_nw": 0.0,
             "identity_sw": 0.0,
-            "evidence_weight": 1.0,
+            "evidence_weight": 0.0,
             "taxonomic_proximity": 0.0,
+            "neighbor_vote_fraction": 1.0,
         },
         "description": (
-            "Embedding similarity multiplied by evidence code quality (evidence_weighted formula). "
-            "Penalises IEA-sourced annotations regardless of embedding distance."
+            "Canonical KNN score: fraction of the K neighbours that vote for each "
+            "GO term. Tests the hypothesis that consensus across neighbours beats "
+            "the raw cosine distance of the top-1 neighbour."
         ),
     },
     {
-        "name": "alignment_weighted",
+        "name": "alignment_only",
+        "formula": "linear",
+        "weights": {
+            "embedding_similarity": 0.0,
+            "identity_nw": 0.6,
+            "identity_sw": 0.4,
+            "evidence_weight": 0.0,
+            "taxonomic_proximity": 0.0,
+            "neighbor_vote_fraction": 0.0,
+        },
+        "description": (
+            "Pure sequence-identity score (NW global 60 % + SW local 40 %), no embedding. "
+            "Tests whether classical sequence alignment alone can match PLM-based KNN. "
+            "Requires compute_alignments=True."
+        ),
+    },
+    {
+        "name": "embedding_plus_alignment",
         "formula": "linear",
         "weights": {
             "embedding_similarity": 0.5,
@@ -113,45 +160,65 @@
             "identity_sw": 0.2,
             "evidence_weight": 0.0,
             "taxonomic_proximity": 0.0,
+            "neighbor_vote_fraction": 0.0,
         },
         "description": (
-            "Combines embedding similarity (50 %) with global NW identity (30 %) "
-            "and local SW identity (20 %). "
-            "Requires PredictionSet computed with compute_alignments=True."
+            "Embedding (50 %) refined with global NW identity (30 %) and local SW "
+            "identity (20 %). Tests whether alignment adds a usable signal on top "
+            "of the embedding. Requires compute_alignments=True."
         ),
     },
     {
-        "name": "composite",
-        "formula": "evidence_weighted",
+        "name": "embedding_plus_vote",
+        "formula": "linear",
         "weights": {
-            "embedding_similarity": 0.4,
-            "identity_nw": 0.2,
-            "identity_sw": 0.1,
-            "evidence_weight": 0.2,
-            "taxonomic_proximity": 0.1,
+            "embedding_similarity": 0.5,
+            "identity_nw": 0.0,
+            "identity_sw": 0.0,
+            "evidence_weight": 0.0,
+            "taxonomic_proximity": 0.0,
+            "neighbor_vote_fraction": 0.5,
         },
         "description": (
-            "Full composite: embedding + alignment + evidence quality + taxonomic proximity. "
-            "Requires compute_alignments=True and compute_taxonomy=True."
+            "Nearest-neighbour distance (50 %) combined with K-neighbour consensus "
+            "(50 %). Tests whether adding voting on top of cosine distance improves "
+            "the ranking vs either signal alone."
         ),
     },
     {
-        "name": "evidence_primary",
-        "formula": "linear",
+        "name": "evidence_veto",
+        "formula": "evidence_weighted",
         "weights": {
-            "embedding_similarity": 0.2,
+            "embedding_similarity": 1.0,
             "identity_nw": 0.0,
             "identity_sw": 0.0,
-            "evidence_weight": 0.8,
+            "evidence_weight": 0.0,
             "taxonomic_proximity": 0.0,
+            "neighbor_vote_fraction": 0.0,
+        },
+        "description": (
+            "Embedding similarity, multiplied by the resolved evidence weight as a "
+            "final veto (evidence_weighted formula with evidence_weight=0 in the "
+            "linear sum to avoid double-counting). Tests whether down-ranking IEA/ND "
+            "predictions via a clean multiplier beats feeding evidence into the sum."
+        ),
+    },
+    {
+        "name": "composite",
+        "formula": "linear",
+        "weights": {
+            "embedding_similarity": 0.4,
+            "identity_nw": 0.2,
+            "identity_sw": 0.1,
+            "evidence_weight": 0.0,
+            "taxonomic_proximity": 0.1,
+            "neighbor_vote_fraction": 0.2,
         },
         "description": (
-            "Evidence quality as primary signal (80%), embedding similarity as tiebreaker (20%). "
-            "Designed for datasets where cosine distances cluster tightly (>99% of predictions "
-            "within distance < 0.1), making distance a poor tau discriminator. "
-            "Creates three well-separated score tiers: "
-            "EXP/IDA → ~1.0, ISS/IBA → ~0.76, IEA → ~0.46. "
-            "Recommended when compute_alignments and compute_taxonomy are not available."
+            "Kitchen-sink linear mix: embedding + alignment + taxonomy + voting. "
+            "evidence_weight excluded from the linear sum (use evidence_veto when "
+            "you want the multiplier). Requires compute_alignments=True and "
+            "compute_taxonomy=True; tests whether more signals beat fewer."
         ),
     },
 ]
@@ -259,6 +326,72 @@ def _snapshot(c: ScoringConfig) -> ScoringConfig:
     )
 
 
+# Maps each scoring signal key to the GOPrediction column whose fill rate
+# determines whether the signal is usable for a given PredictionSet.
+_SIGNAL_TO_COLUMN: dict[str, str] = {
+    "embedding_similarity": "distance",
+    "identity_nw": "identity_nw",
+    "identity_sw": "identity_sw",
+    "evidence_weight": "evidence_code",
+    "taxonomic_proximity": "taxonomic_distance",
+    "neighbor_vote_fraction": "neighbor_vote_fraction",
+}
+
+
+def _check_signal_coverage(session, prediction_set_id, config_snap: ScoringConfig) -> None:
+    """Fail fast with 409 when the config needs signals absent from the PredictionSet.
+
+    For every signal with a non-zero weight in ``config_snap.weights`` (plus
+    ``evidence_code`` when the formula is ``evidence_weighted`` — the multiplier
+    is always applied), count how many rows in the PredictionSet have the
+    backing column non-NULL.  Zero coverage is a configuration mismatch
+    (typically a ``ScoringConfig`` that requires ``compute_alignments=True`` or
+    ``compute_taxonomy=True`` applied to a PredictionSet computed without
+    those flags).  Raise HTTP 409 with the list of missing signals instead of
+    silently producing a degraded score (``compute_score`` drops NULL signals
+    from both numerator and denominator).
+    """
+    weights = config_snap.weights or {}
+    required: list[tuple[str, str]] = []
+    for signal, col in _SIGNAL_TO_COLUMN.items():
+        if float(weights.get(signal, 0.0)) > 0.0:
+            required.append((signal, col))
+    if getattr(config_snap, "formula", "linear") == FORMULA_EVIDENCE_WEIGHTED and not any(
+        s == "evidence_weight" for s, _ in required
+    ):
+        required.append(("evidence_weight", "evidence_code"))
+    if not required:
+        return
+
+    cols_sql = ", ".join(f"COUNT({col}) AS cnt_{col}" for _, col in required)
+    row = (
+        session.execute(
+            text(
+                f"SELECT COUNT(*) AS total, {cols_sql} "  # noqa: S608 — col names are hard-coded
+                "FROM go_prediction WHERE prediction_set_id = :pid"
+            ),
+            {"pid": str(prediction_set_id)},
+        )
+        .mappings()
+        .one()
+    )
+    total = int(row["total"] or 0)
+    missing: list[str] = []
+    for signal, col in required:
+        cnt = int(row[f"cnt_{col}"] or 0)
+        if total == 0 or cnt == 0:
+            missing.append(f"{signal} (column '{col}': {cnt}/{total} rows)")
+    if missing:
+        raise HTTPException(
+            status_code=409,
+            detail=(
+                "ScoringConfig requires signals absent from the PredictionSet: "
+                + "; ".join(missing)
+                + ". Re-predict with the corresponding compute_* flag enabled."
+            ),
+        )
+
+
 # ---------------------------------------------------------------------------
 # ScoringConfig CRUD
 # ---------------------------------------------------------------------------
@@ -397,6 +530,7 @@ def download_scored_predictions(
         if config is None:
             raise HTTPException(status_code=404, detail="ScoringConfig not found")
         config_snap = _snapshot(config)
+        _check_signal_coverage(session, set_id, config_snap)
 
     def _generate() -> Iterator[bytes]:
         header = (
@@ -412,6 +546,7 @@ def _generate() -> Iterator[bytes]:
                     "identity_nw",
                     "identity_sw",
                     "taxonomic_distance",
+                    "neighbor_vote_fraction",
                 ]
             )
             + "\n"
@@ -434,6 +569,7 @@ def _generate() -> Iterator[bytes]:
                     "identity_sw": pred.identity_sw,
                     "evidence_code": pred.evidence_code,
                     "taxonomic_distance": pred.taxonomic_distance,
+                    "neighbor_vote_fraction": pred.neighbor_vote_fraction,
                 }
                 score = compute_score(pred_dict, config_snap)
                 if min_score is not None and score < min_score:
@@ -454,6 +590,9 @@ def _generate() -> Iterator[bytes]:
                             str(pred.taxonomic_distance)
                             if pred.taxonomic_distance is not None
                             else "",
+                            str(pred.neighbor_vote_fraction)
+                            if pred.neighbor_vote_fraction is not None
+                            else "",
                         ]
                     )
                     + "\n"
@@ -511,6 +650,7 @@ def compute_metrics(
         if config is None:
             raise HTTPException(status_code=404, detail="ScoringConfig not found")
         config_snap = _snapshot(config)
+        _check_signal_coverage(session, set_id, config_snap)
 
         eval_data = compute_evaluation_data(
             session,
@@ -536,6 +676,7 @@ def compute_metrics(
             "identity_sw": pred.identity_sw,
             "evidence_code": pred.evidence_code,
             "taxonomic_distance": pred.taxonomic_distance,
+            "neighbor_vote_fraction": pred.neighbor_vote_fraction,
         }
         pred_dict["score"] = compute_score(pred_dict, config_snap)
         scored.append(pred_dict)
@@ -610,8 +751,12 @@ def compute_metrics(
 )
 def download_training_data(
     set_id: uuid.UUID,
-    evaluation_set_id: uuid.UUID = Query(..., description="EvaluationSet to derive ground-truth labels from"),
-    category: str = Query("nk", pattern="^(nk|lk|pk)$", description="Ground-truth category: nk, lk, or pk"),
+    evaluation_set_id: uuid.UUID = Query(
+        ..., description="EvaluationSet to derive ground-truth labels from"
+    ),
+    category: str = Query(
+        "nk", pattern="^(nk|lk|pk)$", description="Ground-truth category: nk, lk, or pk"
+    ),
     factory=Depends(get_session_factory),
 ) -> StreamingResponse:
     """Stream labeled training data for the re-ranker model.
@@ -670,39 +815,44 @@ def _generate() -> Iterator[bytes]:
                 def _v(val: object) -> str:
                     return "" if val is None else str(val)
 
-                row = "\t".join([
-                    pred.protein_accession,
-                    go_id,
-                    aspect or "",
-                    str(label),
-                    _v(pred.distance),
-                    pred.ref_protein_accession or "",
-                    pred.qualifier or "",
-                    pred.evidence_code or "",
-                    _v(pred.identity_nw),
-                    _v(pred.similarity_nw),
-                    _v(pred.alignment_score_nw),
-                    _v(pred.gaps_pct_nw),
-                    _v(pred.alignment_length_nw),
-                    _v(pred.identity_sw),
-                    _v(pred.similarity_sw),
-                    _v(pred.alignment_score_sw),
-                    _v(pred.gaps_pct_sw),
-                    _v(pred.alignment_length_sw),
-                    _v(pred.length_query),
-                    _v(pred.length_ref),
-                    _v(pred.query_taxonomy_id),
-                    _v(pred.ref_taxonomy_id),
-                    _v(pred.taxonomic_lca),
-                    _v(pred.taxonomic_distance),
-                    _v(pred.taxonomic_common_ancestors),
-                    pred.taxonomic_relation or "",
-                    _v(pred.vote_count),
-                    _v(pred.k_position),
-                    _v(pred.go_term_frequency),
-                    _v(pred.ref_annotation_density),
-                    _v(pred.neighbor_distance_std),
-                ]) + "\n"
+                row = (
+                    "\t".join(
+                        [
+                            pred.protein_accession,
+                            go_id,
+                            aspect or "",
+                            str(label),
+                            _v(pred.distance),
+                            pred.ref_protein_accession or "",
+                            pred.qualifier or "",
+                            pred.evidence_code or "",
+                            _v(pred.identity_nw),
+                            _v(pred.similarity_nw),
+                            _v(pred.alignment_score_nw),
+                            _v(pred.gaps_pct_nw),
+                            _v(pred.alignment_length_nw),
+                            _v(pred.identity_sw),
+                            _v(pred.similarity_sw),
+                            _v(pred.alignment_score_sw),
+                            _v(pred.gaps_pct_sw),
+                            _v(pred.alignment_length_sw),
+                            _v(pred.length_query),
+                            _v(pred.length_ref),
+                            _v(pred.query_taxonomy_id),
+                            _v(pred.ref_taxonomy_id),
+                            _v(pred.taxonomic_lca),
+                            _v(pred.taxonomic_distance),
+                            _v(pred.taxonomic_common_ancestors),
+                            pred.taxonomic_relation or "",
+                            _v(pred.vote_count),
+                            _v(pred.k_position),
+                            _v(pred.go_term_frequency),
+                            _v(pred.ref_annotation_density),
+                            _v(pred.neighbor_distance_std),
+                        ]
+                    )
+                    + "\n"
+                )
                 yield row.encode()
 
     filename = f"training_data_{set_id}_{category}.tsv"
@@ -721,37 +871,6 @@ def _v(val: object) -> str:
 _ASPECT_MAP = {"bpo": "P", "mfo": "F", "cco": "C"}
 
 
-class _TrainingPair(BaseModel):
-    prediction_set_id: uuid.UUID
-    evaluation_set_id: uuid.UUID
-
-
-class RerankerTrainRequest(BaseModel):
-    """Request body for POST /scoring/rerankers/train."""
-
-    name: str = Field(..., min_length=1, max_length=255)
-    prediction_set_id: uuid.UUID
-    evaluation_set_id: uuid.UUID
-    category: str = Field("nk", pattern="^(nk|lk|pk)$")
-    aspect: str | None = Field(
-        default=None,
-        pattern="^(bpo|mfo|cco)$",
-        description="Train only on predictions for this GO aspect. None trains on all aspects.",
-    )
-    neg_pos_ratio: float | None = Field(
-        default=None,
-        ge=1.0,
-        description="Subsample negatives to this ratio vs positives (e.g. 1.0 for 1:1, 10.0 for 10:1). None keeps all.",
-    )
-    extra_pairs: list[_TrainingPair] | None = Field(
-        default=None,
-        description="Additional (prediction_set, evaluation_set) pairs to include in training data. "
-        "Data from all pairs is concatenated before training a single model.",
-    )
-
-    model_config = {"extra": "forbid"}
-
-
 class RerankerResponse(BaseModel):
     """Serialised representation of a stored RerankerModel."""
 
@@ -780,138 +899,6 @@ def _reranker_to_response(m: RerankerModel) -> RerankerResponse:
     )
 
 
-def _collect_training_records(
-    session: Any,
-    prediction_set_id: uuid.UUID,
-    evaluation_set_id: uuid.UUID,
-    category: str,
-    aspect_filter_char: str | None,
-) -> list[dict[str, Any]]:
-    """Build labeled training records from a (PredictionSet, EvaluationSet) pair."""
-    ps = session.get(PredictionSet, prediction_set_id)
-    if ps is None:
-        raise HTTPException(status_code=404, detail=f"PredictionSet {prediction_set_id} not found")
-    es = session.get(EvaluationSet, evaluation_set_id)
-    if es is None:
-        raise HTTPException(status_code=404, detail=f"EvaluationSet {evaluation_set_id} not found")
-
-    eval_data = compute_evaluation_data(
-        session,
-        old_annotation_set_id=es.old_annotation_set_id,
-        new_annotation_set_id=es.new_annotation_set_id,
-        ontology_snapshot_id=ps.ontology_snapshot_id,
-    )
-
-    ground_truth: dict[str, set[str]] = getattr(eval_data, category)
-    gt_pairs: set[tuple[str, str]] = set()
-    for protein, go_ids in ground_truth.items():
-        for go_id in go_ids:
-            gt_pairs.add((protein, go_id))
-
-    records: list[dict[str, Any]] = []
-    q_preds = (
-        session.query(GOPrediction, GOTerm.go_id, GOTerm.aspect)
-        .join(GOTerm, GOPrediction.go_term_id == GOTerm.id)
-        .filter(GOPrediction.prediction_set_id == prediction_set_id)
-    )
-    if aspect_filter_char:
-        q_preds = q_preds.filter(GOTerm.aspect == aspect_filter_char)
-    for pred, go_id, aspect in q_preds.yield_per(5000):
-        label = 1 if (pred.protein_accession, go_id) in gt_pairs else 0
-        records.append({
-            "protein_accession": pred.protein_accession,
-            "go_id": go_id,
-            "aspect": aspect or "",
-            "label": label,
-            "distance": pred.distance,
-            "ref_protein_accession": pred.ref_protein_accession or "",
-            "qualifier": pred.qualifier or "",
-            "evidence_code": pred.evidence_code or "",
-            "identity_nw": pred.identity_nw,
-            "similarity_nw": pred.similarity_nw,
-            "alignment_score_nw": pred.alignment_score_nw,
-            "gaps_pct_nw": pred.gaps_pct_nw,
-            "alignment_length_nw": pred.alignment_length_nw,
-            "identity_sw": pred.identity_sw,
-            "similarity_sw": pred.similarity_sw,
-            "alignment_score_sw": pred.alignment_score_sw,
-            "gaps_pct_sw": pred.gaps_pct_sw,
-            "alignment_length_sw": pred.alignment_length_sw,
-            "length_query": pred.length_query,
-            "length_ref": pred.length_ref,
-            "query_taxonomy_id": pred.query_taxonomy_id,
-            "ref_taxonomy_id": pred.ref_taxonomy_id,
-            "taxonomic_lca": pred.taxonomic_lca,
-            "taxonomic_distance": pred.taxonomic_distance,
-            "taxonomic_common_ancestors": pred.taxonomic_common_ancestors,
-            "taxonomic_relation": pred.taxonomic_relation or "",
-            "vote_count": pred.vote_count,
-            "k_position": pred.k_position,
-            "go_term_frequency": pred.go_term_frequency,
-            "ref_annotation_density": pred.ref_annotation_density,
-            "neighbor_distance_std": pred.neighbor_distance_std,
-        })
-    return records
-
-
-@router.post("/rerankers/train", response_model=RerankerResponse, status_code=201)
-def train_reranker(
-    body: RerankerTrainRequest,
-    factory=Depends(get_session_factory),
-):
-    """Train a LightGBM re-ranker from one or more (PredictionSet, EvaluationSet) pairs.
-
-    When ``extra_pairs`` is provided, training data from all pairs is
-    concatenated before training a single model — useful for multi-temporal
-    holdout training where each pair represents a different GOA time split.
-    """
-    import pandas as pd
-
-    aspect_filter_char = _ASPECT_MAP.get(body.aspect) if body.aspect else None
-
-    with session_scope(factory) as session:
-        # Check name uniqueness
-        existing = session.query(RerankerModel).filter(RerankerModel.name == body.name).first()
-        if existing is not None:
-            raise HTTPException(status_code=409, detail=f"Reranker with name '{body.name}' already exists")
-
-        # Collect records from the primary pair
-        records = _collect_training_records(
-            session, body.prediction_set_id, body.evaluation_set_id,
-            body.category, aspect_filter_char,
-        )
-
-        # Collect records from extra pairs
-        if body.extra_pairs:
-            for pair in body.extra_pairs:
-                extra = _collect_training_records(
-                    session, pair.prediction_set_id, pair.evaluation_set_id,
-                    body.category, aspect_filter_char,
-                )
-                records.extend(extra)
-
-    if not records:
-        raise HTTPException(status_code=422, detail="No predictions found across all pairs")
-
-    df = pd.DataFrame(records)
-    result = reranker_train(df, neg_pos_ratio=body.neg_pos_ratio)
-
-    with session_scope(factory) as session:
-        model = RerankerModel(
-            name=body.name,
-            prediction_set_id=body.prediction_set_id,
-            evaluation_set_id=body.evaluation_set_id,
-            category=body.category,
-            aspect=body.aspect,
-            model_data=model_to_string(result.model),
-            metrics=result.metrics,
-            feature_importance=result.feature_importance,
-        )
-        session.add(model)
-        session.flush()
-        return _reranker_to_response(model)
-
-
 @router.get("/rerankers", response_model=list[RerankerResponse])
 def list_rerankers(factory=Depends(get_session_factory)):
     """Return all stored re-ranker models ordered by creation time."""
@@ -948,7 +935,9 @@ def delete_reranker(reranker_id: uuid.UUID, factory=Depends(get_session_factory)
 def download_reranked_predictions(
     set_id: uuid.UUID,
     reranker_id: uuid.UUID = Query(..., description="UUID of the trained RerankerModel to apply"),
-    min_score: float | None = Query(None, ge=0.0, le=1.0, description="Minimum re-ranker score threshold"),
+    min_score: float | None = Query(
+        None, ge=0.0, le=1.0, description="Minimum re-ranker score threshold"
+    ),
     factory=Depends(get_session_factory),
 ) -> StreamingResponse:
     """Stream predictions re-scored by a trained LightGBM model.
@@ -966,7 +955,7 @@ def download_reranked_predictions(
         rm = session.get(RerankerModel, reranker_id)
         if rm is None:
             raise HTTPException(status_code=404, detail="RerankerModel not found")
-        model_str = rm.model_data
+        model = _load_booster(rm)
 
         records: list[dict[str, Any]] = []
         for pred, go_id, aspect in (
@@ -975,43 +964,51 @@ def download_reranked_predictions(
             .filter(GOPrediction.prediction_set_id == set_id)
             .yield_per(5000)
         ):
-            records.append({
-                "protein_accession": pred.protein_accession,
-                "go_id": go_id,
-                "aspect": aspect or "",
-                "distance": pred.distance,
-                "ref_protein_accession": pred.ref_protein_accession or "",
-                "qualifier": pred.qualifier or "",
-                "evidence_code": pred.evidence_code or "",
-                "identity_nw": pred.identity_nw,
-                "similarity_nw": pred.similarity_nw,
-                "alignment_score_nw": pred.alignment_score_nw,
-                "gaps_pct_nw": pred.gaps_pct_nw,
-                "alignment_length_nw": pred.alignment_length_nw,
-                "identity_sw": pred.identity_sw,
-                "similarity_sw": pred.similarity_sw,
-                "alignment_score_sw": pred.alignment_score_sw,
-                "gaps_pct_sw": pred.gaps_pct_sw,
-                "alignment_length_sw": pred.alignment_length_sw,
-                "length_query": pred.length_query,
-                "length_ref": pred.length_ref,
-                "query_taxonomy_id": pred.query_taxonomy_id,
-                "ref_taxonomy_id": pred.ref_taxonomy_id,
-                "taxonomic_lca": pred.taxonomic_lca,
-                "taxonomic_distance": pred.taxonomic_distance,
-                "taxonomic_common_ancestors": pred.taxonomic_common_ancestors,
-                "taxonomic_relation": pred.taxonomic_relation or "",
-                "vote_count": pred.vote_count,
-                "k_position": pred.k_position,
-                "go_term_frequency": pred.go_term_frequency,
-                "ref_annotation_density": pred.ref_annotation_density,
-                "neighbor_distance_std": pred.neighbor_distance_std,
-                "label": 0,
-            })
+            records.append(
+                {
+                    "protein_accession": pred.protein_accession,
+                    "go_id": go_id,
+                    "aspect": aspect or "",
+                    "distance": pred.distance,
+                    "ref_protein_accession": pred.ref_protein_accession or "",
+                    "qualifier": pred.qualifier or "",
+                    "evidence_code": pred.evidence_code or "",
+                    "identity_nw": pred.identity_nw,
+                    "similarity_nw": pred.similarity_nw,
+                    "alignment_score_nw": pred.alignment_score_nw,
+                    "gaps_pct_nw": pred.gaps_pct_nw,
+                    "alignment_length_nw": pred.alignment_length_nw,
+                    "identity_sw": pred.identity_sw,
+                    "similarity_sw": pred.similarity_sw,
+                    "alignment_score_sw": pred.alignment_score_sw,
+                    "gaps_pct_sw": pred.gaps_pct_sw,
+                    "alignment_length_sw": pred.alignment_length_sw,
+                    "length_query": pred.length_query,
+                    "length_ref": pred.length_ref,
+                    "query_taxonomy_id": pred.query_taxonomy_id,
+                    "ref_taxonomy_id": pred.ref_taxonomy_id,
+                    "taxonomic_lca": pred.taxonomic_lca,
+                    "taxonomic_distance": pred.taxonomic_distance,
+                    "taxonomic_common_ancestors": pred.taxonomic_common_ancestors,
+                    "taxonomic_relation": pred.taxonomic_relation or "",
+                    "vote_count": pred.vote_count,
+                    "k_position": pred.k_position,
+                    "go_term_frequency": pred.go_term_frequency,
+                    "ref_annotation_density": pred.ref_annotation_density,
+                    "neighbor_distance_std": pred.neighbor_distance_std,
+                    # NOTE: do not add a ``label`` column here — its
+                    # presence makes ``predict`` route through
+                    # ``prepare_dataset`` which expects every training
+                    # column. At inference time we want the alignment
+                    # branch that fills missing v6 features as NaN.
+                }
+            )
 
     if not records:
+
         def _empty() -> Iterator[bytes]:
             yield b"protein_accession\tgo_id\taspect\treranker_score\tdistance\n"
+
         return StreamingResponse(
             _empty(),
             media_type="text/tab-separated-values",
@@ -1019,7 +1016,6 @@ def _empty() -> Iterator[bytes]:
         )
 
     df = pd.DataFrame(records)
-    model = model_from_string(model_str)
     scores = reranker_predict(model, df)
     df["reranker_score"] = scores
 
@@ -1027,8 +1023,14 @@ def _empty() -> Iterator[bytes]:
     df = df.sort_values(["protein_accession", "reranker_score"], ascending=[True, False])
 
     _RERANK_COLUMNS = [
-        "protein_accession", "go_id", "aspect", "reranker_score", "distance",
-        "ref_protein_accession", "evidence_code", "qualifier",
+        "protein_accession",
+        "go_id",
+        "aspect",
+        "reranker_score",
+        "distance",
+        "ref_protein_accession",
+        "evidence_code",
+        "qualifier",
     ]
 
     def _generate() -> Iterator[bytes]:
@@ -1036,16 +1038,21 @@ def _generate() -> Iterator[bytes]:
         for _, row in df.iterrows():
             if min_score is not None and row["reranker_score"] < min_score:
                 continue
-            line = "\t".join([
-                str(row["protein_accession"]),
-                str(row["go_id"]),
-                str(row["aspect"]),
-                f"{row['reranker_score']:.6f}",
-                str(row["distance"]) if pd.notna(row["distance"]) else "",
-                str(row["ref_protein_accession"]),
-                str(row["evidence_code"]),
-                str(row["qualifier"]),
-            ]) + "\n"
+            line = (
+                "\t".join(
+                    [
+                        str(row["protein_accession"]),
+                        str(row["go_id"]),
+                        str(row["aspect"]),
+                        f"{row['reranker_score']:.6f}",
+                        str(row["distance"]) if pd.notna(row["distance"]) else "",
+                        str(row["ref_protein_accession"]),
+                        str(row["evidence_code"]),
+                        str(row["qualifier"]),
+                    ]
+                )
+                + "\n"
+            )
             yield line.encode()
 
     filename = f"reranked_{set_id}.tsv"
@@ -1084,15 +1091,24 @@ def compute_reranker_metrics(
         if es is None:
             raise HTTPException(status_code=404, detail="EvaluationSet not found")
 
-        model_str = rm.model_data
+        # Booster load is deferred until after the empty-predictions check
+        # so a request against an empty PredictionSet doesn't pay the
+        # MinIO download cost.
         reranker_name = rm.name
 
-        eval_data = compute_evaluation_data(
-            session,
-            old_annotation_set_id=es.old_annotation_set_id,
-            new_annotation_set_id=es.new_annotation_set_id,
-            ontology_snapshot_id=ps.ontology_snapshot_id,
-        )
+        # Reuse the persisted ground-truth artifact when available (the only
+        # path that handles ``mode=reconciled`` correctly, where the eval set's
+        # underlying annotation snapshots differ from ``ps.ontology_snapshot_id``).
+        # Fall back to on-the-fly computation only for legacy same-snapshot rows.
+        if es.groundtruth_uri:
+            eval_data, _pivot_id = load_evaluation_data_for_set(session, es)
+        else:
+            eval_data = compute_evaluation_data(
+                session,
+                old_annotation_set_id=es.old_annotation_set_id,
+                new_annotation_set_id=es.new_annotation_set_id,
+                ontology_snapshot_id=ps.ontology_snapshot_id,
+            )
 
         records: list[dict[str, Any]] = []
         for pred, go_id in (
@@ -1101,52 +1117,60 @@ def compute_reranker_metrics(
             .filter(GOPrediction.prediction_set_id == set_id)
             .yield_per(5000)
         ):
-            records.append({
-                "protein_accession": pred.protein_accession,
-                "go_id": go_id,
-                "distance": pred.distance,
-                "qualifier": pred.qualifier or "",
-                "evidence_code": pred.evidence_code or "",
-                "identity_nw": pred.identity_nw,
-                "similarity_nw": pred.similarity_nw,
-                "alignment_score_nw": pred.alignment_score_nw,
-                "gaps_pct_nw": pred.gaps_pct_nw,
-                "alignment_length_nw": pred.alignment_length_nw,
-                "identity_sw": pred.identity_sw,
-                "similarity_sw": pred.similarity_sw,
-                "alignment_score_sw": pred.alignment_score_sw,
-                "gaps_pct_sw": pred.gaps_pct_sw,
-                "alignment_length_sw": pred.alignment_length_sw,
-                "length_query": pred.length_query,
-                "length_ref": pred.length_ref,
-                "query_taxonomy_id": pred.query_taxonomy_id,
-                "ref_taxonomy_id": pred.ref_taxonomy_id,
-                "taxonomic_lca": pred.taxonomic_lca,
-                "taxonomic_distance": pred.taxonomic_distance,
-                "taxonomic_common_ancestors": pred.taxonomic_common_ancestors,
-                "taxonomic_relation": pred.taxonomic_relation or "",
-                "vote_count": pred.vote_count,
-                "k_position": pred.k_position,
-                "go_term_frequency": pred.go_term_frequency,
-                "ref_annotation_density": pred.ref_annotation_density,
-                "neighbor_distance_std": pred.neighbor_distance_std,
-                "label": 0,
-            })
+            records.append(
+                {
+                    "protein_accession": pred.protein_accession,
+                    "go_id": go_id,
+                    "distance": pred.distance,
+                    "qualifier": pred.qualifier or "",
+                    "evidence_code": pred.evidence_code or "",
+                    "identity_nw": pred.identity_nw,
+                    "similarity_nw": pred.similarity_nw,
+                    "alignment_score_nw": pred.alignment_score_nw,
+                    "gaps_pct_nw": pred.gaps_pct_nw,
+                    "alignment_length_nw": pred.alignment_length_nw,
+                    "identity_sw": pred.identity_sw,
+                    "similarity_sw": pred.similarity_sw,
+                    "alignment_score_sw": pred.alignment_score_sw,
+                    "gaps_pct_sw": pred.gaps_pct_sw,
+                    "alignment_length_sw": pred.alignment_length_sw,
+                    "length_query": pred.length_query,
+                    "length_ref": pred.length_ref,
+                    "query_taxonomy_id": pred.query_taxonomy_id,
+                    "ref_taxonomy_id": pred.ref_taxonomy_id,
+                    "taxonomic_lca": pred.taxonomic_lca,
+                    "taxonomic_distance": pred.taxonomic_distance,
+                    "taxonomic_common_ancestors": pred.taxonomic_common_ancestors,
+                    "taxonomic_relation": pred.taxonomic_relation or "",
+                    "vote_count": pred.vote_count,
+                    "k_position": pred.k_position,
+                    "go_term_frequency": pred.go_term_frequency,
+                    "ref_annotation_density": pred.ref_annotation_density,
+                    "neighbor_distance_std": pred.neighbor_distance_std,
+                    # See note in download_reranked_predictions: omitting
+                    # ``label`` forces the alignment branch in ``predict``.
+                }
+            )
 
-    if not records:
-        return {
-            "prediction_set_id": str(set_id),
-            "reranker_id": str(reranker_id),
-            "reranker_name": reranker_name,
-            "category": category,
-            "fmax": 0.0,
-            "auc_pr": 0.0,
-            "n_predictions": 0,
-            "curve": [],
-        }
+        if not records:
+            return {
+                "prediction_set_id": str(set_id),
+                "reranker_id": str(reranker_id),
+                "reranker_name": reranker_name,
+                "category": category,
+                "fmax": 0.0,
+                "auc_pr": 0.0,
+                "n_predictions": 0,
+                "curve": [],
+            }
+
+        # Booster load and scoring stay inside the session scope: ``rm``'s lazy
+        # columns (``model_data`` / ``artifact_uri``) are loaded against the
+        # live session, then the heavy numeric work runs before the with-block
+        # closes (the eval_data + records are already fully materialised).
+        model = _load_booster(rm)
 
     df = pd.DataFrame(records)
-    model = model_from_string(model_str)
     scores = reranker_predict(model, df)
 
     scored: list[dict[str, Any]] = [
diff --git a/protea/api/routers/showcase.py b/protea/api/routers/showcase.py
index 6bc415a..8637561 100644
--- a/protea/api/routers/showcase.py
+++ b/protea/api/routers/showcase.py
@@ -1,160 +1,209 @@
-"""Showcase endpoint — aggregates platform stats and best evaluation results."""
+"""Showcase endpoint — aggregates platform stats and the single best evaluation
+result with full embedding attribution.
+
+Unlike :mod:`protea.api.routers.benchmark`, which exposes the full per-model
+per-stage matrix, this module is deliberately minimal: it returns **one**
+"spotlight" result that the Home page can use for its hero card, plus the
+pipeline stage counts.
+
+Background
+----------
+The previous implementation collapsed every evaluation into three method
+buckets (``knn_baseline`` / ``knn_scored`` / ``knn_reranker``) and took the
+maximum Fmax across *all* embeddings in each bucket.  That hid which concrete
+embedding won a given cell, and silently dropped losing embeddings from the
+UI entirely.  With the introduction of the 8-model benchmark, that collapse
+is actively misleading — so this endpoint now returns a single named winner
+and a link to ``/benchmark`` for the full matrix.
+"""
 
 from __future__ import annotations
 
 from typing import Any
 
 from fastapi import APIRouter, Depends
-from sqlalchemy import func
+from sqlalchemy import func, select, text
 from sqlalchemy.orm import Session, sessionmaker
 
 from protea.api.deps import get_session_factory
+from protea.api.stages import stage_of
+from protea.core.domain.aspect import ASPECT_CAFA_CODES as _ASPECTS
 from protea.infrastructure.orm.models.annotation.evaluation_result import EvaluationResult
-from protea.infrastructure.orm.models.embedding.go_prediction import GOPrediction
+from protea.infrastructure.orm.models.embedding.embedding_config import EmbeddingConfig
 from protea.infrastructure.orm.models.embedding.prediction_set import PredictionSet
 from protea.infrastructure.orm.models.embedding.reranker_model import RerankerModel
-from protea.infrastructure.orm.models.embedding.sequence_embedding import SequenceEmbedding
+from protea.infrastructure.orm.models.embedding.scoring_config import ScoringConfig
 from protea.infrastructure.orm.models.protein.protein import Protein
-from protea.infrastructure.orm.models.sequence.sequence import Sequence
 from protea.infrastructure.session import session_scope
 
 router = APIRouter(prefix="/showcase", tags=["showcase"])
 
+_CATEGORIES = ("NK", "LK", "PK")
 
-def _derive_method(
-    scoring_config_id: Any, reranker_model_id: Any
-) -> tuple[str, str]:
-    """Return (method_key, human_label) from nullable FK columns."""
-    if reranker_model_id is not None:
-        return "knn_reranker", "KNN + Re-ranker"
-    if scoring_config_id is not None:
-        return "knn_scored", "KNN + Scoring"
-    return "knn_baseline", "KNN (embedding distance)"
 
+def _approx_count(session: Session, table: str) -> int:
+    """Fast approximate row count via pg_class.reltuples. Accurate enough for
+    a UI spotlight, and O(1) instead of a full table scan on 40M+ row tables."""
+    n = session.execute(
+        text("SELECT reltuples::bigint FROM pg_class WHERE relname = :t"),
+        {"t": table},
+    ).scalar()
+    return int(n) if n is not None and n >= 0 else 0
 
-# Method display order
-_METHOD_ORDER = ["knn_baseline", "knn_scored", "knn_reranker"]
-_ASPECTS = ["BPO", "MFO", "CCO"]
+
+def _avg_fmax(results: dict[str, Any]) -> float | None:
+    """Mean Fmax across the 9 (category × aspect) cells, ignoring missing ones.
+
+    Returns ``None`` if the ``results`` blob is empty or has no Fmax values —
+    that way a malformed or partial evaluation does not pretend to be
+    "the best".
+    """
+    values: list[float] = []
+    for cat in _CATEGORIES:
+        cat_data = results.get(cat) or {}
+        for asp in _ASPECTS:
+            cell = cat_data.get(asp) or {}
+            fmax = cell.get("fmax")
+            if fmax is not None:
+                values.append(float(fmax))
+    if not values:
+        return None
+    return sum(values) / len(values)
 
 
 @router.get("", summary="Platform showcase data")
 def get_showcase(
     factory: sessionmaker[Session] = Depends(get_session_factory),
 ) -> dict[str, Any]:
-    """Aggregate stats, best evaluation metrics, and method comparison for the
-    landing page.  Returns a single JSON object so the frontend needs only one
-    fetch on mount."""
+    """Aggregate pipeline stage counts and return the single best evaluation
+    result (by mean Fmax across the 9 cells) along with the embedding that
+    produced it.
+
+    Empty-state contract:
+
+    - ``best`` is ``None`` when no ``EvaluationResult`` exists yet
+    - ``pipeline_stages`` always returns five entries, with ``count = 0``
+      for stages that have not been populated yet
+    - ``counts`` always returns the same keys, defaulting to 0
+
+    The frontend is expected to render sensible placeholder copy when ``best``
+    is ``None`` rather than hiding the page.
+    """
 
     with session_scope(factory) as session:
-        # ── Protein stats (mirrors /proteins/stats but lighter) ──────────
-        total_proteins = session.query(func.count(Protein.accession)).scalar() or 0
+        # ── Pipeline stage counts ────────────────────────────────────────
+        # Large tables (protein/sequence/sequence_embedding/go_prediction) use
+        # pg_class.reltuples — exact COUNT(*) on go_prediction takes 20-30s.
+        # Small tables keep exact counts.
+        total_proteins = _approx_count(session, "protein")
         canonical_proteins = (
-            session.query(func.count(Protein.accession))
-            .filter(Protein.is_canonical.is_(True))
-            .scalar()
+            session.scalar(
+                select(func.count(Protein.accession)).where(Protein.is_canonical.is_(True))
+            )
             or 0
         )
-
-        # ── Counts ───────────────────────────────────────────────────────
-        total_sequences = session.query(func.count(Sequence.id)).scalar() or 0
-        total_embeddings = session.query(func.count(SequenceEmbedding.id)).scalar() or 0
-        total_prediction_sets = session.query(func.count(PredictionSet.id)).scalar() or 0
-        total_predictions = session.query(func.count(GOPrediction.id)).scalar() or 0
-        total_rerankers = session.query(func.count(RerankerModel.id)).scalar() or 0
-
-        # ── Evaluation results ───────────────────────────────────────────
-        eval_rows = session.query(EvaluationResult).all()
-        total_evaluations = len(eval_rows)
-
-        # Group by category → method, track best fmax per aspect
-        _CATEGORIES = ["NK", "LK", "PK"]
-        # best_fmax[category][aspect] = {fmax, method, ...}
-        best_fmax: dict[str, dict[str, dict[str, Any]]] = {}
-        # method_best[category][method_key] = {label, BPO: {fmax}, ...}
-        method_best: dict[str, dict[str, dict[str, Any]]] = {}
-
-        for er in eval_rows:
-            method_key, method_label = _derive_method(
-                er.scoring_config_id, er.reranker_model_id
+        total_sequences = _approx_count(session, "sequence")
+        total_embeddings = _approx_count(session, "sequence_embedding")
+        total_prediction_sets = session.scalar(select(func.count(PredictionSet.id))) or 0
+        total_predictions = _approx_count(session, "go_prediction")
+        total_rerankers = session.scalar(select(func.count(RerankerModel.id))) or 0
+
+        # ── Pick the single best evaluation result ──────────────────────
+        rows = session.execute(
+            select(EvaluationResult, EmbeddingConfig, ScoringConfig.name)
+            .join(PredictionSet, PredictionSet.id == EvaluationResult.prediction_set_id)
+            .join(EmbeddingConfig, EmbeddingConfig.id == PredictionSet.embedding_config_id)
+            .outerjoin(
+                ScoringConfig, ScoringConfig.id == EvaluationResult.scoring_config_id
             )
-            results = er.results or {}
-
-            for cat in _CATEGORIES:
-                cat_data = results.get(cat, {})
-                if not cat_data:
-                    continue
-
-                if cat not in method_best:
-                    method_best[cat] = {}
-                if method_key not in method_best[cat]:
-                    method_best[cat][method_key] = {
-                        "label": method_label,
-                        **{a: {"fmax": None} for a in _ASPECTS},
-                    }
-
-                for aspect in _ASPECTS:
-                    aspect_data = cat_data.get(aspect, {})
-                    fmax = aspect_data.get("fmax")
-                    if fmax is None:
-                        continue
-
-                    # Update method-level best for this category
-                    cur = method_best[cat][method_key][aspect].get("fmax")
-                    if cur is None or fmax > cur:
-                        method_best[cat][method_key][aspect] = {"fmax": round(fmax, 4)}
-
-                    # Update global best for this category
-                    if cat not in best_fmax:
-                        best_fmax[cat] = {}
-                    if aspect not in best_fmax[cat] or fmax > best_fmax[cat][aspect]["fmax"]:
-                        best_fmax[cat][aspect] = {
-                            "fmax": round(fmax, 4),
-                            "method": method_key,
-                            "method_label": method_label,
-                            "evaluation_result_id": str(er.id),
-                        }
-
-        # Build ordered method_comparison per category
-        method_comparison: dict[str, list[dict[str, Any]]] = {}
-        for cat in _CATEGORIES:
-            cat_methods = method_best.get(cat, {})
-            cat_list: list[dict[str, Any]] = []
-            for mk in _METHOD_ORDER:
-                if mk in cat_methods:
-                    entry: dict[str, Any] = {
-                        "method": mk,
-                        "label": cat_methods[mk]["label"],
-                    }
-                    for aspect in _ASPECTS:
-                        entry[aspect] = cat_methods[mk][aspect]
-                    cat_list.append(entry)
-            if cat_list:
-                method_comparison[cat] = cat_list
-
-        # Pipeline stages
+        ).all()
+
+        total_evaluations = len(rows)
+        best: dict[str, Any] | None = None
+        best_score: float = -1.0
+
+        for er, cfg, scoring_name in rows:
+            score = _avg_fmax(er.results or {})
+            if score is None:
+                continue
+            if score > best_score:
+                best_score = score
+                stage = stage_of(er, scoring_name)
+                best = {
+                    "evaluation_result_id": str(er.id),
+                    "evaluation_set_id": str(er.evaluation_set_id),
+                    "stage": stage,
+                    "avg_fmax": round(score, 4),
+                    "embedding": {
+                        "id": str(cfg.id),
+                        "model_name": cfg.model_name,
+                        "model_backend": cfg.model_backend,
+                        "display_name": cfg.display_name or cfg.model_name,
+                        "family": cfg.family or cfg.model_backend,
+                        "param_count": cfg.param_count,
+                    },
+                    "per_cell": _flatten_cells(er.results or {}),
+                }
+
         pipeline_stages = [
-            {"name": "sequences", "count": total_sequences, "href": "/proteins"},
-            {"name": "embeddings", "count": total_embeddings, "href": "/embeddings"},
-            {"name": "predictions", "count": total_predictions, "href": "/functional-annotation"},
-            {"name": "reranker_models", "count": total_rerankers, "href": "/reranker"},
-            {"name": "evaluations", "count": total_evaluations, "href": "/evaluation"},
+            {"name": "sequences", "count": int(total_sequences), "href": "/proteins"},
+            {"name": "embeddings", "count": int(total_embeddings), "href": "/embeddings"},
+            {
+                "name": "predictions",
+                "count": int(total_predictions),
+                "href": "/functional-annotation",
+            },
+            {"name": "reranker_models", "count": int(total_rerankers), "href": "/reranker"},
+            {"name": "evaluations", "count": int(total_evaluations), "href": "/benchmark"},
         ]
 
         return {
             "protein_stats": {
-                "total": total_proteins,
-                "canonical": canonical_proteins,
+                "total": int(total_proteins),
+                "canonical": int(canonical_proteins),
             },
-            "best_fmax": best_fmax if best_fmax else {},
-            "method_comparison": method_comparison,
+            "best": best,
             "counts": {
-                "proteins": total_proteins,
-                "sequences": total_sequences,
-                "embeddings": total_embeddings,
-                "prediction_sets": total_prediction_sets,
-                "predictions": total_predictions,
-                "reranker_models": total_rerankers,
-                "evaluations": total_evaluations,
+                "proteins": int(total_proteins),
+                "sequences": int(total_sequences),
+                "embeddings": int(total_embeddings),
+                "prediction_sets": int(total_prediction_sets),
+                "predictions": int(total_predictions),
+                "reranker_models": int(total_rerankers),
+                "evaluations": int(total_evaluations),
             },
             "pipeline_stages": pipeline_stages,
         }
+
+
+def _flatten_cells(results: dict[str, Any]) -> list[dict[str, Any]]:
+    """Serialise the nested (category → aspect) Fmax blob as a flat list.
+
+    Kept on the showcase response so the Home page can render a compact
+    "per-tier breakdown" tile without a second fetch.  Only cells with a
+    non-null ``fmax`` are included.
+    """
+    out: list[dict[str, Any]] = []
+    for cat in _CATEGORIES:
+        cat_data = results.get(cat) or {}
+        for asp in _ASPECTS:
+            cell = cat_data.get(asp) or {}
+            fmax = cell.get("fmax")
+            if fmax is None:
+                continue
+            out.append(
+                {
+                    "category": cat,
+                    "aspect": asp,
+                    "fmax": round(float(fmax), 4),
+                    "precision": (
+                        round(float(cell["precision"]), 4)
+                        if cell.get("precision") is not None
+                        else None
+                    ),
+                    "recall": (
+                        round(float(cell["recall"]), 4) if cell.get("recall") is not None else None
+                    ),
+                }
+            )
+    return out
diff --git a/protea/api/routers/support.py b/protea/api/routers/support.py
index 65228da..102bcc2 100644
--- a/protea/api/routers/support.py
+++ b/protea/api/routers/support.py
@@ -3,21 +3,28 @@
 from typing import Any
 
 from fastapi import APIRouter, Depends, Query
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, field_validator
 
 from protea.api.deps import get_session_factory
+from protea.config.tuning import get_tuning
 from protea.infrastructure.orm.models.support_entry import SupportEntry
 from protea.infrastructure.session import session_scope
 
 router = APIRouter(prefix="/support", tags=["support"])
 
-_MAX_COMMENT_LENGTH = 500
-_RECENT_LIMIT = 20
-_PAGE_LIMIT = 100
-
 
 class SupportCreate(BaseModel):
-    comment: str | None = Field(default=None, max_length=_MAX_COMMENT_LENGTH)
+    comment: str | None = Field(default=None)
+
+    @field_validator("comment")
+    @classmethod
+    def comment_within_limit(cls, v: str | None) -> str | None:
+        if v is None:
+            return v
+        max_len = get_tuning().api.max_comment_length
+        if len(v) > max_len:
+            raise ValueError(f"comment exceeds max length {max_len}")
+        return v
 
 
 @router.get("")
@@ -27,11 +34,13 @@ def get_support(
 ) -> dict[str, Any]:
     """Return total thumbs-up count and comments.
 
-    Pass ``all_comments=true`` to get all comments (up to 100) instead of the 20 most recent.
+    Pass ``all_comments=true`` to get all comments (up to the configured
+    page limit) instead of the recent_limit most recent.
     """
+    api_limits = get_tuning().api
     with session_scope(factory) as session:
         total = session.query(SupportEntry).count()
-        limit = _PAGE_LIMIT if all_comments else _RECENT_LIMIT
+        limit = api_limits.page_limit if all_comments else api_limits.recent_limit
         recent = (
             session.query(SupportEntry)
             .filter(SupportEntry.comment.isnot(None))
diff --git a/protea/api/services/__init__.py b/protea/api/services/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/protea/api/stages.py b/protea/api/stages.py
new file mode 100644
index 0000000..86b7754
--- /dev/null
+++ b/protea/api/stages.py
@@ -0,0 +1,42 @@
+"""Shared stage-classification helpers for the benchmark + showcase routers.
+
+Both routers need to label an :class:`EvaluationResult` with the
+pipeline stage that produced it (``"reranker"`` or whichever
+``ScoringConfig.name`` was applied). The logic was duplicated across
+both files until this module consolidated it — the inline copy in
+``showcase.py`` carried a comment "Matches benchmark.py semantics
+without cross-importing", which is exactly the dispensable-duplication
+smell this module fixes.
+"""
+
+from __future__ import annotations
+
+from typing import Literal
+
+from protea.infrastructure.orm.models.annotation.evaluation_result import EvaluationResult
+
+RERANKER_STAGE = "reranker"
+
+StageKind = Literal["scoring", "reranker"]
+
+
+def stage_of(result: EvaluationResult, scoring_name: str | None) -> str | None:
+    """Classify an EvaluationResult into a stage.
+
+    Reranker dominates scoring config. Evaluations without either a scoring
+    config or a reranker are considered incomplete and excluded from the
+    matrix (return ``None``).
+    """
+    if result.reranker_model_id is not None:
+        return RERANKER_STAGE
+    if scoring_name:
+        return scoring_name
+    return None
+
+
+def stage_kind(stage: str) -> StageKind:
+    """Return ``"reranker"`` for the reranker stage, ``"scoring"`` otherwise."""
+    return "reranker" if stage == RERANKER_STAGE else "scoring"
+
+
+__all__ = ["RERANKER_STAGE", "StageKind", "stage_kind", "stage_of"]
diff --git a/protea/cli/__init__.py b/protea/cli/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/protea/cli/commands/__init__.py b/protea/cli/commands/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/protea/config/benchmark.yaml b/protea/config/benchmark.yaml
new file mode 100644
index 0000000..b55639e
--- /dev/null
+++ b/protea/config/benchmark.yaml
@@ -0,0 +1,51 @@
+# Benchmark matrix configuration.
+#
+# Controls how `/benchmark/matrix` classifies and orders evaluation results for
+# the UI. Adding a new scoring config or PLM should NOT require editing this
+# file unless you want a custom label or a different default stage.
+
+# Stage taxonomy -----------------------------------------------------------
+stages:
+  # Preferred stage to select on initial page load. The first entry that
+  # actually has data wins; otherwise falls back to the first stage seen.
+  # Baseline first so the landing view shows the widest coverage across PLMs.
+  preferred_default:
+    - embedding_only
+    - vote_fraction
+    - embedding_plus_alignment
+    - composite
+
+  # Scoring configs the frontend should highlight as the "reference / raw
+  # embedding" baseline in the leaderboard. The name must match an entry in
+  # `scoring_config.name`. Optional.
+  baseline_scoring_name: embedding_only
+
+  # Names to hide from the benchmark UI entirely (results still exist in the
+  # DB, they just are not returned by the matrix endpoint).
+  hidden: []
+
+  # Human-readable labels. Any scoring config not listed here falls back to
+  # a Title-Cased version of its own `name`.
+  labels:
+    embedding_only: "Embedding-only"
+    vote_fraction: "Vote fraction (KNN)"
+    alignment_only: "Alignment-only"
+    embedding_plus_alignment: "Embedding + alignment"
+    embedding_plus_vote: "Embedding + vote"
+    evidence_veto: "Evidence veto"
+    composite: "Composite"
+    reranker: "Re-ranker"
+
+# Evaluation set labels ----------------------------------------------------
+# Optional override for how evaluation sets appear in selectors.
+# Key: evaluation_set_id UUID. Value: label string.
+# When absent, the backend auto-generates a label from the annotation set
+# versions + delta stats.
+eval_set_labels: {}
+
+# GO namespace metadata ----------------------------------------------------
+# These are ontological constants, but exposing them here keeps the frontend
+# fully driven by the API response and allows non-standard evaluation splits
+# in the future (e.g. IEA-only vs experimental).
+categories: [NK, LK, PK]
+aspects: [BPO, MFO, CCO]
diff --git a/protea/config/system.yaml b/protea/config/system.yaml
index 11af208..24e997c 100644
--- a/protea/config/system.yaml
+++ b/protea/config/system.yaml
@@ -6,6 +6,14 @@ queue:
 
 storage:
   artifacts_dir: storage/evaluation_artifacts
+  backend: minio                  # "local" | "minio"
+  root: storage/artifacts         # local backend root (relative → project root)
+  minio:
+    endpoint: localhost:9000
+    bucket: protea
+    access_key: minioadmin
+    secret_key: minioadmin
+    secure: false
 
 admin:
   token: protea-admin
diff --git a/protea/config/tuning.py b/protea/config/tuning.py
new file mode 100644
index 0000000..6880d2d
--- /dev/null
+++ b/protea/config/tuning.py
@@ -0,0 +1,300 @@
+"""Runtime tuning settings (T-CONF.2).
+
+Externalises hardcoded module-level constants from ``protea/`` so an
+operator can tune throughput, retry policy and timeouts per
+deployment target (dev, prod-cloud, hpc-bsc, hpc-airgap) without
+touching code.
+
+Hierarchy (lowest to highest priority):
+
+  1. Defaults baked into the pydantic models below.
+  2. ``tuning:`` section in ``protea/config/system.yaml``.
+  3. Environment variables of the form ``PROTEA_TUNING__<group>__<field>``.
+
+Currently scoped to the ``QueueTuning`` group as a proof of concept.
+The remaining categories from ``docs/CONFIG_INVENTORY.md``
+(WorkerTuning, OperationTuning, APILimits, ResearchKnobs) follow the
+same pattern and will be added incrementally.
+
+Example::
+
+    from protea.config.tuning import get_tuning
+
+    settings = get_tuning()
+    for attempt in range(settings.queue.publisher_max_attempts):
+        ...
+"""
+
+from __future__ import annotations
+
+import os
+from functools import lru_cache
+from pathlib import Path
+from typing import Any
+
+import yaml
+from pydantic import BaseModel, Field
+
+ENV_PREFIX = "PROTEA_TUNING__"
+
+
+class QueueTuning(BaseModel):
+    """RabbitMQ publisher / consumer retry and dispatch knobs.
+
+    Sources: ``infrastructure/queue/publisher.py`` and
+    ``infrastructure/queue/consumer.py`` (ver
+    ``docs/CONFIG_INVENTORY.md`` §A).
+    """
+
+    publisher_max_attempts: int = Field(
+        default=12,
+        ge=1,
+        description=(
+            "Reintentos máximos al publicar a RabbitMQ. 12 attempts cubren "
+            "~4 min de broker downtime con backoff exponencial cap a 30s."
+        ),
+    )
+    publisher_base_delay: float = Field(
+        default=1.0,
+        ge=0.0,
+        description=(
+            "Backoff inicial publisher en segundos. Multiplica x2 por "
+            "intento hasta el cap interno de 30s."
+        ),
+    )
+    oom_max_retries: int = Field(
+        default=5,
+        ge=0,
+        description="Reintentos al hit CUDA OOM en GPU worker.",
+    )
+    oom_base_delay: int = Field(
+        default=5,
+        ge=0,
+        description="Backoff inicial OOM en segundos.",
+    )
+    oom_max_delay: int = Field(
+        default=300,
+        ge=1,
+        description="Cap del backoff OOM en segundos (5 min default).",
+    )
+
+
+class WorkerTuning(BaseModel):
+    """Pool sizes, in-process caches and reaper timeouts.
+
+    Sources: ``infrastructure/database/engine.py``,
+    ``infrastructure/operations/{compute_embeddings,predict_go_terms}.py``,
+    ``workers/stale_job_reaper.py``, ``api/cache.py`` (ver
+    ``docs/CONFIG_INVENTORY.md`` §B).
+    """
+
+    db_pool_size: int = Field(
+        default=20,
+        ge=1,
+        description="SQLAlchemy connection pool size. Tunear según concurrencia esperada.",
+    )
+    db_pool_max_overflow: int = Field(
+        default=40,
+        ge=0,
+        description="Conexiones extra permitidas sobre el pool size cuando hay pico.",
+    )
+    db_pool_recycle_seconds: int = Field(
+        default=3600,
+        ge=60,
+        description=(
+            "Reciclar conexiones tras N segundos para evitar idle-timeout silencioso del DB."
+        ),
+    )
+    model_cache_max: int = Field(
+        default=1,
+        ge=1,
+        description=(
+            "Modelos PLM en cache por proceso de embeddings. >1 acumula GB en GPU."
+        ),
+    )
+    ref_cache_max: int = Field(
+        default=1,
+        ge=1,
+        description="Reference data sets en cache por proceso predict.",
+    )
+    reaper_main_timeout_seconds: int = Field(
+        default=86400,
+        ge=300,
+        description=(
+            "Timeout duro antes de marcar jobs FAILED en producción (default 24h). "
+            "Coordinator jobs como compute_embeddings pueden correr <1d en datasets "
+            "grandes; este es el corte global."
+        ),
+    )
+    reaper_default_timeout_seconds: int = Field(
+        default=3600,
+        ge=300,
+        description="Default constructor de StaleJobReaper (sobrescrito por main).",
+    )
+    reaper_stall_seconds: int = Field(
+        default=1800,
+        ge=60,
+        description=(
+            "Tiempo sin JobEvent antes de considerar un job stalled candidato a reapear."
+        ),
+    )
+    api_cache_default_ttl_seconds: float = Field(
+        default=300.0,
+        ge=1.0,
+        description="TTL default cache HTTP (api/cache.py). 5 min por defecto.",
+    )
+
+
+class OperationTuning(BaseModel):
+    """Module-level chunk and batch sizes used inside operations.
+
+    HTTP retry policy and per-source timeouts live inside their
+    respective pydantic payloads (``InsertProteinsPayload``,
+    ``LoadGoaAnnotationsPayload``, etc.) because the caller picks
+    them per-job. The values here are infra-level: how to slice
+    work between memory and broker pressure constraints.
+
+    Sources: ``core/feature_enricher.py``, ``core/knn_search.py``,
+    ``core/operations/{predict_go_terms,training_dump_helpers}.py``
+    (ver ``docs/CONFIG_INVENTORY.md`` §C).
+    """
+
+    annotation_chunk_size: int = Field(
+        default=10_000,
+        ge=100,
+        description=(
+            "Filas por chunk al cargar/iterar anotaciones. Tunear "
+            "según RAM disponible: 1k-100k razonable."
+        ),
+    )
+    stream_chunk_size: int = Field(
+        default=2_000,
+        ge=100,
+        description=(
+            "Chunk size streaming PyArrow / SQLAlchemy yield_per. "
+            "Más bajo reduce pico Python-object; más alto reduce "
+            "round-trips. 500-10k razonable."
+        ),
+    )
+    store_chunk_size: int = Field(
+        default=10_000,
+        ge=500,
+        description=(
+            "Filas por chunk al publicar predictions a la cola "
+            "store. RabbitMQ cap 128 MB; 10k filas serializan "
+            "~20-25 MB. 5k-50k según mensaje promedio."
+        ),
+    )
+    numpy_query_chunk: int = Field(
+        default=500,
+        ge=10,
+        description=(
+            "Query chunk size para KNN numpy backend. Multiplicado "
+            "por n_refs determina el pico de la matriz de "
+            "distancias (500 x 500k x 4B ~ 1 GB)."
+        ),
+    )
+
+
+class APILimits(BaseModel):
+    """HTTP boundary limits enforced at the FastAPI router layer.
+
+    Sources: ``api/routers/{annotate,query_sets,support}.py`` (ver
+    ``docs/CONFIG_INVENTORY.md`` §D).
+    """
+
+    max_fasta_bytes: int = Field(
+        default=50 * 1024 * 1024,
+        ge=1024,
+        description=(
+            "Tope upload FASTA en bytes. 50 MB cubre la mayoría de "
+            "submissions; subir si el caso de uso lo justifica. "
+            "Hardcodeado antes en dos routers; este campo dedupica."
+        ),
+    )
+    max_comment_length: int = Field(
+        default=500,
+        ge=1,
+        description="Caracteres máximos por comentario en /support.",
+    )
+    recent_limit: int = Field(
+        default=20,
+        ge=1,
+        description="Items devueltos por defecto en /support/recent.",
+    )
+    page_limit: int = Field(
+        default=100,
+        ge=1,
+        description="Page size hard cap para list endpoints de soporte.",
+    )
+
+
+class TuningSettings(BaseModel):
+    """Root tuning model that composes per-category sub-models."""
+
+    queue: QueueTuning = Field(default_factory=QueueTuning)
+    worker: WorkerTuning = Field(default_factory=WorkerTuning)
+    operation: OperationTuning = Field(default_factory=OperationTuning)
+    api: APILimits = Field(default_factory=APILimits)
+
+
+def _load_yaml_tuning(project_root: Path) -> dict[str, Any]:
+    path = project_root / "protea" / "config" / "system.yaml"
+    if not path.exists():
+        return {}
+    with path.open("r", encoding="utf-8") as f:
+        data = yaml.safe_load(f) or {}
+    raw = data.get("tuning") or {}
+    return raw if isinstance(raw, dict) else {}
+
+
+def _apply_env_overrides(merged: dict[str, Any]) -> dict[str, Any]:
+    """Merge env vars of the form PROTEA_TUNING__<group>__<field>=<value>.
+
+    The double underscore is the conventional path separator (matches
+    pydantic-settings env_nested_delimiter) so we don't collide with
+    legitimate single underscores inside field names like
+    ``publisher_max_attempts``.
+    """
+    for key, value in os.environ.items():
+        if not key.startswith(ENV_PREFIX):
+            continue
+        path = key[len(ENV_PREFIX):].split("__")
+        if len(path) < 2:
+            continue
+        group, field = path[0].lower(), "__".join(path[1:]).lower()
+        merged.setdefault(group, {})[field] = _coerce(value)
+    return merged
+
+
+def _coerce(value: str) -> Any:
+    """Best-effort string -> int/float/bool coercion for env values."""
+    lo = value.strip().lower()
+    if lo in {"true", "false"}:
+        return lo == "true"
+    try:
+        if "." in value:
+            return float(value)
+        return int(value)
+    except ValueError:
+        return value
+
+
+def _resolve_project_root() -> Path:
+    """Resolve the project root from this file's location.
+
+    ``protea/config/tuning.py`` -> parents[2] = project root.
+    """
+    return Path(__file__).resolve().parents[2]
+
+
+@lru_cache(maxsize=1)
+def get_tuning() -> TuningSettings:
+    """Load and cache the tuning settings.
+
+    Cache reset (mostly for tests):
+        ``get_tuning.cache_clear()``
+    """
+    raw = _load_yaml_tuning(_resolve_project_root())
+    raw = _apply_env_overrides(raw)
+    return TuningSettings.model_validate(raw)
diff --git a/protea/core/anc2vec_embeddings.py b/protea/core/anc2vec_embeddings.py
new file mode 100644
index 0000000..340a70a
--- /dev/null
+++ b/protea/core/anc2vec_embeddings.py
@@ -0,0 +1,60 @@
+"""Anc2Vec GO-term embedding index.
+
+Loads the 200-dim pre-trained dictionary shipped by
+https://github.com/aedera/anc2vec (GO release 2020-10-06) from a compact
+``.npz`` cached under ``artifacts/anc2vec/anc2vec_2020-10.npz``.
+
+Exposes a zero-copy lookup by ``GO:`` id and a batched variant that returns
+an (N, D) matrix, filling rows for unknown terms with the zero vector so
+that downstream cosine operations degrade to 0 rather than NaN.
+"""
+
+from __future__ import annotations
+
+from functools import lru_cache
+from pathlib import Path
+
+import numpy as np
+
+_DEFAULT_PATH = (
+    Path(__file__).resolve().parents[2] / "artifacts" / "anc2vec" / "anc2vec_2020-10.npz"
+)
+
+
+class Anc2VecIndex:
+    __slots__ = ("embeddings", "go_ids", "_idx", "dim", "release")
+
+    def __init__(self, path: str | Path | None = None) -> None:
+        src = Path(path) if path else _DEFAULT_PATH
+        data = np.load(src, allow_pickle=True)
+        self.embeddings = np.ascontiguousarray(data["embeddings"], dtype=np.float32)
+        self.go_ids = [str(g) for g in data["go_ids"]]
+        self._idx = {g: i for i, g in enumerate(self.go_ids)}
+        self.dim = int(self.embeddings.shape[1])
+        self.release = str(data["ontology_release"]) if "ontology_release" in data.files else ""
+
+    def __len__(self) -> int:
+        return len(self.go_ids)
+
+    def __contains__(self, go_id: str) -> bool:
+        return go_id in self._idx
+
+    def vec(self, go_id: str) -> np.ndarray | None:
+        i = self._idx.get(go_id)
+        return self.embeddings[i] if i is not None else None
+
+    def batch(self, go_ids: list[str], *, zero_if_missing: bool = True) -> np.ndarray:
+        """Return (N, dim) matrix; missing rows are zero (or NaN if disabled)."""
+        fill = 0.0 if zero_if_missing else np.nan
+        out = np.full((len(go_ids), self.dim), fill, dtype=np.float32)
+        for row, g in enumerate(go_ids):
+            i = self._idx.get(g)
+            if i is not None:
+                out[row] = self.embeddings[i]
+        return out
+
+
+@lru_cache(maxsize=2)
+def get_index(path: str | None = None) -> Anc2VecIndex:
+    """Return a process-wide singleton index (keyed by path)."""
+    return Anc2VecIndex(path)
diff --git a/protea/core/annotation_intern.py b/protea/core/annotation_intern.py
new file mode 100644
index 0000000..e35e1d4
--- /dev/null
+++ b/protea/core/annotation_intern.py
@@ -0,0 +1,48 @@
+"""Process-local string interning for annotation hot loops.
+
+The predict / train pipelines build millions of per-row dicts of the
+shape ``{"go_term_id": int, "qualifier": str|None, "evidence_code": str|None}``
+when they materialise annotations from PostgreSQL. SQLAlchemy returns
+each ``qualifier`` and ``evidence_code`` as a *fresh* Python string —
+even though across a million rows there are only ~5-10 distinct values
+(``"IEA"``, ``"IDA"``, ``"EXP"``, ``"TAS"``, …) and most rows have
+``qualifier = None``.
+
+Without interning, each duplicate string allocates ~50 B in CPython,
+so a 5 M-row batch can carry ~500 MB of redundant string objects.
+Interning collapses every duplicate to a single shared instance, a
+Flyweight-style intrinsic-state share. Python already does this implicitly for short
+identifier-like literals; this module forces the same dedup for the
+strings that come back from the DB driver.
+
+Process-local by design (one cache per worker), trivially small (the
+domain has fewer than 50 distinct codes/qualifiers in practice), and
+thread-safe via the GIL — ``setdefault`` is atomic for built-in dicts.
+"""
+
+from __future__ import annotations
+
+# Bounded only by the number of distinct strings ever seen in this
+# process — in practice that is the count of GO evidence codes and
+# qualifier strings, both small finite sets.
+_INTERN_POOL: dict[str, str] = {}
+
+
+def intern_string(value: str | None) -> str | None:
+    """Return a shared instance of ``value`` if it has been seen before.
+
+    Pass ``None`` through unchanged so callers don't need a guard. The
+    pool grows monotonically; reset only when the worker restarts.
+    """
+    if value is None:
+        return None
+    cached = _INTERN_POOL.setdefault(value, value)
+    return cached
+
+
+def pool_size() -> int:
+    """Diagnostic — how many distinct strings the pool has retained."""
+    return len(_INTERN_POOL)
+
+
+__all__ = ["intern_string", "pool_size"]
diff --git a/protea/core/contracts/operation.py b/protea/core/contracts/operation.py
index d97df11..82f45de 100644
--- a/protea/core/contracts/operation.py
+++ b/protea/core/contracts/operation.py
@@ -1,17 +1,53 @@
 # protea/core/contracts/operation.py
 from __future__ import annotations
 
+import logging
 from collections.abc import Callable
 from dataclasses import dataclass, field
 from typing import Any, Literal, Protocol
 from uuid import UUID
 
-from pydantic import BaseModel, ConfigDict
+# T1.5 of master plan v3: ProteaPayload is owned by protea-contracts.
+# Re-export here so existing imports of ``ProteaPayload`` from this
+# module keep working; new code should import from ``protea_contracts``.
+from protea_contracts import ProteaPayload as ProteaPayload  # noqa: F401  # re-export
 from sqlalchemy.orm import Session
 
 Level = Literal["info", "warning", "error"]
 EmitFn = Callable[[str, str | None, dict[str, Any], Level], None]
 
+_safe_emit_logger = logging.getLogger("protea.emit")
+
+
+def make_safe_emit(raw_emit: EmitFn) -> EmitFn:
+    """Wrap a raw EmitFn so failures are logged and never propagate.
+
+    The platform-level emit may fail for transient reasons (DB
+    connection lost mid-operation, JobEvent insert conflict, etc.).
+    Operations should not crash because the audit trail hiccupped:
+    the operation's primary work matters more than the event row.
+    Failures are logged at ERROR with full traceback so they remain
+    visible in observability without breaking the running job.
+    """
+
+    def wrapped(
+        event: str,
+        message: str | None = None,
+        fields: dict[str, Any] | None = None,
+        level: Level = "info",
+    ) -> None:
+        try:
+            raw_emit(event, message, fields or {}, level)
+        except Exception:
+            _safe_emit_logger.error(
+                "emit failed; operation continues. event=%s level=%s",
+                event,
+                level,
+                exc_info=True,
+            )
+
+    return wrapped
+
 
 @dataclass(frozen=True)
 class OperationResult:
@@ -50,28 +86,28 @@ def __init__(self, reason: str, delay_seconds: int = 60) -> None:  # noqa: B042
         self.delay_seconds = delay_seconds
 
 
-class ProteaPayload(BaseModel):
-    """Immutable, strictly-typed base class for all operation payloads.
-
-    Subclass and declare fields using Pydantic annotations.  Validation runs
-    automatically via ``model_validate(dict)`` — no manual parsing needed.
-    ``strict=True`` prevents silent type coercion (e.g. ``"yes"`` is not a
-    valid ``bool``).
-    """
-
-    model_config = ConfigDict(strict=True, frozen=True)
-
-
 class Operation(Protocol):
     """Protocol that every domain operation must satisfy.
 
     Operations are pure domain logic: they receive an open SQLAlchemy session
     and an ``emit`` callback for structured event logging, and return an
     ``OperationResult``.  They must not manage sessions or queue connections.
+
+    ``description`` is a short, static, human-readable explanation of what
+    the operation does in general, surfaced in the jobs UI to give the
+    operations history context.
+
+    ``summarize_payload`` returns a 1-line dynamic summary of the most
+    informative fields in the payload (e.g. "release 211 ← OBO 2022-07-01"),
+    so each individual job in the history tells its own story.  Returning an
+    empty string is acceptable for operations with no useful payload.
     """
 
     name: str
+    description: str
 
     def execute(
         self, session: Session, payload: dict[str, Any], *, emit: EmitFn
     ) -> OperationResult: ...
+
+    def summarize_payload(self, payload: dict[str, Any]) -> str: ...
diff --git a/protea/core/contracts/parent_progress.py b/protea/core/contracts/parent_progress.py
new file mode 100644
index 0000000..d6fafa9
--- /dev/null
+++ b/protea/core/contracts/parent_progress.py
@@ -0,0 +1,70 @@
+"""Shared helper for child operations that report progress to a parent job.
+
+Used by ``store_embeddings`` and ``store_predictions``: each child
+batch increments the parent's ``progress_current`` and, if it was
+the last batch, transitions the parent from RUNNING to SUCCEEDED.
+"""
+
+from __future__ import annotations
+
+from uuid import UUID
+
+from sqlalchemy import update as sa_update
+from sqlalchemy.orm import Session
+
+from protea.core.contracts.operation import EmitFn
+from protea.core.utils import utcnow
+from protea.infrastructure.orm.models.job import Job, JobEvent, JobStatus
+
+
+def update_parent_progress(
+    session: Session,
+    parent_job_id: UUID,
+    emit: EmitFn,
+    *,
+    event_name: str,
+) -> None:
+    """Atomically increment parent progress; mark SUCCEEDED if last batch.
+
+    Returns silently if the parent has not yet seen all its batches
+    (``progress_current < progress_total``) or if no longer RUNNING.
+
+    The ``event_name`` is the operation-specific suffix used in the
+    ``emit`` call when the parent transitions to SUCCEEDED, e.g.
+    ``"store_embeddings.parent_succeeded"`` or
+    ``"store_predictions.parent_succeeded"``. The DB-level event row is
+    always written as ``job.succeeded`` so downstream consumers see a
+    uniform name regardless of which child closed the parent.
+    """
+    row = session.execute(
+        sa_update(Job)
+        .where(Job.id == parent_job_id, Job.status == JobStatus.RUNNING)
+        .values(progress_current=Job.progress_current + 1)
+        .returning(Job.progress_current, Job.progress_total)
+    ).fetchone()
+
+    if row is None or row.progress_current != row.progress_total:
+        return
+
+    closed = session.execute(
+        sa_update(Job)
+        .where(Job.id == parent_job_id, Job.status == JobStatus.RUNNING)
+        .values(status=JobStatus.SUCCEEDED, finished_at=utcnow())
+        .returning(Job.id)
+    ).fetchone()
+
+    if closed:
+        session.add(
+            JobEvent(
+                job_id=parent_job_id,
+                event="job.succeeded",
+                fields={"via": "last_batch_stored"},
+                level="info",
+            )
+        )
+        emit(
+            event_name,
+            None,
+            {"parent_job_id": str(parent_job_id)},
+            "info",
+        )
diff --git a/protea/core/disk_cache.py b/protea/core/disk_cache.py
new file mode 100644
index 0000000..fb51aa0
--- /dev/null
+++ b/protea/core/disk_cache.py
@@ -0,0 +1,243 @@
+"""On-disk caches for the predict_go_terms pipeline.
+
+The reference embedding pool for an ``(EmbeddingConfig, AnnotationSet)``
+pair is large (millions of rows × ~1280 dims × 2 bytes = several GB).
+Re-fetching it from PostgreSQL on every batch worker is the main
+bottleneck, so the pool is materialised once into ``data/ref_cache/``
+and read back via numpy mmap on subsequent runs. Annotations are stored
+in a CSR-style layout (``offsets`` + flat ``go_term_ids`` /
+``qualifiers`` / ``evidence_codes`` arrays) so per-protein lookups stay
+O(1) instead of dictionary-of-list-of-dict.
+
+Files (under ``_DISK_CACHE_DIR``):
+
+* ``{cfg}__{ann}_embeddings.npy``    — float16 reference embeddings
+* ``{cfg}__{ann}_accessions.npy``    — aligned accession list
+* ``{cfg}__{ann}__{aspect}_indices.npy`` — int32 indices into the unified pool
+* ``{cfg}__{ann}__{aspect}_anno_*.npy``  — CSR annotation arrays
+
+Invalidation: ``AnnotationSet`` rows are immutable once loaded, so the
+cache stays valid for as long as the files exist. Delete the files
+manually to force a reload after a model change.
+"""
+
+from __future__ import annotations
+
+import os
+import uuid
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+
+_DISK_CACHE_DIR = Path(os.environ.get("PROTEA_REF_CACHE_DIR", "data/ref_cache"))
+
+
+def _disk_cache_paths(
+    embedding_config_id: uuid.UUID,
+    annotation_set_id: uuid.UUID,
+) -> tuple[Path, Path]:
+    """Return (embeddings_path, accessions_path) for the unified reference cache."""
+    key = f"{embedding_config_id}__{annotation_set_id}"
+    return (
+        _DISK_CACHE_DIR / f"{key}_embeddings.npy",
+        _DISK_CACHE_DIR / f"{key}_accessions.npy",
+    )
+
+
+def _aspect_index_path(
+    embedding_config_id: uuid.UUID,
+    annotation_set_id: uuid.UUID,
+    aspect: str,
+) -> Path:
+    """Return the path for the per-aspect index array (int32 indices into the unified cache)."""
+    key = f"{embedding_config_id}__{annotation_set_id}"
+    return _DISK_CACHE_DIR / f"{key}__{aspect}_indices.npy"
+
+
+def _anno_disk_cache_paths(
+    embedding_config_id: uuid.UUID,
+    annotation_set_id: uuid.UUID,
+    aspect: str,
+) -> tuple[Path, Path, Path, Path]:
+    """Return (gtids, quals, ecodes, offsets) paths for the annotation CSR cache."""
+    key = f"{embedding_config_id}__{annotation_set_id}__{aspect}"
+    base = _DISK_CACHE_DIR
+    return (
+        base / f"{key}_anno_gtids.npy",
+        base / f"{key}_anno_quals.npy",
+        base / f"{key}_anno_ecodes.npy",
+        base / f"{key}_anno_offsets.npy",
+    )
+
+
+def _build_anno_csr(
+    accessions: list[str],
+    go_map: dict[str, list[dict[str, Any]]],
+) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    """Build a CSR-style annotation structure for the given accession list.
+
+    Returns (go_term_ids, qualifiers, evidence_codes, offsets) where
+    annotations for accessions[i] are at indices offsets[i]:offsets[i+1].
+    """
+    all_gtids: list[int] = []
+    all_quals: list[Any] = []
+    all_ecodes: list[Any] = []
+    offsets: list[int] = [0]
+    for acc in accessions:
+        for ann in go_map.get(acc, []):
+            all_gtids.append(ann["go_term_id"])
+            all_quals.append(ann.get("qualifier"))
+            all_ecodes.append(ann.get("evidence_code"))
+        offsets.append(len(all_gtids))
+    return (
+        np.array(all_gtids, dtype=np.int32),
+        np.array(all_quals, dtype=object),
+        np.array(all_ecodes, dtype=object),
+        np.array(offsets, dtype=np.int32),
+    )
+
+
+def _load_anno_csr_from_disk(
+    embedding_config_id: uuid.UUID,
+    annotation_set_id: uuid.UUID,
+    aspect: str,
+) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray] | None:
+    """Load annotation CSR arrays from disk. Returns None on miss or error."""
+    gtids_p, quals_p, ecodes_p, offsets_p = _anno_disk_cache_paths(
+        embedding_config_id, annotation_set_id, aspect
+    )
+    if not all(p.exists() for p in (gtids_p, quals_p, ecodes_p, offsets_p)):
+        return None
+    try:
+        return (
+            np.load(gtids_p),
+            np.load(quals_p, allow_pickle=True),
+            np.load(ecodes_p, allow_pickle=True),
+            np.load(offsets_p),
+        )
+    except Exception:
+        return None
+
+
+def _save_anno_csr_to_disk(
+    embedding_config_id: uuid.UUID,
+    annotation_set_id: uuid.UUID,
+    aspect: str,
+    gtids: np.ndarray,
+    quals: np.ndarray,
+    ecodes: np.ndarray,
+    offsets: np.ndarray,
+) -> None:
+    gtids_p, quals_p, ecodes_p, offsets_p = _anno_disk_cache_paths(
+        embedding_config_id, annotation_set_id, aspect
+    )
+    gtids_p.parent.mkdir(parents=True, exist_ok=True)
+    np.save(gtids_p, gtids)
+    np.save(quals_p, quals)
+    np.save(ecodes_p, ecodes)
+    np.save(offsets_p, offsets)
+
+
+def _csr_lookup(
+    query_accessions: set[str],
+    accessions: list[str],
+    acc_to_anno_idx: dict[str, int],
+    gtids: np.ndarray,
+    quals: np.ndarray,
+    ecodes: np.ndarray,
+    offsets: np.ndarray,
+) -> dict[str, list[dict[str, Any]]]:
+    """Return a go_map for query_accessions using the preloaded CSR annotation cache."""
+    go_map: dict[str, list[dict[str, Any]]] = {}
+    for acc in query_accessions:
+        idx = acc_to_anno_idx.get(acc)
+        if idx is None:
+            continue
+        start, end = int(offsets[idx]), int(offsets[idx + 1])
+        if start >= end:
+            continue
+        go_map[acc] = [
+            {
+                "go_term_id": int(gtids[j]),
+                "qualifier": quals[j] if quals[j] is not None else None,
+                "evidence_code": ecodes[j] if ecodes[j] is not None else None,
+            }
+            for j in range(start, end)
+        ]
+    return go_map
+
+
+def _derive_reference_views(
+    accessions: list[str],
+    embeddings_f16: np.ndarray,
+) -> dict[str, Any]:
+    """Build the per-process reference view dict used by the KNN path.
+
+    Stores one f32 copy of the embeddings plus an L2-normalised f32 copy
+    for cosine similarity. These are the inputs the search path consumes —
+    keeping them precomputed avoids an ``astype(np.float32)`` and an
+    ``np.linalg.norm`` on every batch (together ~9 s per batch at 555k × 1280).
+
+    The original f16 array is also kept on the result dict for any consumer
+    that still expects it (e.g. size reporting), but the hot path reads
+    ``embeddings_f32`` and ``embeddings_f32_cos`` directly.
+    """
+    if not accessions or embeddings_f16.size == 0:
+        return {
+            "accessions": accessions,
+            "embeddings": embeddings_f16,
+            "embeddings_f32": np.empty((0,), dtype=np.float32),
+            "embeddings_f32_cos": np.empty((0,), dtype=np.float32),
+        }
+    emb_f32 = embeddings_f16.astype(np.float32)
+    norms = np.linalg.norm(emb_f32, axis=1, keepdims=True)
+    emb_f32_cos = emb_f32 / (norms + 1e-9)
+    return {
+        "accessions": accessions,
+        "embeddings": embeddings_f16,
+        "embeddings_f32": emb_f32,
+        "embeddings_f32_cos": emb_f32_cos,
+    }
+
+
+def _load_from_disk_cache(
+    embedding_config_id: uuid.UUID,
+    annotation_set_id: uuid.UUID,
+) -> dict[str, Any] | None:
+    emb_path, acc_path = _disk_cache_paths(embedding_config_id, annotation_set_id)
+    if not emb_path.exists() or not acc_path.exists():
+        return None
+    try:
+        embeddings = np.load(emb_path)
+        accessions = list(np.load(acc_path))
+        return {"accessions": accessions, "embeddings": embeddings}
+    except Exception:
+        return None
+
+
+def _save_to_disk_cache(
+    embedding_config_id: uuid.UUID,
+    annotation_set_id: uuid.UUID,
+    accessions: list[str],
+    embeddings: np.ndarray,
+) -> None:
+    emb_path, acc_path = _disk_cache_paths(embedding_config_id, annotation_set_id)
+    emb_path.parent.mkdir(parents=True, exist_ok=True)
+    np.save(emb_path, embeddings)
+    np.save(acc_path, np.array(accessions))
+
+
+__all__ = [
+    "_DISK_CACHE_DIR",
+    "_anno_disk_cache_paths",
+    "_aspect_index_path",
+    "_build_anno_csr",
+    "_csr_lookup",
+    "_derive_reference_views",
+    "_disk_cache_paths",
+    "_load_anno_csr_from_disk",
+    "_load_from_disk_cache",
+    "_save_anno_csr_to_disk",
+    "_save_to_disk_cache",
+]
diff --git a/protea/core/domain/__init__.py b/protea/core/domain/__init__.py
new file mode 100644
index 0000000..9e42bce
--- /dev/null
+++ b/protea/core/domain/__init__.py
@@ -0,0 +1,6 @@
+"""Domain types shared across the core layer.
+
+These are pure value objects (enums, dataclasses) with no dependencies
+on infrastructure (DB, queue, FS). They're safe to import from anywhere
+in ``protea/`` without creating cycles.
+"""
diff --git a/protea/core/domain/aspect.py b/protea/core/domain/aspect.py
new file mode 100644
index 0000000..2d9430d
--- /dev/null
+++ b/protea/core/domain/aspect.py
@@ -0,0 +1,102 @@
+"""GO aspect (namespace) — the three-way partition of the Gene Ontology.
+
+Two encodings circulate in the codebase, both of them load-bearing:
+
+* **Single-char codes** (``"P"`` / ``"F"`` / ``"C"``) — the wire format
+  used by ``GOTerm.aspect`` in PostgreSQL and the ``go-basic.obo`` file
+  itself. The ``go_term`` table CHECK constraint is on these codes.
+
+* **Three-char CAFA codes** (``"BPO"`` / ``"MFO"`` / ``"CCO"``) — the
+  format expected by ``cafaeval`` (the upstream Fmax / AuPRC evaluator)
+  and surfaced in the UI for human readers.
+
+Until this module landed, both encodings appeared as bare string literals
+in 30+ places — a textbook Primitive Obsession smell. The enum is the
+single source of truth; everything else converts at the boundary.
+
+Typical usage::
+
+    from protea.core.domain.aspect import Aspect
+
+    # Iterate the three aspects in a stable canonical order
+    for aspect in Aspect:
+        ...
+
+    # Convert from a DB row
+    aspect = Aspect.from_code(row.aspect)
+
+    # Hand off to cafaeval
+    result_dict[aspect.cafa] = ...
+"""
+
+from __future__ import annotations
+
+from enum import Enum
+
+
+class Aspect(Enum):
+    """Gene Ontology aspect / namespace.
+
+    The three GO sub-ontologies. Iteration order is the canonical
+    PROTEA order (P → F → C), matching the historical
+    ``_ASPECTS = ("P", "F", "C")`` tuples this enum replaces.
+    """
+
+    BIOLOGICAL_PROCESS = "P"
+    MOLECULAR_FUNCTION = "F"
+    CELLULAR_COMPONENT = "C"
+
+    @property
+    def code(self) -> str:
+        """Single-char code (``"P"`` / ``"F"`` / ``"C"``).
+
+        Wire format in PostgreSQL (``go_term.aspect`` column) and the
+        ``go-basic.obo`` file. Use this when reading/writing the DB or
+        comparing against the ORM column.
+        """
+        return self.value
+
+    @property
+    def cafa(self) -> str:
+        """Three-char CAFA code (``"BPO"`` / ``"MFO"`` / ``"CCO"``).
+
+        Format expected by the upstream ``cafaeval`` package and the
+        evaluation results JSON; also the canonical UI label.
+        """
+        return _CODE_TO_CAFA[self.value]
+
+    @classmethod
+    def from_code(cls, code: str) -> Aspect:
+        """Build an Aspect from its single-char wire code."""
+        return _CODE_TO_ASPECT[code]
+
+    @classmethod
+    def from_cafa(cls, cafa: str) -> Aspect:
+        """Build an Aspect from its three-char CAFA code."""
+        return _CAFA_TO_ASPECT[cafa]
+
+
+_CODE_TO_CAFA: dict[str, str] = {
+    "P": "BPO",
+    "F": "MFO",
+    "C": "CCO",
+}
+
+_CAFA_TO_CODE: dict[str, str] = {v: k for k, v in _CODE_TO_CAFA.items()}
+
+_CODE_TO_ASPECT: dict[str, Aspect] = {a.code: a for a in Aspect}
+_CAFA_TO_ASPECT: dict[str, Aspect] = {a.cafa: a for a in Aspect}
+
+
+# Canonical iteration tuples — kept as module-level constants so existing
+# callers that destructure into ``for code in ASPECT_CODES`` keep working
+# without importing the enum directly.
+ASPECT_CODES: tuple[str, str, str] = tuple(a.code for a in Aspect)  # type: ignore[assignment]
+ASPECT_CAFA_CODES: tuple[str, str, str] = tuple(a.cafa for a in Aspect)  # type: ignore[assignment]
+
+
+__all__ = [
+    "ASPECT_CAFA_CODES",
+    "ASPECT_CODES",
+    "Aspect",
+]
diff --git a/protea/core/evaluation.py b/protea/core/evaluation.py
index 16df3ae..93e2db1 100644
--- a/protea/core/evaluation.py
+++ b/protea/core/evaluation.py
@@ -22,21 +22,33 @@
   simultaneously (e.g. had MFO+BPO at t0, gains CCO → LK in CCO, gains new
   BPO → PK in BPO).
 
+When the two annotation sets use different OntologySnapshots,
+``compute_evaluation_data_reconciled`` implements the CAFA reconciliation
+protocol: propagate ancestors under each side's native DAG, intersect with a
+frozen pivot snapshot, then re-propagate is handled by downstream cafaeval
+(idempotent under closure semantics). Reference implementation:
+``anphan0828/democafa_package``, ``utils/ontology.filter_terms_given_obo``.
+
 Output format (matching CAFA evaluator): 2-column TSV, no header.
   protein_accession \\t go_id
 """
 
 from __future__ import annotations
 
+import io
 import uuid
 from collections import defaultdict
 from dataclasses import dataclass, field
+from pathlib import Path
 
 from sqlalchemy import text
 from sqlalchemy.orm import Session
 
 from protea.core.evidence_codes import ECO_TO_CODE, EXPERIMENTAL
 
+# Parquet column for the bucket each (protein, go_id) row belongs to.
+_GROUNDTRUTH_BUCKETS = ("nk", "lk", "pk", "known", "pk_known")
+
 # ---------------------------------------------------------------------------
 # All codes (GO + ECO) that are considered experimental
 # ---------------------------------------------------------------------------
@@ -251,16 +263,20 @@ def compute_evaluation_data(
 ) -> EvaluationData:
     """Compute NK/LK/PK ground truth following the CAFA5 protocol.
 
-    Classification is per (protein, namespace):
+    Classification is per ``(protein, namespace)``:
 
-      NK  — protein had no experimental annotations in any namespace at t0.
-      LK  — protein had annotations in some namespaces at t0, but not in
-             namespace S; gained new terms in S → those terms are LK ground truth.
-      PK  — protein had annotations in namespace S at t0 and gained new terms
-             in S → those novel terms are PK ground truth; old terms in S are
-             stored in ``pk_known`` for the cafaeval ``-known`` flag.
+    - **NK** — protein had no experimental annotations in any namespace at
+      ``t0``.
+    - **LK** — protein had annotations in some namespaces at ``t0``, but not
+      in namespace ``S``; gained new terms in ``S`` → those terms are LK
+      ground truth.
+    - **PK** — protein had annotations in namespace ``S`` at ``t0`` and
+      gained new terms in ``S`` → those novel terms are PK ground truth;
+      old terms in ``S`` are stored in ``pk_known`` for the cafaeval
+      ``-known`` flag.
 
-    The same protein can be simultaneously LK in one namespace and PK in another.
+    The same protein can be simultaneously LK in one namespace and PK in
+    another.
     """
     go_id_map, aspect_map = _load_go_maps(session, ontology_snapshot_id)
     children_map = _load_children_map(session, ontology_snapshot_id)
@@ -326,3 +342,417 @@ def compute_evaluation_data(
         pk_known=dict(pk_known),
         known=known,
     )
+
+
+# ---------------------------------------------------------------------------
+# Cross-OBO reconciliation (CAFA protocol for mismatched snapshots)
+# ---------------------------------------------------------------------------
+
+
+def _load_parents_by_go_id(session: Session, snapshot_id: uuid.UUID) -> dict[str, set[str]]:
+    """Return ``{child_go_id: {parent_go_id}}`` in string space (is_a + part_of)."""
+    rows = session.execute(
+        text("""
+        SELECT child.go_id, parent.go_id
+        FROM go_term_relationship rel
+        JOIN go_term child ON child.id = rel.child_go_term_id
+        JOIN go_term parent ON parent.id = rel.parent_go_term_id
+        WHERE rel.ontology_snapshot_id = :snap_id
+          AND rel.relation_type IN ('is_a', 'part_of')
+    """),
+        {"snap_id": snapshot_id},
+    ).fetchall()
+    parents: dict[str, set[str]] = defaultdict(set)
+    for child_go, parent_go in rows:
+        parents[child_go].add(parent_go)
+    return dict(parents)
+
+
+def _load_children_by_go_id(session: Session, snapshot_id: uuid.UUID) -> dict[str, set[str]]:
+    """Return ``{parent_go_id: {child_go_id}}`` in string space (is_a + part_of)."""
+    rows = session.execute(
+        text("""
+        SELECT parent.go_id, child.go_id
+        FROM go_term_relationship rel
+        JOIN go_term parent ON parent.id = rel.parent_go_term_id
+        JOIN go_term child ON child.id = rel.child_go_term_id
+        WHERE rel.ontology_snapshot_id = :snap_id
+          AND rel.relation_type IN ('is_a', 'part_of')
+    """),
+        {"snap_id": snapshot_id},
+    ).fetchall()
+    children: dict[str, set[str]] = defaultdict(set)
+    for parent_go, child_go in rows:
+        children[parent_go].add(child_go)
+    return dict(children)
+
+
+def _bfs_closure(seeds: set[str], edges: dict[str, set[str]]) -> set[str]:
+    """BFS over ``edges`` from ``seeds``, returning the inclusive closure."""
+    closure: set[str] = set(seeds)
+    queue: list[str] = list(seeds)
+    while queue:
+        cur = queue.pop()
+        for nxt in edges.get(cur, ()):
+            if nxt not in closure:
+                closure.add(nxt)
+                queue.append(nxt)
+    return closure
+
+
+def _load_pivot_term_universe(
+    session: Session, pivot_snapshot_id: uuid.UUID
+) -> tuple[set[str], dict[str, str]]:
+    """Return ``(set of pivot go_ids with aspect, {go_id: aspect})``.
+
+    Terms without an aspect are excluded — they cannot participate in CAFA
+    namespace bucketing and would otherwise leak through the intersect step.
+    """
+    rows = session.execute(
+        text("""
+        SELECT go_id, aspect FROM go_term
+        WHERE ontology_snapshot_id = :snap_id AND aspect IS NOT NULL
+    """),
+        {"snap_id": pivot_snapshot_id},
+    ).fetchall()
+    go_ids: set[str] = set()
+    aspect_by_go_id: dict[str, str] = {}
+    for go_id, aspect in rows:
+        go_ids.add(go_id)
+        aspect_by_go_id[go_id] = aspect
+    return go_ids, aspect_by_go_id
+
+
+def _load_experimental_raw_go_ids(
+    session: Session, annotation_set_id: uuid.UUID
+) -> dict[str, set[str]]:
+    """Return ``{protein: {native_go_id}}`` for experimental non-NOT rows."""
+    rows = session.execute(
+        text("""
+        SELECT pga.protein_accession, gt.go_id
+        FROM protein_go_annotation pga
+        JOIN go_term gt ON gt.id = pga.go_term_id
+        WHERE pga.annotation_set_id = :set_id
+          AND pga.evidence_code = ANY(:exp_codes)
+          AND (pga.qualifier IS NULL OR pga.qualifier NOT LIKE '%NOT%')
+    """),
+        {"set_id": annotation_set_id, "exp_codes": _EXP_CODES},
+    ).fetchall()
+    out: dict[str, set[str]] = defaultdict(set)
+    for prot, go_id in rows:
+        out[prot].add(go_id)
+    return dict(out)
+
+
+def _load_not_raw_go_ids(
+    session: Session, annotation_set_id: uuid.UUID
+) -> dict[str, set[str]]:
+    """Return ``{protein: {native_go_id}}`` for NOT-qualified rows."""
+    rows = session.execute(
+        text("""
+        SELECT DISTINCT pga.protein_accession, gt.go_id
+        FROM protein_go_annotation pga
+        JOIN go_term gt ON gt.id = pga.go_term_id
+        WHERE pga.annotation_set_id = :set_id
+          AND pga.qualifier LIKE '%NOT%'
+    """),
+        {"set_id": annotation_set_id},
+    ).fetchall()
+    out: dict[str, set[str]] = defaultdict(set)
+    for prot, go_id in rows:
+        out[prot].add(go_id)
+    return dict(out)
+
+
+def _reconcile_experimental_side(
+    session: Session,
+    annotation_set_id: uuid.UUID,
+    native_snapshot_id: uuid.UUID,
+    pivot_go_ids: set[str],
+    pivot_aspect: dict[str, str],
+) -> dict[str, dict[str, set[str]]]:
+    """CAFA steps 1–2 for experimental positives on one side.
+
+    Per protein: propagate ancestors under the *native* DAG (True Path Rule),
+    intersect with the pivot term universe, then bucket by the pivot aspect.
+    Step 3 (re-propagate under pivot) is deferred to cafaeval, which applies
+    ancestor propagation before scoring and produces the same closure.
+    """
+    native_parents = _load_parents_by_go_id(session, native_snapshot_id)
+    raw = _load_experimental_raw_go_ids(session, annotation_set_id)
+
+    out: dict[str, dict[str, set[str]]] = {}
+    for protein, go_ids in raw.items():
+        closure = _bfs_closure(go_ids, native_parents)
+        in_pivot = closure & pivot_go_ids
+        if not in_pivot:
+            continue
+        ns_map: dict[str, set[str]] = defaultdict(set)
+        for go_id in in_pivot:
+            aspect = pivot_aspect.get(go_id)
+            if aspect:
+                ns_map[aspect].add(go_id)
+        if ns_map:
+            out[protein] = {ns: terms for ns, terms in ns_map.items()}
+    return out
+
+
+def _reconcile_not_side(
+    session: Session,
+    annotation_set_id: uuid.UUID,
+    native_snapshot_id: uuid.UUID,
+    pivot_go_ids: set[str],
+    pivot_children: dict[str, set[str]],
+) -> dict[str, set[str]]:
+    """Reconcile NOT-qualified terms to pivot via True Path Rule contrapositive.
+
+    Per protein: propagate descendants under the native DAG (matching the
+    same-snapshot path), intersect with pivot, then propagate descendants under
+    the pivot DAG to capture subtypes only the pivot ontology sees. The final
+    set is the pivot go_ids to exclude from the experimental closure.
+
+    This preserves PROTEA's NOT propagation semantics — democafa just drops
+    NOT rows — while still producing a pivot-consistent exclusion set.
+    """
+    native_children = _load_children_by_go_id(session, native_snapshot_id)
+    raw = _load_not_raw_go_ids(session, annotation_set_id)
+
+    out: dict[str, set[str]] = {}
+    for protein, go_ids in raw.items():
+        closure_native = _bfs_closure(go_ids, native_children)
+        in_pivot = closure_native & pivot_go_ids
+        if not in_pivot:
+            continue
+        closure_pivot = _bfs_closure(in_pivot, pivot_children)
+        out[protein] = closure_pivot & pivot_go_ids
+    return out
+
+
+def _apply_negatives(
+    experimental: dict[str, dict[str, set[str]]],
+    negatives: dict[str, set[str]],
+) -> dict[str, dict[str, set[str]]]:
+    """Remove negated go_ids from each namespace bucket. Drops empty entries."""
+    cleaned: dict[str, dict[str, set[str]]] = {}
+    for protein, ns_map in experimental.items():
+        neg = negatives.get(protein)
+        if not neg:
+            cleaned[protein] = {ns: set(terms) for ns, terms in ns_map.items()}
+            continue
+        new_ns_map = {ns: terms - neg for ns, terms in ns_map.items()}
+        new_ns_map = {ns: terms for ns, terms in new_ns_map.items() if terms}
+        if new_ns_map:
+            cleaned[protein] = new_ns_map
+    return cleaned
+
+
+def compute_evaluation_data_reconciled(
+    session: Session,
+    old_annotation_set_id: uuid.UUID,
+    new_annotation_set_id: uuid.UUID,
+    old_native_snapshot_id: uuid.UUID,
+    new_native_snapshot_id: uuid.UUID,
+    pivot_snapshot_id: uuid.UUID,
+) -> EvaluationData:
+    """CAFA-compliant evaluation delta across mismatched ontology snapshots.
+
+    Applies the democafa ``filter_terms_given_obo`` protocol per side:
+
+      1. Load experimental annotations under each set's native snapshot.
+      2. Propagate ancestors under the *native* DAG (True Path Rule).
+      3. Intersect with the pivot snapshot's term universe.
+
+    Step 4 (re-propagate under pivot) is handled by cafaeval downstream, which
+    applies ancestor propagation before scoring — ``prop(prop(x)) == prop(x)``.
+
+    NOT-qualifier exclusion preserves PROTEA's True Path Rule contrapositive:
+    NOT terms are propagated to descendants under the native DAG, intersected
+    with pivot, then further propagated under the pivot DAG. The union across
+    both annotation sets is applied to both sides, matching same-snapshot
+    behaviour.
+    """
+    pivot_go_ids, pivot_aspect = _load_pivot_term_universe(session, pivot_snapshot_id)
+    pivot_children = _load_children_by_go_id(session, pivot_snapshot_id)
+
+    old_exp = _reconcile_experimental_side(
+        session, old_annotation_set_id, old_native_snapshot_id, pivot_go_ids, pivot_aspect
+    )
+    new_exp = _reconcile_experimental_side(
+        session, new_annotation_set_id, new_native_snapshot_id, pivot_go_ids, pivot_aspect
+    )
+
+    old_neg = _reconcile_not_side(
+        session, old_annotation_set_id, old_native_snapshot_id, pivot_go_ids, pivot_children
+    )
+    new_neg = _reconcile_not_side(
+        session, new_annotation_set_id, new_native_snapshot_id, pivot_go_ids, pivot_children
+    )
+    merged_neg: dict[str, set[str]] = defaultdict(set)
+    for src in (old_neg, new_neg):
+        for protein, terms in src.items():
+            merged_neg[protein] |= terms
+
+    old_by_ns = _apply_negatives(old_exp, merged_neg)
+    new_by_ns = _apply_negatives(new_exp, merged_neg)
+
+    nk: dict[str, set[str]] = {}
+    lk: dict[str, set[str]] = defaultdict(set)
+    pk: dict[str, set[str]] = defaultdict(set)
+    pk_known: dict[str, set[str]] = defaultdict(set)
+
+    all_proteins = set(old_by_ns) | set(new_by_ns)
+    for protein in all_proteins:
+        old_ns_map = old_by_ns.get(protein, {})
+        new_ns_map = new_by_ns.get(protein, {})
+
+        new_all = {go for terms in new_ns_map.values() for go in terms}
+        if not new_all:
+            continue
+
+        had_anything_old = bool(old_ns_map)
+
+        if not had_anything_old:
+            nk[protein] = new_all
+        else:
+            for ns in _NAMESPACES:
+                old_ns = old_ns_map.get(ns, set())
+                new_ns = new_ns_map.get(ns, set())
+                delta_ns = new_ns - old_ns
+                if not delta_ns:
+                    continue
+                if not old_ns:
+                    lk[protein] |= delta_ns
+                else:
+                    pk[protein] |= delta_ns
+                    pk_known[protein] |= old_ns
+
+    known = {
+        p: {go for terms in ns_map.values() for go in terms} for p, ns_map in old_by_ns.items()
+    }
+
+    return EvaluationData(
+        nk=nk,
+        lk=dict(lk),
+        pk=dict(pk),
+        pk_known=dict(pk_known),
+        known=known,
+    )
+
+
+def _eval_data_to_dataframe(data: EvaluationData):
+    """Flatten EvaluationData's five buckets into a long DataFrame.
+
+    Columns: ``protein_accession`` (str), ``go_id`` (str), ``bucket`` (categorical
+    one of nk/lk/pk/known/pk_known). One row per (protein, go_id, bucket) triple.
+    """
+    import pandas as pd
+
+    rows: list[tuple[str, str, str]] = []
+    for bucket_name, bucket_dict in (
+        ("nk", data.nk),
+        ("lk", data.lk),
+        ("pk", data.pk),
+        ("known", data.known),
+        ("pk_known", data.pk_known),
+    ):
+        for protein, go_ids in bucket_dict.items():
+            for go_id in go_ids:
+                rows.append((protein, go_id, bucket_name))
+    df = pd.DataFrame(rows, columns=["protein_accession", "go_id", "bucket"])
+    df["bucket"] = df["bucket"].astype(
+        pd.CategoricalDtype(categories=list(_GROUNDTRUTH_BUCKETS))
+    )
+    return df
+
+
+def _dataframe_to_eval_data(df) -> EvaluationData:
+    """Inverse of ``_eval_data_to_dataframe``."""
+    nk: dict[str, set[str]] = defaultdict(set)
+    lk: dict[str, set[str]] = defaultdict(set)
+    pk: dict[str, set[str]] = defaultdict(set)
+    known: dict[str, set[str]] = defaultdict(set)
+    pk_known: dict[str, set[str]] = defaultdict(set)
+    bucket_to_dict = {
+        "nk": nk, "lk": lk, "pk": pk, "known": known, "pk_known": pk_known,
+    }
+    for protein, go_id, bucket in df[["protein_accession", "go_id", "bucket"]].itertuples(
+        index=False, name=None
+    ):
+        bucket_to_dict[str(bucket)][str(protein)].add(str(go_id))
+    return EvaluationData(
+        nk=dict(nk), lk=dict(lk), pk=dict(pk),
+        known=dict(known), pk_known=dict(pk_known),
+    )
+
+
+def serialize_evaluation_data_to_parquet(data: EvaluationData, dest: Path) -> Path:
+    """Write ``data`` to a parquet file at ``dest`` (creates parent dirs).
+
+    Returns ``dest`` for convenience. Uses snappy compression and the long
+    layout produced by ``_eval_data_to_dataframe`` — kept stable since
+    consumers parse it back with ``deserialize_evaluation_data_from_bytes``.
+    """
+    dest.parent.mkdir(parents=True, exist_ok=True)
+    df = _eval_data_to_dataframe(data)
+    df.to_parquet(dest, index=False, compression="snappy")
+    return dest
+
+
+def deserialize_evaluation_data_from_bytes(blob: bytes) -> EvaluationData:
+    """Parse parquet bytes (as returned by ``ArtifactStore.get``) into EvaluationData."""
+    import pandas as pd
+
+    df = pd.read_parquet(io.BytesIO(blob))
+    return _dataframe_to_eval_data(df)
+
+
+def load_evaluation_data_for_set(session: Session, eval_set) -> tuple[EvaluationData, uuid.UUID]:
+    """Load ground-truth for an EvaluationSet row.
+
+    Strict reuse path: if ``eval_set.groundtruth_uri`` is set, deserializes the
+    persisted parquet via the configured ArtifactStore and returns it. If not
+    set, raises — recomputation on-the-fly is intentionally not allowed (see
+    the project's "no on-the-fly reuse" rule). Use
+    ``scripts/backfill_evaluation_groundtruth.py`` to materialize artifacts for
+    legacy EvaluationSet rows that predate this column.
+
+    Returns the EvaluationData plus the pivot OntologySnapshot ID — the caller
+    should use the pivot snapshot (not the old set's) when loading the OBO for
+    cafaeval, since propagated go_ids live in pivot term space.
+    """
+    from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
+    from protea.infrastructure.settings import load_settings
+    from protea.infrastructure.storage import get_artifact_store
+
+    ann_old = session.get(AnnotationSet, eval_set.old_annotation_set_id)
+    ann_new = session.get(AnnotationSet, eval_set.new_annotation_set_id)
+
+    stats = eval_set.stats or {}
+    pivot_raw = stats.get("pivot_ontology_snapshot_id")
+    if pivot_raw:
+        pivot_id = uuid.UUID(str(pivot_raw))
+    else:
+        # Both ann_new and ann_old are validated non-None by the caller
+        # (run_cafa_evaluation); the ternary short-circuits before
+        # dereferencing ann_old when ann_new is set.
+        pivot_id = ann_new.ontology_snapshot_id if ann_new else ann_old.ontology_snapshot_id  # type: ignore[union-attr]
+
+    if not eval_set.groundtruth_uri:
+        raise RuntimeError(
+            f"EvaluationSet {eval_set.id} has no groundtruth_uri. "
+            "Run scripts/backfill_evaluation_groundtruth.py to materialize "
+            "the parquet artifact, or regenerate via /annotations/evaluation-sets/generate."
+        )
+
+    project_root = Path(__file__).resolve().parents[2]
+    settings = load_settings(project_root)
+    store = get_artifact_store(settings)
+    key = groundtruth_key_for(eval_set.id)
+    blob = store.get(key)
+    data = deserialize_evaluation_data_from_bytes(blob)
+    return data, pivot_id
+
+
+def groundtruth_key_for(eval_set_id) -> str:
+    """Storage key under which an EvaluationSet's ground-truth parquet lives."""
+    return f"eval_groundtruth/{eval_set_id}/groundtruth.parquet"
diff --git a/protea/core/feature_engineering.py b/protea/core/feature_engineering.py
index 12760ba..902ca9f 100644
--- a/protea/core/feature_engineering.py
+++ b/protea/core/feature_engineering.py
@@ -7,9 +7,11 @@
 ``GOPrediction.distance`` with sequence-level and phylogenetic signals.
 
 Performance notes:
+
 - Alignment is O(m*n) per pair; parasail uses SIMD acceleration.
-- Taxonomy lookups use an LRU cache over lineage queries (ete3 local SQLite).
-  First call may trigger a DB download if the ete3 database is absent.
+- Taxonomy lookups use an LRU cache over lineage queries (ete3 local
+  SQLite). First call may trigger a DB download if the ete3 database
+  is absent.
 """
 
 from __future__ import annotations
diff --git a/protea/core/feature_enricher.py b/protea/core/feature_enricher.py
new file mode 100644
index 0000000..893ff4c
--- /dev/null
+++ b/protea/core/feature_enricher.py
@@ -0,0 +1,614 @@
+"""V6 feature enrichment for GO predictions.
+
+The "v6" feature family is the set of 25 columns that the post-2026 lab
+re-ranker consumes on top of the base KNN features. Computing them
+requires three independent intermediates over the prediction batch:
+
+* an Anc2Vec embedding pool covering every GO term seen as either a
+  candidate or a voting-neighbor annotation,
+* per-(query, aspect) neighbor centroids derived from that pool,
+* per-(query, candidate) tax-voter counters built from the pair-features
+  produced by the alignment / taxonomy pipeline.
+
+A fourth intermediate, the PCA projection of query embeddings, is
+optional — present only when the caller passes a fitted ``pca_state``.
+
+The orchestrator :func:`enrich_v6_features` runs the four stages, then
+walks the prediction list and merges each candidate's features in
+place. The intermediates are exposed as module-private helpers so the
+unit tests can exercise them in isolation if needed.
+
+This module was extracted from ``predict_go_terms.py`` (Extract Class):
+the seven functions below were originally instance methods on
+``PredictGOTermsBatchOperation`` but used no instance state and only
+ever ran in sequence — moving them out drops ~280 LOC from the batch
+operation and makes the v6 pipeline independently testable.
+"""
+
+from __future__ import annotations
+
+import uuid  # noqa: F401  # kept for the public type signatures
+from typing import Any
+
+import numpy as np
+from sqlalchemy.orm import Session
+
+from protea.core.anc2vec_embeddings import Anc2VecIndex
+from protea.core.anc2vec_embeddings import get_index as get_anc2vec_index
+from protea.core.domain.aspect import ASPECT_CODES as _ASPECTS
+from protea.core.reranker import EMBEDDING_PCA_DIM
+from protea.infrastructure.orm.models.annotation.go_term import GOTerm
+
+# Annotation chunk size is configured via OperationTuning.annotation_chunk_size.
+
+_TAX_CLOSE_RELATIONS = frozenset(
+    {"same", "ancestor", "descendant", "child", "parent", "close"}
+)
+
+#: Feature columns the v6 enricher writes into each prediction dict.
+#: Re-exported by :mod:`protea.core.operations.predict_go_terms` (it composes
+#: them into the wider ``_STORE_FLOAT_KEYS`` set used by the bulk insert path).
+NEW_V6_FEATURE_KEYS: tuple[str, ...] = (
+    "anc2vec_neighbor_cos",
+    "anc2vec_neighbor_maxcos",
+    "anc2vec_has_emb",
+    "anc2vec_query_known_cos",
+    "anc2vec_query_known_maxcos",
+    "anc2vec_query_known_count",
+    "tax_voters_same_frac",
+    "tax_voters_close_frac",
+    "tax_voters_mean_common_ancestors",
+    *(f"emb_pca_query_{i}" for i in range(EMBEDDING_PCA_DIM)),
+)
+
+
+# ---------------------------------------------------------------------------
+# Stage 1 — collect GO term metadata
+# ---------------------------------------------------------------------------
+
+
+def _collect_gtids_in_play(
+    predictions: list[dict[str, Any]],
+    go_map_by_aspect: dict[str, dict[str, list[dict[str, Any]]]],
+) -> set[int]:
+    """Every go_term_id that appears either as a candidate in
+    predictions or as an annotation of a voting neighbor — both are
+    needed to compute neighbor Anc2Vec centroids.
+    """
+    gtids: set[int] = {int(pred["go_term_id"]) for pred in predictions}
+    for go_map in go_map_by_aspect.values():
+        for anns in go_map.values():
+            for ann in anns:
+                gtids.add(int(ann["go_term_id"]))
+    return gtids
+
+
+def _load_go_term_metadata(
+    session: Session,
+    go_term_ids: set[int],
+) -> tuple[dict[int, str], dict[int, str]]:
+    """Return ``(go_id_map, aspect_map)`` for the given ``GOTerm.id`` set.
+
+    Both maps are keyed by the numeric ``go_term_id``. Values are the
+    canonical ``GO:NNNNNNN`` string and the single-char aspect (``P``/
+    ``F``/``C``), respectively. Chunked to stay within parameter limits.
+    """
+    go_id_map: dict[int, str] = {}
+    aspect_map: dict[int, str] = {}
+    if not go_term_ids:
+        return go_id_map, aspect_map
+    from protea.config.tuning import get_tuning
+
+    chunk_size = get_tuning().operation.annotation_chunk_size
+    ids_list = list(go_term_ids)
+    for i in range(0, len(ids_list), chunk_size):
+        chunk = ids_list[i : i + chunk_size]
+        rows = (
+            session.query(GOTerm.id, GOTerm.go_id, GOTerm.aspect)
+            .filter(GOTerm.id.in_(chunk))
+            .all()
+        )
+        for gid, go_str, aspect in rows:
+            go_id_map[gid] = go_str
+            aspect_map[gid] = aspect or ""
+    return go_id_map, aspect_map
+
+
+# ---------------------------------------------------------------------------
+# Stage 2 — Anc2Vec projection pool
+# ---------------------------------------------------------------------------
+
+
+def _build_anc2vec_pool(
+    gtids_in_play: set[int],
+    go_id_map: dict[int, str],
+) -> tuple[dict[str, int], np.ndarray, np.ndarray]:
+    """Materialise the Anc2Vec embedding matrix for every GO id seen.
+
+    Returns ``(idx_of_go, all_norm, has_emb_mask)`` where ``all_norm``
+    is the L2-normalised projection of every GO id (rows lacking an
+    Anc2Vec vector are zeroed out and flagged in ``has_emb_mask``).
+    """
+    anc_idx: Anc2VecIndex = get_anc2vec_index()
+    all_go_id_strs = {go_id_map[gid] for gid in gtids_in_play if gid in go_id_map}
+    all_go_id_list = sorted(all_go_id_strs)
+    idx_of_go: dict[str, int] = {g: i for i, g in enumerate(all_go_id_list)}
+    all_emb = anc_idx.batch(all_go_id_list)
+    raw_norms = np.linalg.norm(all_emb, axis=1)
+    has_emb_mask = raw_norms > 0.0
+    safe_norms = np.where(has_emb_mask, raw_norms, 1.0)[:, None]
+    all_norm = (all_emb / safe_norms).astype(np.float32)
+    all_norm[~has_emb_mask] = 0.0
+    return idx_of_go, all_norm, has_emb_mask
+
+
+# ---------------------------------------------------------------------------
+# Stage 3 — neighbor centroids per (q_acc, aspect)
+# ---------------------------------------------------------------------------
+
+
+def _compute_neighbor_centroids(
+    *,
+    valid_accessions: list[str],
+    neighbors_by_aspect: dict[str, list[list[tuple[str, float]]]],
+    go_map_by_aspect: dict[str, dict[str, list[dict[str, Any]]]],
+    go_id_map: dict[int, str],
+    go_aspect_map: dict[int, str],
+    idx_of_go: dict[str, int],
+    all_norm: np.ndarray,
+    has_emb_mask: np.ndarray,
+) -> dict[tuple[str, str], tuple[np.ndarray | None, np.ndarray | None]]:
+    """Per ``(q_acc, aspect)`` pair, compute the Anc2Vec centroid of
+    the neighbor-side annotations. Returns ``(centroid_unit, nmat)``
+    where ``nmat`` is the matrix of contributing neighbor vectors and
+    ``centroid_unit`` is its L2-normalised mean.
+
+    The aspect partition matches training: each centroid only mixes
+    neighbor terms from the same GO aspect as the candidate.
+    """
+    info: dict[tuple[str, str], tuple[np.ndarray | None, np.ndarray | None]] = {}
+    for aspect_key, nbs_all in neighbors_by_aspect.items():
+        go_map = go_map_by_aspect.get(aspect_key, {})
+        for q_idx, q_acc in enumerate(valid_accessions):
+            if q_idx >= len(nbs_all):
+                continue
+            per_asp_rows: dict[str, list[int]] = {a: [] for a in _ASPECTS}
+            per_asp_seen: dict[str, set[str]] = {a: set() for a in _ASPECTS}
+            for ref_acc, _ in nbs_all[q_idx]:
+                for ann in go_map.get(ref_acc, []):
+                    gtid = int(ann["go_term_id"])
+                    gid_str = go_id_map.get(gtid)
+                    asp = go_aspect_map.get(gtid, "")
+                    if not gid_str or asp not in per_asp_rows:
+                        continue
+                    if gid_str in per_asp_seen[asp]:
+                        continue
+                    per_asp_seen[asp].add(gid_str)
+                    i = idx_of_go.get(gid_str)
+                    if i is not None and has_emb_mask[i]:
+                        per_asp_rows[asp].append(i)
+            for asp, rows in per_asp_rows.items():
+                if not rows:
+                    info.setdefault((q_acc, asp), (None, None))
+                    continue
+                nmat = all_norm[rows]
+                centroid = nmat.mean(axis=0)
+                cn = float(np.linalg.norm(centroid))
+                centroid_unit = (
+                    (centroid / cn).astype(np.float32) if cn > 0.0 else None
+                )
+                # Aspect-separated mode may revisit the same aspect once
+                # per KNN call, but per-aspect rows are disjoint across
+                # aspect_keys — accumulate via setdefault.
+                prev = info.get((q_acc, asp))
+                if prev is None or prev == (None, None):
+                    info[(q_acc, asp)] = (centroid_unit, nmat)
+    return info
+
+
+# ---------------------------------------------------------------------------
+# Stage 4 — tax-voter counters
+# ---------------------------------------------------------------------------
+
+
+def _compute_tax_voter_counters(
+    *,
+    valid_accessions: list[str],
+    neighbors_by_aspect: dict[str, list[list[tuple[str, float]]]],
+    go_map_by_aspect: dict[str, dict[str, list[dict[str, Any]]]],
+    pair_features: dict[tuple[str, str], dict[str, Any]],
+    compute_taxonomy: bool,
+) -> tuple[
+    dict[str, dict[int, int]],
+    dict[str, dict[int, int]],
+    dict[str, dict[int, float]],
+    dict[str, dict[int, int]],
+    dict[str, dict[int, int]],
+]:
+    """Build the five per-(q_acc, gtid) counters that feed the
+    tax_voters_* feature family.
+
+    Returns ``(same_cnt, close_cnt, ca_sum, ca_n, vote_count_div)``.
+    When ``compute_taxonomy`` is False, every dict comes back empty
+    (the merge loop emits NaN for the corresponding columns).
+    """
+    same_cnt: dict[str, dict[int, int]] = {}
+    close_cnt: dict[str, dict[int, int]] = {}
+    ca_sum: dict[str, dict[int, float]] = {}
+    ca_n: dict[str, dict[int, int]] = {}
+    vc_div: dict[str, dict[int, int]] = {}
+    if not compute_taxonomy:
+        return same_cnt, close_cnt, ca_sum, ca_n, vc_div
+
+    for aspect_key, nbs_all in neighbors_by_aspect.items():
+        go_map = go_map_by_aspect.get(aspect_key, {})
+        for q_idx, q_acc in enumerate(valid_accessions):
+            if q_idx >= len(nbs_all):
+                continue
+            same_d = same_cnt.setdefault(q_acc, {})
+            close_d = close_cnt.setdefault(q_acc, {})
+            sum_d = ca_sum.setdefault(q_acc, {})
+            n_d = ca_n.setdefault(q_acc, {})
+            vc_d = vc_div.setdefault(q_acc, {})
+            for ref_acc, _ in nbs_all[q_idx]:
+                pf = pair_features.get((q_acc, ref_acc), {})
+                rel = pf.get("taxonomic_relation") or ""
+                ca = pf.get("taxonomic_common_ancestors")
+                is_same = rel == "same"
+                is_close = rel in _TAX_CLOSE_RELATIONS
+                for ann in go_map.get(ref_acc, []):
+                    gtid = int(ann["go_term_id"])
+                    vc_d[gtid] = vc_d.get(gtid, 0) + 1
+                    if is_same:
+                        same_d[gtid] = same_d.get(gtid, 0) + 1
+                    if is_close:
+                        close_d[gtid] = close_d.get(gtid, 0) + 1
+                    if isinstance(ca, int | float) and ca is not None:
+                        sum_d[gtid] = sum_d.get(gtid, 0.0) + float(ca)
+                        n_d[gtid] = n_d.get(gtid, 0) + 1
+    return same_cnt, close_cnt, ca_sum, ca_n, vc_div
+
+
+# ---------------------------------------------------------------------------
+# Stage 5 — PCA query projection
+# ---------------------------------------------------------------------------
+
+
+def _compute_pca_projection(
+    query_embeddings: np.ndarray,
+    pca_state: tuple[np.ndarray, np.ndarray] | None,
+) -> np.ndarray | None:
+    """Project query embeddings into the PCA reference subspace.
+
+    Returns ``None`` when the projection cannot be computed (no PCA
+    state or no query embeddings); the merge loop emits NaN for the
+    ``emb_pca_query_*`` columns in that case.
+    """
+    if pca_state is None or not query_embeddings.size:
+        return None
+    pca_mean, pca_components = pca_state
+    return (
+        (query_embeddings.astype(np.float32) - pca_mean) @ pca_components.T
+    ).astype(np.float32)
+
+
+# ---------------------------------------------------------------------------
+# Public orchestrator
+# ---------------------------------------------------------------------------
+
+
+def enrich_v6_features(
+    predictions: list[dict[str, Any]],
+    *,
+    session: Session,
+    valid_accessions: list[str],
+    query_embeddings: np.ndarray,
+    neighbors_by_aspect: dict[str, list[list[tuple[str, float]]]],
+    go_map_by_aspect: dict[str, dict[str, list[dict[str, Any]]]],
+    pair_features: dict[tuple[str, str], dict[str, Any]],
+    pca_state: tuple[np.ndarray, np.ndarray] | None,
+    compute_taxonomy: bool,
+) -> None:
+    """Compute the 25 v6 features and merge them into each ``pred`` dict in place.
+
+    ``neighbors_by_aspect`` / ``go_map_by_aspect`` may contain a single
+    aspect key (``""``) in unified-KNN mode; the per-aspect groupings of
+    candidate GO terms are resolved via the GO term aspect map.
+
+    ``pair_features`` carries the ``taxonomic_relation`` and
+    ``taxonomic_common_ancestors`` keys populated by ``compute_taxonomy``.
+    When ``compute_taxonomy`` is ``False`` the tax_voters family stays NaN.
+
+    Query-known Anc2Vec features are emitted as NaN / 0 at predict time —
+    ``anc2vec_query_known_*`` is resolved at eval time from the split's
+    pre-cutoff annotations (LK / PK only).
+
+    The body is a five-stage pipeline (collect metadata → build Anc2Vec
+    pool → neighbor centroids → tax-voter counters → PCA projection →
+    merge); each stage is its own private helper.
+    """
+    if not predictions:
+        return
+
+    acc_to_idx = {acc: i for i, acc in enumerate(valid_accessions)}
+
+    # ── 1. Collect GO term metadata ──────────────────────────────────
+    gtids_in_play = _collect_gtids_in_play(predictions, go_map_by_aspect)
+    go_id_map, go_aspect_map = _load_go_term_metadata(session, gtids_in_play)
+
+    # ── 2. Anc2Vec projection pool ──────────────────────────────────
+    idx_of_go, all_norm, has_emb_mask = _build_anc2vec_pool(gtids_in_play, go_id_map)
+
+    # ── 3. Neighbor centroids per (q_acc, aspect) ───────────────────
+    neighbor_info = _compute_neighbor_centroids(
+        valid_accessions=valid_accessions,
+        neighbors_by_aspect=neighbors_by_aspect,
+        go_map_by_aspect=go_map_by_aspect,
+        go_id_map=go_id_map,
+        go_aspect_map=go_aspect_map,
+        idx_of_go=idx_of_go,
+        all_norm=all_norm,
+        has_emb_mask=has_emb_mask,
+    )
+
+    # ── 4. Tax-voter counters per (q_acc, gtid) ──────────────────────
+    tax_same_cnt, tax_close_cnt, tax_ca_sum, tax_ca_n, vote_count_div = (
+        _compute_tax_voter_counters(
+            valid_accessions=valid_accessions,
+            neighbors_by_aspect=neighbors_by_aspect,
+            go_map_by_aspect=go_map_by_aspect,
+            pair_features=pair_features,
+            compute_taxonomy=compute_taxonomy,
+        )
+    )
+
+    # ── 5. PCA query projection ──────────────────────────────────────
+    pca_query_proj = _compute_pca_projection(query_embeddings, pca_state)
+    _nan_pca = [float("nan")] * EMBEDDING_PCA_DIM
+
+    # ── 6. Merge per-row features ────────────────────────────────────
+    for pred in predictions:
+        q_acc = pred["protein_accession"]
+        gtid = int(pred["go_term_id"])
+        go_id = go_id_map.get(gtid)
+        aspect = go_aspect_map.get(gtid, "")
+
+        cand_i = idx_of_go.get(go_id, -1) if go_id else -1
+        if cand_i >= 0 and has_emb_mask[cand_i]:
+            cand_vec = all_norm[cand_i]
+            centroid_unit, nmat = neighbor_info.get((q_acc, aspect), (None, None))
+            anc_cos = (
+                float(cand_vec @ centroid_unit)
+                if centroid_unit is not None
+                else float("nan")
+            )
+            anc_maxcos = (
+                float((nmat @ cand_vec).max()) if nmat is not None else float("nan")
+            )
+            anc_has = 1.0
+        else:
+            anc_cos = float("nan")
+            anc_maxcos = float("nan")
+            anc_has = 0.0
+
+        pred["anc2vec_neighbor_cos"] = anc_cos
+        pred["anc2vec_neighbor_maxcos"] = anc_maxcos
+        pred["anc2vec_has_emb"] = anc_has
+        # Query-known is resolved at eval time from the split context.
+        pred["anc2vec_query_known_cos"] = float("nan")
+        pred["anc2vec_query_known_maxcos"] = float("nan")
+        pred["anc2vec_query_known_count"] = 0.0
+
+        if compute_taxonomy:
+            vc_total = max(1, vote_count_div.get(q_acc, {}).get(gtid, 1))
+            pred["tax_voters_same_frac"] = (
+                tax_same_cnt.get(q_acc, {}).get(gtid, 0) / vc_total
+            )
+            pred["tax_voters_close_frac"] = (
+                tax_close_cnt.get(q_acc, {}).get(gtid, 0) / vc_total
+            )
+            ca_n = tax_ca_n.get(q_acc, {}).get(gtid, 0)
+            pred["tax_voters_mean_common_ancestors"] = (
+                tax_ca_sum.get(q_acc, {}).get(gtid, 0.0) / max(1, ca_n)
+                if ca_n > 0
+                else float("nan")
+            )
+        else:
+            pred["tax_voters_same_frac"] = float("nan")
+            pred["tax_voters_close_frac"] = float("nan")
+            pred["tax_voters_mean_common_ancestors"] = float("nan")
+
+        q_idx = acc_to_idx.get(q_acc, -1)
+        if pca_query_proj is not None and 0 <= q_idx < pca_query_proj.shape[0]:
+            row = pca_query_proj[q_idx]
+            for i in range(EMBEDDING_PCA_DIM):
+                pred[f"emb_pca_query_{i}"] = float(row[i])
+        else:
+            for i in range(EMBEDDING_PCA_DIM):
+                pred[f"emb_pca_query_{i}"] = _nan_pca[i]
+
+
+def expand_predictions_to_ancestors(
+    predictions: list[dict[str, Any]],
+    *,
+    parent_map: dict[str, set[str]] | dict[str, list[str]],
+    k_limit: int,
+    ia_weights: dict[str, float] | None = None,
+    gt_pairs: set[tuple[str, str]] | None = None,
+    label_column: str = "label",
+    label_field_present: bool = False,
+) -> list[dict[str, Any]]:
+    """Expand each leaf prediction to its is_a / part_of ancestor closure.
+
+    Mirrors the in-loop expansion in
+    :func:`protea.core.training_dump_helpers._knn_transfer_and_label`,
+    pulled out so :mod:`predict_go_terms` (live inference) and
+    ``the dump helper`` (offline dataset generation) share a single canonical
+    implementation. Without it the candidate sets diverge — the lab dump
+    expanded to ancestors, live KNN didn't, and the v9/v10 boosters scored
+    LK / PK candidates on a feature distribution they never saw at
+    training time.
+
+    The function processes predictions per (protein_accession, aspect)
+    group: it adds the ancestor closure of each leaf go_id, merging into
+    existing leaf records (bumping ``neighbor_vote_fraction`` /
+    ``neighbor_min_distance``) when an ancestor is itself already a leaf
+    candidate, and synthesizing new records (cloning the closest leaf and
+    overriding ``go_id``) otherwise.
+
+    Synthetic records inherit the leaf's per-pair features verbatim
+    (alignment, taxonomy, anc2vec_*, emb_pca_*) — this is the same
+    convention the train side used, so train and predict stay in lockstep.
+    Recomputing those features for the ancestor would be the strictly
+    correct semantics but is **not** what the booster was trained on.
+
+    Parameters
+    ----------
+    predictions:
+        Leaf prediction records, each with at least ``protein_accession``,
+        ``aspect``, ``go_id``, ``distance``, ``neighbor_vote_fraction``,
+        ``neighbor_min_distance``. Must be the unmodified output of the
+        leaf record loop — both the dump helper and predict_go_terms emit
+        this shape.
+    parent_map:
+        ``{child_go_id: {parent_go_id, ...}}`` for is_a / part_of edges.
+        Caller responsible for loading from the active OntologySnapshot.
+    k_limit:
+        Same K used by the upstream KNN. Drives the ``w / k_limit``
+        normalisation of inherited votes.
+    ia_weights:
+        Optional ``{go_id: ia_weight}`` for IA-weighted vote inheritance.
+        ``w = ia_weights[ancestor] / ia_weights[leaf]`` when both present
+        and non-zero; ``w = 1`` otherwise.
+    gt_pairs / label_field_present:
+        Train-time only. When ``label_field_present`` is True, ancestor
+        records are labelled by membership of ``(protein, ancestor)`` in
+        ``gt_pairs``. Predict callers leave both at default — the helper
+        omits the label key entirely.
+    label_column:
+        Name of the label column on prediction dicts (train-only, defaults
+        to ``"label"``).
+    """
+    if not predictions:
+        return predictions
+
+    # Coerce parent_map values to frozensets once (and only once) so the
+    # closure walk doesn't keep paying conversion cost.
+    pm: dict[str, frozenset[str]] = {
+        c: frozenset(parents) for c, parents in (parent_map or {}).items()
+    }
+    closure: dict[str, frozenset[str]] = {}
+
+    def _ancestors(gid: str) -> frozenset[str]:
+        cached = closure.get(gid)
+        if cached is not None:
+            return cached
+        seen: set[str] = set()
+        stack = [gid]
+        while stack:
+            node = stack.pop()
+            for parent in pm.get(node, ()):
+                if parent not in seen:
+                    seen.add(parent)
+                    stack.append(parent)
+        result = frozenset(seen)
+        closure[gid] = result
+        return result
+
+    def _ia_weight(anc_gid: str, leaf_gid: str) -> float:
+        if not ia_weights:
+            return 1.0
+        anc_w = float(ia_weights.get(anc_gid, 0.0))
+        leaf_w = float(ia_weights.get(leaf_gid, 0.0))
+        if leaf_w <= 0.0:
+            return 1.0
+        return anc_w / leaf_w
+
+    k_limit_f = float(k_limit) if k_limit > 0 else 1.0
+
+    # Group leaf records by (protein_accession, aspect). The grouping uses
+    # the per-record ``aspect`` field (which the leaf-emit loops already
+    # populate); for unified-KNN inputs this falls back to "" — the
+    # expansion is still correct but everything ends up in one group.
+    groups: dict[tuple[str, str], list[dict[str, Any]]] = {}
+    for rec in predictions:
+        key = (rec.get("protein_accession", ""), rec.get("aspect", ""))
+        groups.setdefault(key, []).append(rec)
+
+    out: list[dict[str, Any]] = []
+    for (q_acc, _aspect), recs in groups.items():
+        leaf_by_gid: dict[str, dict[str, Any]] = {r["go_id"]: r for r in recs}
+        synth: dict[str, dict[str, Any]] = {}
+        for leaf_gid, leaf_rec in list(leaf_by_gid.items()):
+            leaf_d = float(leaf_rec.get("distance", 1.0))
+            for anc in _ancestors(leaf_gid):
+                w = _ia_weight(anc, leaf_gid)
+                if anc in leaf_by_gid:
+                    leaf_anc = leaf_by_gid[anc]
+                    leaf_anc["neighbor_vote_fraction"] = min(
+                        1.0,
+                        float(leaf_anc.get("neighbor_vote_fraction", 0.0))
+                        + w / k_limit_f,
+                    )
+                    lmd = float(leaf_rec.get("neighbor_min_distance", leaf_d))
+                    cur_md = float(leaf_anc.get("neighbor_min_distance", leaf_d))
+                    if lmd < cur_md:
+                        leaf_anc["neighbor_min_distance"] = lmd
+                    continue
+                entry = synth.get(anc)
+                if entry is None or leaf_d < float(entry.get("distance", float("inf"))):
+                    base = dict(leaf_rec)
+                    base["go_id"] = anc
+                    if label_field_present:
+                        base[label_column] = (
+                            1 if (gt_pairs and (q_acc, anc) in gt_pairs) else 0
+                        )
+                    prior_frac = (
+                        float(entry["neighbor_vote_fraction"])
+                        if entry is not None
+                        else 0.0
+                    )
+                    base["neighbor_vote_fraction"] = min(1.0, prior_frac + w / k_limit_f)
+                    synth[anc] = base
+                else:
+                    entry["neighbor_vote_fraction"] = min(
+                        1.0,
+                        float(entry["neighbor_vote_fraction"]) + w / k_limit_f,
+                    )
+
+        out.extend(leaf_by_gid.values())
+        out.extend(synth.values())
+    return out
+
+
+def load_parent_map(session: Session, snapshot_id: uuid.UUID) -> dict[str, set[str]]:
+    """``{child_go_id: {parent_go_id, ...}}`` for is_a + part_of edges in a
+    given :class:`OntologySnapshot`. Used by the ancestor-expansion helper
+    above; both the live ``predict_go_terms`` path and offline
+    ``the dump helper`` should load it through this function so the closure
+    they pass to :func:`expand_predictions_to_ancestors` is identical."""
+    from sqlalchemy import text
+
+    rows = session.execute(
+        text(
+            "SELECT c.go_id AS child, p.go_id AS parent "
+            "FROM go_term_relationship r "
+            "JOIN go_term c ON c.id = r.child_go_term_id "
+            "JOIN go_term p ON p.id = r.parent_go_term_id "
+            "WHERE r.ontology_snapshot_id = :snap_id "
+            "AND r.relation_type IN ('is_a', 'part_of')"
+        ),
+        {"snap_id": snapshot_id},
+    ).fetchall()
+    parent_map: dict[str, set[str]] = {}
+    for child, parent in rows:
+        parent_map.setdefault(str(child), set()).add(str(parent))
+    return parent_map
+
+
+__all__ = [
+    "NEW_V6_FEATURE_KEYS",
+    "enrich_v6_features",
+    "expand_predictions_to_ancestors",
+    "load_parent_map",
+]
diff --git a/protea/core/knn_search.py b/protea/core/knn_search.py
index 99de1e8..8e7b750 100644
--- a/protea/core/knn_search.py
+++ b/protea/core/knn_search.py
@@ -49,6 +49,7 @@ def search_knn(
     distance_threshold: float | None = None,
     backend: str = "numpy",
     metric: str = "cosine",
+    pre_normalized: bool = False,
     faiss_index_type: str = "Flat",
     faiss_nlist: int = 100,
     faiss_nprobe: int = 10,
@@ -73,6 +74,11 @@ def search_knn(
         ``"numpy"`` (exact brute-force) or ``"faiss"``.
     metric:
         ``"cosine"`` or ``"l2"``.
+    pre_normalized:
+        When ``True`` and ``metric == "cosine"``, the caller guarantees that
+        ``ref_embeddings`` rows are already L2-normalised, so the backend
+        skips the per-call normalisation (which costs ~3 s on 500k × 1280
+        vectors and is the dominant non-matmul cost). No-op for ``l2``.
     faiss_index_type:
         One of ``"Flat"``, ``"IVFFlat"``, ``"HNSW"`` (ignored for numpy).
     faiss_nlist:
@@ -112,6 +118,7 @@ def search_knn(
             k,
             distance_threshold=distance_threshold,
             metric=metric,
+            pre_normalized=pre_normalized,
         )
     raise ValueError(f"Unknown search backend: {backend!r}. Choose 'numpy' or 'faiss'.")
 
@@ -121,6 +128,13 @@ def search_knn(
 # ---------------------------------------------------------------------------
 
 
+# Query chunk size lives in OperationTuning.numpy_query_chunk so the
+# memory ceiling is tunable per deployment (the full n_queries x n_refs
+# distance matrix would peak at n_queries x n_refs x 4 bytes; with 500k
+# refs a naive call materialises 10+ GB per aspect; 500 x 500k x 4B
+# is ~1 GB).
+
+
 def _search_numpy(
     Q: np.ndarray,
     R: np.ndarray,
@@ -129,20 +143,85 @@ def _search_numpy(
     *,
     distance_threshold: float | None,
     metric: str,
+    pre_normalized: bool = False,
 ) -> list[list[tuple[str, float]]]:
-    """Exact brute-force search via matrix multiplication."""
-    dist = _compute_distance_matrix(Q, R, metric)  # (n_queries, n_refs)
+    """Exact brute-force search via chunked matrix multiplication.
+
+    Chunk-invariant work (R normalisation for cosine, ``||R||²`` for L2, and
+    the transposed ``R.T`` view) is computed once and reused across chunks.
+
+    Top-k selection uses ``np.argpartition`` (O(n_refs) per query) followed by
+    a partial sort of the k-slice, instead of a full ``argsort`` — for k ≪
+    n_refs this is ~30× faster on 500k-vector banks.
+    """
+    if metric == "cosine":
+        if pre_normalized:
+            R_ready = R
+        else:
+            R_ready = R / (np.linalg.norm(R, axis=1, keepdims=True) + 1e-9)
+        R2 = None
+    elif metric == "l2":
+        R_ready = R
+        R2 = (R**2).sum(axis=1)  # shape (n_refs,), reused by every chunk
+    else:
+        raise ValueError(f"Unknown metric: {metric!r}. Choose 'cosine' or 'l2'.")
+
+    R_T = R_ready.T  # contiguous view; shared by all chunks
+    n_refs = R.shape[0]
+    k_eff = min(k, n_refs)
+
     results: list[list[tuple[str, float]]] = []
-    for row in dist:
-        order = np.argsort(row)
-        if distance_threshold is not None:
-            order = order[row[order] <= distance_threshold]
-        top = order[:k]
-        results.append([(ref_accessions[i], float(row[i])) for i in top])
+    n_queries = Q.shape[0]
+    from protea.config.tuning import get_tuning
+
+    query_chunk = get_tuning().operation.numpy_query_chunk
+    for start in range(0, n_queries, query_chunk):
+        Q_chunk = Q[start : start + query_chunk]
+        if metric == "cosine":
+            if pre_normalized:
+                Q_n = Q_chunk / (np.linalg.norm(Q_chunk, axis=1, keepdims=True) + 1e-9)
+            else:
+                Q_n = Q_chunk / (np.linalg.norm(Q_chunk, axis=1, keepdims=True) + 1e-9)
+            dist = 1.0 - (Q_n @ R_T)
+        else:  # l2
+            Q2 = (Q_chunk**2).sum(axis=1, keepdims=True)
+            dist = np.maximum(0.0, Q2 + R2 - 2.0 * (Q_chunk @ R_T))
+
+        # Vectorised top-k via partition + per-row partial sort.
+        n_rows = dist.shape[0]
+        if k_eff < n_refs:
+            # argpartition ensures the k smallest distances are in the first k
+            # positions (unsorted); we then sort just those k per row.
+            part = np.argpartition(dist, k_eff - 1, axis=1)[:, :k_eff]
+            row_range = np.arange(n_rows)[:, None]
+            part_d = dist[row_range, part]
+            sort_in_part = np.argsort(part_d, axis=1)
+            top_per_row = part[row_range, sort_in_part]
+        else:
+            top_per_row = np.argsort(dist, axis=1)[:, :k_eff]
+
+        for row_i in range(n_rows):
+            row = dist[row_i]
+            top = top_per_row[row_i]
+            if distance_threshold is not None:
+                top = top[row[top] <= distance_threshold]
+            results.append([(ref_accessions[int(i)], float(row[i])) for i in top])
+        del dist
     return results
 
 
-def _compute_distance_matrix(Q: np.ndarray, R: np.ndarray, metric: str) -> np.ndarray:
+def _compute_distance_matrix(
+    Q: np.ndarray,
+    R: np.ndarray,
+    metric: str,
+) -> np.ndarray:
+    """Dense distance matrix between all query/ref pairs.
+
+    Kept as a standalone helper because ``tests/test_predict_go_terms.py``
+    imports it directly to exercise the metric-dispatch error path.
+    Production code (``_search_numpy``) inlines the computation so it can
+    hoist chunk-invariant work out of the loop.
+    """
     if metric == "cosine":
         Q_n = Q / (np.linalg.norm(Q, axis=1, keepdims=True) + 1e-9)
         R_n = R / (np.linalg.norm(R, axis=1, keepdims=True) + 1e-9)
diff --git a/protea/core/metrics.py b/protea/core/metrics.py
index 8577a89..5ccb900 100644
--- a/protea/core/metrics.py
+++ b/protea/core/metrics.py
@@ -5,11 +5,16 @@
 
 CAFA protocol summary
 ---------------------
+
 - Evaluate only on proteins present in the ground truth (NK, LK, or PK).
-- At each score threshold t:
-    precision(t) = mean over proteins-with-predictions of |pred ∩ true| / |pred|
-    recall(t)    = mean over ALL ground-truth proteins of |pred ∩ true| / |true|
-- Fmax = max_t(2 * P(t) * R(t) / (P(t) + R(t)))
+- At each score threshold ``t``::
+
+    precision(t) = mean over proteins-with-predictions of
+                   card(pred & true) / card(pred)
+    recall(t)    = mean over ALL ground-truth proteins of
+                   card(pred & true) / card(true)
+
+- ``Fmax = max_t(2 * P(t) * R(t) / (P(t) + R(t)))``
 - AUC-PR via trapezoidal integration of the PR curve.
 
 Note: This implementation uses exact GO term matching (no DAG propagation).
@@ -74,7 +79,7 @@ def compute_cafa_metrics(
     scored_predictions:
         List of dicts, each must have:
           - ``protein_accession`` (str)
-          - ``go_id`` (str, e.g. "GO:0005488")
+          - ``go_id`` (str, e.g. ``GO:0005488``)
           - ``score`` (float in [0, 1])
     evaluation_data:
         Ground truth from ``compute_evaluation_data()``.
diff --git a/protea/core/operation_catalog.py b/protea/core/operation_catalog.py
new file mode 100644
index 0000000..25dc652
--- /dev/null
+++ b/protea/core/operation_catalog.py
@@ -0,0 +1,57 @@
+"""Single source of truth for the registered operation set.
+
+Both ``scripts/worker.py`` and ``protea/api/app.py`` build their
+``OperationRegistry`` through ``build_operation_registry()`` so that the API
+and the workers stay in sync about which operations exist and what their
+metadata looks like.
+"""
+
+from __future__ import annotations
+
+from protea.core.contracts.registry import OperationRegistry
+from protea.core.operations.compute_embeddings import (
+    ComputeEmbeddingsBatchOperation,
+    ComputeEmbeddingsOperation,
+    StoreEmbeddingsOperation,
+)
+from protea.core.operations.export_research_dataset import (
+    ExportResearchDatasetOperation,
+)
+from protea.core.operations.fetch_uniprot_metadata import FetchUniProtMetadataOperation
+from protea.core.operations.generate_evaluation_set import GenerateEvaluationSetOperation
+from protea.core.operations.insert_proteins import InsertProteinsOperation
+from protea.core.operations.load_goa_annotations import LoadGOAAnnotationsOperation
+from protea.core.operations.load_ontology_snapshot import LoadOntologySnapshotOperation
+from protea.core.operations.load_quickgo_annotations import LoadQuickGOAnnotationsOperation
+from protea.core.operations.ping import PingOperation
+from protea.core.operations.predict_go_terms import (
+    PredictGOTermsBatchOperation,
+    PredictGOTermsOperation,
+    StorePredictionsOperation,
+)
+from protea.core.operations.run_cafa_evaluation import RunCafaEvaluationOperation
+
+
+def build_operation_registry() -> OperationRegistry:
+    registry = OperationRegistry()
+    registry.register(PingOperation())
+    registry.register(InsertProteinsOperation())
+    registry.register(FetchUniProtMetadataOperation())
+    registry.register(LoadOntologySnapshotOperation())
+    registry.register(LoadQuickGOAnnotationsOperation())
+    registry.register(LoadGOAAnnotationsOperation())
+    registry.register(GenerateEvaluationSetOperation())
+    registry.register(RunCafaEvaluationOperation())
+    registry.register(ComputeEmbeddingsOperation())
+    registry.register(ComputeEmbeddingsBatchOperation())
+    registry.register(StoreEmbeddingsOperation())
+    registry.register(PredictGOTermsOperation())
+    registry.register(PredictGOTermsBatchOperation())
+    registry.register(StorePredictionsOperation())
+    # TrainRerankerOperation / TrainRerankerAutoOperation are no longer
+    # publicly registered: all re-ranker training moves to
+    # protea-reranker-lab. They remain importable as internal helpers —
+    # ExportResearchDatasetOperation still uses TrainRerankerAutoOperation
+    # in-process to run the dump-only pipeline.
+    registry.register(ExportResearchDatasetOperation())
+    return registry
diff --git a/protea/core/operations/compute_embeddings.py b/protea/core/operations/compute_embeddings.py
index 9abc1f0..fb856ea 100644
--- a/protea/core/operations/compute_embeddings.py
+++ b/protea/core/operations/compute_embeddings.py
@@ -10,15 +10,14 @@
 import numpy as np
 from pydantic import Field, field_validator
 from sqlalchemy import exists, select
-from sqlalchemy import update as sa_update
 from sqlalchemy.dialects.postgresql import insert as pg_insert
 from sqlalchemy.orm import Session
 
 from protea.core.contracts.operation import EmitFn, OperationResult, ProteaPayload, RetryLaterError
-from protea.core.utils import utcnow
+from protea.core.contracts.parent_progress import update_parent_progress
 from protea.infrastructure.orm.models.embedding.embedding_config import EmbeddingConfig
 from protea.infrastructure.orm.models.embedding.sequence_embedding import SequenceEmbedding
-from protea.infrastructure.orm.models.job import Job, JobEvent, JobStatus
+from protea.infrastructure.orm.models.job import Job, JobStatus
 from protea.infrastructure.orm.models.protein.protein import Protein
 from protea.infrastructure.orm.models.query.query_set import QuerySetEntry
 from protea.infrastructure.orm.models.sequence.sequence import Sequence
@@ -74,7 +73,10 @@ class ComputeEmbeddingsPayload(ProteaPayload, frozen=True):
     skip_existing : bool
         Skip sequences that already have an embedding for this config.
     batch_size : int
-        Model forward-pass batch size inside each batch worker.
+        Model forward-pass batch size inside each batch worker.  Defaults to
+        ``1`` because the largest supported backend (``prot_t5_xl_uniref50``
+        at ``max_length=2048``) OOMs on a 12 GB GPU with anything higher.
+        Callers running smaller models on roomier GPUs can raise it explicitly.
     """
 
     embedding_config_id: str
@@ -83,7 +85,7 @@ class ComputeEmbeddingsPayload(ProteaPayload, frozen=True):
     sequences_per_job: PositiveInt = 64
     device: str = "cuda"
     skip_existing: bool = True
-    batch_size: PositiveInt = 8
+    batch_size: PositiveInt = 1
 
     @field_validator("embedding_config_id", mode="before")
     @classmethod
@@ -101,7 +103,7 @@ class ComputeEmbeddingsBatchPayload(ProteaPayload, frozen=True):
     parent_job_id: str
     device: str = "cuda"
     skip_existing: bool = True
-    batch_size: PositiveInt = 8
+    batch_size: PositiveInt = 1
 
 
 # ---------------------------------------------------------------------------
@@ -127,6 +129,12 @@ class ComputeEmbeddingsOperation:
       auto-detected from ``model_name``.  EOS token is included in the
       residue tensor (consistent with PIS behaviour).
 
+    - **ankh** : HuggingFace ``T5EncoderModel`` loaded via ``AutoTokenizer``
+      (``ElnaggarLab/ankh-base``, ``ElnaggarLab/ankh-large``).  Shares the
+      batched T5 pipeline with ``t5`` but never injects the ``<AA2fold>``
+      prefix.  Ambiguous residues (``U``, ``Z``, ``O``, ``B``) are replaced
+      with ``X`` before tokenisation.
+
     Layer indexing (reverse convention, matches PIS)
     ------------------------------------------------
     ``layer_indices = [0]`` → last (most semantic) layer.
@@ -144,6 +152,46 @@ class ComputeEmbeddingsOperation:
     """
 
     name = "compute_embeddings"
+    description = (
+        "Coordinator: partition sequences for an EmbeddingConfig into GPU batches "
+        "and dispatch them to compute_embeddings_batch workers."
+    )
+
+    def summarize_payload(self, payload: dict[str, Any], *, session: Session | None = None) -> str:
+        p = payload or {}
+        bits: list[str] = []
+
+        cfg_id_raw = p.get("embedding_config_id")
+        if cfg_id_raw and session is not None:
+            try:
+                cfg = session.get(EmbeddingConfig, uuid.UUID(str(cfg_id_raw)))
+            except Exception:
+                cfg = None
+            if cfg is not None:
+                model_label = cfg.display_name or cfg.model_name or str(cfg.id)[:8]
+                head = f"{model_label} ({cfg.model_backend})"
+                bits.append(head)
+                bits.append(f"max_len={cfg.max_length}")
+                bits.append(f"pool={cfg.pooling}")
+                if cfg.normalize:
+                    bits.append("L2")
+                if cfg.use_chunking:
+                    bits.append(f"chunk={cfg.chunk_size}/{cfg.chunk_overlap}")
+        elif cfg_id_raw:
+            bits.append(f"cfg={str(cfg_id_raw)[:8]}")
+
+        if p.get("query_set_id"):
+            bits.append(f"qs={str(p['query_set_id'])[:8]}")
+        if p.get("accessions"):
+            bits.append(f"n_acc={len(p['accessions'])}")
+        if p.get("sequences_per_job"):
+            bits.append(f"per_job={p['sequences_per_job']}")
+        if p.get("batch_size") is not None:
+            bits.append(f"bs={p['batch_size']}")
+        bits.append(f"dev={p.get('device', 'cuda')}")
+        if p.get("skip_existing") is False:
+            bits.append("overwrite")
+        return " · ".join(bits)
 
     def execute(
         self, session: Session, payload: dict[str, Any], *, emit: EmitFn
@@ -281,6 +329,8 @@ def _embed_batch(
             return _embed_esm3c(model, sequences, config, device)
         elif config.model_backend == "t5":
             return _embed_t5(model, tokenizer, sequences, config, device)
+        elif config.model_backend == "ankh":
+            return _embed_ankh(model, tokenizer, sequences, config, device)
         else:  # esm / auto
             return _embed_esm(model, tokenizer, sequences, config, device)
 
@@ -299,6 +349,20 @@ class ComputeEmbeddingsBatchOperation:
     """
 
     name = "compute_embeddings_batch"
+    description = (
+        "GPU child job: run a forward pass on a small batch of sequences "
+        "and forward the resulting vectors to the store_embeddings worker."
+    )
+
+    def summarize_payload(self, payload: dict[str, Any]) -> str:
+        p = payload or {}
+        n = len(p.get("sequence_ids") or [])
+        bits = []
+        if n:
+            bits.append(f"n={n}")
+        if p.get("device"):
+            bits.append(p["device"])
+        return " · ".join(bits)
 
     def execute(
         self, session: Session, payload: dict[str, Any], *, emit: EmitFn
@@ -406,6 +470,8 @@ def _embed_batch(
             return _embed_esm3c(model, sequences, config, device)
         elif config.model_backend == "t5":
             return _embed_t5(model, tokenizer, sequences, config, device)
+        elif config.model_backend == "ankh":
+            return _embed_ankh(model, tokenizer, sequences, config, device)
         else:
             return _embed_esm(model, tokenizer, sequences, config, device)
 
@@ -432,6 +498,15 @@ class StoreEmbeddingsOperation:
     """
 
     name = "store_embeddings"
+    description = (
+        "CPU child job: bulk-insert pre-computed pgvector embeddings and "
+        "atomically increment the parent compute_embeddings job's progress."
+    )
+
+    def summarize_payload(self, payload: dict[str, Any]) -> str:
+        p = payload or {}
+        n = len(p.get("sequences") or [])
+        return f"n={n}" if n else ""
 
     def execute(
         self, session: Session, payload: dict[str, Any], *, emit: EmitFn
@@ -512,38 +587,12 @@ def execute(
         )
 
     def _update_parent_progress(self, session: Session, parent_job_id: UUID, emit: EmitFn) -> None:
-        row = session.execute(
-            sa_update(Job)
-            .where(Job.id == parent_job_id, Job.status == JobStatus.RUNNING)
-            .values(progress_current=Job.progress_current + 1)
-            .returning(Job.progress_current, Job.progress_total)
-        ).fetchone()
-
-        if row is None or row.progress_current != row.progress_total:
-            return
-
-        closed = session.execute(
-            sa_update(Job)
-            .where(Job.id == parent_job_id, Job.status == JobStatus.RUNNING)
-            .values(status=JobStatus.SUCCEEDED, finished_at=utcnow())
-            .returning(Job.id)
-        ).fetchone()
-
-        if closed:
-            session.add(
-                JobEvent(
-                    job_id=parent_job_id,
-                    event="job.succeeded",
-                    fields={"via": "last_batch_stored"},
-                    level="info",
-                )
-            )
-            emit(
-                "store_embeddings.parent_succeeded",
-                None,
-                {"parent_job_id": str(parent_job_id)},
-                "info",
-            )
+        update_parent_progress(
+            session,
+            parent_job_id,
+            emit,
+            event_name="store_embeddings.parent_succeeded",
+        )
 
 
 # ---------------------------------------------------------------------------
@@ -555,65 +604,102 @@ def _update_parent_progress(self, session: Session, parent_job_id: UUID, emit: E
 # all subsequent batch messages with the same config.  Max 1 entry to avoid
 # accumulating multi-GB models in GPU memory when configs change.
 _MODEL_CACHE: dict[tuple[str, str, str], tuple[Any, Any]] = {}
-_MODEL_CACHE_MAX = 1
 
 
 def _get_or_load_model(config: EmbeddingConfig, device: str, emit: EmitFn) -> tuple[Any, Any]:
+    from protea.config.tuning import get_tuning
+
+    cache_max = get_tuning().worker.model_cache_max
     key = (config.model_name, config.model_backend, device)
     if key not in _MODEL_CACHE:
-        if len(_MODEL_CACHE) >= _MODEL_CACHE_MAX:
+        if len(_MODEL_CACHE) >= cache_max:
             evict_key = next(iter(_MODEL_CACHE))
-            del _MODEL_CACHE[evict_key]
+            old_model, old_tokenizer = _MODEL_CACHE.pop(evict_key)
+            del old_model, old_tokenizer
+            import gc
+
+            import torch
+
+            gc.collect()
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+                torch.cuda.synchronize()
         _MODEL_CACHE[key] = _load_model(config, device, emit)
     return _MODEL_CACHE[key]
 
 
-def _load_model(config: EmbeddingConfig, device: str, emit: EmitFn) -> tuple[Any, Any]:
-    import torch
+# Cached map of backend plugins resolved from the ``protea.backends``
+# entry_points group.  Lazy: populated on first call to ``_load_model``.
+# ``None`` means "not yet discovered"; an empty dict means "no backends
+# installed" (which is a hard error at load time, not a registry warning).
+_BACKEND_PLUGINS: dict[str, Any] | None = None
+
+
+def _get_backend_plugins() -> dict[str, Any]:
+    """Discover and cache backend plugins via ``entry_points``.
+
+    Returns a dict keyed by ``plugin.name``. Each plugin must implement
+    :class:`protea_contracts.EmbeddingBackend`. Discovery is performed
+    once per process; subsequent calls return the cached map.
+
+    A plugin whose ``name`` attribute disagrees with its entry_point
+    name is a hard error — the entry_points file and the class
+    declaration must agree, and silently letting them drift would make
+    "Unknown model_backend" errors confusing.
+    """
+    global _BACKEND_PLUGINS
+    if _BACKEND_PLUGINS is None:
+        from importlib.metadata import entry_points
+
+        cache: dict[str, Any] = {}
+        for ep in entry_points(group="protea.backends"):
+            plugin = ep.load()
+            if getattr(plugin, "name", None) != ep.name:
+                raise RuntimeError(
+                    f"Backend plugin name mismatch: entry_point {ep.name!r} "
+                    f"resolves to plugin with name "
+                    f"{getattr(plugin, 'name', None)!r}"
+                )
+            cache[ep.name] = plugin
+        _BACKEND_PLUGINS = cache
+    return _BACKEND_PLUGINS
+
+
+def _resolve_backend(backend_name: str) -> Any:
+    """Resolve a ``model_backend`` identifier to a plugin instance.
+
+    The ``"auto"`` legacy alias maps to ``"esm"``. Unknown identifiers
+    raise ``ValueError`` listing the discovered backends so the failure
+    message is actionable.
+    """
+    plugins = _get_backend_plugins()
+    key = "esm" if backend_name == "auto" else backend_name
+    if key not in plugins:
+        raise ValueError(
+            f"Unknown model_backend: {backend_name!r}. "
+            f"Discovered: {sorted(plugins)}"
+        )
+    return plugins[key]
 
+
+def _load_model(config: EmbeddingConfig, device: str, emit: EmitFn) -> tuple[Any, Any]:
+    """Load ``(model, tokenizer)`` via the ``protea.backends`` plugin
+    matching ``config.model_backend``.
+
+    Each plugin owns its own torch / transformers / esm imports (lazy
+    inside ``plugin.load_model``) and the device + dtype dance.  The
+    return shape ``(model, tokenizer)`` matches the legacy hardcoded
+    dispatch exactly; for ESM-C the tokenizer slot is ``None`` because
+    the standalone ``esm`` SDK takes raw sequence strings.
+    """
     emit(
         "compute_embeddings.model_load_start",
         None,
         {"model_name": config.model_name, "backend": config.model_backend},
         "info",
     )
-
-    if config.model_backend == "esm3c":
-        from esm.models.esmc import ESMC
-
-        device_obj = torch.device(device)
-        dtype = torch.float16 if device_obj.type == "cuda" else torch.float32
-        model = ESMC.from_pretrained(config.model_name)
-        model = model.to(device)
-        model = model.to(dtype)
-        model.eval()
-        tokenizer = None
-
-    elif config.model_backend in ("esm", "auto"):
-        from transformers import AutoTokenizer, EsmModel
-
-        tokenizer = AutoTokenizer.from_pretrained(config.model_name)
-        model = EsmModel.from_pretrained(config.model_name, output_hidden_states=True)
-        model.eval()
-        model.to(device)
-
-    elif config.model_backend == "t5":
-        from transformers import T5EncoderModel, T5Tokenizer
-
-        device_obj = torch.device(device)
-        dtype = torch.float16 if device_obj.type == "cuda" else torch.float32
-        tokenizer = T5Tokenizer.from_pretrained(config.model_name, do_lower_case=False)
-        model = T5EncoderModel.from_pretrained(
-            config.model_name,
-            output_hidden_states=True,
-            torch_dtype=dtype,
-        )
-        model.eval()
-        model.to(device)
-
-    else:
-        raise ValueError(f"Unknown model_backend: {config.model_backend!r}")
-
+    plugin = _resolve_backend(config.model_backend)
+    model, tokenizer = plugin.load_model(config.model_name, device, emit)
     emit("compute_embeddings.model_load_done", None, {}, "info")
     return model, tokenizer
 
@@ -697,35 +783,81 @@ def _embed_t5(
     sequences: list[str],
     config: EmbeddingConfig,
     device: str,
+    *,
+    use_aa2fold: bool | None = None,
+    split_into_words: bool = False,
 ) -> list[list[ChunkEmbedding]]:
-    """Embed sequences with T5EncoderModel (ProstT5, prot_t5_xl, …).
+    """Embed sequences with T5EncoderModel (ProstT5, prot_t5_xl, Ankh, …).
 
     Sequences are processed as a padded batch.  ProSTT5 mode is auto-detected
-    from ``config.model_name`` (looks for ``prostt5`` substring, case-insensitive).
-
-    T5 has no CLS token; the EOS token at the last valid position is included
-    in residue-level operations (consistent with PIS behaviour).
+    from ``config.model_name`` (looks for ``prostt5`` substring, case-insensitive)
+    when ``use_aa2fold`` is ``None``; callers (e.g. the Ankh backend) can pass
+    ``use_aa2fold=False`` to disable the prefix unconditionally.
+
+    Tokeniser input format
+    ----------------------
+    Two tokenisation strategies are supported:
+
+    * ``split_into_words=False`` (default) — each sequence is joined with
+      single spaces (``"A C D E"``) and passed as a string.  This matches the
+      ProstT5 / ``prot_t5_xl_uniref50`` SentencePiece models, which recognise
+      space-separated amino acids as individual tokens.
+    * ``split_into_words=True`` — each sequence is passed as a list of
+      characters with ``is_split_into_words=True``.  Required by Ankh
+      (``ElnaggarLab/ankh-base`` / ``-large``): Ankh's SentencePiece
+      tokeniser maps a literal space to ``<unk>``, so the space-joined path
+      produces ~50% ``<unk>`` tokens and NaN outputs under FP16.
+
+    Residue slicing
+    ---------------
+    T5 has no CLS token at position 0, but ProstT5 injects a ``<AA2fold>``
+    prefix token there, and every T5 tokenizer appends an EOS at the last
+    valid position.  The residue-level slice strips both so that
+    ``residues[0]`` is always the first amino acid and ``residues.shape[0]``
+    equals the amino-acid count, consistent with ``_embed_esm`` /
+    ``_embed_esm3c`` which strip CLS/BOS+EOS:
+
+        start = 1 if use_aa2fold else 0   # skip <AA2fold> on ProstT5 only
+        end   = actual_len - 1            # drop trailing EOS
+
+    This makes ``chunk_index_s`` / ``chunk_index_e`` mean the same thing on
+    every backend: indices into the amino-acid sequence, not into the
+    backend-specific residue tensor.  The CLS pooling path is unchanged
+    (position 0 = ``<AA2fold>`` for ProstT5, arbitrary first AA otherwise).
     """
     import torch
     import torch.nn.functional as F
 
-    use_aa2fold = "prostt5" in config.model_name.lower()
-
-    processed: list[str] = []
-    for seq_str in sequences:
-        # Replace ambiguous amino acids; space-separate characters for T5
-        clean = re.sub(r"[UZOB]", "X", seq_str)
-        prefix = "<AA2fold> " if use_aa2fold else ""
-        processed.append(prefix + " ".join(clean))
-
-    inputs = tokenizer.batch_encode_plus(
-        processed,
-        padding="longest",
-        truncation=True,
-        max_length=config.max_length,
-        add_special_tokens=True,
-        return_tensors="pt",
-    )
+    if use_aa2fold is None:
+        use_aa2fold = "prostt5" in config.model_name.lower()
+
+    # Replace ambiguous amino acids (U/Z/O/B → X) regardless of tokenisation mode.
+    cleaned = [re.sub(r"[UZOB]", "X", seq_str) for seq_str in sequences]
+
+    if split_into_words:
+        # Ankh path: pass list-of-chars with is_split_into_words=True so the
+        # tokeniser treats each residue as one word and never falls back to <unk>.
+        inputs = tokenizer.batch_encode_plus(
+            [list(c) for c in cleaned],
+            padding="longest",
+            truncation=True,
+            max_length=config.max_length,
+            add_special_tokens=True,
+            is_split_into_words=True,
+            return_tensors="pt",
+        )
+    else:
+        processed = [
+            ("<AA2fold> " if use_aa2fold else "") + " ".join(c) for c in cleaned
+        ]
+        inputs = tokenizer.batch_encode_plus(
+            processed,
+            padding="longest",
+            truncation=True,
+            max_length=config.max_length,
+            add_special_tokens=True,
+            return_tensors="pt",
+        )
     inputs = {k: v.to(device) for k, v in inputs.items()}
 
     with torch.no_grad():
@@ -741,20 +873,28 @@ def _embed_t5(
 
     valid_layers = _validate_layers(config.layer_indices, hidden_states, "T5", "batch")
 
+    # On ProstT5 the <AA2fold> prefix lives at token position 0; skip it.
+    start_idx = 1 if use_aa2fold else 0
+
     results: list[list[ChunkEmbedding]] = []
     for i in range(len(sequences)):
-        # actual_len includes EOS; we keep it (PIS convention for T5)
+        # actual_len = (optional <AA2fold>) + N residues + EOS
         actual_len = int(inputs["attention_mask"][i].sum().item())
 
         if config.pooling == "cls":
+            # CLS pooling on T5 uses position 0 — the <AA2fold> hidden state
+            # on ProstT5, otherwise the first amino-acid hidden state.
             layer_tensors_1d = [hidden_states[-(li + 1)][i, 0, :].float() for li in valid_layers]
             pooled = _aggregate_1d(layer_tensors_1d, config.layer_agg)
             if config.normalize:
                 pooled = F.normalize(pooled.unsqueeze(0), p=2, dim=1).squeeze(0)
             results.append([ChunkEmbedding(0, None, pooled.cpu().numpy())])
         else:
+            # Residue slice: strip <AA2fold> (if present) and trailing EOS so
+            # residues[0] is AA 0 and residues.shape[0] == amino-acid count.
             layer_tensors_2d = [
-                hidden_states[-(li + 1)][i, :actual_len, :].float() for li in valid_layers
+                hidden_states[-(li + 1)][i, start_idx : actual_len - 1, :].float()
+                for li in valid_layers
             ]
             residues = _aggregate_residue_layers(layer_tensors_2d, config.layer_agg)
             if config.normalize_residues:
@@ -767,6 +907,42 @@ def _embed_t5(
     return results
 
 
+# ---------------------------------------------------------------------------
+# Backend: Ankh (HuggingFace T5EncoderModel, loaded via AutoTokenizer)
+# ---------------------------------------------------------------------------
+
+
+def _embed_ankh(
+    model: Any,
+    tokenizer: Any,
+    sequences: list[str],
+    config: EmbeddingConfig,
+    device: str,
+) -> list[list[ChunkEmbedding]]:
+    """Embed sequences with Ankh (base / large).
+
+    Ankh is a T5 encoder-decoder; we reuse the shared T5 batched pipeline but
+    with two deviations:
+
+    * never inject the ProstT5 ``<AA2fold>`` prefix (Ankh was pre-trained on
+      plain amino-acid sequences);
+    * tokenise via ``is_split_into_words=True`` with a list of per-residue
+      characters.  Ankh's SentencePiece tokeniser maps a literal space to
+      ``<unk>``, so the space-joined path used for ProstT5 produces ~50%
+      ``<unk>`` tokens and collapses to NaN under FP16.  Verified on
+      ``ElnaggarLab/ankh-base``.
+    """
+    return _embed_t5(
+        model,
+        tokenizer,
+        sequences,
+        config,
+        device,
+        use_aa2fold=False,
+        split_into_words=True,
+    )
+
+
 # ---------------------------------------------------------------------------
 # Backend: ESM3c (ESM SDK ESMC)
 # ---------------------------------------------------------------------------
diff --git a/protea/core/operations/export_research_dataset.py b/protea/core/operations/export_research_dataset.py
new file mode 100644
index 0000000..3f8b29d
--- /dev/null
+++ b/protea/core/operations/export_research_dataset.py
@@ -0,0 +1,243 @@
+"""Export a frozen re-ranker dataset for protea-reranker-lab.
+
+Runs the same KNN + feature-generation pipeline as the now-renamed
+research dataset dump helper (``training_dump_helpers``), skips the
+LightGBM training stage, and publishes the resulting parquets +
+manifest via the configured
+:class:`~protea.infrastructure.storage.ArtifactStore` (local FS by
+default, MinIO when enabled via the ``storage`` compose profile).
+
+Why a dedicated operation instead of dump-only mode of the helper?
+
+* Narrower payload: only the knobs that matter for export, no
+  LightGBM-specific fields.
+* Routes output through the storage abstraction, so the lab can consume
+  from MinIO without every export having to know a local dump path on
+  the API host.
+* Records ``producer_version`` / ``producer_git_sha`` in the manifest
+  so any lab run can be traced back to a PROTEA HEAD.
+"""
+
+from __future__ import annotations
+
+import hashlib
+import tempfile
+import uuid
+from pathlib import Path
+from typing import Annotated, Any
+
+from pydantic import Field, field_validator
+from sqlalchemy.orm import Session
+
+from protea.core.contracts.operation import EmitFn, OperationResult, ProteaPayload
+from protea.core.training_dump_helpers import TrainRerankerAutoOperation
+from protea.infrastructure.orm.models.embedding.dataset import Dataset
+from protea.infrastructure.settings import load_settings
+from protea.infrastructure.storage import get_artifact_store
+
+PositiveInt = Annotated[int, Field(gt=0)]
+
+
+class ExportResearchDatasetPayload(ProteaPayload, frozen=True):
+    """Payload for the ``export_research_dataset`` operation."""
+
+    embedding_config_id: str
+    ontology_snapshot_id: str
+    train_versions: list[int]
+    test_versions: list[int]
+    annotation_source: str = "goa"
+
+    # Human label for the published dataset; also the ``name`` field in
+    # the lab manifest and the key prefix ``datasets/{output_name}/``.
+    output_name: str
+
+    # KNN + feature generation knobs (mirror the dump helper).
+    k: PositiveInt = 5
+    search_backend: str = "faiss"
+    compute_alignments: bool = False
+    compute_taxonomy: bool = False
+    expand_votes_to_ancestors: bool = False
+    use_embedding_pca: bool = False
+
+    @field_validator("output_name", "embedding_config_id", "ontology_snapshot_id", mode="before")
+    @classmethod
+    def must_be_non_empty(cls, v: str) -> str:
+        if not isinstance(v, str) or not v.strip():
+            raise ValueError("must be a non-empty string")
+        return v.strip()
+
+    @field_validator("train_versions", mode="before")
+    @classmethod
+    def at_least_two_train(cls, v: list[int]) -> list[int]:
+        if len(v) < 2:
+            raise ValueError("train_versions must have at least 2 entries to form pairs")
+        return sorted(v)
+
+    @field_validator("test_versions", mode="before")
+    @classmethod
+    def at_least_one_test(cls, v: list[int]) -> list[int]:
+        if not v:
+            raise ValueError("test_versions must have at least 1 entry")
+        return sorted(v)
+
+
+class ExportResearchDatasetOperation:
+    name = "export_research_dataset"
+    description = (
+        "Generate a frozen reranker dataset (train/eval parquets + "
+        "manifest) and publish it to the configured artifact store."
+    )
+
+    _auto = TrainRerankerAutoOperation()
+
+    def summarize_payload(
+        self, payload: dict[str, Any], *, session: Session | None = None
+    ) -> str:
+        p = payload or {}
+        bits: list[str] = []
+        if p.get("output_name"):
+            bits.append(str(p["output_name"]))
+        train = p.get("train_versions") or []
+        test = p.get("test_versions") or []
+        if train:
+            bits.append(f"train={train[0]}→{train[-1]} (n={len(train)})")
+        if test:
+            bits.append(f"test={','.join(str(v) for v in test)}")
+        if p.get("k"):
+            bits.append(f"k={p['k']}")
+        return " · ".join(bits)
+
+    def execute(
+        self, session: Session, payload: dict[str, Any], *, emit: EmitFn
+    ) -> OperationResult:
+        p = ExportResearchDatasetPayload.model_validate(payload)
+        raw_job_id = payload.get("_job_id") if isinstance(payload, dict) else None
+        job_uuid = uuid.UUID(raw_job_id) if raw_job_id else None
+
+        settings = load_settings(_resolve_project_root())
+        store = get_artifact_store(settings)
+        key_prefix = f"datasets/{p.output_name}/"
+
+        # Reject duplicate names up-front so a half-succeeded run doesn't
+        # silently leave orphan blobs in the store.
+        existing = (
+            session.query(Dataset.id).filter(Dataset.name == p.output_name).first()
+        )
+        if existing is not None:
+            raise ValueError(f"Dataset {p.output_name!r} already exists")
+
+        def _relay(event: str, scope: str | None, evt_payload: dict[str, Any], level: str) -> None:
+            # Surface the underlying dump-helper events under this
+            # operation's namespace so the job event log reads naturally.
+            if event.startswith("dump_helper."):
+                event = "export_research_dataset." + event[len("dump_helper."):]
+            emit(event, scope, evt_payload, level)  # type: ignore[arg-type]
+
+        with tempfile.TemporaryDirectory(prefix="protea_export_") as tmp:
+            stage_dir = Path(tmp)
+            auto_payload: dict[str, Any] = {
+                "name": p.output_name,
+                "embedding_config_id": p.embedding_config_id,
+                "ontology_snapshot_id": p.ontology_snapshot_id,
+                "train_versions": p.train_versions,
+                "test_versions": p.test_versions,
+                "annotation_source": p.annotation_source,
+                "limit_per_entry": p.k,
+                "search_backend": p.search_backend,
+                "compute_alignments": p.compute_alignments,
+                "compute_taxonomy": p.compute_taxonomy,
+                "expand_votes_to_ancestors": p.expand_votes_to_ancestors,
+                "use_embedding_pca": p.use_embedding_pca,
+                "training_scope": "per_cell",
+                "dump_to": str(stage_dir),
+                "dump_only": True,
+            }
+            auto_result = self._auto.execute(session, auto_payload, emit=_relay)
+
+            uploaded: dict[str, str] = {}
+            for fname in ("train.parquet", "eval.parquet", "manifest.json"):
+                p_path = stage_dir / fname
+                if p_path.exists():
+                    uploaded[fname] = store.put(key_prefix + fname, p_path)
+
+            # Read manifest bytes while the staging dir still exists so the
+            # Dataset row can record a content-addressed fingerprint. The
+            # manifest is written by ``export_reranker_parquets`` and is
+            # never empty on a successful run.
+            import json as _json
+
+            manifest_path = stage_dir / "manifest.json"
+            if not manifest_path.exists():
+                raise RuntimeError(
+                    "export_research_dataset: manifest.json missing from stage dir — "
+                    "dump path did not produce the expected layout"
+                )
+            manifest_bytes = manifest_path.read_bytes()
+            manifest_sha = hashlib.sha256(manifest_bytes).hexdigest()
+            manifest_data = _json.loads(manifest_bytes)
+
+            emit(
+                "export_research_dataset.published",
+                None,
+                {
+                    "backend": settings.storage_backend,
+                    "key_prefix": key_prefix,
+                    "files": list(uploaded.keys()),
+                    "manifest_sha": manifest_sha,
+                },
+                "info",
+            )
+
+        # Register the dataset in the DB so the lab can pull by name/id.
+        dataset = Dataset(
+            name=p.output_name,
+            operation=self.name,
+            job_id=job_uuid,
+            storage_backend=settings.storage_backend,
+            key_prefix=key_prefix,
+            train_uri=uploaded.get("train.parquet"),
+            eval_uri=uploaded.get("eval.parquet"),
+            manifest_uri=uploaded["manifest.json"],
+            schema_sha=manifest_data.get("schema_sha", ""),
+            manifest_sha=manifest_sha,
+            n_train_rows=int(manifest_data.get("n_train_rows", 0)),
+            n_eval_rows=int(manifest_data.get("n_eval_rows", 0)),
+            k=int(manifest_data.get("k", p.k)),
+            annotation_source=manifest_data.get("annotation_source", p.annotation_source),
+            embedding_config_id=uuid.UUID(p.embedding_config_id),
+            ontology_snapshot_id=uuid.UUID(p.ontology_snapshot_id),
+            train_snapshot_pairs=list(manifest_data.get("train_snapshot_pairs", [])),
+            eval_snapshot_pair=manifest_data.get("eval_snapshot_pair"),
+            producer_version=manifest_data.get("producer_version"),
+            producer_git_sha=manifest_data.get("producer_git_sha"),
+            meta={},
+        )
+        session.add(dataset)
+        session.flush()
+        dataset_id = dataset.id
+        emit(
+            "export_research_dataset.registered",
+            None,
+            {"dataset_id": str(dataset_id), "name": p.output_name},
+            "info",
+        )
+
+        merged: dict[str, Any] = dict(auto_result.result)
+        merged.update(
+            {
+                "dataset_id": str(dataset_id),
+                "output_name": p.output_name,
+                "key_prefix": key_prefix,
+                "storage_backend": settings.storage_backend,
+                "train_uri": uploaded.get("train.parquet"),
+                "eval_uri": uploaded.get("eval.parquet"),
+                "manifest_uri": uploaded.get("manifest.json"),
+                "manifest_sha": manifest_sha,
+            }
+        )
+        return OperationResult(result=merged)
+
+
+def _resolve_project_root() -> Path:
+    # protea/core/operations/export_research_dataset.py → parents[3] = repo root
+    return Path(__file__).resolve().parents[3]
diff --git a/protea/core/operations/fetch_uniprot_metadata.py b/protea/core/operations/fetch_uniprot_metadata.py
index 5d92980..75eda76 100644
--- a/protea/core/operations/fetch_uniprot_metadata.py
+++ b/protea/core/operations/fetch_uniprot_metadata.py
@@ -1,20 +1,19 @@
 from __future__ import annotations
 
-import csv
-import gzip
 import time
-from collections.abc import Iterable, Sequence
-from io import BytesIO, StringIO
+from collections.abc import Iterator, Sequence
 from typing import Annotated, Any
-from urllib.parse import quote
 
-import requests
+from protea_contracts import (
+    UniProtMetadataRecord,
+    UniProtMetadataStreamPayload,
+    parse_isoform,
+)
 from pydantic import Field, field_validator
-from requests import Response
 from sqlalchemy.orm import Session
 
 from protea.core.contracts.operation import EmitFn, OperationResult, ProteaPayload
-from protea.core.utils import UniProtHttpMixin, chunks
+from protea.core.utils import chunks
 from protea.infrastructure.orm.models.protein.protein import Protein
 from protea.infrastructure.orm.models.protein.protein_metadata import ProteinUniProtMetadata
 
@@ -44,7 +43,7 @@ def must_be_non_empty(cls, v: str) -> str:
         return v.strip()
 
 
-class FetchUniProtMetadataOperation(UniProtHttpMixin):
+class FetchUniProtMetadataOperation:
     """Fetches functional annotations from UniProt (TSV) and upserts ProteinUniProtMetadata rows.
 
     One metadata row is stored per canonical accession. Isoforms share the same
@@ -54,7 +53,54 @@ class FetchUniProtMetadataOperation(UniProtHttpMixin):
     """
 
     name = "fetch_uniprot_metadata"
-    UNIPROT_SEARCH_URL = "https://rest.uniprot.org/uniprotkb/search"
+    description = (
+        "Fetch functional annotations (TSV) from UniProt and upsert "
+        "ProteinUniProtMetadata rows keyed by canonical accession."
+    )
+
+    # UniProt field identifiers requested in the TSV query, in display
+    # order. Kept here (operation-side) because the field set is a
+    # persistence concern: it determines which DB columns get populated.
+    UNIPROT_FIELDS: list[str] = [
+        "accession",
+        "reviewed",
+        "id",
+        "protein_name",
+        "gene_names",
+        "organism_name",
+        "length",
+        "absorption",
+        "ft_act_site",
+        "ft_binding",
+        "cc_catalytic_activity",
+        "cc_cofactor",
+        "ft_dna_bind",
+        "ec",
+        "cc_activity_regulation",
+        "cc_function",
+        "cc_pathway",
+        "kinetics",
+        "ph_dependence",
+        "redox_potential",
+        "rhea",
+        "ft_site",
+        "temp_dependence",
+        "keyword",
+        "feature_count",
+    ]
+
+    def summarize_payload(self, payload: dict[str, Any]) -> str:
+        criteria = (payload or {}).get("search_criteria")
+        limit = (payload or {}).get("total_limit")
+        bits = []
+        if criteria:
+            short = str(criteria)
+            if len(short) > 60:
+                short = short[:57] + "..."
+            bits.append(f"query={short}")
+        if limit:
+            bits.append(f"limit={limit}")
+        return " · ".join(bits)
 
     # DB column -> TSV header
     FIELD_MAP: dict[str, str] = {
@@ -79,18 +125,16 @@ class FetchUniProtMetadataOperation(UniProtHttpMixin):
     }
 
     def __init__(self) -> None:
-        self._http_requests = 0
-        self._http_retries = 0
-        self._total_results: int | None = None
-        self._http = requests.Session()
+        # Plugin instance is reused across executions for connection
+        # pooling; counters are reset by the plugin at the start of
+        # each ``stream_metadata`` call.
+        from protea_sources.uniprot import plugin as _uniprot_plugin
+
+        self._uniprot_plugin = _uniprot_plugin
 
     def execute(
         self, session: Session, payload: dict[str, Any], *, emit: EmitFn
     ) -> OperationResult:
-        self._http_requests = 0
-        self._http_retries = 0
-        self._total_results = None
-
         p = FetchUniProtMetadataPayload.model_validate(payload)
 
         t0 = time.perf_counter()
@@ -106,45 +150,11 @@ def execute(
         proteins_touched = 0
         metadata_upserted = 0
 
-        for page_idx, rows in enumerate(self._fetch_tsv_pages(p, emit), start=1):
-            pages = page_idx
-            if not rows:
-                continue
-
-            if p.total_limit is not None and (total_rows + len(rows)) > p.total_limit:
-                rows = rows[: max(0, p.total_limit - total_rows)]
-            if not rows:
-                break
-
-            total_rows += len(rows)
-
-            touched, upserted = self._store_rows(session, rows, p, emit)
-            proteins_touched += touched
-            metadata_upserted += upserted
-
-            emit(
-                "fetch_uniprot_metadata.page_done",
-                None,
-                {
-                    "page": page_idx,
-                    "rows_total": total_rows,
-                    "proteins_touched_total": proteins_touched,
-                    "metadata_upserted_total": metadata_upserted,
-                    "http_requests": self._http_requests,
-                    "http_retries": self._http_retries,
-                    "_progress_current": total_rows,
-                    **(
-                        {"_progress_total": p.total_limit or self._total_results}
-                        if (p.total_limit or self._total_results)
-                        else {}
-                    ),
-                },
-                "info",
-            )
-
-            if p.commit_every_page:
-                session.commit()
-
+        # Buffer per-record into operation-controlled pages of size
+        # ``p.page_size``. Plugin yields one record at a time; operation
+        # owns batching policy + commits.
+        buffer: list[UniProtMetadataRecord] = []
+        for record in self._stream_metadata(p, emit):
             if p.total_limit is not None and total_rows >= p.total_limit:
                 emit(
                     "fetch_uniprot_metadata.limit_reached",
@@ -154,113 +164,97 @@ def execute(
                 )
                 break
 
+            buffer.append(record)
+            total_rows += 1
+
+            if len(buffer) >= p.page_size:
+                pages += 1
+                touched, upserted = self._store_rows(session, buffer, p, emit)
+                proteins_touched += touched
+                metadata_upserted += upserted
+                buffer.clear()
+
+                http_req, http_ret = self._uniprot_plugin.http_counters
+                emit(
+                    "fetch_uniprot_metadata.page_done",
+                    None,
+                    {
+                        "page": pages,
+                        "rows_total": total_rows,
+                        "proteins_touched_total": proteins_touched,
+                        "metadata_upserted_total": metadata_upserted,
+                        "http_requests": http_req,
+                        "http_retries": http_ret,
+                        "_progress_current": total_rows,
+                        **(
+                            {"_progress_total": p.total_limit}
+                            if p.total_limit
+                            else {}
+                        ),
+                    },
+                    "info",
+                )
+
+                if p.commit_every_page:
+                    session.commit()
+
+        # Flush remaining buffer.
+        if buffer:
+            pages += 1
+            touched, upserted = self._store_rows(session, buffer, p, emit)
+            proteins_touched += touched
+            metadata_upserted += upserted
+
         elapsed = time.perf_counter() - t0
+        http_req, http_ret = self._uniprot_plugin.http_counters
         result = {
             "pages": pages,
             "rows": total_rows,
             "proteins_touched": proteins_touched,
             "metadata_upserted": metadata_upserted,
-            "http_requests": self._http_requests,
-            "http_retries": self._http_retries,
+            "http_requests": http_req,
+            "http_retries": http_ret,
             "elapsed_seconds": elapsed,
         }
         emit("fetch_uniprot_metadata.done", None, result, "info")
         return OperationResult(result=result)
 
-    # ---------------- HTTP / paging ----------------
-
-    def _fetch_tsv_pages(
+    def _stream_metadata(
         self, p: FetchUniProtMetadataPayload, emit: EmitFn
-    ) -> Iterable[list[dict[str, str]]]:
-        encoded_query = quote(p.search_criteria)
-
-        fields = [
-            "accession",
-            "reviewed",
-            "id",
-            "protein_name",
-            "gene_names",
-            "organism_name",
-            "length",
-            "absorption",
-            "ft_act_site",
-            "ft_binding",
-            "cc_catalytic_activity",
-            "cc_cofactor",
-            "ft_dna_bind",
-            "ec",
-            "cc_activity_regulation",
-            "cc_function",
-            "cc_pathway",
-            "kinetics",
-            "ph_dependence",
-            "redox_potential",
-            "rhea",
-            "ft_site",
-            "temp_dependence",
-            "keyword",
-            "feature_count",
-        ]
-
-        params = [
-            "format=tsv",
-            f"query={encoded_query}",
-            f"size={p.page_size}",
-            "compressed=true" if p.compressed else "compressed=false",
-            f"fields={quote(','.join(fields))}",
-        ]
-        base_url = f"{self.UNIPROT_SEARCH_URL}?{'&'.join(params)}"
-
-        next_cursor: str | None = None
-        page = 0
-
-        while True:
-            page += 1
-            url = base_url if not next_cursor else f"{base_url}&cursor={next_cursor}"
-            emit(
-                "uniprot.fetch_page_start",
-                None,
-                {"page": page, "has_cursor": bool(next_cursor)},
-                "info",
-            )
-
-            resp = self._get_with_retries(url, p, emit)
-            if self._total_results is None:
-                try:
-                    self._total_results = int(resp.headers.get("X-Total-Results", 0)) or None
-                except (ValueError, TypeError):
-                    pass
-            text = self._decode_response(resp, p.compressed)
-            rows = self._parse_tsv(text)
-
-            emit("uniprot.fetch_page_done", None, {"page": page, "rows": len(rows)}, "info")
-            yield rows
-
-            next_cursor = self._extract_next_cursor(resp.headers.get("link", ""))
-            if not next_cursor:
-                break
-
-    def _decode_response(self, resp: Response, compressed: bool) -> str:
-        if compressed:
-            with gzip.GzipFile(fileobj=BytesIO(resp.content)) as f:
-                return f.read().decode("utf-8", errors="replace")
-        return resp.content.decode("utf-8", errors="replace")
-
-    # ---------------- TSV / DB ----------------
+    ) -> Iterator[UniProtMetadataRecord]:
+        """Delegate to the protea-sources UniProtSource plugin.
+
+        Plugin owns HTTP retries, cursor pagination, gzip decoding,
+        and TSV parsing. The operation owns the field list (persistence
+        concern) plus DB upsert. See ``f2a6_real_migration_design.md``.
+        """
+        yield from self._uniprot_plugin.stream_metadata(
+            UniProtMetadataStreamPayload(
+                search_criteria=p.search_criteria,
+                fields=self.UNIPROT_FIELDS,
+                page_size=p.page_size,
+                timeout_seconds=p.timeout_seconds,
+                compressed=p.compressed,
+                max_retries=p.max_retries,
+                backoff_base_seconds=p.backoff_base_seconds,
+                backoff_max_seconds=p.backoff_max_seconds,
+                jitter_seconds=p.jitter_seconds,
+                user_agent=p.user_agent,
+            ),
+            emit=emit,
+        )
 
-    def _parse_tsv(self, tsv_text: str) -> list[dict[str, str]]:
-        reader = csv.DictReader(StringIO(tsv_text), delimiter="\t")
-        return [{k: (v if v is not None else "") for k, v in row.items()} for row in reader]
+    # ---------------- DB ----------------
 
     def _store_rows(
         self,
         session: Session,
-        rows: list[dict[str, str]],
+        records: list[UniProtMetadataRecord],
         p: FetchUniProtMetadataPayload,
         emit: EmitFn,
     ) -> tuple[int, int]:
-        accessions = [r.get("Entry", "").strip() for r in rows if r.get("Entry")]
-        canonicals = [Protein.parse_isoform(a)[0] for a in accessions]
+        accessions = [r.accession for r in records]
+        canonicals = [parse_isoform(a)[0] for a in accessions]
         canonical_unique = list(dict.fromkeys([c for c in canonicals if c]))
 
         existing = self._load_existing_metadata(session, canonical_unique)
@@ -273,11 +267,9 @@ def _store_rows(
         touched = 0
         upserted = 0
 
-        for row in rows:
-            acc = row.get("Entry", "").strip()
-            if not acc:
-                continue
-            canonical, _, _ = Protein.parse_isoform(acc)
+        for record in records:
+            canonical, _, _ = parse_isoform(record.accession)
+            row = record.raw_fields
 
             m = existing.get(canonical)
             if m is None:
@@ -295,7 +287,7 @@ def _store_rows(
                 upserted += 1
 
             if p.update_protein_core:
-                pr = protein_map.get(acc)
+                pr = protein_map.get(record.accession)
                 if pr is not None:
                     core_changed = False
 
diff --git a/protea/core/operations/generate_evaluation_set.py b/protea/core/operations/generate_evaluation_set.py
index 771ab41..fc1081b 100644
--- a/protea/core/operations/generate_evaluation_set.py
+++ b/protea/core/operations/generate_evaluation_set.py
@@ -1,20 +1,31 @@
 from __future__ import annotations
 
+import tempfile
 import uuid
+from pathlib import Path
 from typing import Any
 
 from pydantic import field_validator
 from sqlalchemy.orm import Session
 
 from protea.core.contracts.operation import EmitFn, OperationResult, ProteaPayload
-from protea.core.evaluation import compute_evaluation_data
+from protea.core.evaluation import (
+    compute_evaluation_data,
+    compute_evaluation_data_reconciled,
+    groundtruth_key_for,
+    serialize_evaluation_data_to_parquet,
+)
 from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
 from protea.infrastructure.orm.models.annotation.evaluation_set import EvaluationSet
+from protea.infrastructure.orm.models.annotation.ontology_snapshot import OntologySnapshot
+from protea.infrastructure.settings import load_settings
+from protea.infrastructure.storage import get_artifact_store
 
 
 class GenerateEvaluationSetPayload(ProteaPayload, frozen=True):
     old_annotation_set_id: str
     new_annotation_set_id: str
+    pivot_ontology_snapshot_id: str | None = None
 
     @field_validator("old_annotation_set_id", "new_annotation_set_id", mode="before")
     @classmethod
@@ -23,6 +34,15 @@ def must_be_non_empty(cls, v: str) -> str:
             raise ValueError("must be a non-empty string")
         return v.strip()
 
+    @field_validator("pivot_ontology_snapshot_id", mode="before")
+    @classmethod
+    def pivot_opt_non_empty(cls, v):
+        if v is None:
+            return None
+        if not isinstance(v, str) or not v.strip():
+            raise ValueError("must be a non-empty string or null")
+        return v.strip()
+
 
 class GenerateEvaluationSetOperation:
     """Computes the CAFA evaluation delta between two GOA annotation sets.
@@ -35,6 +55,29 @@ class GenerateEvaluationSetOperation:
     """
 
     name = "generate_evaluation_set"
+    description = (
+        "Compute the CAFA delta between an old and a new GOA annotation set, "
+        "split delta proteins into NK/LK and persist an EvaluationSet. "
+        "Supports cross-OBO reconciliation via an optional pivot snapshot."
+    )
+
+    def summarize_payload(self, payload: dict[str, Any], *, session: Session | None = None) -> str:
+        p = payload or {}
+        old_raw = p.get("old_annotation_set_id")
+        new_raw = p.get("new_annotation_set_id")
+        if old_raw and new_raw and session is not None:
+            try:
+                old = session.get(AnnotationSet, uuid.UUID(str(old_raw)))
+                new = session.get(AnnotationSet, uuid.UUID(str(new_raw)))
+            except Exception:
+                old = new = None
+            if old is not None and new is not None:
+                ov = old.source_version or str(old.id)[:8]
+                nv = new.source_version or str(new.id)[:8]
+                return f"{old.source}@{ov} → {new.source}@{nv}"
+        if old_raw and new_raw:
+            return f"{str(old_raw)[:8]} → {str(new_raw)[:8]}"
+        return ""
 
     def execute(
         self, session: Session, payload: dict[str, Any], *, emit: EmitFn
@@ -50,8 +93,18 @@ def execute(
         new_set = session.get(AnnotationSet, new_set_id)
         if new_set is None:
             raise ValueError(f"AnnotationSet {new_set_id} not found")
-        if old_set.ontology_snapshot_id != new_set.ontology_snapshot_id:
-            raise ValueError("Both annotation sets must use the same ontology snapshot")
+
+        if p.pivot_ontology_snapshot_id is not None:
+            pivot_id = uuid.UUID(p.pivot_ontology_snapshot_id)
+            if session.get(OntologySnapshot, pivot_id) is None:
+                raise ValueError(f"OntologySnapshot {pivot_id} not found")
+        else:
+            pivot_id = new_set.ontology_snapshot_id
+
+        same_snapshot = (
+            old_set.ontology_snapshot_id == new_set.ontology_snapshot_id == pivot_id
+        )
+        mode = "same_snapshot" if same_snapshot else "reconciled"
 
         emit(
             "generate_evaluation_set.start",
@@ -59,20 +112,30 @@ def execute(
             {
                 "old_annotation_set_id": str(old_set_id),
                 "new_annotation_set_id": str(new_set_id),
-                "ontology_snapshot_id": str(old_set.ontology_snapshot_id),
+                "old_ontology_snapshot_id": str(old_set.ontology_snapshot_id),
+                "new_ontology_snapshot_id": str(new_set.ontology_snapshot_id),
+                "pivot_ontology_snapshot_id": str(pivot_id),
+                "mode": mode,
             },
             "info",
         )
 
-        emit("generate_evaluation_set.computing_delta", None, {}, "info")
-        data = compute_evaluation_data(
-            session,
-            old_set_id,
-            new_set_id,
-            old_set.ontology_snapshot_id,
-        )
+        emit("generate_evaluation_set.computing_delta", None, {"mode": mode}, "info")
+        if same_snapshot:
+            data = compute_evaluation_data(session, old_set_id, new_set_id, pivot_id)
+        else:
+            data = compute_evaluation_data_reconciled(
+                session,
+                old_set_id,
+                new_set_id,
+                old_set.ontology_snapshot_id,
+                new_set.ontology_snapshot_id,
+                pivot_id,
+            )
 
         stats = data.stats()
+        stats["mode"] = mode
+        stats["pivot_ontology_snapshot_id"] = str(pivot_id)
         emit("generate_evaluation_set.delta_done", None, stats, "info")
 
         eval_set = EvaluationSet(
@@ -83,6 +146,25 @@ def execute(
         session.add(eval_set)
         session.flush()
 
-        result = {"evaluation_set_id": str(eval_set.id), **stats}
+        # Persist the full ground-truth (nk/lk/pk/known/pk_known) to the artifact
+        # store. Downstream consumers (the dump helper, cafaeval) read this
+        # parquet via load_evaluation_data_for_set instead of recomputing.
+        project_root = Path(__file__).resolve().parents[3]
+        store = get_artifact_store(load_settings(project_root))
+        key = groundtruth_key_for(eval_set.id)
+        with tempfile.TemporaryDirectory(prefix="protea_eval_gt_") as tmp:
+            local_path = Path(tmp) / "groundtruth.parquet"
+            serialize_evaluation_data_to_parquet(data, local_path)
+            uri = store.put(key, local_path)
+        eval_set.groundtruth_uri = uri
+        session.flush()
+        emit(
+            "generate_evaluation_set.groundtruth_persisted",
+            None,
+            {"evaluation_set_id": str(eval_set.id), "uri": uri, "key": key},
+            "info",
+        )
+
+        result = {"evaluation_set_id": str(eval_set.id), "groundtruth_uri": uri, **stats}
         emit("generate_evaluation_set.done", None, result, "info")
         return OperationResult(result=result)
diff --git a/protea/core/operations/insert_proteins.py b/protea/core/operations/insert_proteins.py
index a12aad6..cdad485 100644
--- a/protea/core/operations/insert_proteins.py
+++ b/protea/core/operations/insert_proteins.py
@@ -1,21 +1,16 @@
 from __future__ import annotations
 
-import gzip
-import re
 import time
-from collections.abc import Iterable
+from collections.abc import Iterator
 from collections.abc import Sequence as Seq
-from io import BytesIO
 from typing import Annotated, Any
-from urllib.parse import quote
 
-import requests
+from protea_contracts import UniProtFastaStreamPayload, UniProtProteinRecord
 from pydantic import Field, field_validator
-from requests import Response
 from sqlalchemy.orm import Session
 
 from protea.core.contracts.operation import EmitFn, Operation, OperationResult, ProteaPayload
-from protea.core.utils import UniProtHttpMixin, chunks
+from protea.core.utils import chunks
 from protea.infrastructure.orm.models.protein.protein import Protein
 from protea.infrastructure.orm.models.sequence.sequence import Sequence as SequenceModel
 
@@ -44,7 +39,7 @@ def must_be_non_empty(cls, v: str) -> str:
         return v.strip()
 
 
-class InsertProteinsOperation(UniProtHttpMixin, Operation):
+class InsertProteinsOperation(Operation):
     """Fetches protein sequences from UniProt (FASTA) and upserts them into the DB.
 
     Uses cursor-based pagination, exponential backoff with jitter, and MD5-based
@@ -54,25 +49,35 @@ class InsertProteinsOperation(UniProtHttpMixin, Operation):
     """
 
     name = "insert_proteins"
-    UNIPROT_SEARCH_URL = "https://rest.uniprot.org/uniprotkb/search"
-
-    _re_os = re.compile(r"\bOS=([^=]+?)\sOX=")
-    _re_ox = re.compile(r"\bOX=(\d+)")
-    _re_gn = re.compile(r"\bGN=([^\s]+)")
+    description = (
+        "Fetch protein sequences from UniProt (FASTA, cursor-paginated) and upsert "
+        "Protein + Sequence rows; isoforms are stored grouped by canonical accession."
+    )
+
+    def summarize_payload(self, payload: dict[str, Any]) -> str:
+        criteria = (payload or {}).get("search_criteria")
+        limit = (payload or {}).get("total_limit")
+        bits = []
+        if criteria:
+            short = str(criteria)
+            if len(short) > 60:
+                short = short[:57] + "..."
+            bits.append(f"query={short}")
+        if limit:
+            bits.append(f"limit={limit}")
+        return " · ".join(bits)
 
     def __init__(self) -> None:
-        self._http_requests = 0
-        self._http_retries = 0
-        self._total_results: int | None = None
-        self._http = requests.Session()
+        # Plugin instance is reused across executions for connection
+        # pooling; counters are reset by the plugin at the start of
+        # each ``stream_fasta`` call.
+        from protea_sources.uniprot import plugin as _uniprot_plugin
+
+        self._uniprot_plugin = _uniprot_plugin
 
     def execute(
         self, session: Session, payload: dict[str, Any], *, emit: EmitFn
     ) -> OperationResult:
-        self._http_requests = 0
-        self._http_retries = 0
-        self._total_results = None
-
         p = InsertProteinsPayload.model_validate(payload)
 
         t0 = time.perf_counter()
@@ -91,214 +96,111 @@ def execute(
         sequences_inserted = 0
         sequences_reused = 0
 
-        for page_idx, records in enumerate(self._fetch_fasta_pages(p, emit), start=1):
-            pages = page_idx
-            if not records:
-                continue
-
-            if p.total_limit is not None and (retrieved + len(records)) > p.total_limit:
-                records = records[: max(0, p.total_limit - retrieved)]
-            if not records:
+        # Buffer per-record into operation-controlled pages of size
+        # ``p.page_size``. The plugin yields one record at a time
+        # (D-MIGR-01); the operation owns batching policy.
+        buffer: list[UniProtProteinRecord] = []
+        for record in self._stream_fasta(p, emit):
+            if p.total_limit is not None and retrieved >= p.total_limit:
+                emit(
+                    "insert_proteins.limit_reached",
+                    None,
+                    {"total_limit": p.total_limit},
+                    "warning",
+                )
                 break
 
-            retrieved += len(records)
-            isoforms += sum(1 for r in records if r["isoform_index"] is not None)
+            buffer.append(record)
+            retrieved += 1
+            if record.isoform_index is not None:
+                isoforms += 1
+
+            if len(buffer) >= p.page_size:
+                pages += 1
+                ins_p, upd_p, ins_s, re_s = self._store_records(session, buffer, emit)
+                proteins_inserted += ins_p
+                proteins_updated += upd_p
+                sequences_inserted += ins_s
+                sequences_reused += re_s
+                buffer.clear()
+
+                http_req, http_ret = self._uniprot_plugin.http_counters
+                emit(
+                    "insert_proteins.page_done",
+                    None,
+                    {
+                        "page": pages,
+                        "retrieved_total": retrieved,
+                        "proteins_inserted_total": proteins_inserted,
+                        "proteins_updated_total": proteins_updated,
+                        "sequences_inserted_total": sequences_inserted,
+                        "sequences_reused_total": sequences_reused,
+                        "http_requests": http_req,
+                        "http_retries": http_ret,
+                        "_progress_current": retrieved,
+                        **(
+                            {"_progress_total": p.total_limit}
+                            if p.total_limit
+                            else {}
+                        ),
+                    },
+                    "info",
+                )
 
-            ins_p, upd_p, ins_s, re_s = self._store_records(session, records, emit)
+        # Flush remaining buffer.
+        if buffer:
+            pages += 1
+            ins_p, upd_p, ins_s, re_s = self._store_records(session, buffer, emit)
             proteins_inserted += ins_p
             proteins_updated += upd_p
             sequences_inserted += ins_s
             sequences_reused += re_s
 
-            emit(
-                "insert_proteins.page_done",
-                None,
-                {
-                    "page": page_idx,
-                    "retrieved_total": retrieved,
-                    "proteins_inserted_total": proteins_inserted,
-                    "proteins_updated_total": proteins_updated,
-                    "sequences_inserted_total": sequences_inserted,
-                    "sequences_reused_total": sequences_reused,
-                    "http_requests": self._http_requests,
-                    "http_retries": self._http_retries,
-                    "_progress_current": retrieved,
-                    **(
-                        {"_progress_total": p.total_limit or self._total_results}
-                        if (p.total_limit or self._total_results)
-                        else {}
-                    ),
-                },
-                "info",
-            )
-
-            if p.total_limit is not None and retrieved >= p.total_limit:
-                emit(
-                    "insert_proteins.limit_reached", None, {"total_limit": p.total_limit}, "warning"
-                )
-                break
-
         elapsed = time.perf_counter() - t0
-        emit(
-            "insert_proteins.done",
-            None,
-            {
-                "pages": pages,
-                "retrieved_records": retrieved,
-                "isoform_records": isoforms,
-                "proteins_inserted": proteins_inserted,
-                "proteins_updated": proteins_updated,
-                "sequences_inserted": sequences_inserted,
-                "sequences_reused": sequences_reused,
-                "http_requests": self._http_requests,
-                "http_retries": self._http_retries,
-                "elapsed_seconds": elapsed,
-            },
-            "info",
-        )
-
-        return OperationResult(
-            result={
-                "pages": pages,
-                "retrieved_records": retrieved,
-                "isoform_records": isoforms,
-                "proteins_inserted": proteins_inserted,
-                "proteins_updated": proteins_updated,
-                "sequences_inserted": sequences_inserted,
-                "sequences_reused": sequences_reused,
-                "http_requests": self._http_requests,
-                "http_retries": self._http_retries,
-                "elapsed_seconds": elapsed,
-            }
-        )
+        http_req, http_ret = self._uniprot_plugin.http_counters
+        result_dict = {
+            "pages": pages,
+            "retrieved_records": retrieved,
+            "isoform_records": isoforms,
+            "proteins_inserted": proteins_inserted,
+            "proteins_updated": proteins_updated,
+            "sequences_inserted": sequences_inserted,
+            "sequences_reused": sequences_reused,
+            "http_requests": http_req,
+            "http_retries": http_ret,
+            "elapsed_seconds": elapsed,
+        }
+        emit("insert_proteins.done", None, result_dict, "info")
+        return OperationResult(result=result_dict)
 
-    # ---- HTTP paging ----
-    def _fetch_fasta_pages(
+    def _stream_fasta(
         self, p: InsertProteinsPayload, emit: EmitFn
-    ) -> Iterable[list[dict[str, Any]]]:
-        encoded_query = quote(p.search_criteria)
-        params = ["format=fasta", f"query={encoded_query}", f"size={p.page_size}"]
-        if p.include_isoforms:
-            params.append("includeIsoform=true")
-        if p.compressed:
-            params.append("compressed=true")
-
-        base_url = f"{self.UNIPROT_SEARCH_URL}?{'&'.join(params)}"
-        next_cursor: str | None = None
-        page = 0
-
-        while True:
-            page += 1
-            url = base_url if not next_cursor else f"{base_url}&cursor={next_cursor}"
-            emit(
-                "uniprot.fetch_page_start",
-                None,
-                {"page": page, "has_cursor": bool(next_cursor)},
-                "info",
-            )
-
-            resp = self._get_with_retries(url, p, emit)
-            if self._total_results is None:
-                try:
-                    self._total_results = int(resp.headers.get("X-Total-Results", 0)) or None
-                except (ValueError, TypeError):
-                    pass
-            text = self._decode_response(resp, p.compressed)
-            records = self._parse_fasta(text)
-
-            emit("uniprot.fetch_page_done", None, {"page": page, "records": len(records)}, "info")
-            yield records
-
-            next_cursor = self._extract_next_cursor(resp.headers.get("link", ""))
-            if not next_cursor:
-                break
-
-    def _decode_response(self, resp: Response, compressed: bool) -> str:
-        content = resp.content
-        if compressed:
-            with gzip.GzipFile(fileobj=BytesIO(content)) as f:
-                return f.read().decode("utf-8", errors="replace")
-        return content.decode("utf-8", errors="replace")
-
-    # ---- FASTA parsing ----
-    def _parse_fasta(self, fasta_text: str) -> list[dict[str, Any]]:
-        records: list[dict[str, Any]] = []
-        header: str | None = None
-        seq_lines: list[str] = []
-
-        def flush() -> None:
-            nonlocal header, seq_lines
-            if not header:
-                return
-            seq = "".join(seq_lines).replace(" ", "").strip()
-            if not seq:
-                header = None
-                seq_lines = []
-                return
-
-            parsed = self._parse_header(header)
-            parsed["sequence"] = seq
-            parsed["length"] = len(seq)
-            parsed["sequence_hash"] = SequenceModel.compute_hash(seq)
-            records.append(parsed)
-
-            header = None
-            seq_lines = []
-
-        for line in fasta_text.splitlines():
-            line = line.strip()
-            if not line:
-                continue
-            if line.startswith(">"):
-                flush()
-                header = line[1:]
-            else:
-                seq_lines.append(line)
-        flush()
-        return records
-
-    def _parse_header(self, header: str) -> dict[str, Any]:
-        parts = header.split("|")
-        reviewed = header.startswith("sp|")
-
-        if len(parts) >= 3:
-            accession = parts[1].strip()
-            entry_name = parts[2].split(" ", 1)[0].strip()
-        else:
-            accession = header.split(" ", 1)[0].strip()
-            entry_name = None
-
-        canonical, is_canonical, iso_idx = Protein.parse_isoform(accession)
-
-        organism = None
-        taxonomy_id = None
-        gene_name = None
-
-        m = self._re_os.search(header)
-        if m:
-            organism = m.group(1).strip()
-        m = self._re_ox.search(header)
-        if m:
-            taxonomy_id = m.group(1).strip()
-        m = self._re_gn.search(header)
-        if m:
-            gene_name = m.group(1).strip()
-
-        return {
-            "accession": accession,
-            "entry_name": entry_name,
-            "canonical_accession": canonical,
-            "is_canonical": is_canonical,
-            "isoform_index": iso_idx,
-            "organism": organism,
-            "taxonomy_id": taxonomy_id,
-            "gene_name": gene_name,
-            "reviewed": reviewed,
-        }
+    ) -> Iterator[UniProtProteinRecord]:
+        """Delegate to the protea-sources UniProtSource plugin.
+
+        Plugin owns HTTP retries, cursor pagination, gzip decoding,
+        and FASTA parsing. The operation owns batching, dedup, and
+        bulk insert. See ``f2a6_real_migration_design.md``.
+        """
+        yield from self._uniprot_plugin.stream_fasta(
+            UniProtFastaStreamPayload(
+                search_criteria=p.search_criteria,
+                page_size=p.page_size,
+                timeout_seconds=p.timeout_seconds,
+                include_isoforms=p.include_isoforms,
+                compressed=p.compressed,
+                max_retries=p.max_retries,
+                backoff_base_seconds=p.backoff_base_seconds,
+                backoff_max_seconds=p.backoff_max_seconds,
+                jitter_seconds=p.jitter_seconds,
+                user_agent=p.user_agent,
+            ),
+            emit=emit,
+        )
 
     # ---- DB storage ----
     def _store_records(
-        self, session: Session, records: list[dict[str, Any]], emit: EmitFn
+        self, session: Session, records: list[UniProtProteinRecord], emit: EmitFn
     ) -> tuple[int, int, int, int]:
         if not records:
             return 0, 0, 0, 0
@@ -306,9 +208,8 @@ def _store_records(
         # 1) Deduplicate sequences
         hash_to_seq: dict[str, str] = {}
         for r in records:
-            h = r["sequence_hash"]
-            if h not in hash_to_seq:
-                hash_to_seq[h] = r["sequence"]
+            if r.sequence_hash not in hash_to_seq:
+                hash_to_seq[r.sequence_hash] = r.sequence
 
         unique_hashes = list(hash_to_seq.keys())
         emit("db.lookup_sequences_start", None, {"count": len(unique_hashes)}, "info")
@@ -338,7 +239,7 @@ def _store_records(
             emit("db.insert_sequences_done", None, {"rows": sequences_inserted}, "info")
 
         # 2) Load existing proteins
-        accessions = [r["accession"] for r in records]
+        accessions = [r.accession for r in records]
         existing_prot = self._load_existing_proteins(session, accessions)
 
         # 3) Upsert proteins (insert new, conservative update existing)
@@ -347,51 +248,50 @@ def _store_records(
         to_add: list[Protein] = []
 
         for r in records:
-            acc = r["accession"]
-            seq_id = existing_seq_ids[r["sequence_hash"]]
+            seq_id = existing_seq_ids[r.sequence_hash]
 
-            if acc in existing_prot:
-                p = existing_prot[acc]
+            if r.accession in existing_prot:
+                p = existing_prot[r.accession]
                 changed = False
 
                 if getattr(p, "sequence_id", None) is None and seq_id is not None:
                     p.sequence_id = seq_id
                     changed = True
 
-                if getattr(p, "entry_name", None) in (None, "") and r.get("entry_name"):
-                    p.entry_name = r["entry_name"]
+                if getattr(p, "entry_name", None) in (None, "") and r.entry_name:
+                    p.entry_name = r.entry_name
                     changed = True
 
-                if getattr(p, "canonical_accession", None) != r["canonical_accession"]:
-                    p.canonical_accession = r["canonical_accession"]
+                if getattr(p, "canonical_accession", None) != r.canonical_accession:
+                    p.canonical_accession = r.canonical_accession
                     changed = True
 
-                if getattr(p, "is_canonical", None) != r["is_canonical"]:
-                    p.is_canonical = r["is_canonical"]
+                if getattr(p, "is_canonical", None) != r.is_canonical:
+                    p.is_canonical = r.is_canonical
                     changed = True
 
-                if getattr(p, "isoform_index", None) != r["isoform_index"]:
-                    p.isoform_index = r["isoform_index"]
+                if getattr(p, "isoform_index", None) != r.isoform_index:
+                    p.isoform_index = r.isoform_index
                     changed = True
 
-                if getattr(p, "reviewed", None) is None and r.get("reviewed") is not None:
-                    p.reviewed = r["reviewed"]
+                if getattr(p, "reviewed", None) is None:
+                    p.reviewed = r.reviewed
                     changed = True
 
-                if getattr(p, "taxonomy_id", None) in (None, "") and r.get("taxonomy_id"):
-                    p.taxonomy_id = r["taxonomy_id"]
+                if getattr(p, "taxonomy_id", None) in (None, "") and r.taxonomy_id:
+                    p.taxonomy_id = r.taxonomy_id
                     changed = True
 
-                if getattr(p, "organism", None) in (None, "") and r.get("organism"):
-                    p.organism = r["organism"]
+                if getattr(p, "organism", None) in (None, "") and r.organism:
+                    p.organism = r.organism
                     changed = True
 
-                if getattr(p, "gene_name", None) in (None, "") and r.get("gene_name"):
-                    p.gene_name = r["gene_name"]
+                if getattr(p, "gene_name", None) in (None, "") and r.gene_name:
+                    p.gene_name = r.gene_name
                     changed = True
 
-                if getattr(p, "length", None) is None and r.get("length"):
-                    p.length = int(r["length"])
+                if getattr(p, "length", None) is None:
+                    p.length = r.length
                     changed = True
 
                 if changed:
@@ -400,16 +300,16 @@ def _store_records(
             else:
                 to_add.append(
                     Protein(
-                        accession=acc,
-                        canonical_accession=r["canonical_accession"],
-                        is_canonical=r["is_canonical"],
-                        isoform_index=r["isoform_index"],
-                        reviewed=r.get("reviewed"),
-                        entry_name=r.get("entry_name"),
-                        organism=r.get("organism"),
-                        taxonomy_id=r.get("taxonomy_id"),
-                        gene_name=r.get("gene_name"),
-                        length=int(r["length"]) if r.get("length") else None,
+                        accession=r.accession,
+                        canonical_accession=r.canonical_accession,
+                        is_canonical=r.is_canonical,
+                        isoform_index=r.isoform_index,
+                        reviewed=r.reviewed,
+                        entry_name=r.entry_name,
+                        organism=r.organism,
+                        taxonomy_id=r.taxonomy_id,
+                        gene_name=r.gene_name,
+                        length=r.length,
                         sequence_id=seq_id,
                     )
                 )
diff --git a/protea/core/operations/load_goa_annotations.py b/protea/core/operations/load_goa_annotations.py
index e52f515..01ac84a 100644
--- a/protea/core/operations/load_goa_annotations.py
+++ b/protea/core/operations/load_goa_annotations.py
@@ -1,24 +1,26 @@
 from __future__ import annotations
 
-import gzip
-import io
 import time
 import uuid
 from collections.abc import Iterator
 from typing import Annotated, Any
 
-import requests
+from protea_contracts import GoaAnnotationRecord, GoaStreamPayload
 from pydantic import Field, field_validator
 from sqlalchemy import distinct, select
 from sqlalchemy.orm import Session
 
 from protea.core.contracts.operation import EmitFn, OperationResult, ProteaPayload
 from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
+from protea.infrastructure.orm.models.annotation.evaluation_set import EvaluationSet
 from protea.infrastructure.orm.models.annotation.go_term import GOTerm
 from protea.infrastructure.orm.models.annotation.ontology_snapshot import OntologySnapshot
 from protea.infrastructure.orm.models.annotation.protein_go_annotation import ProteinGOAnnotation
+from protea.infrastructure.orm.models.job import Job, JobEvent
 from protea.infrastructure.orm.models.protein.protein import Protein
 
+_AUTO_EVAL_QUEUE = "protea.jobs"
+
 PositiveInt = Annotated[int, Field(gt=0)]
 
 
@@ -62,16 +64,25 @@ class LoadGOAAnnotationsOperation:
     """
 
     name = "load_goa_annotations"
-
-    # GAF 2.x column indices (0-based after splitting on tab)
-    _IDX_ACCESSION = 1
-    _IDX_QUALIFIER = 3
-    _IDX_GO_ID = 4
-    _IDX_DB_REFERENCE = 5
-    _IDX_EVIDENCE = 6
-    _IDX_WITH_FROM = 7
-    _IDX_ASSIGNED_BY = 14
-    _IDX_DATE = 13
+    description = (
+        "Stream a UniProt GOA GAF release line by line and bulk-insert "
+        "ProteinGOAnnotation rows for accessions already present in the DB."
+    )
+
+    def summarize_payload(self, payload: dict[str, Any]) -> str:
+        p = payload or {}
+        version = p.get("source_version")
+        url = p.get("gaf_url", "")
+        # Strip everything before the filename to keep it short
+        filename = url.rsplit("/", 1)[-1] if url else ""
+        bits = []
+        if version:
+            bits.append(f"release={version}")
+        if filename:
+            bits.append(filename)
+        if p.get("total_limit"):
+            bits.append(f"limit={p['total_limit']}")
+        return " · ".join(bits)
 
     def execute(
         self, session: Session, payload: dict[str, Any], *, emit: EmitFn
@@ -121,7 +132,7 @@ def execute(
         total_inserted = 0
         total_skipped = 0
         pages = 0
-        buffer: list[dict[str, str]] = []
+        buffer: list[GoaAnnotationRecord] = []
 
         for record in self._stream_gaf(p, emit):
             total_lines += 1
@@ -179,8 +190,119 @@ def execute(
             "annotations_skipped": total_skipped,
             "elapsed_seconds": elapsed,
         }
+
+        # Auto-trigger an atomic generate_evaluation_set against the latest
+        # prior goa AnnotationSet (numeric source_version sort).  Cascade
+        # cumulative deltas (V_K → V_target) remain manual via
+        # scripts/materialize_lab_intervals.py --mode cascade.
+        publish_after_commit: list[tuple[str, uuid.UUID]] = []
+        child_job_id = self._maybe_enqueue_atomic_eval(session, annotation_set, emit)
+        if child_job_id is not None:
+            publish_after_commit.append((_AUTO_EVAL_QUEUE, child_job_id))
+            result["auto_eval_job_id"] = str(child_job_id)
+
         emit("load_goa_annotations.done", None, result, "info")
-        return OperationResult(result=result)
+        return OperationResult(result=result, publish_after_commit=publish_after_commit)
+
+    @staticmethod
+    def _numeric_version_key(v: str | None) -> tuple[int, str]:
+        """Sort key that orders ``"160" < "211" < "215"`` numerically and
+        falls back to lexicographic for non-numeric values (which sort last)."""
+        if v is None:
+            return (10**9, "")
+        try:
+            return (int(v), "")
+        except (TypeError, ValueError):
+            return (10**9, str(v))
+
+    def _maybe_enqueue_atomic_eval(
+        self,
+        session: Session,
+        new_set: AnnotationSet,
+        emit: EmitFn,
+    ) -> uuid.UUID | None:
+        candidates = (
+            session.query(AnnotationSet)
+            .filter(
+                AnnotationSet.source == "goa",
+                AnnotationSet.id != new_set.id,
+            )
+            .all()
+        )
+        prior_candidates = [
+            s for s in candidates
+            if self._numeric_version_key(s.source_version)
+            < self._numeric_version_key(new_set.source_version)
+        ]
+        if not prior_candidates:
+            emit(
+                "load_goa_annotations.auto_eval_skipped",
+                None,
+                {"reason": "no_prior_goa_annotation_set"},
+                "info",
+            )
+            return None
+        prior = max(prior_candidates, key=lambda s: self._numeric_version_key(s.source_version))
+
+        existing = (
+            session.query(EvaluationSet.id)
+            .filter(
+                EvaluationSet.old_annotation_set_id == prior.id,
+                EvaluationSet.new_annotation_set_id == new_set.id,
+            )
+            .first()
+        )
+        if existing is not None:
+            emit(
+                "load_goa_annotations.auto_eval_skipped",
+                None,
+                {
+                    "reason": "evaluation_set_exists",
+                    "existing_evaluation_set_id": str(existing[0]),
+                    "old_annotation_set_id": str(prior.id),
+                    "new_annotation_set_id": str(new_set.id),
+                },
+                "info",
+            )
+            return None
+
+        payload = {
+            "old_annotation_set_id": str(prior.id),
+            "new_annotation_set_id": str(new_set.id),
+        }
+        child = Job(
+            operation="generate_evaluation_set",
+            queue_name=_AUTO_EVAL_QUEUE,
+            payload=payload,
+        )
+        session.add(child)
+        session.flush()
+        session.add(
+            JobEvent(
+                job_id=child.id,
+                event="job.created",
+                fields={
+                    "operation": "generate_evaluation_set",
+                    "queue": _AUTO_EVAL_QUEUE,
+                    "trigger": "load_goa_annotations.auto",
+                    "old_annotation_set_id": str(prior.id),
+                    "new_annotation_set_id": str(new_set.id),
+                },
+            )
+        )
+        emit(
+            "load_goa_annotations.auto_eval_enqueued",
+            None,
+            {
+                "child_job_id": str(child.id),
+                "old_annotation_set_id": str(prior.id),
+                "new_annotation_set_id": str(new_set.id),
+                "old_source_version": prior.source_version,
+                "new_source_version": new_set.source_version,
+            },
+            "info",
+        )
+        return child.id
 
     def _load_accessions(self, session: Session, emit: EmitFn) -> set[str]:
         emit("load_goa_annotations.load_accessions_start", None, {}, "info")
@@ -206,45 +328,27 @@ def _load_go_term_map(
         emit("load_goa_annotations.load_go_terms_done", None, {"go_terms": len(mapping)}, "info")
         return mapping
 
-    def _stream_gaf(self, p: LoadGOAAnnotationsPayload, emit: EmitFn) -> Iterator[dict[str, str]]:
-        emit("load_goa_annotations.download_start", None, {"gaf_url": p.gaf_url}, "info")
-        resp = requests.get(p.gaf_url, stream=True, timeout=p.timeout_seconds)
-        resp.raise_for_status()
-
-        compressed = p.gaf_url.endswith(".gz")
-        raw_stream = resp.raw
-        raw_stream.decode_content = True
-
-        stream: io.TextIOWrapper
-        if compressed:
-            gz = gzip.GzipFile(fileobj=raw_stream)
-            stream = io.TextIOWrapper(gz, encoding="utf-8", errors="replace")
-        else:
-            stream = io.TextIOWrapper(raw_stream, encoding="utf-8", errors="replace")
-
-        with stream:
-            for raw in stream:
-                line = raw.rstrip("\n")
-                if not line or line.startswith("!"):
-                    continue
-                parts = line.split("\t")
-                if len(parts) < 15:
-                    continue
-                yield {
-                    "accession": parts[self._IDX_ACCESSION],
-                    "go_id": parts[self._IDX_GO_ID],
-                    "qualifier": parts[self._IDX_QUALIFIER],
-                    "evidence_code": parts[self._IDX_EVIDENCE],
-                    "db_reference": parts[self._IDX_DB_REFERENCE],
-                    "with_from": parts[self._IDX_WITH_FROM],
-                    "assigned_by": parts[self._IDX_ASSIGNED_BY],
-                    "annotation_date": parts[self._IDX_DATE],
-                }
+    def _stream_gaf(
+        self, p: LoadGOAAnnotationsPayload, emit: EmitFn
+    ) -> Iterator[GoaAnnotationRecord]:
+        """Delegate to the protea-sources GoaSource plugin.
+
+        The plugin owns HTTP, gzip decoding, and GAF line parsing; the
+        operation owns DB filtering, GO term resolution, dedup, and
+        bulk insert. See ``f2a6_real_migration_design.md`` (D-MIGR-01,
+        D-MIGR-02, D-MIGR-06).
+        """
+        from protea_sources.goa import plugin as goa_plugin
+
+        yield from goa_plugin.stream(
+            GoaStreamPayload(gaf_url=p.gaf_url, timeout_seconds=p.timeout_seconds),
+            emit=emit,
+        )
 
     def _store_buffer(
         self,
         session: Session,
-        records: list[dict[str, str]],
+        records: list[GoaAnnotationRecord],
         annotation_set_id: uuid.UUID,
         valid_accessions: set[str],
         go_term_map: dict[str, int],
@@ -254,18 +358,18 @@ def _store_buffer(
         seen: set[tuple] = set()
 
         for rec in records:
-            accession = rec["accession"].strip()
+            accession = rec.accession.strip()
             if not accession or accession not in valid_accessions:
                 skipped += 1
                 continue
 
-            go_id = rec["go_id"].strip()
+            go_id = rec.go_id.strip()
             go_term_id = go_term_map.get(go_id)
             if go_term_id is None:
                 skipped += 1
                 continue
 
-            evidence_code = rec["evidence_code"] or None
+            evidence_code = rec.evidence_code
             dedup_key = (annotation_set_id, accession, go_term_id, evidence_code)
             if dedup_key in seen:
                 skipped += 1
@@ -277,12 +381,12 @@ def _store_buffer(
                     "annotation_set_id": annotation_set_id,
                     "protein_accession": accession,
                     "go_term_id": go_term_id,
-                    "qualifier": rec["qualifier"] or None,
+                    "qualifier": rec.qualifier,
                     "evidence_code": evidence_code,
-                    "assigned_by": rec["assigned_by"] or None,
-                    "db_reference": rec["db_reference"] or None,
-                    "with_from": rec["with_from"] or None,
-                    "annotation_date": rec["annotation_date"] or None,
+                    "assigned_by": rec.assigned_by,
+                    "db_reference": rec.db_reference,
+                    "with_from": rec.with_from,
+                    "annotation_date": rec.annotation_date,
                 }
             )
 
diff --git a/protea/core/operations/load_ontology_snapshot.py b/protea/core/operations/load_ontology_snapshot.py
index 86d6fa2..a5369d1 100644
--- a/protea/core/operations/load_ontology_snapshot.py
+++ b/protea/core/operations/load_ontology_snapshot.py
@@ -5,6 +5,7 @@
 from typing import Any
 
 import requests
+from pydantic import field_validator
 from sqlalchemy.orm import Session
 
 from protea.core.contracts.operation import EmitFn, OperationResult, ProteaPayload
@@ -18,8 +19,6 @@ class LoadOntologySnapshotPayload(ProteaPayload, frozen=True):
     timeout_seconds: int = 120
     force_relationships: bool = False
 
-    from pydantic import field_validator
-
     @field_validator("obo_url", mode="before")
     @classmethod
     def must_be_non_empty(cls, v: str) -> str:
@@ -42,6 +41,20 @@ class LoadOntologySnapshotOperation:
     """
 
     name = "load_ontology_snapshot"
+    description = (
+        "Download a GO OBO file and persist it as an OntologySnapshot with its "
+        "GOTerm + GOTermRelationship rows; idempotent on the OBO data-version."
+    )
+
+    def summarize_payload(self, payload: dict[str, Any]) -> str:
+        url = (payload or {}).get("obo_url", "")
+        if not url:
+            return ""
+        # The URL usually looks like .../<YYYY-MM-DD>/ontology/<file>.obo
+        marker = url.rsplit("/", 4)
+        if len(marker) >= 4 and marker[-1].endswith(".obo"):
+            return f"{marker[-3]} · {marker[-1]}"
+        return url
 
     def execute(
         self, session: Session, payload: dict[str, Any], *, emit: EmitFn
diff --git a/protea/core/operations/load_quickgo_annotations.py b/protea/core/operations/load_quickgo_annotations.py
index e90b4ce..f37821b 100644
--- a/protea/core/operations/load_quickgo_annotations.py
+++ b/protea/core/operations/load_quickgo_annotations.py
@@ -1,12 +1,15 @@
 from __future__ import annotations
 
-import io
 import time
 import uuid
 from collections.abc import Iterator
 from typing import Annotated, Any
 
-import requests
+from protea_contracts import (
+    EcoMappingPayload,
+    QuickGoAnnotationRecord,
+    QuickGoStreamPayload,
+)
 from pydantic import Field, field_validator
 from sqlalchemy import distinct, select
 from sqlalchemy.orm import Session
@@ -71,6 +74,19 @@ class LoadQuickGOAnnotationsOperation:
     """
 
     name = "load_quickgo_annotations"
+    description = (
+        "Stream GO annotations from QuickGO's bulk download endpoint and insert "
+        "ProteinGOAnnotation rows for accessions already present in the DB."
+    )
+
+    def summarize_payload(self, payload: dict[str, Any]) -> str:
+        p = payload or {}
+        bits = []
+        if p.get("source_version"):
+            bits.append(f"source={p['source_version']}")
+        if p.get("total_limit"):
+            bits.append(f"limit={p['total_limit']}")
+        return " · ".join(bits)
 
     def execute(
         self, session: Session, payload: dict[str, Any], *, emit: EmitFn
@@ -124,7 +140,7 @@ def execute(
         total_inserted = 0
         total_skipped = 0
         pages = 0
-        buffer: list[dict[str, str]] = []
+        buffer: list[QuickGoAnnotationRecord] = []
 
         for record in self._stream_quickgo(p, emit, gene_product_ids=effective_gp_ids):
             total_lines += 1
@@ -233,116 +249,54 @@ def _load_go_term_map(
         return mapping
 
     def _load_eco_mapping(self, p: LoadQuickGOAnnotationsPayload, emit: EmitFn) -> dict[str, str]:
-        """Download and parse gaf-eco-mapping-derived.txt → {ECO:XXXXXXX: CODE}."""
+        """Delegate to the protea-sources QuickGoSource auxiliary fetch.
+
+        D-MIGR-05 of F2A.6-real: ECO mapping is a separate small-file
+        fetch (single shot, in-memory dict). The operation calls it
+        once before iterating ``_stream_quickgo`` and caches the
+        result for the duration of the load.
+        """
         if not p.eco_mapping_url:
             return {}
-        emit("load_quickgo_annotations.eco_mapping_start", None, {"url": p.eco_mapping_url}, "info")
-        resp = requests.get(p.eco_mapping_url, timeout=60)
-        resp.raise_for_status()
-        mapping: dict[str, str] = {}
-        for line in resp.text.splitlines():
-            parts = line.strip().split()
-            if len(parts) >= 2 and parts[0].startswith("ECO:"):
-                mapping[parts[0]] = parts[1]
-        emit("load_quickgo_annotations.eco_mapping_done", None, {"entries": len(mapping)}, "info")
-        return mapping
+        from protea_sources.quickgo import plugin as quickgo_plugin
 
-    def _stream_quickgo(
-        self,
-        p: LoadQuickGOAnnotationsPayload,
-        emit: EmitFn,
-        gene_product_ids: list[str] | None = None,
-    ) -> Iterator[dict[str, str]]:
-        effective_ids = gene_product_ids or p.gene_product_ids
-
-        # If no ID filter, do a single unbatched request
-        if not effective_ids:
-            yield from self._fetch_quickgo_page(
-                p, emit, gp_ids=None, batch_index=0, total_batches=1
-            )
-            return
-
-        # Batch accessions to avoid URL length limits (QuickGO returns 400 for very long URLs)
-        batches = [
-            effective_ids[i : i + p.gene_product_batch_size]
-            for i in range(0, len(effective_ids), p.gene_product_batch_size)
-        ]
-        total_batches = len(batches)
-        emit(
-            "load_quickgo_annotations.batching",
-            None,
-            {
-                "total_accessions": len(effective_ids),
-                "total_batches": total_batches,
-                "batch_size": p.gene_product_batch_size,
-            },
-            "info",
+        return quickgo_plugin.fetch_eco_mapping(
+            EcoMappingPayload(url=p.eco_mapping_url),
+            emit=emit,
         )
 
-        for batch_index, batch in enumerate(batches):
-            yield from self._fetch_quickgo_page(
-                p, emit, gp_ids=batch, batch_index=batch_index, total_batches=total_batches
-            )
-
-    def _fetch_quickgo_page(
+    def _stream_quickgo(
         self,
         p: LoadQuickGOAnnotationsPayload,
         emit: EmitFn,
-        gp_ids: list[str] | None,
-        batch_index: int,
-        total_batches: int,
-    ) -> Iterator[dict[str, str]]:
-        params: dict[str, Any] = {"geneProductType": "protein"}
-        if gp_ids:
-            params["geneProductId"] = ",".join(gp_ids)
-
-        headers = {
-            "Accept": "text/tsv",
-            "User-Agent": "PROTEA/load_quickgo_annotations",
-        }
-        emit(
-            "load_quickgo_annotations.download_start",
-            None,
-            {
-                "batch": batch_index + 1,
-                "of": total_batches,
-                "accessions_in_batch": len(gp_ids) if gp_ids else "all",
-                "_progress_current": batch_index + 1,
-                "_progress_total": total_batches,
-            },
-            "info",
-        )
+        gene_product_ids: list[str] | None = None,
+    ) -> Iterator[QuickGoAnnotationRecord]:
+        """Delegate to the protea-sources QuickGoSource plugin.
+
+        The plugin owns HTTP, TSV header detection, multi-batch URL
+        construction (to dodge QuickGO's 400-on-long-URL response), and
+        record construction. The operation owns DB filtering, GO term
+        resolution, ECO map application, and bulk insert. See
+        ``f2a6_real_migration_design.md`` (D-MIGR-01, D-MIGR-02,
+        D-MIGR-05).
+        """
+        from protea_sources.quickgo import plugin as quickgo_plugin
 
-        resp = requests.get(
-            p.quickgo_base_url,
-            params=params,
-            headers=headers,
-            stream=True,
-            timeout=p.timeout_seconds,
+        effective_ids = gene_product_ids or p.gene_product_ids
+        yield from quickgo_plugin.stream(
+            QuickGoStreamPayload(
+                quickgo_base_url=p.quickgo_base_url,
+                gene_product_ids=effective_ids,
+                gene_product_batch_size=p.gene_product_batch_size,
+                timeout_seconds=p.timeout_seconds,
+            ),
+            emit=emit,
         )
-        resp.raise_for_status()
-
-        resp.raw.decode_content = True
-        stream = io.TextIOWrapper(resp.raw, encoding="utf-8", errors="replace")
-
-        header: list[str] | None = None
-        with stream:
-            for raw in stream:
-                line = raw.rstrip("\n")
-                if not line:
-                    continue
-                parts = line.split("\t")
-                if header is None:
-                    header = parts
-                    continue
-                if len(parts) < len(header):
-                    continue
-                yield dict(zip(header, parts, strict=False))
 
     def _store_buffer(
         self,
         session: Session,
-        records: list[dict[str, str]],
+        records: list[QuickGoAnnotationRecord],
         annotation_set_id: uuid.UUID,
         valid_accessions: set[str],
         go_term_map: dict[str, int],
@@ -351,32 +305,31 @@ def _store_buffer(
         to_add: list[dict] = []
         skipped = 0
 
-        for row in records:
-            accession = row.get("GENE PRODUCT ID", "").strip()
-            if not accession or accession not in valid_accessions:
+        for rec in records:
+            if rec.accession not in valid_accessions:
                 skipped += 1
                 continue
 
-            go_id = row.get("GO TERM", "").strip()
-            go_term_id = go_term_map.get(go_id)
+            go_term_id = go_term_map.get(rec.go_id)
             if go_term_id is None:
                 skipped += 1
                 continue
 
-            eco_id = row.get("ECO ID", "").strip() or None
-            evidence_code = eco_map.get(eco_id, eco_id) if eco_id else None
+            evidence_code = (
+                eco_map.get(rec.eco_id, rec.eco_id) if rec.eco_id else None
+            )
 
             to_add.append(
                 {
                     "annotation_set_id": annotation_set_id,
-                    "protein_accession": accession,
+                    "protein_accession": rec.accession,
                     "go_term_id": go_term_id,
-                    "qualifier": row.get("QUALIFIER", "").strip() or None,
+                    "qualifier": rec.qualifier,
                     "evidence_code": evidence_code,
-                    "assigned_by": row.get("ASSIGNED BY", "").strip() or None,
-                    "db_reference": row.get("REFERENCE", "").strip() or None,
-                    "with_from": row.get("WITH/FROM", "").strip() or None,
-                    "annotation_date": row.get("DATE", "").strip() or None,
+                    "assigned_by": rec.assigned_by,
+                    "db_reference": rec.db_reference,
+                    "with_from": rec.with_from,
+                    "annotation_date": rec.annotation_date,
                 }
             )
 
diff --git a/protea/core/operations/ping.py b/protea/core/operations/ping.py
index 560bbde..aaa9c38 100644
--- a/protea/core/operations/ping.py
+++ b/protea/core/operations/ping.py
@@ -10,6 +10,7 @@
 
 class PingOperation(Operation):
     name = "ping"
+    description = "Smoke-test operation that emits two events and returns immediately."
 
     def execute(
         self, session: Session, payload: dict[str, Any], *, emit: EmitFn
@@ -17,3 +18,6 @@ def execute(
         emit("ping.start", "Ping received", {"payload_keys": list(payload.keys())}, "info")
         emit("ping.done", "Ping finished", {}, "info")
         return OperationResult(result={"ok": True})
+
+    def summarize_payload(self, payload: dict[str, Any]) -> str:
+        return ""
diff --git a/protea/core/operations/predict_go_terms.py b/protea/core/operations/predict_go_terms.py
index 7e54d36..d124eb3 100644
--- a/protea/core/operations/predict_go_terms.py
+++ b/protea/core/operations/predict_go_terms.py
@@ -1,22 +1,42 @@
 from __future__ import annotations
 
-import os
 import time
 import uuid
 from pathlib import Path
-from typing import Annotated, Any
+from typing import Any
 from uuid import UUID
 
 import numpy as np
-from pydantic import Field, field_validator
-from sqlalchemy import update as sa_update
 from sqlalchemy.dialects.postgresql import insert as pg_insert
 from sqlalchemy.orm import Session
 
-from protea.core.contracts.operation import EmitFn, OperationResult, ProteaPayload
+from protea.core.annotation_intern import intern_string
+from protea.core.contracts.operation import EmitFn, OperationResult
+from protea.core.contracts.parent_progress import update_parent_progress
+from protea.core.disk_cache import (
+    _aspect_index_path,
+    _build_anno_csr,
+    _csr_lookup,
+    _derive_reference_views,
+    _load_anno_csr_from_disk,
+    _load_from_disk_cache,
+    _save_anno_csr_to_disk,
+    _save_to_disk_cache,
+)
+from protea.core.domain.aspect import ASPECT_CODES as _ASPECTS
 from protea.core.feature_engineering import compute_alignment, compute_taxonomy
+from protea.core.feature_enricher import NEW_V6_FEATURE_KEYS as _NEW_V6_FEATURE_KEYS
+from protea.core.feature_enricher import enrich_v6_features
 from protea.core.knn_search import search_knn
-from protea.core.utils import utcnow
+from protea.core.pca_cache import (
+    _load_or_fit_pca_state,
+)
+from protea.core.reranker import (
+    EMBEDDING_PCA_DIM,
+    apply_reranker,
+    infer_active_feature_families,
+    load_reranker,
+)
 from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
 from protea.infrastructure.orm.models.annotation.go_term import GOTerm
 from protea.infrastructure.orm.models.annotation.ontology_snapshot import OntologySnapshot
@@ -24,24 +44,26 @@
 from protea.infrastructure.orm.models.embedding.embedding_config import EmbeddingConfig
 from protea.infrastructure.orm.models.embedding.go_prediction import GOPrediction
 from protea.infrastructure.orm.models.embedding.prediction_set import PredictionSet
+from protea.infrastructure.orm.models.embedding.reranker_model import RerankerModel
 from protea.infrastructure.orm.models.embedding.sequence_embedding import SequenceEmbedding
-from protea.infrastructure.orm.models.job import Job, JobEvent, JobStatus
+from protea.infrastructure.orm.models.job import Job, JobStatus
 from protea.infrastructure.orm.models.protein.protein import Protein
 from protea.infrastructure.orm.models.query.query_set import QuerySet, QuerySetEntry
 from protea.infrastructure.orm.models.sequence.sequence import Sequence
+from protea.infrastructure.settings import load_settings
+from protea.infrastructure.storage import get_artifact_store
 
-PositiveInt = Annotated[int, Field(gt=0)]
+# Annotation and stream chunk sizes are configured via OperationTuning
+# (annotation_chunk_size, stream_chunk_size) and resolved at call time
+# inside the helpers below. At 1280 dims x 2 bytes (float16) x 2000 rows
+# the streaming reference query fetches ~5 MB per cursor round-trip,
+# keeping Python object pressure negligible.
 
-_ANNOTATION_CHUNK_SIZE = 10_000
 _BATCH_QUEUE = "protea.predictions.batch"
 _WRITE_QUEUE = "protea.predictions.write"
-# Rows fetched per round-trip when streaming reference embeddings from PostgreSQL.
-# At 1280 dims × 2 bytes (float16) × 2000 rows = ~5 MB per chunk — keeps Python
-# object pressure negligible while amortising cursor round-trips.
-_STREAM_CHUNK_SIZE = 2_000
 
-# GO aspect single-character codes used in GOTerm.aspect
-_ASPECTS = ("P", "F", "C")  # biological_process, molecular_function, cellular_component
+# GO aspect single-character codes used in GOTerm.aspect — imported above
+# from the canonical protea.core.domain.aspect module.
 
 # ---------------------------------------------------------------------------
 # Process-level reference cache
@@ -54,263 +76,90 @@
 # Limited to 1 entry — evicts previous reference on config change.
 # ---------------------------------------------------------------------------
 _REF_CACHE: dict[tuple[str, str, bool], dict[str, Any]] = {}
-_REF_CACHE_MAX = 1
 
 # ---------------------------------------------------------------------------
-# Disk cache for reference embeddings
-# Survives worker restarts — avoids re-fetching GB of vectors from PostgreSQL.
-# Files: {cache_dir}/{emb_config_id}__{ann_set_id}_embeddings.npy
-#         {cache_dir}/{emb_config_id}__{ann_set_id}_accessions.npy
-# Invalidation: annotation sets are immutable once loaded, so the cache is
-# valid as long as the file exists. Delete files manually to force a reload.
+# v6 reranker feature constants
 # ---------------------------------------------------------------------------
-_DISK_CACHE_DIR = Path(os.environ.get("PROTEA_REF_CACHE_DIR", "data/ref_cache"))
-
-
-def _disk_cache_paths(
-    embedding_config_id: uuid.UUID,
-    annotation_set_id: uuid.UUID,
-) -> tuple[Path, Path]:
-    """Return (embeddings_path, accessions_path) for the unified reference cache."""
-    key = f"{embedding_config_id}__{annotation_set_id}"
-    return (
-        _DISK_CACHE_DIR / f"{key}_embeddings.npy",
-        _DISK_CACHE_DIR / f"{key}_accessions.npy",
-    )
-
-
-def _aspect_index_path(
-    embedding_config_id: uuid.UUID,
-    annotation_set_id: uuid.UUID,
-    aspect: str,
-) -> Path:
-    """Return the path for the per-aspect index array (int32 indices into the unified cache)."""
-    key = f"{embedding_config_id}__{annotation_set_id}"
-    return _DISK_CACHE_DIR / f"{key}__{aspect}_indices.npy"
-
-
-def _anno_disk_cache_paths(
-    embedding_config_id: uuid.UUID,
-    annotation_set_id: uuid.UUID,
-    aspect: str,
-) -> tuple[Path, Path, Path, Path]:
-    """Return (gtids, quals, ecodes, offsets) paths for the annotation CSR cache."""
-    key = f"{embedding_config_id}__{annotation_set_id}__{aspect}"
-    base = _DISK_CACHE_DIR
-    return (
-        base / f"{key}_anno_gtids.npy",
-        base / f"{key}_anno_quals.npy",
-        base / f"{key}_anno_ecodes.npy",
-        base / f"{key}_anno_offsets.npy",
-    )
 
-
-def _build_anno_csr(
-    accessions: list[str],
-    go_map: dict[str, list[dict[str, Any]]],
-) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
-    """Build a CSR-style annotation structure for the given accession list.
-
-    Returns (go_term_ids, qualifiers, evidence_codes, offsets) where
-    annotations for accessions[i] are at indices offsets[i]:offsets[i+1].
+_STORE_FLOAT_KEYS: tuple[str, ...] = (
+    "identity_nw",
+    "similarity_nw",
+    "alignment_score_nw",
+    "gaps_pct_nw",
+    "alignment_length_nw",
+    "identity_sw",
+    "similarity_sw",
+    "alignment_score_sw",
+    "gaps_pct_sw",
+    "alignment_length_sw",
+    "length_query",
+    "length_ref",
+    "query_taxonomy_id",
+    "ref_taxonomy_id",
+    "taxonomic_lca",
+    "taxonomic_distance",
+    "taxonomic_common_ancestors",
+    "vote_count",
+    "k_position",
+    "go_term_frequency",
+    "ref_annotation_density",
+    "neighbor_distance_std",
+    "neighbor_vote_fraction",
+    "neighbor_min_distance",
+    "neighbor_mean_distance",
+    *_NEW_V6_FEATURE_KEYS,
+)
+
+
+def _clean_float(value: Any) -> Any:
+    """Return ``None`` for NaN / non-finite floats, pass-through otherwise.
+
+    Postgres stores NaN as a real value in double precision columns, but
+    LightGBM treats NULL as missing (its native NA handling) while NaN can
+    trip numeric safeguards downstream. Keeping NaN out of the DB avoids
+    both footguns — feature columns read as ``None`` → pandas NA → LightGBM
+    missing.
     """
-    all_gtids: list[int] = []
-    all_quals: list[Any] = []
-    all_ecodes: list[Any] = []
-    offsets: list[int] = [0]
-    for acc in accessions:
-        for ann in go_map.get(acc, []):
-            all_gtids.append(ann["go_term_id"])
-            all_quals.append(ann.get("qualifier"))
-            all_ecodes.append(ann.get("evidence_code"))
-        offsets.append(len(all_gtids))
-    return (
-        np.array(all_gtids, dtype=np.int32),
-        np.array(all_quals, dtype=object),
-        np.array(all_ecodes, dtype=object),
-        np.array(offsets, dtype=np.int32),
-    )
-
-
-def _load_anno_csr_from_disk(
-    embedding_config_id: uuid.UUID,
-    annotation_set_id: uuid.UUID,
-    aspect: str,
-) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray] | None:
-    """Load annotation CSR arrays from disk. Returns None on miss or error."""
-    gtids_p, quals_p, ecodes_p, offsets_p = _anno_disk_cache_paths(
-        embedding_config_id, annotation_set_id, aspect
-    )
-    if not all(p.exists() for p in (gtids_p, quals_p, ecodes_p, offsets_p)):
-        return None
-    try:
-        return (
-            np.load(gtids_p),
-            np.load(quals_p, allow_pickle=True),
-            np.load(ecodes_p, allow_pickle=True),
-            np.load(offsets_p),
-        )
-    except Exception:
+    if value is None:
         return None
-
-
-def _save_anno_csr_to_disk(
-    embedding_config_id: uuid.UUID,
-    annotation_set_id: uuid.UUID,
-    aspect: str,
-    gtids: np.ndarray,
-    quals: np.ndarray,
-    ecodes: np.ndarray,
-    offsets: np.ndarray,
-) -> None:
-    gtids_p, quals_p, ecodes_p, offsets_p = _anno_disk_cache_paths(
-        embedding_config_id, annotation_set_id, aspect
-    )
-    gtids_p.parent.mkdir(parents=True, exist_ok=True)
-    np.save(gtids_p, gtids)
-    np.save(quals_p, quals)
-    np.save(ecodes_p, ecodes)
-    np.save(offsets_p, offsets)
-
-
-def _csr_lookup(
-    query_accessions: set[str],
-    accessions: list[str],
-    acc_to_anno_idx: dict[str, int],
-    gtids: np.ndarray,
-    quals: np.ndarray,
-    ecodes: np.ndarray,
-    offsets: np.ndarray,
-) -> dict[str, list[dict[str, Any]]]:
-    """Return a go_map for query_accessions using the preloaded CSR annotation cache."""
-    go_map: dict[str, list[dict[str, Any]]] = {}
-    for acc in query_accessions:
-        idx = acc_to_anno_idx.get(acc)
-        if idx is None:
-            continue
-        start, end = int(offsets[idx]), int(offsets[idx + 1])
-        if start >= end:
-            continue
-        go_map[acc] = [
-            {
-                "go_term_id": int(gtids[j]),
-                "qualifier": quals[j] if quals[j] is not None else None,
-                "evidence_code": ecodes[j] if ecodes[j] is not None else None,
-            }
-            for j in range(start, end)
-        ]
-    return go_map
-
-
-def _load_from_disk_cache(
-    embedding_config_id: uuid.UUID,
-    annotation_set_id: uuid.UUID,
-) -> dict[str, Any] | None:
-    emb_path, acc_path = _disk_cache_paths(embedding_config_id, annotation_set_id)
-    if not emb_path.exists() or not acc_path.exists():
-        return None
-    try:
-        embeddings = np.load(emb_path)
-        accessions = list(np.load(acc_path))
-        return {"accessions": accessions, "embeddings": embeddings}
-    except Exception:
-        return None
-
-
-def _save_to_disk_cache(
-    embedding_config_id: uuid.UUID,
-    annotation_set_id: uuid.UUID,
-    accessions: list[str],
-    embeddings: np.ndarray,
-) -> None:
-    emb_path, acc_path = _disk_cache_paths(embedding_config_id, annotation_set_id)
-    emb_path.parent.mkdir(parents=True, exist_ok=True)
-    np.save(emb_path, embeddings)
-    np.save(acc_path, np.array(accessions))
+    if isinstance(value, float):
+        if value != value or value in (float("inf"), float("-inf")):
+            return None
+    return value
+
+
+def _row_from_prediction(
+    pred: dict[str, Any],
+    prediction_set_id: uuid.UUID,
+) -> dict[str, Any]:
+    """Build a GOPrediction INSERT row from a predict-side prediction dict."""
+    row: dict[str, Any] = {
+        "prediction_set_id": prediction_set_id,
+        "protein_accession": pred["protein_accession"],
+        "go_term_id": pred["go_term_id"],
+        "ref_protein_accession": pred["ref_protein_accession"],
+        "distance": pred["distance"],
+        "qualifier": pred.get("qualifier"),
+        "evidence_code": pred.get("evidence_code"),
+        "taxonomic_relation": pred.get("taxonomic_relation"),
+    }
+    for key in _STORE_FLOAT_KEYS:
+        row[key] = _clean_float(pred.get(key))
+    return row
 
 
 # ---------------------------------------------------------------------------
 # Payloads
 # ---------------------------------------------------------------------------
+# T1.5 of master plan v3: payloads now live in protea-contracts.
+# Re-export here so existing imports of these classes from this module
+# keep working; new code should import from ``protea_contracts``.
 
-
-class PredictGOTermsPayload(ProteaPayload, frozen=True):
-    """Payload for the predict_go_terms coordinator job."""
-
-    embedding_config_id: str
-    annotation_set_id: str
-    ontology_snapshot_id: str
-    query_accessions: list[str] | None = None
-    query_set_id: str | None = None
-    limit_per_entry: PositiveInt = 5
-    distance_threshold: float | None = None
-    batch_size: PositiveInt = 1024
-
-    # Search backend
-    search_backend: str = "numpy"
-    metric: str = "cosine"
-    faiss_index_type: str = "Flat"
-    faiss_nlist: int = 100
-    faiss_nprobe: int = 10
-    faiss_hnsw_m: int = 32
-    faiss_hnsw_ef_search: int = 64
-
-    # Feature engineering (opt-in)
-    compute_alignments: bool = False
-    compute_taxonomy: bool = False
-    compute_reranker_features: bool = False
-
-    # Per-aspect KNN indices (opt-in)
-    # When True, three separate KNN indices are built — one per GO aspect (P/F/C).
-    # Each index contains only reference proteins annotated in that aspect, and only
-    # annotations of that aspect are transferred from matched neighbors.
-    # This guarantees that every query protein receives BPO, MFO, and CCO candidates
-    # even if its nearest neighbors in a unified index happen to be annotated only in
-    # one or two aspects (a common cause of BPO recall ceilings).
-    # Memory cost: 3× the reference embedding array; search time: 3 KNN calls per batch.
-    aspect_separated_knn: bool = True
-
-    @field_validator(
-        "embedding_config_id", "annotation_set_id", "ontology_snapshot_id", mode="before"
-    )
-    @classmethod
-    def must_be_non_empty(cls, v: str) -> str:
-        if not isinstance(v, str) or not v.strip():
-            raise ValueError("must be a non-empty string")
-        return v.strip()
-
-
-class PredictGOTermsBatchPayload(ProteaPayload, frozen=True):
-    """Payload for one KNN batch dispatched by the coordinator."""
-
-    embedding_config_id: str
-    annotation_set_id: str
-    prediction_set_id: str
-    parent_job_id: str
-    query_accessions: list[str]
-    query_set_id: str | None = None
-    limit_per_entry: PositiveInt = 5
-    distance_threshold: float | None = None
-    search_backend: str = "numpy"
-    metric: str = "cosine"
-    faiss_index_type: str = "Flat"
-    faiss_nlist: int = 100
-    faiss_nprobe: int = 10
-    faiss_hnsw_m: int = 32
-    faiss_hnsw_ef_search: int = 64
-    compute_alignments: bool = False
-    compute_taxonomy: bool = False
-    compute_reranker_features: bool = False
-    aspect_separated_knn: bool = True
-
-
-class StorePredictionsPayload(ProteaPayload, frozen=True):
-    """Payload carrying serialized prediction dicts to the write worker."""
-
-    parent_job_id: str
-    prediction_set_id: str
-    predictions: list[dict[str, Any]]
-
+from protea_contracts import (  # noqa: E402
+    PredictGOTermsBatchPayload,
+    PredictGOTermsPayload,
+    StorePredictionsPayload,
+)
 
 # ---------------------------------------------------------------------------
 # Coordinator
@@ -321,16 +170,69 @@ class PredictGOTermsOperation:
     """Coordinator: validates, creates PredictionSet, dispatches N batch messages.
 
     Pipeline:
-    1. Validate EmbeddingConfig / AnnotationSet / OntologySnapshot.
+
+    1. Validate ``EmbeddingConfig``, ``AnnotationSet`` and
+       ``OntologySnapshot``.
     2. Load query accessions that have embeddings (no embedding data — keeps
        the coordinator session light).
-    3. Create PredictionSet.
-    4. Partition accessions into batches and publish to protea.predictions.batch.
+    3. Create the ``PredictionSet``.
+    4. Partition accessions into batches and publish to
+       ``protea.predictions.batch``.
 
-    The actual KNN search and GO transfer happen inside PredictGOTermsBatchOperation.
+    The actual KNN search and GO transfer happen inside
+    ``PredictGOTermsBatchOperation``.
     """
 
     name = "predict_go_terms"
+    description = (
+        "Coordinator: create a PredictionSet and partition query proteins into "
+        "KNN batches dispatched to predict_go_terms_batch workers."
+    )
+
+    def summarize_payload(self, payload: dict[str, Any], *, session: Session | None = None) -> str:
+        p = payload or {}
+        bits: list[str] = []
+
+        cfg_id_raw = p.get("embedding_config_id")
+        if cfg_id_raw and session is not None:
+            try:
+                cfg = session.get(EmbeddingConfig, uuid.UUID(str(cfg_id_raw)))
+            except Exception:
+                cfg = None
+            if cfg is not None:
+                model_label = cfg.display_name or cfg.model_name or str(cfg.id)[:8]
+                bits.append(f"{model_label} ({cfg.model_backend})")
+        elif cfg_id_raw:
+            bits.append(f"cfg={str(cfg_id_raw)[:8]}")
+
+        ann_id_raw = p.get("annotation_set_id")
+        if ann_id_raw and session is not None:
+            try:
+                ann = session.get(AnnotationSet, uuid.UUID(str(ann_id_raw)))
+            except Exception:
+                ann = None
+            if ann is not None:
+                label = f"{ann.source}@{ann.source_version}" if ann.source_version else ann.source
+                bits.append(f"ref={label}")
+        elif ann_id_raw:
+            bits.append(f"ann={str(ann_id_raw)[:8]}")
+
+        if p.get("query_set_id"):
+            bits.append(f"qs={str(p['query_set_id'])[:8]}")
+        if p.get("limit_per_entry"):
+            bits.append(f"k={p['limit_per_entry']}")
+        if p.get("search_backend"):
+            backend = p["search_backend"]
+            if backend == "faiss" and p.get("faiss_index_type"):
+                backend = f"faiss/{p['faiss_index_type']}"
+            bits.append(backend)
+        if p.get("aspect_separated_knn"):
+            bits.append("aspect-knn")
+        if p.get("compute_alignments"):
+            bits.append("+align")
+        if p.get("compute_taxonomy"):
+            bits.append("+tax")
+        return " · ".join(bits)
 
     def execute(
         self, session: Session, payload: dict[str, Any], *, emit: EmitFn
@@ -363,6 +265,35 @@ def execute(
             "info",
         )
 
+        reranker_artifact_uri: str | None = None
+        reranker_feature_schema_sha: str | None = None
+        if p.reranker_model_id:
+            reranker_row = session.get(RerankerModel, uuid.UUID(p.reranker_model_id))
+            if reranker_row is None:
+                raise ValueError(f"RerankerModel {p.reranker_model_id} not found")
+            if not reranker_row.artifact_uri:
+                raise ValueError(
+                    f"RerankerModel {p.reranker_model_id} has no artifact_uri — "
+                    "register it via scripts/register_reranker.py"
+                )
+            if not reranker_row.feature_schema_sha:
+                raise ValueError(
+                    f"RerankerModel {p.reranker_model_id} has no feature_schema_sha — "
+                    "cannot validate feature alignment at inference time"
+                )
+            reranker_artifact_uri = reranker_row.artifact_uri
+            reranker_feature_schema_sha = reranker_row.feature_schema_sha
+            emit(
+                "predict_go_terms.reranker_bound",
+                None,
+                {
+                    "reranker_model_id": p.reranker_model_id,
+                    "reranker_name": reranker_row.name,
+                    "feature_schema_sha": reranker_feature_schema_sha,
+                },
+                "info",
+            )
+
         query_accessions = self._load_query_accessions(session, p, embedding_config_id, emit)
         if not query_accessions:
             emit("predict_go_terms.no_queries", None, {}, "warning")
@@ -408,6 +339,7 @@ def execute(
                         "payload": {
                             "embedding_config_id": p.embedding_config_id,
                             "annotation_set_id": p.annotation_set_id,
+                            "ontology_snapshot_id": p.ontology_snapshot_id,
                             "prediction_set_id": str(prediction_set.id),
                             "parent_job_id": str(parent_job_id),
                             "query_accessions": batch_accs,
@@ -424,7 +356,12 @@ def execute(
                             "compute_alignments": p.compute_alignments,
                             "compute_taxonomy": p.compute_taxonomy,
                             "compute_reranker_features": p.compute_reranker_features,
+                            "compute_v6_features": p.compute_v6_features,
+                            "expand_votes_to_ancestors": p.expand_votes_to_ancestors,
                             "aspect_separated_knn": p.aspect_separated_knn,
+                            "reranker_model_id": p.reranker_model_id,
+                            "reranker_artifact_uri": reranker_artifact_uri,
+                            "reranker_feature_schema_sha": reranker_feature_schema_sha,
                         },
                     },
                 )
@@ -501,6 +438,15 @@ class PredictGOTermsBatchOperation:
     """
 
     name = "predict_go_terms_batch"
+    description = (
+        "CPU child job: KNN search and GO annotation transfer for one query "
+        "chunk; result is forwarded to store_predictions."
+    )
+
+    def summarize_payload(self, payload: dict[str, Any]) -> str:
+        p = payload or {}
+        n = len(p.get("query_accessions") or [])
+        return f"n={n}" if n else ""
 
     def execute(
         self, session: Session, payload: dict[str, Any], *, emit: EmitFn
@@ -528,7 +474,10 @@ def execute(
         cache_key = (p.embedding_config_id, p.annotation_set_id, p.aspect_separated_knn)
         if cache_key not in _REF_CACHE:
             # Evict oldest entry when cache is full to free numpy arrays from memory.
-            if len(_REF_CACHE) >= _REF_CACHE_MAX:
+            from protea.config.tuning import get_tuning
+
+            cache_max = get_tuning().worker.ref_cache_max
+            if len(_REF_CACHE) >= cache_max:
                 evict_key = next(iter(_REF_CACHE))
                 del _REF_CACHE[evict_key]
             emit(
@@ -559,8 +508,15 @@ def execute(
 
         t0 = time.perf_counter()
 
+        v6_ctx: dict[str, Any] | None = None
+
         if p.aspect_separated_knn:
-            prediction_dicts = self._run_aspect_separated_knn(
+            (
+                prediction_dicts,
+                neighbors_by_aspect,
+                go_map_by_aspect,
+                pair_features,
+            ) = self._run_aspect_separated_knn(
                 session,
                 valid_accessions,
                 query_embeddings,
@@ -569,14 +525,23 @@ def execute(
                 prediction_set_id,
                 p,
             )
+            if p.compute_v6_features:
+                v6_ctx = {
+                    "neighbors_by_aspect": neighbors_by_aspect,
+                    "go_map_by_aspect": go_map_by_aspect,
+                    "pair_features": pair_features,
+                }
         else:
             ref_data = _REF_CACHE[cache_key]
             if not ref_data["embeddings"].size:
                 emit("predict_go_terms_batch.no_references", None, {}, "warning")
                 return OperationResult(result={"predictions": 0})
 
-            # --- KNN: convert float16 cache → float32 for search ---
-            ref_embeddings_f32 = ref_data["embeddings"].astype(np.float32)
+            # --- KNN: use precomputed f32 (cosine-normalised if metric == cosine) ---
+            use_cos = p.metric == "cosine"
+            ref_embeddings_f32 = (
+                ref_data["embeddings_f32_cos"] if use_cos else ref_data["embeddings_f32"]
+            )
             neighbors = search_knn(
                 query_embeddings,
                 ref_embeddings_f32,
@@ -585,6 +550,7 @@ def execute(
                 distance_threshold=p.distance_threshold,
                 backend=p.search_backend,
                 metric=p.metric,
+                pre_normalized=use_cos,
                 faiss_index_type=p.faiss_index_type,
                 faiss_nlist=p.faiss_nlist,
                 faiss_nprobe=p.faiss_nprobe,
@@ -618,7 +584,7 @@ def execute(
                 "embeddings": ref_embeddings_f32,
                 "go_map": go_map,
             }
-            prediction_dicts = self._predict_batch(
+            prediction_dicts, neighbors, pair_features = self._predict_batch(
                 valid_accessions,
                 query_embeddings,
                 ref_data_with_annotations,
@@ -630,23 +596,170 @@ def execute(
                 ref_tax_ids=ref_tax_ids,
                 query_tax_ids=query_tax_ids,
             )
+            if p.compute_v6_features:
+                # Unified mode: collapse to a single synthetic aspect key so the
+                # enricher can partition GO terms via the aspect map.
+                v6_ctx = {
+                    "neighbors_by_aspect": {"": neighbors},
+                    "go_map_by_aspect": {"": go_map},
+                    "pair_features": pair_features,
+                }
+
+        # --- v6 feature enrichment (Anc2Vec + tax_voters + emb_pca) ---------
+        if p.compute_v6_features and v6_ctx is not None and prediction_dicts:
+            ref_unified = _REF_CACHE[cache_key]
+            # For aspect-separated mode, the cache is a per-aspect dict —
+            # concatenate f32 embeddings to fit PCA on the full pool.
+            if p.aspect_separated_knn:
+                pools = [
+                    ref_unified[a]["embeddings_f32"]
+                    for a in _ASPECTS
+                    if ref_unified[a].get("embeddings_f32") is not None
+                    and ref_unified[a]["embeddings_f32"].size
+                ]
+                pca_pool = (
+                    np.concatenate(pools, axis=0) if pools else np.empty((0,), dtype=np.float32)
+                )
+            else:
+                pca_pool = ref_unified.get("embeddings_f32", np.empty((0,), dtype=np.float32))
+
+            pca_state = _load_or_fit_pca_state(embedding_config_id, pca_pool)
+            enrich_v6_features(
+                prediction_dicts,
+                session=session,
+                valid_accessions=valid_accessions,
+                query_embeddings=query_embeddings,
+                neighbors_by_aspect=v6_ctx["neighbors_by_aspect"],
+                go_map_by_aspect=v6_ctx["go_map_by_aspect"],
+                pair_features=v6_ctx["pair_features"],
+                pca_state=pca_state,
+                compute_taxonomy=p.compute_taxonomy,
+            )
+            emit(
+                "predict_go_terms_batch.v6_features_done",
+                None,
+                {
+                    "pca_state_fit": pca_state is not None,
+                    "pca_dim": EMBEDDING_PCA_DIM if pca_state is not None else 0,
+                    "rows_enriched": len(prediction_dicts),
+                },
+                "info",
+            )
+
+        # Ancestor expansion — required for the lab booster's candidate
+        # distribution. Runs AFTER v6 enrichment so synthetic ancestor
+        # records inherit the leaf's anc2vec_/emb_pca_ values, mirroring
+        # what the dump helper emits.
+        if p.expand_votes_to_ancestors and prediction_dicts:
+            from sqlalchemy import select
+
+            from protea.core.feature_enricher import (
+                expand_predictions_to_ancestors,
+                load_parent_map,
+            )
+
+            # predict_go_terms keys candidates by integer ``go_term_id``;
+            # the expansion helper (and parent_map) operate on string GO
+            # accessions (``"GO:0006357"``). Materialise the map once for
+            # this batch's candidate set, then add ``go_id`` to each record
+            # before expanding. After expansion, synthetic ancestor records
+            # need ``go_term_id`` resolved back so the bulk insert can use
+            # the FK — pull both directions from ``go_term`` in one query.
+            parent_map = load_parent_map(session, uuid.UUID(p.ontology_snapshot_id))
+            unique_int_ids = {
+                rec["go_term_id"] for rec in prediction_dicts if rec.get("go_term_id")
+            }
+            id_pairs = session.execute(
+                select(GOTerm.id, GOTerm.go_id).where(GOTerm.id.in_(unique_int_ids))
+            ).all()
+            int_to_str = {gid: go_id for gid, go_id in id_pairs}
+            for rec in prediction_dicts:
+                gid = rec.get("go_term_id")
+                if gid is not None and gid in int_to_str:
+                    rec["go_id"] = int_to_str[gid]
+
+            n_before = len(prediction_dicts)
+            prediction_dicts = expand_predictions_to_ancestors(
+                prediction_dicts,
+                parent_map=parent_map,
+                k_limit=p.limit_per_entry,
+                ia_weights=None,
+            )
+
+            # Synthetic ancestors get a ``go_id`` string but no ``go_term_id``
+            # (the helper just clones the leaf record). Resolve the FK so
+            # store_predictions can insert the row.
+            ancestor_strs = {
+                rec["go_id"]
+                for rec in prediction_dicts
+                if rec.get("go_id") and rec["go_id"] not in {v for v in int_to_str.values()}
+            }
+            if ancestor_strs:
+                anc_pairs = session.execute(
+                    select(GOTerm.id, GOTerm.go_id).where(
+                        GOTerm.go_id.in_(ancestor_strs),
+                        GOTerm.ontology_snapshot_id == uuid.UUID(p.ontology_snapshot_id),
+                    )
+                ).all()
+                str_to_int = {go_id: gid for gid, go_id in anc_pairs}
+                str_to_int.update({v: k for k, v in int_to_str.items()})
+                # Drop ancestors that don't exist in this snapshot — predict
+                # cannot store rows without a valid go_term FK.
+                prediction_dicts = [
+                    {**rec, "go_term_id": str_to_int[rec["go_id"]]}
+                    for rec in prediction_dicts
+                    if rec.get("go_id") in str_to_int
+                ]
+
+            emit(
+                "predict_go_terms_batch.expanded_to_ancestors",
+                None,
+                {
+                    "rows_before": n_before,
+                    "rows_after": len(prediction_dicts),
+                    "expansion_ratio": (len(prediction_dicts) / n_before if n_before else 0.0),
+                },
+                "info",
+            )
+
+        reranker_stats: dict[str, Any] | None = None
+        if p.reranker_model_id and prediction_dicts:
+            reranker_stats = self._apply_reranker_if_aligned(session, prediction_dicts, p, emit)
 
         elapsed = time.perf_counter() - t0
 
+        done_fields: dict[str, Any] = {
+            "queries": len(valid_accessions),
+            "predictions": len(prediction_dicts),
+            "elapsed_seconds": elapsed,
+        }
+        if reranker_stats is not None:
+            done_fields["reranker"] = reranker_stats
         emit(
             "predict_go_terms_batch.done",
             None,
-            {
-                "queries": len(valid_accessions),
-                "predictions": len(prediction_dicts),
-                "elapsed_seconds": elapsed,
-            },
+            done_fields,
             "info",
         )
 
-        return OperationResult(
-            result={"predictions": len(prediction_dicts)},
-            publish_operations=[
+        # RabbitMQ caps message size at 128 MB; ancestor-expanded batches
+        # serialise to ~250-300 MB and silently land in the dead-letter
+        # queue. Split into ≤10k-row chunks (~20-25 MB each) so the write
+        # worker actually receives them and broker memory pressure stays low
+        # even when many batches publish concurrently. Only the final chunk
+        # advances the coordinator's batch counter (``is_final_chunk=True``)
+        # so the parent job doesn't mark itself succeeded after the first
+        # batch's chunks finish.
+        from protea.config.tuning import get_tuning
+
+        store_chunk_size = get_tuning().operation.store_chunk_size
+        chunks: list[list[dict[str, Any]]] = [
+            prediction_dicts[s : s + store_chunk_size]
+            for s in range(0, len(prediction_dicts), store_chunk_size)
+        ] or [[]]
+        store_messages: list[tuple[str, dict[str, Any]]] = []
+        for i, chunk in enumerate(chunks):
+            store_messages.append(
                 (
                     _WRITE_QUEUE,
                     {
@@ -655,15 +768,149 @@ def execute(
                         "payload": {
                             "parent_job_id": str(parent_job_id),
                             "prediction_set_id": str(prediction_set_id),
-                            "predictions": prediction_dicts,
+                            "predictions": chunk,
+                            "is_final_chunk": i == len(chunks) - 1,
                         },
                     },
                 )
-            ],
+            )
+
+        return OperationResult(
+            result={
+                "predictions": len(prediction_dicts),
+                "store_chunks": len(store_messages),
+            },
+            publish_operations=store_messages,
         )
 
     # ── helpers ──────────────────────────────────────────────────────────────
 
+    def _apply_reranker_if_aligned(
+        self,
+        session: Session,
+        prediction_dicts: list[dict[str, Any]],
+        p: PredictGOTermsBatchPayload,
+        emit: EmitFn,
+    ) -> dict[str, Any] | None:
+        """Score ``prediction_dicts`` with the configured reranker.
+
+        The booster is skipped (never crashed) whenever any of the load-
+        bearing preconditions fails:
+
+        * ``artifact_uri`` / ``feature_schema_sha`` missing in the payload
+          (coordinator bug — should not happen, but we log and continue).
+        * ``protea_reranker_lab.contracts`` is not importable (production
+          image without the dev dep).
+        * ``live_sha != expected_sha`` (feature set diverged since
+          training — silently fall back to KNN distance ordering).
+
+        On success the ``reranker_score`` float ends up on every prediction
+        dict in memory (not persisted — ``GOPrediction`` has no column for
+        it yet) and the method returns per-batch summary stats for the
+        ``predict_go_terms_batch.done`` event.
+        """
+        if not (p.reranker_artifact_uri and p.reranker_feature_schema_sha):
+            emit(
+                "reranker.skipped",
+                None,
+                {"reason": "missing_artifact_context", "reranker_model_id": p.reranker_model_id},
+                "warning",
+            )
+            return None
+
+        try:
+            # T1.8 boundary validation: live sha computed via the canonical
+            # protea_contracts implementation (single source of truth).
+            from protea_contracts import compute_feature_schema_sha
+        except Exception as exc:
+            emit(
+                "reranker.skipped",
+                None,
+                {
+                    "reason": "contracts_unavailable",
+                    "reranker_model_id": p.reranker_model_id,
+                    "error": str(exc),
+                },
+                "warning",
+            )
+            return None
+
+        live_families = infer_active_feature_families(
+            compute_alignments=p.compute_alignments,
+            compute_taxonomy=p.compute_taxonomy,
+            compute_v6_features=p.compute_v6_features,
+        )
+        live_sha = compute_feature_schema_sha(live_families)
+        if live_sha != p.reranker_feature_schema_sha:
+            emit(
+                "reranker.schema_mismatch",
+                None,
+                {
+                    "reranker_model_id": p.reranker_model_id,
+                    "expected_sha": p.reranker_feature_schema_sha,
+                    "live_sha": live_sha,
+                    "live_families": live_families,
+                },
+                "error",
+            )
+            return {
+                "applied": False,
+                "skipped_reason": "schema_mismatch",
+                "expected_sha": p.reranker_feature_schema_sha,
+                "live_sha": live_sha,
+            }
+
+        import pandas as pd
+
+        project_root = Path(__file__).resolve().parents[2]
+        settings = load_settings(project_root)
+        store = get_artifact_store(settings)
+        booster = load_reranker(
+            p.reranker_artifact_uri,
+            feature_schema_sha=p.reranker_feature_schema_sha,
+            store=store,
+        )
+
+        self._attach_go_term_aspect(session, prediction_dicts)
+        df = pd.DataFrame(prediction_dicts)
+        scores = apply_reranker(df, booster)
+
+        for rec, score in zip(prediction_dicts, scores.tolist(), strict=True):
+            rec["reranker_score"] = float(score)
+
+        if scores.size == 0:
+            return {"applied": True, "rows": 0}
+        return {
+            "applied": True,
+            "rows": int(scores.size),
+            "score_min": float(scores.min()),
+            "score_max": float(scores.max()),
+            "score_mean": float(scores.mean()),
+            "feature_schema_sha": live_sha,
+        }
+
+    def _attach_go_term_aspect(
+        self,
+        session: Session,
+        prediction_dicts: list[dict[str, Any]],
+    ) -> None:
+        """Look up ``GOTerm.aspect`` for every unique ``go_term_id`` and
+        write it back onto each prediction dict so the reranker's
+        categorical feature is populated.
+        """
+        unique_ids = {
+            rec["go_term_id"] for rec in prediction_dicts if rec.get("go_term_id") is not None
+        }
+        if not unique_ids:
+            return
+        aspect_by_id: dict[int, str] = dict(
+            session.query(GOTerm.id, GOTerm.aspect).filter(GOTerm.id.in_(unique_ids)).all()
+        )
+        for rec in prediction_dicts:
+            gid = rec.get("go_term_id")
+            if gid is not None and gid in aspect_by_id:
+                rec["aspect"] = aspect_by_id[gid]
+
     def _load_reference_data(
         self,
         session: Session,
@@ -695,7 +942,7 @@ def _load_reference_data(
                 },
                 "info",
             )
-            return cached
+            return _derive_reference_views(cached["accessions"], cached["embeddings"])
 
         annotated_accessions_sq = (
             session.query(ProteinGOAnnotation.protein_accession)
@@ -721,19 +968,24 @@ def _load_reference_data(
         # materialises ~14 GB of Python float objects and hits swap.
         total = base_q.count()
         if total == 0:
-            return {"accessions": [], "embeddings": np.empty((0,), dtype=np.float16)}
+            return _derive_reference_views([], np.empty((0,), dtype=np.float16))
 
-        # Determine embedding dimension from a single row.
+        # Determine embedding dimension from a single row.  Rows come back as
+        # pgvector HalfVector instances after the 2026-04-11 halfvec migration —
+        # they expose .dimensions() and .to_numpy() but not len() / __array__.
         first_emb = base_q.limit(1).one()[1]
-        dim = len(first_emb)
+        dim = first_emb.dimensions()
 
         # Pre-allocate float16 array; fill row-by-row via yield_per so the
-        # cursor fetches _STREAM_CHUNK_SIZE rows at a time — peak Python-object
-        # memory stays at ~chunk_size × dim × 28 bytes ≈ tens of MB, not 14 GB.
+        # cursor fetches stream_chunk_size rows at a time, peak Python-object
+        # memory stays at ~chunk_size x dim x 28 bytes ~= tens of MB, not 14 GB.
+        from protea.config.tuning import get_tuning
+
+        stream_chunk = get_tuning().operation.stream_chunk_size
         embeddings = np.empty((total, dim), dtype=np.float16)
         accessions: list[str] = []
-        for i, (acc, emb) in enumerate(base_q.yield_per(_STREAM_CHUNK_SIZE)):
-            embeddings[i] = emb
+        for i, (acc, emb) in enumerate(base_q.yield_per(stream_chunk)):
+            embeddings[i] = emb.to_numpy()
             accessions.append(acc)
 
         _save_to_disk_cache(embedding_config_id, annotation_set_id, accessions, embeddings)
@@ -749,7 +1001,7 @@ def _load_reference_data(
             "info",
         )
 
-        return {"accessions": accessions, "embeddings": embeddings}
+        return _derive_reference_views(accessions, embeddings)
 
     def _load_reference_data_per_aspect(
         self,
@@ -800,7 +1052,7 @@ def _load_reference_data_per_aspect(
         unified = self._load_reference_data(session, embedding_config_id, annotation_set_id, emit)
         if not unified["accessions"]:
             return {
-                asp: {"accessions": [], "embeddings": np.empty((0,), dtype=np.float16)}
+                asp: _derive_reference_views([], np.empty((0,), dtype=np.float16))
                 for asp in _ASPECTS
             }
 
@@ -848,11 +1100,14 @@ def _load_reference_data_per_aspect(
             for acc, asp, go_term_id, qualifier, evidence_code in rows:
                 if asp in aspect_to_accset:
                     aspect_to_accset[asp].add(acc)
-                    aspect_to_go_map[asp].setdefault(acc, []).append({
-                        "go_term_id": go_term_id,
-                        "qualifier": qualifier,
-                        "evidence_code": evidence_code,
-                    })
+                    aspect_to_go_map[asp].setdefault(acc, []).append(
+                        {
+                            "go_term_id": go_term_id,
+                            # Flyweight — see ``protea.core.annotation_intern``.
+                            "qualifier": intern_string(qualifier),
+                            "evidence_code": intern_string(evidence_code),
+                        }
+                    )
 
             for asp in missing_aspects:
                 # Save embedding index array
@@ -880,6 +1135,8 @@ def _load_reference_data_per_aspect(
 
             aspect_accessions = [unified["accessions"][i] for i in indices]
             aspect_embeddings = unified["embeddings"][indices]  # float16 copy, ~300 MB max
+            aspect_embeddings_f32 = unified["embeddings_f32"][indices]
+            aspect_embeddings_f32_cos = unified["embeddings_f32_cos"][indices]
 
             anno_csr = _load_anno_csr_from_disk(embedding_config_id, annotation_set_id, aspect)
             anno_data: dict[str, Any] = {}
@@ -896,6 +1153,8 @@ def _load_reference_data_per_aspect(
             result[aspect] = {
                 "accessions": aspect_accessions,
                 "embeddings": aspect_embeddings,
+                "embeddings_f32": aspect_embeddings_f32,
+                "embeddings_f32_cos": aspect_embeddings_f32_cos,
                 **anno_data,
             }
             total_refs += len(indices)
@@ -929,7 +1188,12 @@ def _run_aspect_separated_knn(
         annotation_set_id: uuid.UUID,
         prediction_set_id: uuid.UUID,
         p: PredictGOTermsBatchPayload,
-    ) -> list[dict[str, Any]]:
+    ) -> tuple[
+        list[dict[str, Any]],
+        dict[str, list[list[tuple[str, float]]]],
+        dict[str, dict[str, list[dict[str, Any]]]],
+        dict[tuple[str, str], dict[str, Any]],
+    ]:
         """Run three independent KNN searches (one per GO aspect) and merge results.
 
         For each aspect ``a`` in (P, F, C):
@@ -946,50 +1210,15 @@ def _run_aspect_separated_knn(
         Feature engineering (alignments / taxonomy) is computed for the union of
         neighbors across all aspects to avoid redundant work on shared neighbors.
         """
-        # Collect all unique neighbors across aspects so feature engineering
-        # is computed once per pair regardless of how many aspects reference it.
-        neighbors_by_aspect: dict[str, list[list[tuple[str, float]]]] = {}
-        all_unique_neighbors: set[str] = set()
-
-        for aspect in _ASPECTS:
-            aspect_refs = ref_data_by_aspect[aspect]
-            if not aspect_refs["accessions"]:
-                neighbors_by_aspect[aspect] = [[] for _ in valid_accessions]
-                continue
-
-            ref_f32 = aspect_refs["embeddings"].astype(np.float32)
-            aspect_neighbors = search_knn(
-                query_embeddings,
-                ref_f32,
-                aspect_refs["accessions"],
-                k=p.limit_per_entry,
-                distance_threshold=p.distance_threshold,
-                backend=p.search_backend,
-                metric=p.metric,
-                faiss_index_type=p.faiss_index_type,
-                faiss_nlist=p.faiss_nlist,
-                faiss_nprobe=p.faiss_nprobe,
-                faiss_hnsw_m=p.faiss_hnsw_m,
-                faiss_hnsw_ef_search=p.faiss_hnsw_ef_search,
-            )
-            neighbors_by_aspect[aspect] = aspect_neighbors
-            for top_refs in aspect_neighbors:
-                for ref_acc, _ in top_refs:
-                    all_unique_neighbors.add(ref_acc)
-
-        # Feature engineering — computed over the union of all neighbors
-        ref_sequences: dict[str, str] = {}
-        query_sequences: dict[str, str] = {}
-        ref_tax_ids: dict[str, int | None] = {}
-        query_tax_ids: dict[str, int | None] = {}
-
-        if p.compute_alignments:
-            ref_sequences = self._load_sequences_for_proteins(session, all_unique_neighbors)
-            query_sequences = self._load_sequences_for_queries(session, p, valid_accessions)
+        # ── 1. KNN per aspect ─────────────────────────────────────────
+        neighbors_by_aspect, all_unique_neighbors = self._run_knn_per_aspect(
+            valid_accessions, query_embeddings, ref_data_by_aspect, p
+        )
 
-        if p.compute_taxonomy:
-            ref_tax_ids = self._load_taxonomy_ids_for_proteins(session, all_unique_neighbors)
-            query_tax_ids = self._load_taxonomy_ids_for_queries(session, p, valid_accessions)
+        # ── 2. Load feature-engineering inputs over the union of neighbors ──
+        ref_sequences, query_sequences, ref_tax_ids, query_tax_ids = (
+            self._load_feature_engineering_data(session, p, valid_accessions, all_unique_neighbors)
+        )
 
         # Build predictions per aspect, merging into a single list.
         # seen_terms is keyed per query protein to deduplicate across aspects.
@@ -1003,14 +1232,23 @@ def _run_aspect_separated_knn(
         rr_distance_std_per_query: dict[str, float] = {}
         rr_vote_count_per_query: dict[str, dict[int, int]] = {}
         rr_k_position_per_query: dict[str, dict[int, int]] = {}
+        # Consensus features: per (q_acc, gtid) min and sum of distances across
+        # the neighbors that voted for that term; mean is sum / vote_count.
+        rr_vote_min_d_per_query: dict[str, dict[int, float]] = {}
+        rr_vote_sum_d_per_query: dict[str, dict[int, float]] = {}
         # go_term_frequency and ref_annotation_density are computed per-aspect below
         all_go_term_freq: dict[int, int] = {}
         all_ref_ann_density: dict[str, int] = {}
+        # Track the per-aspect go_maps so the v6 feature enricher can see the
+        # full set of voting-neighbor annotations without re-querying.
+        go_map_by_aspect: dict[str, dict[str, list[dict[str, Any]]]] = {}
 
         if compute_rr:
             for q_idx, q_acc in enumerate(valid_accessions):
                 rr_vote_count_per_query[q_acc] = {}
                 rr_k_position_per_query[q_acc] = {}
+                rr_vote_min_d_per_query[q_acc] = {}
+                rr_vote_sum_d_per_query[q_acc] = {}
                 all_distances = []
                 for aspect in _ASPECTS:
                     aspect_neighbors = neighbors_by_aspect[aspect]
@@ -1043,6 +1281,8 @@ def _run_aspect_separated_knn(
                     session, annotation_set_id, unique_neighbors_aspect, aspect=aspect
                 )
 
+            go_map_by_aspect[aspect] = go_map
+
             # Pre-compute reranker aggregates for this aspect's go_map
             if compute_rr:
                 for acc, anns in go_map.items():
@@ -1053,18 +1293,24 @@ def _run_aspect_separated_knn(
                         gtid = ann["go_term_id"]
                         all_go_term_freq[gtid] = all_go_term_freq.get(gtid, 0) + 1
 
-                # vote_count and k_position per query per aspect
+                # vote_count, k_position and consensus per query per aspect
                 for q_idx, q_acc in enumerate(valid_accessions):
                     vc = rr_vote_count_per_query.setdefault(q_acc, {})
                     kp = rr_k_position_per_query.setdefault(q_acc, {})
+                    min_d = rr_vote_min_d_per_query.setdefault(q_acc, {})
+                    sum_d = rr_vote_sum_d_per_query.setdefault(q_acc, {})
                     aspect_neighbors = neighbors_by_aspect[aspect]
                     if q_idx < len(aspect_neighbors):
-                        for k_pos, (ref_acc, _) in enumerate(aspect_neighbors[q_idx], 1):
+                        for k_pos, (ref_acc, dist) in enumerate(aspect_neighbors[q_idx], 1):
                             for ann in go_map.get(ref_acc, []):
                                 gtid = ann["go_term_id"]
                                 vc[gtid] = vc.get(gtid, 0) + 1
                                 if gtid not in kp:
                                     kp[gtid] = k_pos
+                                prev_min = min_d.get(gtid)
+                                if prev_min is None or dist < prev_min:
+                                    min_d[gtid] = dist
+                                sum_d[gtid] = sum_d.get(gtid, 0.0) + dist
 
             for q_acc, top_refs in zip(valid_accessions, neighbors_by_aspect[aspect], strict=False):
                 seen_terms = seen_per_query[q_acc]
@@ -1105,11 +1351,24 @@ def _run_aspect_separated_knn(
                         if ann.get("evidence_code"):
                             pred["evidence_code"] = ann["evidence_code"]
                         if compute_rr:
-                            pred["vote_count"] = rr_vote_count_per_query.get(q_acc, {}).get(go_term_id, 1)
-                            pred["k_position"] = rr_k_position_per_query.get(q_acc, {}).get(go_term_id, 1)
+                            vc_val = rr_vote_count_per_query.get(q_acc, {}).get(go_term_id, 1)
+                            pred["vote_count"] = vc_val
+                            pred["k_position"] = rr_k_position_per_query.get(q_acc, {}).get(
+                                go_term_id, 1
+                            )
                             pred["go_term_frequency"] = all_go_term_freq.get(go_term_id, 0)
                             pred["ref_annotation_density"] = all_ref_ann_density.get(ref_acc, 0)
-                            pred["neighbor_distance_std"] = rr_distance_std_per_query.get(q_acc, 0.0)
+                            pred["neighbor_distance_std"] = rr_distance_std_per_query.get(
+                                q_acc, 0.0
+                            )
+                            pred["neighbor_vote_fraction"] = (
+                                vc_val / p.limit_per_entry if p.limit_per_entry else None
+                            )
+                            min_d_map = rr_vote_min_d_per_query.get(q_acc, {})
+                            sum_d_map = rr_vote_sum_d_per_query.get(q_acc, {})
+                            pred["neighbor_min_distance"] = min_d_map.get(go_term_id)
+                            if vc_val > 0 and go_term_id in sum_d_map:
+                                pred["neighbor_mean_distance"] = sum_d_map[go_term_id] / vc_val
                         for key in (
                             "identity_nw",
                             "similarity_nw",
@@ -1135,7 +1394,92 @@ def _run_aspect_separated_knn(
                                 pred[key] = val
                         predictions.append(pred)
 
-        return predictions
+        return predictions, neighbors_by_aspect, go_map_by_aspect, pair_features
+
+    def _run_knn_per_aspect(
+        self,
+        valid_accessions: list[str],
+        query_embeddings: np.ndarray,
+        ref_data_by_aspect: dict[str, dict[str, Any]],
+        p: PredictGOTermsBatchPayload,
+    ) -> tuple[dict[str, list[list[tuple[str, float]]]], set[str]]:
+        """Run one independent KNN search per GO aspect and accumulate
+        the union of all neighbors across aspects.
+
+        Returns ``(neighbors_by_aspect, all_unique_neighbors)`` — feature
+        engineering downstream is computed once per pair regardless of how
+        many aspects reference it, so the union is the right call surface.
+        """
+        neighbors_by_aspect: dict[str, list[list[tuple[str, float]]]] = {}
+        all_unique_neighbors: set[str] = set()
+
+        use_cos = p.metric == "cosine"
+        for aspect in _ASPECTS:
+            aspect_refs = ref_data_by_aspect[aspect]
+            if not aspect_refs["accessions"]:
+                neighbors_by_aspect[aspect] = [[] for _ in valid_accessions]
+                continue
+
+            ref_f32 = (
+                aspect_refs["embeddings_f32_cos"] if use_cos else aspect_refs["embeddings_f32"]
+            )
+            aspect_neighbors = search_knn(
+                query_embeddings,
+                ref_f32,
+                aspect_refs["accessions"],
+                k=p.limit_per_entry,
+                distance_threshold=p.distance_threshold,
+                backend=p.search_backend,
+                metric=p.metric,
+                pre_normalized=use_cos,
+                faiss_index_type=p.faiss_index_type,
+                faiss_nlist=p.faiss_nlist,
+                faiss_nprobe=p.faiss_nprobe,
+                faiss_hnsw_m=p.faiss_hnsw_m,
+                faiss_hnsw_ef_search=p.faiss_hnsw_ef_search,
+            )
+            neighbors_by_aspect[aspect] = aspect_neighbors
+            for top_refs in aspect_neighbors:
+                for ref_acc, _ in top_refs:
+                    all_unique_neighbors.add(ref_acc)
+
+        return neighbors_by_aspect, all_unique_neighbors
+
+    def _load_feature_engineering_data(
+        self,
+        session: Session,
+        p: PredictGOTermsBatchPayload,
+        valid_accessions: list[str],
+        all_unique_neighbors: set[str],
+    ) -> tuple[
+        dict[str, str],
+        dict[str, str],
+        dict[str, int | None],
+        dict[str, int | None],
+    ]:
+        """Load sequences and taxonomy IDs for downstream feature engineering.
+
+        Each tuple slot is empty when the corresponding flag
+        (``compute_alignments`` / ``compute_taxonomy``) is False, so the
+        caller can pass them straight into the per-pair feature builder
+        without further conditionals.
+
+        Returns ``(ref_sequences, query_sequences, ref_tax_ids, query_tax_ids)``.
+        """
+        ref_sequences: dict[str, str] = {}
+        query_sequences: dict[str, str] = {}
+        ref_tax_ids: dict[str, int | None] = {}
+        query_tax_ids: dict[str, int | None] = {}
+
+        if p.compute_alignments:
+            ref_sequences = self._load_sequences_for_proteins(session, all_unique_neighbors)
+            query_sequences = self._load_sequences_for_queries(session, p, valid_accessions)
+
+        if p.compute_taxonomy:
+            ref_tax_ids = self._load_taxonomy_ids_for_proteins(session, all_unique_neighbors)
+            query_tax_ids = self._load_taxonomy_ids_for_queries(session, p, valid_accessions)
+
+        return ref_sequences, query_sequences, ref_tax_ids, query_tax_ids
 
     def _load_annotations_for(
         self,
@@ -1159,11 +1503,14 @@ def _load_annotations_for(
         MFO-index neighbors transfer only MFO terms, etc.  The join to ``go_term``
         is added only when needed to keep the no-aspect path as fast as before.
         """
+        from protea.config.tuning import get_tuning
+
+        chunk_size = get_tuning().operation.annotation_chunk_size
         go_map: dict[str, list[dict[str, Any]]] = {}
         accessions_list = list(accessions)
 
-        for i in range(0, len(accessions_list), _ANNOTATION_CHUNK_SIZE):
-            chunk = accessions_list[i : i + _ANNOTATION_CHUNK_SIZE]
+        for i in range(0, len(accessions_list), chunk_size):
+            chunk = accessions_list[i : i + chunk_size]
             q = session.query(
                 ProteinGOAnnotation.protein_accession,
                 ProteinGOAnnotation.go_term_id,
@@ -1189,8 +1536,11 @@ def _load_annotations_for(
                 go_map.setdefault(acc, []).append(
                     {
                         "go_term_id": go_term_id,
-                        "qualifier": qualifier,
-                        "evidence_code": evidence_code,
+                        # Flyweight — qualifier / evidence_code take ~5-10 distinct
+                        # values across millions of rows; interning collapses every
+                        # duplicate to one shared string instance.
+                        "qualifier": intern_string(qualifier),
+                        "evidence_code": intern_string(evidence_code),
                     }
                 )
 
@@ -1241,7 +1591,8 @@ def _load_query_embeddings(
             return np.empty((0,)), []
 
         valid_accessions = [r[0] for r in rows]
-        embeddings = np.array([list(r[1]) for r in rows], dtype=np.float32)
+        # Rows return pgvector HalfVector instances (halfvec column since 2026-04-11).
+        embeddings = np.array([r[1].to_list() for r in rows], dtype=np.float32)
         return embeddings, valid_accessions
 
     def _predict_batch(
@@ -1257,13 +1608,18 @@ def _predict_batch(
         query_sequences: dict[str, str] | None = None,
         ref_tax_ids: dict[str, int | None] | None = None,
         query_tax_ids: dict[str, int | None] | None = None,
-    ) -> list[dict[str, Any]]:
+    ) -> tuple[
+        list[dict[str, Any]],
+        list[list[tuple[str, float]]],
+        dict[tuple[str, str], dict[str, Any]],
+    ]:
         """Build serializable prediction dicts from KNN results.
 
         ``ref_data`` must have keys ``accessions``, ``embeddings``, and ``go_map``.
         If ``neighbors`` is provided (pre-computed by execute()), KNN is skipped.
-        Returns compact dicts — None-valued optional fields are omitted to reduce
-        message size.
+        Returns ``(predictions, neighbors, pair_features)`` — the last two are
+        used by ``_enrich_with_v6_features`` when ``compute_v6_features`` is on.
+        ``pair_features`` is keyed by ``(query_accession, ref_accession)``.
         """
         ref_sequences = ref_sequences or {}
         query_sequences = query_sequences or {}
@@ -1291,6 +1647,9 @@ def _predict_batch(
 
         go_map = ref_data["go_map"]
         predictions: list[dict[str, Any]] = []
+        # Global (q_acc, ref_acc) keying lets callers reuse the pair features
+        # for post-hoc v6 feature enrichment without recomputing taxonomy.
+        pair_features: dict[tuple[str, str], dict[str, Any]] = {}
 
         # Pre-compute reranker aggregates if requested
         compute_rr = p.compute_reranker_features
@@ -1305,23 +1664,31 @@ def _predict_batch(
 
         for q_acc, top_refs in zip(query_accessions, neighbors, strict=False):
             seen_terms: set[int] = set()
-            pair_features: dict[str, dict[str, Any]] = {}
 
             # Reranker: pre-compute per-query stats
             rr_distance_std: float | None = None
             rr_vote_count: dict[int, int] = {}
             rr_k_position: dict[int, int] = {}
+            rr_vote_min_d: dict[int, float] = {}
+            rr_vote_sum_d: dict[int, float] = {}
             if compute_rr and top_refs:
-                rr_distance_std = float(np.std([d for _, d in top_refs])) if len(top_refs) > 1 else 0.0
-                for k_pos, (ref_acc, _) in enumerate(top_refs, 1):
+                rr_distance_std = (
+                    float(np.std([d for _, d in top_refs])) if len(top_refs) > 1 else 0.0
+                )
+                for k_pos, (ref_acc, dist) in enumerate(top_refs, 1):
                     for ann in go_map.get(ref_acc, []):
                         gtid = ann["go_term_id"]
                         rr_vote_count[gtid] = rr_vote_count.get(gtid, 0) + 1
                         if gtid not in rr_k_position:
                             rr_k_position[gtid] = k_pos
+                        prev_min = rr_vote_min_d.get(gtid)
+                        if prev_min is None or dist < prev_min:
+                            rr_vote_min_d[gtid] = dist
+                        rr_vote_sum_d[gtid] = rr_vote_sum_d.get(gtid, 0.0) + dist
 
             for ref_acc, distance in top_refs:
-                if ref_acc not in pair_features:
+                pair_key = (q_acc, ref_acc)
+                if pair_key not in pair_features:
                     features: dict[str, Any] = {}
 
                     if p.compute_alignments:
@@ -1338,9 +1705,9 @@ def _predict_batch(
                         features["query_taxonomy_id"] = q_tid
                         features["ref_taxonomy_id"] = r_tid
 
-                    pair_features[ref_acc] = features
+                    pair_features[pair_key] = features
 
-                features = pair_features[ref_acc]
+                features = pair_features[pair_key]
 
                 for ann in go_map.get(ref_acc, []):
                     go_term_id = ann["go_term_id"]
@@ -1360,11 +1727,18 @@ def _predict_batch(
                     if ann.get("evidence_code"):
                         pred["evidence_code"] = ann["evidence_code"]
                     if compute_rr:
-                        pred["vote_count"] = rr_vote_count.get(go_term_id, 1)
+                        vc_val = rr_vote_count.get(go_term_id, 1)
+                        pred["vote_count"] = vc_val
                         pred["k_position"] = rr_k_position.get(go_term_id, 1)
                         pred["go_term_frequency"] = go_term_freq.get(go_term_id, 0)
                         pred["ref_annotation_density"] = ref_ann_density.get(ref_acc, 0)
                         pred["neighbor_distance_std"] = rr_distance_std
+                        pred["neighbor_vote_fraction"] = (
+                            vc_val / p.limit_per_entry if p.limit_per_entry else None
+                        )
+                        pred["neighbor_min_distance"] = rr_vote_min_d.get(go_term_id)
+                        if vc_val > 0 and go_term_id in rr_vote_sum_d:
+                            pred["neighbor_mean_distance"] = rr_vote_sum_d[go_term_id] / vc_val
                     for key in (
                         "identity_nw",
                         "similarity_nw",
@@ -1390,17 +1764,22 @@ def _predict_batch(
                             pred[key] = val
                     predictions.append(pred)
 
-        return predictions
+        return predictions, neighbors, pair_features
+
+    # ── v6 reranker features ─────────────────────────────────────────────────
 
     # ── feature-engineering helpers ───────────────────────────────────────────
 
     def _load_sequences_for_proteins(
         self, session: Session, accessions: set[str]
     ) -> dict[str, str]:
+        from protea.config.tuning import get_tuning
+
+        chunk_size = get_tuning().operation.annotation_chunk_size
         result: dict[str, str] = {}
         acc_list = list(accessions)
-        for i in range(0, len(acc_list), _ANNOTATION_CHUNK_SIZE):
-            chunk = acc_list[i : i + _ANNOTATION_CHUNK_SIZE]
+        for i in range(0, len(acc_list), chunk_size):
+            chunk = acc_list[i : i + chunk_size]
             rows = (
                 session.query(Protein.accession, Sequence.sequence)
                 .join(Protein.sequence)
@@ -1431,10 +1810,13 @@ def _load_sequences_for_queries(
     def _load_taxonomy_ids_for_proteins(
         self, session: Session, accessions: set[str]
     ) -> dict[str, int | None]:
+        from protea.config.tuning import get_tuning
+
+        chunk_size = get_tuning().operation.annotation_chunk_size
         result: dict[str, int | None] = {}
         acc_list = list(accessions)
-        for i in range(0, len(acc_list), _ANNOTATION_CHUNK_SIZE):
-            chunk = acc_list[i : i + _ANNOTATION_CHUNK_SIZE]
+        for i in range(0, len(acc_list), chunk_size):
+            chunk = acc_list[i : i + chunk_size]
             rows = (
                 session.query(Protein.accession, Protein.taxonomy_id)
                 .filter(Protein.accession.in_(chunk))
@@ -1450,11 +1832,14 @@ def _load_taxonomy_ids_for_queries(
         p: PredictGOTermsBatchPayload,
         accessions: list[str],
     ) -> dict[str, int | None]:
+        from protea.config.tuning import get_tuning
+
+        chunk_size = get_tuning().operation.annotation_chunk_size
         acc_set = set(accessions)
         result: dict[str, int | None] = {acc: None for acc in acc_set}
         acc_list = list(acc_set)
-        for i in range(0, len(acc_list), _ANNOTATION_CHUNK_SIZE):
-            chunk = acc_list[i : i + _ANNOTATION_CHUNK_SIZE]
+        for i in range(0, len(acc_list), chunk_size):
+            chunk = acc_list[i : i + chunk_size]
             rows = (
                 session.query(Protein.accession, Protein.taxonomy_id)
                 .filter(Protein.accession.in_(chunk))
@@ -1480,6 +1865,15 @@ class StorePredictionsOperation:
     """
 
     name = "store_predictions"
+    description = (
+        "CPU child job: bulk-insert GOPrediction rows from a batch and "
+        "atomically advance the parent predict_go_terms progress counter."
+    )
+
+    def summarize_payload(self, payload: dict[str, Any]) -> str:
+        p = payload or {}
+        n = len(p.get("predictions") or [])
+        return f"n={n}" if n else ""
 
     def execute(
         self, session: Session, payload: dict[str, Any], *, emit: EmitFn
@@ -1498,41 +1892,7 @@ def execute(
         if p.predictions:
             session.execute(
                 pg_insert(GOPrediction).on_conflict_do_nothing(),
-                [
-                    {
-                        "prediction_set_id": prediction_set_id,
-                        "protein_accession": pred["protein_accession"],
-                        "go_term_id": pred["go_term_id"],
-                        "ref_protein_accession": pred["ref_protein_accession"],
-                        "distance": pred["distance"],
-                        "qualifier": pred.get("qualifier"),
-                        "evidence_code": pred.get("evidence_code"),
-                        "identity_nw": pred.get("identity_nw"),
-                        "similarity_nw": pred.get("similarity_nw"),
-                        "alignment_score_nw": pred.get("alignment_score_nw"),
-                        "gaps_pct_nw": pred.get("gaps_pct_nw"),
-                        "alignment_length_nw": pred.get("alignment_length_nw"),
-                        "identity_sw": pred.get("identity_sw"),
-                        "similarity_sw": pred.get("similarity_sw"),
-                        "alignment_score_sw": pred.get("alignment_score_sw"),
-                        "gaps_pct_sw": pred.get("gaps_pct_sw"),
-                        "alignment_length_sw": pred.get("alignment_length_sw"),
-                        "length_query": pred.get("length_query"),
-                        "length_ref": pred.get("length_ref"),
-                        "query_taxonomy_id": pred.get("query_taxonomy_id"),
-                        "ref_taxonomy_id": pred.get("ref_taxonomy_id"),
-                        "taxonomic_lca": pred.get("taxonomic_lca"),
-                        "taxonomic_distance": pred.get("taxonomic_distance"),
-                        "taxonomic_common_ancestors": pred.get("taxonomic_common_ancestors"),
-                        "taxonomic_relation": pred.get("taxonomic_relation"),
-                        "vote_count": pred.get("vote_count"),
-                        "k_position": pred.get("k_position"),
-                        "go_term_frequency": pred.get("go_term_frequency"),
-                        "ref_annotation_density": pred.get("ref_annotation_density"),
-                        "neighbor_distance_std": pred.get("neighbor_distance_std"),
-                    }
-                    for pred in p.predictions
-                ],
+                [_row_from_prediction(pred, prediction_set_id) for pred in p.predictions],
             )
 
         emit(
@@ -1545,40 +1905,15 @@ def execute(
             "info",
         )
 
-        self._update_parent_progress(session, parent_job_id, emit)
+        if p.is_final_chunk:
+            self._update_parent_progress(session, parent_job_id, emit)
 
         return OperationResult(result={"predictions_inserted": len(p.predictions)})
 
     def _update_parent_progress(self, session: Session, parent_job_id: UUID, emit: EmitFn) -> None:
-        row = session.execute(
-            sa_update(Job)
-            .where(Job.id == parent_job_id, Job.status == JobStatus.RUNNING)
-            .values(progress_current=Job.progress_current + 1)
-            .returning(Job.progress_current, Job.progress_total)
-        ).fetchone()
-
-        if row is None or row.progress_current != row.progress_total:
-            return
-
-        closed = session.execute(
-            sa_update(Job)
-            .where(Job.id == parent_job_id, Job.status == JobStatus.RUNNING)
-            .values(status=JobStatus.SUCCEEDED, finished_at=utcnow())
-            .returning(Job.id)
-        ).fetchone()
-
-        if closed:
-            session.add(
-                JobEvent(
-                    job_id=parent_job_id,
-                    event="job.succeeded",
-                    fields={"via": "last_batch_stored"},
-                    level="info",
-                )
-            )
-            emit(
-                "store_predictions.parent_succeeded",
-                None,
-                {"parent_job_id": str(parent_job_id)},
-                "info",
-            )
+        update_parent_progress(
+            session,
+            parent_job_id,
+            emit,
+            event_name="store_predictions.parent_succeeded",
+        )
diff --git a/protea/core/operations/run_cafa_evaluation.py b/protea/core/operations/run_cafa_evaluation.py
index a266a38..c6af82c 100644
--- a/protea/core/operations/run_cafa_evaluation.py
+++ b/protea/core/operations/run_cafa_evaluation.py
@@ -7,14 +7,17 @@
 from pathlib import Path
 from typing import Any
 
+import numpy as np
 import requests
 from pydantic import Field, field_validator
 from sqlalchemy.orm import Session
 
+from protea.core.anc2vec_embeddings import get_index as get_anc2vec_index
 from protea.core.contracts.operation import EmitFn, OperationResult, ProteaPayload
-from protea.core.evaluation import compute_evaluation_data
+from protea.core.domain.aspect import ASPECT_CAFA_CODES, Aspect
+from protea.core.evaluation import load_evaluation_data_for_set
+from protea.core.reranker import load_reranker
 from protea.core.scoring import compute_score
-from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
 from protea.infrastructure.orm.models.annotation.evaluation_result import EvaluationResult
 from protea.infrastructure.orm.models.annotation.evaluation_set import EvaluationSet
 from protea.infrastructure.orm.models.annotation.go_term import GOTerm
@@ -25,21 +28,176 @@
     RerankerModel as RerankerModelORM,
 )
 from protea.infrastructure.orm.models.embedding.scoring_config import ScoringConfig
+from protea.infrastructure.settings import load_settings
+from protea.infrastructure.settings import load_settings as _load_settings_for_reranker
+from protea.infrastructure.storage import get_artifact_store
+from protea.infrastructure.storage import get_artifact_store as _get_store_for_reranker
 
-# Namespace labels used by cafaeval OBO parser
-_NS_LABELS = {
-    "biological_process": "BPO",
-    "molecular_function": "MFO",
-    "cellular_component": "CCO",
+
+def eval_artifact_key(result_id: uuid.UUID, relpath: str) -> str:
+    """Canonical MinIO/artifact-store key for a cafaeval output file."""
+    return f"eval_artifacts/{result_id}/{relpath.lstrip('/')}"
+
+
+# Namespace labels used by cafaeval OBO parser. The full names come from
+# the obo file; we map them to PROTEA's canonical CAFA codes.
+_NS_LABELS: dict[str, str] = {
+    "biological_process": Aspect.BIOLOGICAL_PROCESS.cafa,
+    "molecular_function": Aspect.MOLECULAR_FUNCTION.cafa,
+    "cellular_component": Aspect.CELLULAR_COMPONENT.cafa,
 }
-_NS_SHORT = {"BPO", "MFO", "CCO"}
+_NS_SHORT: set[str] = set(ASPECT_CAFA_CODES)
+
+
+# Feature columns read straight off the GOPrediction ORM into the reranker
+# DataFrame. Kept local (not imported from reranker.py) so this module does
+# not pay the LightGBM import cost when no reranker is configured.
+_NUMERIC_ORM_COLS: tuple[str, ...] = (
+    "distance",
+    "identity_nw",
+    "similarity_nw",
+    "alignment_score_nw",
+    "gaps_pct_nw",
+    "alignment_length_nw",
+    "identity_sw",
+    "similarity_sw",
+    "alignment_score_sw",
+    "gaps_pct_sw",
+    "alignment_length_sw",
+    "length_query",
+    "length_ref",
+    "taxonomic_distance",
+    "taxonomic_common_ancestors",
+    "vote_count",
+    "k_position",
+    "go_term_frequency",
+    "ref_annotation_density",
+    "neighbor_distance_std",
+    "neighbor_vote_fraction",
+    "neighbor_min_distance",
+    "neighbor_mean_distance",
+    "anc2vec_neighbor_cos",
+    "anc2vec_neighbor_maxcos",
+    "anc2vec_has_emb",
+    "anc2vec_query_known_cos",
+    "anc2vec_query_known_maxcos",
+    "anc2vec_query_known_count",
+    "tax_voters_same_frac",
+    "tax_voters_close_frac",
+    "tax_voters_mean_common_ancestors",
+    *(f"emb_pca_query_{i}" for i in range(16)),
+)
+
+
+def _record_from_pred(
+    pred: GOPrediction,
+    go_id: str,
+    aspect: str | None = None,
+) -> dict[str, Any]:
+    """Extract a reranker-ready record from a GOPrediction ORM instance.
+
+    ``aspect`` is only needed when the caller routes by aspect (per-aspect
+    models). For category-level reranking pass ``None``.
+    """
+    record: dict[str, Any] = {
+        "protein_accession": pred.protein_accession,
+        "go_id": go_id,
+        "aspect": aspect or "",
+        "qualifier": pred.qualifier or "",
+        "evidence_code": pred.evidence_code or "",
+        "taxonomic_relation": pred.taxonomic_relation or "",
+    }
+    for col in _NUMERIC_ORM_COLS:
+        record[col] = getattr(pred, col, None)
+    return record
+
+
+def _patch_query_known_features(
+    df: Any,
+    known_gos: dict[str, set[str]],
+) -> None:
+    """Overwrite ``anc2vec_query_known_*`` in-place from eval-time known GOs.
+
+    At predict time these columns are stored as NaN / 0 because the query's
+    pre-cutoff annotation set is a property of the evaluation split, not the
+    prediction set.  This helper repairs them for LK / PK evaluation so the
+    reranker sees the same query-profile features it was trained on.
+
+    - ``anc2vec_query_known_cos`` : cosine between the candidate's Anc2Vec
+      vector and the L2-normalized centroid of the query's known-GO vectors.
+    - ``anc2vec_query_known_maxcos`` : max cosine vs any individual known-GO
+      vector.
+    - ``anc2vec_query_known_count`` : raw size of the known-GO set (before
+      filtering by Anc2Vec coverage).  Stays informative even when the
+      intersection with the Anc2Vec vocab is empty.
+
+    Rows whose candidate term has no Anc2Vec vector keep cos/maxcos as NaN
+    (matches the predict-time convention and the training code path).
+    """
+    import pandas as pd
+
+    if df.empty or not known_gos:
+        return
+
+    anc_idx = get_anc2vec_index()
+
+    unique_proteins = df["protein_accession"].unique().tolist()
+    candidate_go_ids = df["go_id"].unique().tolist()
+
+    all_go_ids: set[str] = set(candidate_go_ids)
+    for q_acc in unique_proteins:
+        all_go_ids.update(known_gos.get(q_acc, ()))
+    go_list = sorted(all_go_ids)
+    if not go_list:
+        return
+    idx_of_go: dict[str, int] = {g: i for i, g in enumerate(go_list)}
+    emb = anc_idx.batch(go_list)
+    raw_norms = np.linalg.norm(emb, axis=1)
+    has_emb_mask = raw_norms > 0.0
+    safe_norms = np.where(has_emb_mask, raw_norms, 1.0)[:, None]
+    all_norm = (emb / safe_norms).astype(np.float32)
+    all_norm[~has_emb_mask] = 0.0
+
+    cos_col = np.full(len(df), np.nan, dtype=np.float32)
+    maxcos_col = np.full(len(df), np.nan, dtype=np.float32)
+    count_col = np.zeros(len(df), dtype=np.float32)
+
+    protein_groups = df.groupby("protein_accession", sort=False).indices
+
+    for q_acc, row_indices in protein_groups.items():
+        known = known_gos.get(q_acc, set())
+        count_col[row_indices] = float(len(known))
+        if not known:
+            continue
+        known_rows = [idx_of_go[g] for g in known if g in idx_of_go and has_emb_mask[idx_of_go[g]]]
+        if not known_rows:
+            continue
+        kmat = all_norm[known_rows]
+        centroid = kmat.mean(axis=0)
+        cn = float(np.linalg.norm(centroid))
+        centroid_unit = (centroid / cn).astype(np.float32) if cn > 0.0 else None
+
+        for ridx in row_indices:
+            go_id = df["go_id"].iat[ridx]
+            cand_i = idx_of_go.get(go_id)
+            if cand_i is None or not has_emb_mask[cand_i]:
+                continue
+            cand_vec = all_norm[cand_i]
+            if centroid_unit is not None:
+                cos_col[ridx] = float(cand_vec @ centroid_unit)
+            maxcos_col[ridx] = float((kmat @ cand_vec).max())
+
+    df["anc2vec_query_known_cos"] = pd.Series(cos_col, index=df.index).replace({np.nan: pd.NA})
+    df["anc2vec_query_known_maxcos"] = pd.Series(maxcos_col, index=df.index).replace(
+        {np.nan: pd.NA}
+    )
+    df["anc2vec_query_known_count"] = count_col
 
 
 class RunCafaEvaluationPayload(ProteaPayload, frozen=True):
     evaluation_set_id: str
     prediction_set_id: str
     max_distance: float | None = Field(default=None, ge=0.0, le=2.0)
-    artifacts_dir: str | None = Field(default=None)
     scoring_config_id: str | None = Field(default=None)
     reranker_id_nk: str | None = Field(default=None)
     reranker_id_lk: str | None = Field(default=None)
@@ -48,7 +206,7 @@ class RunCafaEvaluationPayload(ProteaPayload, frozen=True):
         default=None,
         description=(
             "Nested mapping of category → aspect → reranker_model_id. "
-            "E.g. {\"nk\": {\"bpo\": \"uuid\", \"mfo\": \"uuid\"}, \"lk\": {...}}. "
+            'E.g. {"nk": {"bpo": "uuid", "mfo": "uuid"}, "lk": {...}}. '
             "Overrides the flat reranker_id_* fields when present."
         ),
     )
@@ -63,6 +221,16 @@ class RunCafaEvaluationPayload(ProteaPayload, frozen=True):
             "For CAFA6 evaluations use the IA_cafa6.tsv file supplied with the benchmark."
         ),
     )
+    restrict_gt_to_predicted: bool = Field(
+        default=True,
+        description=(
+            "Standard CAFA practice: drop ground-truth proteins not present in the "
+            "PredictionSet before evaluation, so coverage / Fmax measure performance "
+            "on the actually-predicted cohort. Disable only when the eval set is "
+            "guaranteed to be a subset of the predicted query set (e.g. a re-eval "
+            "of a frozen lab dump where this filter has already been applied)."
+        ),
+    )
 
     @field_validator("evaluation_set_id", "prediction_set_id", mode="before")
     @classmethod
@@ -76,26 +244,84 @@ class RunCafaEvaluationOperation:
     """Runs the CAFA evaluator against NK, LK and PK settings.
 
     Steps:
-      1. Load EvaluationSet and PredictionSet from DB.
-      2. Compute evaluation data (delta NK/LK + known-terms) with full NOT propagation.
-      3. Download the OBO file from the ontology snapshot URL.
-      4. Resolve the Information Accretion (IA) file:
-           - If ``ia_file`` is set in the payload, use that path directly.
-           - Otherwise, if the OntologySnapshot has an ``ia_url``, download it to
-             a temporary file and pass it to cafaeval.
-           - If neither is set, cafaeval runs with uniform IC=1 for all terms.
-         IA weights make rare, specific GO terms count more than common ones and
-         are strongly recommended for publishable evaluations.  Each CAFA benchmark
-         ships its own IA file (e.g. ``IA_cafa6.tsv``); store its URL in the
-         corresponding OntologySnapshot so future evaluations pick it up
-         automatically without touching the job payload.
-      5. Write temp files: ground-truth NK/LK, known-terms, predictions (CAFA format).
-      6. Call ``cafa_eval`` for each setting (NK, LK, PK).
-      7. Parse per-namespace Fmax / precision / recall / coverage from results.
-      8. Persist an EvaluationResult row with all metrics.
+
+    1. Load ``EvaluationSet`` and ``PredictionSet`` from DB.
+    2. Compute evaluation data (delta NK/LK + known terms) with full
+       NOT propagation.
+    3. Download the OBO file from the ontology snapshot URL.
+    4. Resolve the Information Accretion (IA) file: if ``ia_file`` is set
+       in the payload, use that path directly; otherwise, if the
+       ``OntologySnapshot`` has an ``ia_url``, download it to a temporary
+       file; if neither is set, cafaeval runs with uniform IC=1. IA weights
+       make rare GO terms count more than common ones and are recommended
+       for publishable evaluations.
+    5. Write temp files: ground-truth NK/LK, known-terms, predictions
+       (CAFA format).
+    6. Call ``cafa_eval`` for each setting (NK, LK, PK).
+    7. Parse per-namespace Fmax / precision / recall / coverage from
+       results.
+    8. Persist an ``EvaluationResult`` row with all metrics.
     """
 
     name = "run_cafa_evaluation"
+    description = (
+        "Run the CAFA evaluator (NK/LK/PK) against an EvaluationSet using a "
+        "PredictionSet, optionally weighted by Information Accretion."
+    )
+
+    def summarize_payload(self, payload: dict[str, Any], *, session: Session | None = None) -> str:
+        p = payload or {}
+        bits: list[str] = []
+
+        pred_id_raw = p.get("prediction_set_id")
+        if pred_id_raw and session is not None:
+            try:
+                pred = session.get(PredictionSet, uuid.UUID(str(pred_id_raw)))
+            except Exception:
+                pred = None
+            if pred is not None and pred.embedding_config_id is not None:
+                from protea.infrastructure.orm.models.embedding.embedding_config import (
+                    EmbeddingConfig,
+                )
+
+                cfg = session.get(EmbeddingConfig, pred.embedding_config_id)
+                if cfg is not None:
+                    label = cfg.display_name or cfg.model_name or str(cfg.id)[:8]
+                    bits.append(label)
+            if pred is None:
+                bits.append(f"pred={str(pred_id_raw)[:8]}")
+        elif pred_id_raw:
+            bits.append(f"pred={str(pred_id_raw)[:8]}")
+
+        eval_id_raw = p.get("evaluation_set_id")
+        if eval_id_raw and session is not None:
+            try:
+                ev = session.get(EvaluationSet, uuid.UUID(str(eval_id_raw)))
+            except Exception:
+                ev = None
+            if ev is not None:
+                old_v = getattr(ev, "old_source_version", None) or "?"
+                new_v = getattr(ev, "new_source_version", None) or "?"
+                bits.append(f"eval={old_v}→{new_v}")
+        elif eval_id_raw:
+            bits.append(f"eval={str(eval_id_raw)[:8]}")
+
+        scoring_id_raw = p.get("scoring_config_id")
+        if scoring_id_raw and session is not None:
+            try:
+                sc = session.get(ScoringConfig, uuid.UUID(str(scoring_id_raw)))
+            except Exception:
+                sc = None
+            if sc is not None:
+                bits.append(f"scoring={sc.name}")
+        elif scoring_id_raw:
+            bits.append(f"scoring={str(scoring_id_raw)[:8]}")
+
+        if p.get("reranker_model_id"):
+            bits.append("+reranker")
+        if p.get("max_distance") is not None:
+            bits.append(f"max_d={p['max_distance']}")
+        return " · ".join(bits)
 
     def execute(
         self, session: Session, payload: dict[str, Any], *, emit: EmitFn
@@ -115,8 +341,22 @@ def execute(
         if pred_set is None:
             raise ValueError(f"PredictionSet {pred_set_id} not found")
 
-        ann_old = session.get(AnnotationSet, eval_set.old_annotation_set_id)
-        snapshot = session.get(OntologySnapshot, ann_old.ontology_snapshot_id)
+        # ── 1. Compute evaluation data (dispatches same-snapshot vs reconciled) ─
+        emit("run_cafa_evaluation.computing_delta", None, {}, "info")
+        data, pivot_snapshot_id = load_evaluation_data_for_set(session, eval_set)
+        snapshot = session.get(OntologySnapshot, pivot_snapshot_id)
+        if snapshot is None:
+            raise ValueError(f"Pivot OntologySnapshot {pivot_snapshot_id} not found")
+
+        # Terms-of-interest (CAFA6 -toi): all GO terms in the pivot graph.
+        # cafaeval restricts evaluation to this set so that terms not in the
+        # frozen graph (e.g. new since t-1) are excluded from scoring.
+        toi_go_ids: list[str] = [
+            gid
+            for (gid,) in session.query(GOTerm.go_id)
+            .filter(GOTerm.ontology_snapshot_id == pivot_snapshot_id)
+            .all()
+        ]
 
         emit(
             "run_cafa_evaluation.start",
@@ -124,19 +364,12 @@ def execute(
             {
                 "evaluation_set_id": str(eval_set_id),
                 "prediction_set_id": str(pred_set_id),
+                "pivot_ontology_snapshot_id": str(pivot_snapshot_id),
+                "mode": (eval_set.stats or {}).get("mode", "same_snapshot"),
                 "obo_url": snapshot.obo_url,
             },
             "info",
         )
-
-        # ── 1. Compute evaluation data ────────────────────────────────────────
-        emit("run_cafa_evaluation.computing_delta", None, {}, "info")
-        data = compute_evaluation_data(
-            session,
-            eval_set.old_annotation_set_id,
-            eval_set.new_annotation_set_id,
-            ann_old.ontology_snapshot_id,
-        )
         emit(
             "run_cafa_evaluation.delta_done",
             None,
@@ -163,9 +396,46 @@ def execute(
             )
 
         # Load per-category (and optionally per-aspect) reranker models before session commit.
-        # reranker_models: setting → aspect → model_data  (aspect="" means single model for all aspects)
-        reranker_models: dict[str, dict[str, str]] = {}
-        reranker_config_snapshot: dict[str, dict[str, str]] | None = None  # for persisting in EvaluationResult
+        # reranker_models: setting → aspect → {"model": model_str, "cat_codes": dict|None}
+        # aspect="" means single model for all aspects (legacy flat field).
+        reranker_models: dict[str, dict[str, dict[str, Any]]] = {}
+        reranker_config_snapshot: dict[str, dict[str, str]] | None = (
+            None  # for persisting in EvaluationResult
+        )
+
+        def _resolve_model_bundle(rm: RerankerModelORM) -> dict[str, Any]:
+            """Return ``{"model": str, "cat_codes": dict|None}`` from either the
+            legacy inline blob or the ``artifact_uri`` cache. Boosters trained
+            by the lab and imported via ``/reranker-models/import`` only set
+            ``artifact_uri`` and leave ``model_data`` NULL, so the operation
+            must transparently support both paths.
+
+            ``cat_codes`` (if present in the imported run.json under
+            ``__categorical_codes__``) is the lab's per-column sorted-unique
+            string vocabulary, used at predict time to reproduce the encoding
+            seen during training. Without it, ``reranker_predict`` falls back
+            to ``pd.factorize`` over the inference batch — which silently
+            produces the wrong codes for per-aspect inference and tanks the
+            LK / PK fmax. See ``protea.core.reranker.predict``.
+            """
+            if rm.model_data:
+                model_str = rm.model_data
+            elif rm.artifact_uri:
+                project_root = Path(__file__).resolve().parents[3]
+                store = _get_store_for_reranker(_load_settings_for_reranker(project_root))
+                booster = load_reranker(
+                    rm.artifact_uri,
+                    feature_schema_sha=rm.feature_schema_sha or rm.name,
+                    store=store,
+                )
+                model_str = booster.model_to_string()
+            else:
+                raise ValueError(
+                    f"RerankerModel {rm.id} has no booster — both ``model_data`` "
+                    f"(legacy inline) and ``artifact_uri`` (artifact-store path) are NULL."
+                )
+            cat_codes = (rm.metrics or {}).get("__categorical_codes__")
+            return {"model": model_str, "cat_codes": cat_codes}
 
         if p.rerankers:
             # New nested mapping: {"nk": {"bpo": "uuid", "mfo": "uuid", ...}, ...}
@@ -181,37 +451,52 @@ def execute(
                     if rm is None:
                         raise ValueError(f"RerankerModel {rid_str} not found")
                     aspect_char = _aspect_map.get(aspect_key, aspect_key)
-                    reranker_models[setting][aspect_char] = rm.model_data
+                    reranker_models[setting][aspect_char] = _resolve_model_bundle(rm)
                     reranker_config_snapshot[cat_key][aspect_key] = rid_str
-                    emit("run_cafa_evaluation.reranker_loaded", None, {
-                        "setting": setting, "aspect": aspect_key,
-                        "reranker_id": str(rid), "name": rm.name,
-                    }, "info")
+                    emit(
+                        "run_cafa_evaluation.reranker_loaded",
+                        None,
+                        {
+                            "setting": setting,
+                            "aspect": aspect_key,
+                            "reranker_id": str(rid),
+                            "name": rm.name,
+                        },
+                        "info",
+                    )
         else:
             # Legacy flat fields: one model per category (all aspects)
-            for setting, field in [("NK", p.reranker_id_nk), ("LK", p.reranker_id_lk), ("PK", p.reranker_id_pk)]:
+            for setting, field in [
+                ("NK", p.reranker_id_nk),
+                ("LK", p.reranker_id_lk),
+                ("PK", p.reranker_id_pk),
+            ]:
                 if field:
                     rid = uuid.UUID(field)
                     rm = session.get(RerankerModelORM, rid)
                     if rm is None:
                         raise ValueError(f"RerankerModel {field} not found")
-                    reranker_models[setting] = {"": rm.model_data}  # "" = all aspects
+                    reranker_models[setting] = {"": _resolve_model_bundle(rm)}  # "" = all aspects
                     emit(
-                        "run_cafa_evaluation.reranker_loaded", None,
+                        "run_cafa_evaluation.reranker_loaded",
+                        None,
                         {"setting": setting, "reranker_id": str(rid), "name": rm.name},
                         "info",
                     )
 
-        # Pre-generate result_id so the artifact directory name matches the DB row.
+        # Pre-generate result_id so the artifact-store prefix matches the DB row.
         result_id = uuid.uuid4()
 
-        # ── 2. Prepare artifact directory (persistent) + temp dir for OBO ─────
-        artifacts_root = Path(p.artifacts_dir) / str(result_id) if p.artifacts_dir else None
-        if artifacts_root is not None:
-            artifacts_root.mkdir(parents=True, exist_ok=True)
+        project_root = Path(__file__).resolve().parents[3]
+        artifact_store = get_artifact_store(load_settings(project_root))
+        uploaded_keys: list[str] = []
 
         results: dict[str, Any] = {}
         with tempfile.TemporaryDirectory(prefix="protea_cafa_") as tmpdir:
+            # Persistent staging dir for cafaeval outputs (uploaded to MinIO at the
+            # end of each setting). Lives inside tmpdir so it vanishes on exit.
+            artifacts_root = Path(tmpdir) / "artifacts"
+            artifacts_root.mkdir(parents=True, exist_ok=True)
             # Download OBO into temp dir (large file, not persisted)
             emit("run_cafa_evaluation.downloading_obo", None, {"url": snapshot.obo_url}, "info")
             obo_path = os.path.join(tmpdir, "go.obo")
@@ -241,8 +526,50 @@ def execute(
                     "warning",
                 )
 
-            # Write ground truth files
-            gt_dir = str(artifacts_root) if artifacts_root else tmpdir
+            # Restrict GT to the actually-predicted protein cohort. Without this,
+            # delta proteins outside the PredictionSet's query coverage hurt
+            # Fmax / coverage despite the booster being unable to score them.
+            if p.restrict_gt_to_predicted:
+                from sqlalchemy import distinct, select
+
+                from protea.infrastructure.orm.models.embedding.go_prediction import (
+                    GOPrediction as _GP,
+                )
+
+                predicted_set: set[str] = set(
+                    session.execute(
+                        select(distinct(_GP.protein_accession)).where(
+                            _GP.prediction_set_id == pred_set_id
+                        )
+                    )
+                    .scalars()
+                    .all()
+                )
+                _orig_counts = (len(data.nk), len(data.lk), len(data.pk))
+                data = type(data)(
+                    nk={k: v for k, v in data.nk.items() if k in predicted_set},
+                    lk={k: v for k, v in data.lk.items() if k in predicted_set},
+                    pk={k: v for k, v in data.pk.items() if k in predicted_set},
+                    pk_known={k: v for k, v in data.pk_known.items() if k in predicted_set},
+                    known={k: v for k, v in data.known.items() if k in predicted_set},
+                )
+                emit(
+                    "run_cafa_evaluation.gt_restricted_to_predicted",
+                    None,
+                    {
+                        "predicted_proteins": len(predicted_set),
+                        "nk_before": _orig_counts[0],
+                        "nk_after": len(data.nk),
+                        "lk_before": _orig_counts[1],
+                        "lk_after": len(data.lk),
+                        "pk_before": _orig_counts[2],
+                        "pk_after": len(data.pk),
+                    },
+                    "info",
+                )
+
+            # Write ground truth files into the staging artifacts root.
+            gt_dir = str(artifacts_root)
             nk_path = os.path.join(gt_dir, "gt_NK.tsv")
             lk_path = os.path.join(gt_dir, "gt_LK.tsv")
             pk_path = os.path.join(gt_dir, "gt_PK.tsv")
@@ -255,6 +582,18 @@ def execute(
             self._write_gt(data.known, known_path)
             self._write_gt(data.pk_known, pk_known_path)
 
+            # Write terms-of-interest file (CAFA6 -toi flag).
+            toi_path = os.path.join(gt_dir, "terms_of_interest.txt")
+            with open(toi_path, "w") as f:
+                for go_id in sorted(toi_go_ids):
+                    f.write(f"{go_id}\n")
+            emit(
+                "run_cafa_evaluation.toi_written",
+                None,
+                {"toi_terms": len(toi_go_ids), "path": toi_path},
+                "info",
+            )
+
             delta_proteins = set(data.nk) | set(data.lk) | set(data.pk)
             emit(
                 "run_cafa_evaluation.writing_predictions",
@@ -271,8 +610,12 @@ def execute(
                 os.makedirs(pred_dir, exist_ok=True)
                 pred_path = os.path.join(pred_dir, "predictions.tsv")
                 self._write_predictions(
-                    session, pred_set_id, delta_proteins, p.max_distance,
-                    pred_path, scoring_config_snapshot,
+                    session,
+                    pred_set_id,
+                    delta_proteins,
+                    p.max_distance,
+                    pred_path,
+                    scoring_config_snapshot,
                 )
 
             # No-op commit: releases the DB connection back to the pool before
@@ -295,18 +638,35 @@ def execute(
                     os.makedirs(pred_dir, exist_ok=True)
                     pred_path = os.path.join(pred_dir, "predictions.tsv")
                     rr_aspect_map = reranker_models.get(setting, {})
+                    # anc2vec_query_known_* is only meaningful when the query
+                    # has pre-cutoff annotations (LK / PK).  NK proteins have
+                    # nothing "known", so training/serving parity requires
+                    # leaving the features at their predict-time NaN / 0.
+                    setting_known = data.known if setting in ("LK", "PK") else None
                     if "" in rr_aspect_map:
                         # Single model for all aspects (legacy flat field)
+                        bundle = rr_aspect_map[""]
                         self._write_predictions(
-                            session, pred_set_id, delta_proteins, p.max_distance,
-                            pred_path, scoring_config_snapshot,
-                            reranker_model_str=rr_aspect_map[""],
+                            session,
+                            pred_set_id,
+                            delta_proteins,
+                            p.max_distance,
+                            pred_path,
+                            scoring_config_snapshot,
+                            reranker_model_str=bundle["model"],
+                            reranker_cat_codes=bundle.get("cat_codes"),
+                            known_gos=setting_known,
                         )
                     else:
                         # Per-aspect models
                         self._write_predictions_per_aspect(
-                            session, pred_set_id, delta_proteins, p.max_distance,
-                            pred_path, rr_aspect_map,
+                            session,
+                            pred_set_id,
+                            delta_proteins,
+                            p.max_distance,
+                            pred_path,
+                            rr_aspect_map,  # bundle dicts now
+                            known_gos=setting_known,
                         )
                 emit("run_cafa_evaluation.evaluating", None, {"setting": setting}, "info")
                 try:
@@ -323,8 +683,12 @@ def execute(
                             gt_file,
                             ia=ia_path,
                             exclude=known_file,
-                            prop="max",
+                            prop="fill",
                             norm="cafa",
+                            no_orphans=True,
+                            toi_file=toi_path,
+                            max_terms=500,
+                            th_step=0.001,
                             n_cpu=1,
                         )
                     finally:
@@ -334,7 +698,7 @@ def execute(
                     results[setting] = self._parse_results(dfs_best)
 
                     # Persist full cafaeval output (PR curves + best metrics per metric type)
-                    if artifacts_root is not None and df is not None:
+                    if df is not None:
                         from cafaeval.evaluation import write_results as _write_results
 
                         setting_dir = artifacts_root / setting
@@ -362,6 +726,23 @@ def execute(
                     )
                     results[setting] = {}
 
+            # ── 2b. Upload all staged artifacts to the artifact store ────────
+            for path in sorted(artifacts_root.rglob("*")):
+                if not path.is_file():
+                    continue
+                relpath = path.relative_to(artifacts_root).as_posix()
+                key = eval_artifact_key(result_id, relpath)
+                artifact_store.put(key, path)
+                uploaded_keys.append(key)
+            emit(
+                "run_cafa_evaluation.artifacts_uploaded",
+                None,
+                {"count": len(uploaded_keys), "prefix": f"eval_artifacts/{result_id}/"},
+                "info",
+            )
+
+        results["artifacts"] = {"keys": uploaded_keys}
+
         # ── 3. Persist EvaluationResult ───────────────────────────────────────
         # For backwards compat, pick a single representative reranker_model_id
         first_reranker_id: uuid.UUID | None = None
@@ -375,7 +756,11 @@ def execute(
         elif reranker_models:
             # Flat per-category fields: build config snapshot and pick first ID
             reranker_config_snapshot = {}
-            for setting, field in [("nk", p.reranker_id_nk), ("lk", p.reranker_id_lk), ("pk", p.reranker_id_pk)]:
+            for setting, field in [
+                ("nk", p.reranker_id_nk),
+                ("lk", p.reranker_id_lk),
+                ("pk", p.reranker_id_pk),
+            ]:
                 if field:
                     reranker_config_snapshot[setting] = {"all": field}
                     if first_reranker_id is None:
@@ -398,8 +783,9 @@ def execute(
             None,
             {
                 "evaluation_result_id": str(result_id),
-                "settings_evaluated": list(results.keys()),
-                "artifacts_dir": str(artifacts_root) if artifacts_root else None,
+                "settings_evaluated": [k for k in results.keys() if k != "artifacts"],
+                "artifacts_prefix": f"eval_artifacts/{result_id}/",
+                "artifacts_count": len(uploaded_keys),
             },
             "info",
         )
@@ -479,6 +865,8 @@ def _write_predictions(
         path: str,
         scoring_config: ScoringConfig | None = None,
         reranker_model_str: str | None = None,
+        reranker_cat_codes: dict[str, list[str]] | None = None,
+        known_gos: dict[str, set[str]] | None = None,
     ) -> None:
         """Write CAFA-format predictions (protein\\tgo_id\\tscore) for delta proteins.
 
@@ -487,10 +875,21 @@ def _write_predictions(
              all predictions and use re-ranker probabilities as scores.
           2. If a ``ScoringConfig`` is provided, compute scores via ``compute_score()``.
           3. Otherwise fall back to ``1 - cosine_distance / 2``.
+
+        ``known_gos`` carries the query's pre-cutoff annotations (LK / PK
+        settings) and is used to override ``anc2vec_query_known_*`` before the
+        reranker sees the DataFrame. For NK it must stay ``None``.
         """
         if reranker_model_str is not None:
             self._write_predictions_reranked(
-                session, pred_set_id, delta_proteins, max_distance, path, reranker_model_str,
+                session,
+                pred_set_id,
+                delta_proteins,
+                max_distance,
+                path,
+                reranker_model_str,
+                reranker_cat_codes=reranker_cat_codes,
+                known_gos=known_gos,
             )
             return
 
@@ -518,6 +917,7 @@ def _write_predictions(
                         "identity_sw": pred.identity_sw,
                         "evidence_code": pred.evidence_code,
                         "taxonomic_distance": pred.taxonomic_distance,
+                        "neighbor_vote_fraction": pred.neighbor_vote_fraction,
                     }
                     score = compute_score(pred_dict, scoring_config)
                 else:
@@ -532,6 +932,8 @@ def _write_predictions_reranked(
         max_distance: float | None,
         path: str,
         reranker_model_str: str,
+        reranker_cat_codes: dict[str, list[str]] | None = None,
+        known_gos: dict[str, set[str]] | None = None,
     ) -> None:
         """Write CAFA-format predictions using LightGBM re-ranker scores."""
         import pandas as pd
@@ -540,7 +942,7 @@ def _write_predictions_reranked(
         from protea.core.reranker import predict as reranker_predict
 
         q = (
-            session.query(GOPrediction, GOTerm.go_id)
+            session.query(GOPrediction, GOTerm.go_id, GOTerm.aspect)
             .join(GOTerm, GOPrediction.go_term_id == GOTerm.id)
             .filter(GOPrediction.prediction_set_id == pred_set_id)
             .filter(GOPrediction.protein_accession.in_(delta_proteins))
@@ -548,38 +950,10 @@ def _write_predictions_reranked(
         if max_distance is not None:
             q = q.filter(GOPrediction.distance <= max_distance)
 
-        records: list[dict[str, Any]] = []
-        for pred, go_id in q.yield_per(5000):
-            records.append({
-                "protein_accession": pred.protein_accession,
-                "go_id": go_id,
-                "distance": pred.distance,
-                "qualifier": pred.qualifier or "",
-                "evidence_code": pred.evidence_code or "",
-                "identity_nw": pred.identity_nw,
-                "similarity_nw": pred.similarity_nw,
-                "alignment_score_nw": pred.alignment_score_nw,
-                "gaps_pct_nw": pred.gaps_pct_nw,
-                "alignment_length_nw": pred.alignment_length_nw,
-                "identity_sw": pred.identity_sw,
-                "similarity_sw": pred.similarity_sw,
-                "alignment_score_sw": pred.alignment_score_sw,
-                "gaps_pct_sw": pred.gaps_pct_sw,
-                "alignment_length_sw": pred.alignment_length_sw,
-                "length_query": pred.length_query,
-                "length_ref": pred.length_ref,
-                "query_taxonomy_id": pred.query_taxonomy_id,
-                "ref_taxonomy_id": pred.ref_taxonomy_id,
-                "taxonomic_lca": pred.taxonomic_lca,
-                "taxonomic_distance": pred.taxonomic_distance,
-                "taxonomic_common_ancestors": pred.taxonomic_common_ancestors,
-                "taxonomic_relation": pred.taxonomic_relation or "",
-                "vote_count": pred.vote_count,
-                "k_position": pred.k_position,
-                "go_term_frequency": pred.go_term_frequency,
-                "ref_annotation_density": pred.ref_annotation_density,
-                "neighbor_distance_std": pred.neighbor_distance_std,
-            })
+        records: list[dict[str, Any]] = [
+            _record_from_pred(pred, go_id, aspect=aspect)
+            for pred, go_id, aspect in q.yield_per(5000)
+        ]
 
         if not records:
             with open(path, "w") as f:
@@ -587,13 +961,16 @@ def _write_predictions_reranked(
             return
 
         df = pd.DataFrame(records)
+        if known_gos:
+            _patch_query_known_features(df, known_gos)
         model = model_from_string(reranker_model_str)
-        scores = reranker_predict(model, df)
+        scores = reranker_predict(model, df, categorical_codes=reranker_cat_codes)
 
         # Deduplicate: keep highest score per (protein, go_id)
         df["score"] = scores
         df = df.sort_values("score", ascending=False).drop_duplicates(
-            subset=["protein_accession", "go_id"], keep="first",
+            subset=["protein_accession", "go_id"],
+            keep="first",
         )
 
         with open(path, "w") as f:
@@ -607,12 +984,19 @@ def _write_predictions_per_aspect(
         delta_proteins: set[str],
         max_distance: float | None,
         path: str,
-        aspect_models: dict[str, str],
+        aspect_models: dict[str, dict[str, Any]],
+        known_gos: dict[str, set[str]] | None = None,
     ) -> None:
         """Write CAFA-format predictions applying per-aspect LightGBM models.
 
-        ``aspect_models`` maps GO aspect char (P/F/C) to model_data strings.
-        Predictions whose aspect has no model fall back to ``1 - distance/2``.
+        ``aspect_models`` maps GO aspect char (P/F/C) to ``{"model": str,
+        "cat_codes": dict|None}`` bundles. Predictions whose aspect has no
+        model fall back to ``1 - distance/2``.
+
+        ``known_gos`` carries the query's pre-cutoff annotations (LK / PK
+        settings) so the per-aspect model sees the same
+        ``anc2vec_query_known_*`` features it was trained with. Must be
+        ``None`` for NK.
         """
         import pandas as pd
 
@@ -628,39 +1012,9 @@ def _write_predictions_per_aspect(
         if max_distance is not None:
             q = q.filter(GOPrediction.distance <= max_distance)
 
-        records: list[dict[str, Any]] = []
-        for pred, go_id, aspect in q.yield_per(5000):
-            records.append({
-                "protein_accession": pred.protein_accession,
-                "go_id": go_id,
-                "aspect": aspect or "",
-                "distance": pred.distance,
-                "qualifier": pred.qualifier or "",
-                "evidence_code": pred.evidence_code or "",
-                "identity_nw": pred.identity_nw,
-                "similarity_nw": pred.similarity_nw,
-                "alignment_score_nw": pred.alignment_score_nw,
-                "gaps_pct_nw": pred.gaps_pct_nw,
-                "alignment_length_nw": pred.alignment_length_nw,
-                "identity_sw": pred.identity_sw,
-                "similarity_sw": pred.similarity_sw,
-                "alignment_score_sw": pred.alignment_score_sw,
-                "gaps_pct_sw": pred.gaps_pct_sw,
-                "alignment_length_sw": pred.alignment_length_sw,
-                "length_query": pred.length_query,
-                "length_ref": pred.length_ref,
-                "query_taxonomy_id": pred.query_taxonomy_id,
-                "ref_taxonomy_id": pred.ref_taxonomy_id,
-                "taxonomic_lca": pred.taxonomic_lca,
-                "taxonomic_distance": pred.taxonomic_distance,
-                "taxonomic_common_ancestors": pred.taxonomic_common_ancestors,
-                "taxonomic_relation": pred.taxonomic_relation or "",
-                "vote_count": pred.vote_count,
-                "k_position": pred.k_position,
-                "go_term_frequency": pred.go_term_frequency,
-                "ref_annotation_density": pred.ref_annotation_density,
-                "neighbor_distance_std": pred.neighbor_distance_std,
-            })
+        records: list[dict[str, Any]] = [
+            _record_from_pred(pred, go_id, aspect) for pred, go_id, aspect in q.yield_per(5000)
+        ]
 
         if not records:
             with open(path, "w") as f:
@@ -668,15 +1022,21 @@ def _write_predictions_per_aspect(
             return
 
         df = pd.DataFrame(records)
+        if known_gos:
+            _patch_query_known_features(df, known_gos)
 
         # Score each aspect group with its corresponding model
         df["score"] = 0.0
-        for aspect_char, model_str in aspect_models.items():
+        for aspect_char, bundle in aspect_models.items():
             mask = df["aspect"] == aspect_char
             if not mask.any():
                 continue
-            model = model_from_string(model_str)
-            df.loc[mask, "score"] = reranker_predict(model, df.loc[mask])
+            model = model_from_string(bundle["model"])
+            df.loc[mask, "score"] = reranker_predict(
+                model,
+                df.loc[mask],
+                categorical_codes=bundle.get("cat_codes"),
+            )
 
         # Fallback for aspects without a model
         modeled_aspects = set(aspect_models.keys())
@@ -688,7 +1048,8 @@ def _write_predictions_per_aspect(
 
         # Deduplicate: keep highest score per (protein, go_id)
         df = df.sort_values("score", ascending=False).drop_duplicates(
-            subset=["protein_accession", "go_id"], keep="first",
+            subset=["protein_accession", "go_id"],
+            keep="first",
         )
 
         with open(path, "w") as f:
diff --git a/protea/core/operations/train_reranker.py b/protea/core/operations/train_reranker.py
deleted file mode 100644
index 8bd552f..0000000
--- a/protea/core/operations/train_reranker.py
+++ /dev/null
@@ -1,1487 +0,0 @@
-"""Train LightGBM re-rankers from temporal holdout pairs.
-
-Provides two operations:
-
-* ``train_reranker`` — single pair (old → new annotation set).
-* ``train_reranker_auto`` — automated multi-split training: generates
-  consecutive pairs from a list of GOA version numbers, concatenates all
-  labeled data, trains one combined model, and evaluates on a held-out
-  test split.
-
-Both operations run entirely in-process (no RabbitMQ coordination).
-"""
-
-from __future__ import annotations
-
-import gc
-import shutil
-import tempfile
-import time
-import uuid
-from pathlib import Path
-from typing import Annotated, Any
-
-import numpy as np
-import pandas as pd
-from pydantic import Field, field_validator
-from sqlalchemy import text
-from sqlalchemy.orm import Session
-
-from protea.core.contracts.operation import EmitFn, OperationResult, ProteaPayload
-from protea.core.evaluation import compute_evaluation_data
-from protea.core.feature_engineering import compute_alignment, compute_taxonomy
-from protea.core.knn_search import search_knn
-from protea.core.metrics import compute_cafa_metrics
-from protea.core.reranker import (
-    ALL_FEATURES,
-    LABEL_COLUMN,
-    model_to_string,
-)
-from protea.core.reranker import (
-    predict as reranker_predict,
-)
-from protea.core.reranker import (
-    train as reranker_train,
-)
-from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
-from protea.infrastructure.orm.models.embedding.embedding_config import EmbeddingConfig
-from protea.infrastructure.orm.models.embedding.reranker_model import RerankerModel
-from protea.infrastructure.orm.models.embedding.sequence_embedding import (
-    SequenceEmbedding,
-)
-from protea.infrastructure.orm.models.protein.protein import Protein
-from protea.infrastructure.orm.models.sequence.sequence import Sequence
-
-PositiveInt = Annotated[int, Field(gt=0)]
-
-_ASPECTS = ("P", "F", "C")
-_ANNOTATION_CHUNK_SIZE = 10_000
-_STREAM_CHUNK_SIZE = 2_000
-
-
-# ---------------------------------------------------------------------------
-# Payload
-# ---------------------------------------------------------------------------
-
-
-class TrainRerankerPayload(ProteaPayload, frozen=True):
-    """Payload for the train_reranker operation."""
-
-    name: str
-    old_annotation_set_id: str
-    new_annotation_set_id: str
-    embedding_config_id: str
-    ontology_snapshot_id: str
-
-    # Evaluation category
-    category: str = "nk"
-
-    # KNN parameters
-    limit_per_entry: PositiveInt = 5
-    distance_threshold: float | None = None
-    search_backend: str = "numpy"
-    metric: str = "cosine"
-    faiss_index_type: str = "Flat"
-    faiss_nlist: int = 100
-    faiss_nprobe: int = 10
-
-    # LightGBM parameters
-    num_boost_round: int = 1000
-    early_stopping_rounds: int = 50
-    val_fraction: float = 0.2
-    neg_pos_ratio: float | None = None
-
-    # Feature computation
-    compute_alignments: bool = False
-    compute_taxonomy: bool = False
-
-    # Per-aspect model (None = all aspects)
-    aspect: str | None = None
-
-    @field_validator(
-        "old_annotation_set_id",
-        "new_annotation_set_id",
-        "embedding_config_id",
-        "ontology_snapshot_id",
-        "name",
-        mode="before",
-    )
-    @classmethod
-    def must_be_non_empty(cls, v: str) -> str:
-        if not isinstance(v, str) or not v.strip():
-            raise ValueError("must be a non-empty string")
-        return v.strip()
-
-    @field_validator("category", mode="before")
-    @classmethod
-    def valid_category(cls, v: str) -> str:
-        if v not in ("nk", "lk", "pk"):
-            raise ValueError("category must be nk, lk, or pk")
-        return v
-
-
-# ---------------------------------------------------------------------------
-# Operation
-# ---------------------------------------------------------------------------
-
-_ASPECT_MAP = {"bpo": "P", "mfo": "F", "cco": "C"}
-
-
-class TrainRerankerOperation:
-    """Trains a LightGBM re-ranker from a single temporal holdout pair.
-
-    Pipeline (all in-process, no RabbitMQ coordination):
-    1. Validate inputs.
-    2. Compute evaluation delta (old → new annotation set).
-    3. Load reference embeddings (proteins annotated in old set).
-    4. Load query embeddings (delta proteins with embeddings).
-    5. Run per-aspect KNN + GO term transfer.
-    6. Label predictions against delta.
-    7. Train LightGBM.
-    8. Compute baseline Fmax (distance-based) and re-ranker Fmax.
-    9. Store RerankerModel in DB.
-    """
-
-    name = "train_reranker"
-
-    def execute(
-        self, session: Session, payload: dict[str, Any], *, emit: EmitFn
-    ) -> OperationResult:
-        p = TrainRerankerPayload.model_validate(payload)
-        t0 = time.perf_counter()
-
-        old_set_id = uuid.UUID(p.old_annotation_set_id)
-        new_set_id = uuid.UUID(p.new_annotation_set_id)
-        emb_config_id = uuid.UUID(p.embedding_config_id)
-        ontology_snapshot_id = uuid.UUID(p.ontology_snapshot_id)
-
-        # ── 1. Validate ──────────────────────────────────────────────────
-        self._validate(session, p, old_set_id, new_set_id, emb_config_id, ontology_snapshot_id)
-
-        emit(
-            "train_reranker.start",
-            None,
-            {
-                "name": p.name,
-                "old_annotation_set_id": p.old_annotation_set_id,
-                "new_annotation_set_id": p.new_annotation_set_id,
-                "category": p.category,
-                "limit_per_entry": p.limit_per_entry,
-            },
-            "info",
-        )
-
-        # ── 2. Evaluation delta ──────────────────────────────────────────
-        emit("train_reranker.computing_delta", None, {}, "info")
-        eval_data = compute_evaluation_data(
-            session, old_set_id, new_set_id, ontology_snapshot_id
-        )
-        ground_truth: dict[str, set[str]] = getattr(eval_data, p.category)
-        gt_pairs: set[tuple[str, str]] = set()
-        for protein, go_ids in ground_truth.items():
-            for go_id in go_ids:
-                gt_pairs.add((protein, go_id))
-
-        emit(
-            "train_reranker.delta_computed",
-            None,
-            {
-                **eval_data.stats(),
-                "gt_pairs": len(gt_pairs),
-            },
-            "info",
-        )
-
-        if not gt_pairs:
-            raise ValueError(
-                f"No ground truth found for category '{p.category}' "
-                f"between annotation sets {old_set_id} and {new_set_id}"
-            )
-
-        # ── 3. GO term mappings ──────────────────────────────────────────
-        go_id_map, aspect_map = self._load_go_maps(session, ontology_snapshot_id)
-
-        # ── 4. Load reference embeddings per aspect ──────────────────────
-        emit("train_reranker.loading_references", None, {}, "info")
-        ref_by_aspect = self._load_reference_per_aspect(
-            session, emb_config_id, old_set_id, emit
-        )
-
-        # ── 5. Load query embeddings ─────────────────────────────────────
-        query_accessions = list(ground_truth.keys())
-        emit(
-            "train_reranker.loading_queries",
-            None,
-            {"delta_proteins": len(query_accessions)},
-            "info",
-        )
-        query_emb, valid_queries = self._load_query_embeddings(
-            session, query_accessions, emb_config_id
-        )
-        emit(
-            "train_reranker.queries_loaded",
-            None,
-            {"with_embeddings": len(valid_queries)},
-            "info",
-        )
-
-        if not valid_queries:
-            raise ValueError("No delta proteins have embeddings")
-
-        # ── 6. KNN + GO transfer + label ─────────────────────────────────
-        # Load sequences / taxonomy before releasing the DB connection
-        qs: dict[str, str] | None = None
-        rs: dict[str, str] | None = None
-        qt: dict[str, int | None] | None = None
-        rt: dict[str, int | None] | None = None
-        if p.compute_alignments or p.compute_taxonomy:
-            all_ref_accs: set[str] = set()
-            for asp in _ASPECTS:
-                all_ref_accs.update(ref_by_aspect[asp]["accessions"])
-            query_set = set(valid_queries)
-            if p.compute_alignments:
-                emit("train_reranker.loading_sequences", None, {}, "info")
-                qs = self._load_sequences(session, query_set)
-                rs = self._load_sequences(session, all_ref_accs)
-            if p.compute_taxonomy:
-                emit("train_reranker.loading_taxonomy", None, {}, "info")
-                qt = self._load_taxonomy_ids(session, query_set)
-                rt = self._load_taxonomy_ids(session, all_ref_accs)
-
-        # Release DB connection before CPU-heavy phase
-        session.expire_all()
-
-        emit("train_reranker.running_knn", None, {}, "info")
-        labeled_preds = self._knn_transfer_and_label(
-            session,
-            valid_queries,
-            query_emb,
-            ref_by_aspect,
-            go_id_map,
-            aspect_map,
-            gt_pairs,
-            p,
-            query_sequences=qs,
-            ref_sequences=rs,
-            query_tax_ids=qt,
-            ref_tax_ids=rt,
-        )
-
-        emit(
-            "train_reranker.knn_done",
-            None,
-            {
-                "total_predictions": len(labeled_preds),
-                "positives": sum(1 for r in labeled_preds if r["label"] == 1),
-                "negatives": sum(1 for r in labeled_preds if r["label"] == 0),
-            },
-            "info",
-        )
-
-        if not labeled_preds:
-            raise ValueError("KNN produced no predictions for delta proteins")
-
-        # ── 7. Train LightGBM ────────────────────────────────────────────
-        emit("train_reranker.training", None, {}, "info")
-        df = pd.DataFrame(labeled_preds)
-
-        # Aspect filter if requested
-        aspect_filter = _ASPECT_MAP.get(p.aspect) if p.aspect else None
-        if aspect_filter:
-            df = df[df["aspect"] == aspect_filter]
-
-        train_result = reranker_train(
-            df,
-            num_boost_round=p.num_boost_round,
-            early_stopping_rounds=p.early_stopping_rounds,
-            val_fraction=p.val_fraction,
-            neg_pos_ratio=p.neg_pos_ratio,
-        )
-
-        emit(
-            "train_reranker.trained",
-            None,
-            train_result.metrics,
-            "info",
-        )
-
-        # ── 8. Compute baseline vs re-ranker Fmax ────────────────────────
-        emit("train_reranker.evaluating", None, {}, "info")
-        metrics_result = self._compute_comparison_metrics(
-            df, train_result, eval_data, p.category
-        )
-
-        emit(
-            "train_reranker.evaluated",
-            None,
-            {
-                "baseline_fmax": metrics_result["baseline_fmax"],
-                "reranker_fmax": metrics_result["reranker_fmax"],
-                "fmax_improvement": metrics_result["fmax_improvement"],
-            },
-            "info",
-        )
-
-        # ── 9. Store RerankerModel ────────────────────────────────────────
-        full_metrics = {
-            **train_result.metrics,
-            **metrics_result,
-            "category": p.category,
-            "old_annotation_set_id": str(old_set_id),
-            "new_annotation_set_id": str(new_set_id),
-            "embedding_config_id": str(emb_config_id),
-            "limit_per_entry": p.limit_per_entry,
-            "search_backend": p.search_backend,
-            "n_query_proteins": len(valid_queries),
-            "n_predictions": len(labeled_preds),
-            "elapsed_seconds": round(time.perf_counter() - t0, 1),
-        }
-
-        model = RerankerModel(
-            name=p.name,
-            prediction_set_id=None,
-            evaluation_set_id=None,
-            category=p.category,
-            aspect=p.aspect,
-            model_data=model_to_string(train_result.model),
-            metrics=full_metrics,
-            feature_importance=train_result.feature_importance,
-        )
-        session.add(model)
-        session.flush()
-
-        result = {
-            "reranker_model_id": str(model.id),
-            "name": p.name,
-            **full_metrics,
-        }
-        emit("train_reranker.done", None, result, "info")
-        return OperationResult(result=result)
-
-    # ── validation ────────────────────────────────────────────────────────
-
-    def _validate(
-        self,
-        session: Session,
-        p: TrainRerankerPayload,
-        old_set_id: uuid.UUID,
-        new_set_id: uuid.UUID,
-        emb_config_id: uuid.UUID,
-        ontology_snapshot_id: uuid.UUID,
-    ) -> None:
-        if session.get(AnnotationSet, old_set_id) is None:
-            raise ValueError(f"AnnotationSet {old_set_id} not found")
-        if session.get(AnnotationSet, new_set_id) is None:
-            raise ValueError(f"AnnotationSet {new_set_id} not found")
-        if session.get(EmbeddingConfig, emb_config_id) is None:
-            raise ValueError(f"EmbeddingConfig {emb_config_id} not found")
-        existing = (
-            session.query(RerankerModel)
-            .filter(RerankerModel.name == p.name)
-            .first()
-        )
-        if existing is not None:
-            raise ValueError(f"RerankerModel with name '{p.name}' already exists")
-
-    # ── GO term mappings ──────────────────────────────────────────────────
-
-    def _load_go_maps(
-        self, session: Session, snapshot_id: uuid.UUID
-    ) -> tuple[dict[int, str], dict[int, str]]:
-        """Load {go_term.id: go_id} and {go_term.id: aspect} for the snapshot."""
-        rows = session.execute(
-            text("SELECT id, go_id, aspect FROM go_term WHERE ontology_snapshot_id = :snap_id"),
-            {"snap_id": snapshot_id},
-        ).fetchall()
-        id_map = {db_id: go_id for db_id, go_id, _ in rows}
-        aspect_map = {db_id: aspect for db_id, _, aspect in rows if aspect}
-        return id_map, aspect_map
-
-    # ── bulk embedding preload (used by train_reranker_auto) ─────────────
-
-    def _preload_all_embeddings(
-        self,
-        session: Session,
-        emb_config_id: uuid.UUID,
-        emit: EmitFn,
-    ) -> tuple[np.ndarray, list[str], dict[str, int]]:
-        """Load ALL embeddings once into memory.
-
-        Returns (embeddings_f16, accessions, acc_to_idx).
-        This avoids reloading 527K vectors from PostgreSQL on every split.
-        """
-        conn = session.connection()
-
-        count_row = conn.execute(text(
-            "SELECT COUNT(*), "
-            "       (SELECT vector_dims(se2.embedding) "
-            "          FROM sequence_embedding se2 "
-            "         WHERE se2.embedding_config_id = :ecid LIMIT 1) "
-            "  FROM protein p "
-            "  JOIN sequence_embedding se "
-            "    ON se.sequence_id = p.sequence_id "
-            "   AND se.embedding_config_id = :ecid"
-        ), {"ecid": emb_config_id}).one()
-        total, dim = int(count_row[0]), int(count_row[1]) if count_row[1] else 960
-
-        emit(
-            "train_reranker_auto.preloading_embeddings",
-            None,
-            {"total": total, "dim": dim},
-            "info",
-        )
-
-        embeddings = np.empty((total, dim), dtype=np.float16)
-        accessions: list[str] = []
-        result_proxy = conn.execute(text(
-            "SELECT p.accession, se.embedding "
-            "  FROM protein p "
-            "  JOIN sequence_embedding se "
-            "    ON se.sequence_id = p.sequence_id "
-            "   AND se.embedding_config_id = :ecid"
-        ), {"ecid": emb_config_id}).yield_per(_STREAM_CHUNK_SIZE)
-
-        for i, (acc, emb_str) in enumerate(result_proxy):
-            if isinstance(emb_str, str):
-                emb_arr = np.fromstring(emb_str.strip("[]"), sep=",", dtype=np.float16)
-            else:
-                emb_arr = np.array(emb_str, dtype=np.float16)
-            embeddings[i] = emb_arr
-            accessions.append(acc)
-
-        acc_to_idx = {acc: i for i, acc in enumerate(accessions)}
-
-        emit(
-            "train_reranker_auto.embeddings_preloaded",
-            None,
-            {"total": len(accessions), "dim": dim, "memory_mb": round(embeddings.nbytes / 1024 / 1024, 1)},
-            "info",
-        )
-
-        return embeddings, accessions, acc_to_idx
-
-    def _build_reference_from_cache(
-        self,
-        session: Session,
-        annotation_set_id: uuid.UUID,
-        all_embeddings: np.ndarray,
-        all_accessions: list[str],
-        acc_to_idx: dict[str, int],
-        emit: EmitFn,
-    ) -> dict[str, dict[str, Any]]:
-        """Build per-aspect reference data using preloaded embeddings.
-
-        Only loads annotations from the DB (fast, small rows), then filters
-        the preloaded embedding matrix in memory.
-        """
-        conn = session.connection()
-        dim = all_embeddings.shape[1] if all_embeddings.ndim == 2 else 960
-
-        ann_rows = conn.execute(text(
-            "SELECT pga.protein_accession, gt.aspect, pga.go_term_id, "
-            "       pga.qualifier, pga.evidence_code "
-            "  FROM protein_go_annotation pga "
-            "  JOIN go_term gt ON gt.id = pga.go_term_id "
-            " WHERE pga.annotation_set_id = :asid "
-            "   AND gt.aspect IN ('P', 'F', 'C') "
-            "   AND (pga.qualifier IS NULL OR pga.qualifier NOT LIKE '%%NOT%%')"
-        ), {"asid": annotation_set_id}).yield_per(50_000)
-
-        aspect_accs: dict[str, set[str]] = {a: set() for a in _ASPECTS}
-        aspect_go_map: dict[str, dict[str, list[dict[str, Any]]]] = {a: {} for a in _ASPECTS}
-        for acc, asp, go_term_id, qualifier, evidence_code in ann_rows:
-            if asp in aspect_accs and acc in acc_to_idx:
-                aspect_accs[asp].add(acc)
-                aspect_go_map[asp].setdefault(acc, []).append({
-                    "go_term_id": go_term_id,
-                    "qualifier": qualifier,
-                    "evidence_code": evidence_code,
-                })
-
-        result: dict[str, dict[str, Any]] = {}
-        for asp in _ASPECTS:
-            indices = np.array(
-                [acc_to_idx[a] for a in aspect_accs[asp]],
-                dtype=np.int32,
-            )
-            asp_accessions = [all_accessions[i] for i in indices]
-            asp_embeddings = all_embeddings[indices] if len(indices) > 0 else np.empty((0, dim), dtype=np.float16)
-            result[asp] = {
-                "accessions": asp_accessions,
-                "embeddings": asp_embeddings,
-                "go_map": aspect_go_map[asp],
-            }
-            emit(
-                "train_reranker.aspect_loaded",
-                None,
-                {"aspect": asp, "references": len(indices)},
-                "info",
-            )
-
-        return result
-
-    # ── reference embeddings per aspect ───────────────────────────────────
-
-    def _load_reference_per_aspect(
-        self,
-        session: Session,
-        emb_config_id: uuid.UUID,
-        annotation_set_id: uuid.UUID,
-        emit: EmitFn,
-    ) -> dict[str, dict[str, Any]]:
-        """Load per-aspect reference data: accessions, embeddings, annotations.
-
-        Returns {aspect: {accessions, embeddings (float16), go_map}}.
-
-        Uses raw SQL + server-side cursor to avoid SQLAlchemy identity map
-        overhead (540k ORM rows would consume ~20GB of Python objects).
-        """
-        conn = session.connection()
-
-        # Step 1: count + dimension
-        count_row = conn.execute(text(
-            "SELECT COUNT(*), "
-            "       (SELECT vector_dims(se2.embedding) "
-            "          FROM sequence_embedding se2 "
-            "         WHERE se2.embedding_config_id = :ecid LIMIT 1) "
-            "  FROM protein p "
-            "  JOIN sequence_embedding se "
-            "    ON se.sequence_id = p.sequence_id "
-            "   AND se.embedding_config_id = :ecid "
-            " WHERE p.accession IN ("
-            "   SELECT DISTINCT protein_accession "
-            "     FROM protein_go_annotation "
-            "    WHERE annotation_set_id = :asid"
-            " )"
-        ), {"ecid": emb_config_id, "asid": annotation_set_id}).one()
-        total, dim = int(count_row[0]), int(count_row[1]) if count_row[1] else 960
-
-        if total == 0:
-            return {asp: {"accessions": [], "embeddings": np.empty((0,), dtype=np.float16), "go_map": {}} for asp in _ASPECTS}
-
-        # Step 2: stream embeddings via raw SQL — no ORM objects kept
-        embeddings = np.empty((total, dim), dtype=np.float16)
-        accessions: list[str] = []
-        result_proxy = conn.execute(text(
-            "SELECT p.accession, se.embedding "
-            "  FROM protein p "
-            "  JOIN sequence_embedding se "
-            "    ON se.sequence_id = p.sequence_id "
-            "   AND se.embedding_config_id = :ecid "
-            " WHERE p.accession IN ("
-            "   SELECT DISTINCT protein_accession "
-            "     FROM protein_go_annotation "
-            "    WHERE annotation_set_id = :asid"
-            " )"
-        ), {"ecid": emb_config_id, "asid": annotation_set_id}).yield_per(_STREAM_CHUNK_SIZE)
-
-        for i, (acc, emb_str) in enumerate(result_proxy):
-            # pgvector returns text like '[0.1,0.2,...]'; parse to numpy
-            if isinstance(emb_str, str):
-                emb_arr = np.fromstring(emb_str.strip("[]"), sep=",", dtype=np.float16)
-            else:
-                emb_arr = np.array(emb_str, dtype=np.float16)
-            embeddings[i] = emb_arr
-            accessions.append(acc)
-
-        acc_to_idx = {acc: i for i, acc in enumerate(accessions)}
-
-        emit(
-            "train_reranker.references_loaded",
-            None,
-            {"total_references": len(accessions), "dim": dim},
-            "info",
-        )
-
-        # Step 3: load annotations per aspect (also raw SQL)
-        ann_rows = conn.execute(text(
-            "SELECT pga.protein_accession, gt.aspect, pga.go_term_id, "
-            "       pga.qualifier, pga.evidence_code "
-            "  FROM protein_go_annotation pga "
-            "  JOIN go_term gt ON gt.id = pga.go_term_id "
-            " WHERE pga.annotation_set_id = :asid "
-            "   AND gt.aspect IN ('P', 'F', 'C') "
-            "   AND (pga.qualifier IS NULL OR pga.qualifier NOT LIKE '%%NOT%%')"
-        ), {"asid": annotation_set_id}).yield_per(50_000)
-
-        aspect_accs: dict[str, set[str]] = {a: set() for a in _ASPECTS}
-        aspect_go_map: dict[str, dict[str, list[dict[str, Any]]]] = {a: {} for a in _ASPECTS}
-        for acc, asp, go_term_id, qualifier, evidence_code in ann_rows:
-            if asp in aspect_accs:
-                aspect_accs[asp].add(acc)
-                aspect_go_map[asp].setdefault(acc, []).append({
-                    "go_term_id": go_term_id,
-                    "qualifier": qualifier,
-                    "evidence_code": evidence_code,
-                })
-
-        # Step 4: build per-aspect views
-        result: dict[str, dict[str, Any]] = {}
-        for asp in _ASPECTS:
-            indices = np.array(
-                [acc_to_idx[a] for a in aspect_accs[asp] if a in acc_to_idx],
-                dtype=np.int32,
-            )
-            asp_accessions = [accessions[i] for i in indices]
-            asp_embeddings = embeddings[indices] if len(indices) > 0 else np.empty((0, dim), dtype=np.float16)
-            result[asp] = {
-                "accessions": asp_accessions,
-                "embeddings": asp_embeddings,
-                "go_map": aspect_go_map[asp],
-            }
-            emit(
-                "train_reranker.aspect_loaded",
-                None,
-                {"aspect": asp, "references": len(indices)},
-                "info",
-            )
-
-        return result
-
-    # ── query embeddings ──────────────────────────────────────────────────
-
-    def _load_query_embeddings(
-        self,
-        session: Session,
-        accessions: list[str],
-        emb_config_id: uuid.UUID,
-    ) -> tuple[np.ndarray, list[str]]:
-        """Load embeddings for delta proteins. Returns (embeddings_f32, valid_accessions)."""
-        all_valid: list[str] = []
-        all_emb: list[list[float]] = []
-        for i in range(0, len(accessions), _ANNOTATION_CHUNK_SIZE):
-            chunk = accessions[i : i + _ANNOTATION_CHUNK_SIZE]
-            rows = (
-                session.query(Protein.accession, SequenceEmbedding.embedding)
-                .join(
-                    SequenceEmbedding,
-                    (SequenceEmbedding.sequence_id == Protein.sequence_id)
-                    & (SequenceEmbedding.embedding_config_id == emb_config_id),
-                )
-                .filter(Protein.accession.in_(chunk))
-                .all()
-            )
-            for acc, emb in rows:
-                all_valid.append(acc)
-                all_emb.append(list(emb))
-
-        if not all_valid:
-            return np.empty((0,)), []
-        return np.array(all_emb, dtype=np.float32), all_valid
-
-    # ── Sequence / taxonomy loaders ───────────────────────────────────────
-
-    def _load_sequences(
-        self, session: Session, accessions: set[str],
-    ) -> dict[str, str]:
-        result: dict[str, str] = {}
-        acc_list = list(accessions)
-        for i in range(0, len(acc_list), _ANNOTATION_CHUNK_SIZE):
-            chunk = acc_list[i : i + _ANNOTATION_CHUNK_SIZE]
-            rows = (
-                session.query(Protein.accession, Sequence.sequence)
-                .join(Protein.sequence)
-                .filter(Protein.accession.in_(chunk))
-                .all()
-            )
-            for acc, seq in rows:
-                result[acc] = seq
-        return result
-
-    def _load_taxonomy_ids(
-        self, session: Session, accessions: set[str],
-    ) -> dict[str, int | None]:
-        result: dict[str, int | None] = {}
-        acc_list = list(accessions)
-        for i in range(0, len(acc_list), _ANNOTATION_CHUNK_SIZE):
-            chunk = acc_list[i : i + _ANNOTATION_CHUNK_SIZE]
-            rows = (
-                session.query(Protein.accession, Protein.taxonomy_id)
-                .filter(Protein.accession.in_(chunk))
-                .all()
-            )
-            for acc, tid in rows:
-                result[acc] = int(tid) if tid else None
-        return result
-
-    # ── KNN + transfer + label ────────────────────────────────────────────
-
-    def _knn_transfer_and_label(
-        self,
-        session: Session,
-        valid_queries: list[str],
-        query_emb: np.ndarray,
-        ref_by_aspect: dict[str, dict[str, Any]],
-        go_id_map: dict[int, str],
-        aspect_map: dict[int, str],
-        gt_pairs: set[tuple[str, str]],
-        p: TrainRerankerPayload | TrainRerankerAutoPayload,
-        *,
-        query_sequences: dict[str, str] | None = None,
-        ref_sequences: dict[str, str] | None = None,
-        query_tax_ids: dict[str, int | None] | None = None,
-        ref_tax_ids: dict[str, int | None] | None = None,
-    ) -> list[dict[str, Any]]:
-        """Run per-aspect KNN, transfer GO terms, label, compute features."""
-        # Collect neighbors per aspect
-        neighbors_by_aspect: dict[str, list[list[tuple[str, float]]]] = {}
-        for aspect in _ASPECTS:
-            ref = ref_by_aspect[aspect]
-            if not ref["accessions"]:
-                neighbors_by_aspect[aspect] = [[] for _ in valid_queries]
-                continue
-            ref_f32 = ref["embeddings"].astype(np.float32)
-            neighbors_by_aspect[aspect] = search_knn(
-                query_emb,
-                ref_f32,
-                ref["accessions"],
-                k=p.limit_per_entry,
-                distance_threshold=p.distance_threshold,
-                backend=p.search_backend,
-                metric=p.metric,
-                faiss_index_type=p.faiss_index_type,
-                faiss_nlist=p.faiss_nlist,
-                faiss_nprobe=p.faiss_nprobe,
-            )
-            del ref_f32
-
-        # Pre-compute reranker features
-        rr_distance_std: dict[str, float] = {}
-        rr_vote_count: dict[str, dict[int, int]] = {}
-        rr_k_position: dict[str, dict[int, int]] = {}
-        go_term_freq: dict[int, int] = {}
-        ref_ann_density: dict[str, int] = {}
-
-        for q_idx, q_acc in enumerate(valid_queries):
-            all_dists: list[float] = []
-            rr_vote_count[q_acc] = {}
-            rr_k_position[q_acc] = {}
-            for aspect in _ASPECTS:
-                nbs = neighbors_by_aspect[aspect]
-                if q_idx < len(nbs):
-                    for _, d in nbs[q_idx]:
-                        all_dists.append(d)
-            rr_distance_std[q_acc] = float(np.std(all_dists)) if len(all_dists) > 1 else 0.0
-
-        for aspect in _ASPECTS:
-            go_map = ref_by_aspect[aspect]["go_map"]
-            # Ref annotation density
-            for acc, anns in go_map.items():
-                if acc not in ref_ann_density:
-                    ref_ann_density[acc] = 0
-                ref_ann_density[acc] += len(anns)
-                for ann in anns:
-                    gtid = ann["go_term_id"]
-                    go_term_freq[gtid] = go_term_freq.get(gtid, 0) + 1
-
-            # Vote count and k_position per query
-            for q_idx, q_acc in enumerate(valid_queries):
-                vc = rr_vote_count[q_acc]
-                kp = rr_k_position[q_acc]
-                nbs = neighbors_by_aspect[aspect]
-                if q_idx < len(nbs):
-                    for k_pos, (ref_acc, _) in enumerate(nbs[q_idx], 1):
-                        for ann in go_map.get(ref_acc, []):
-                            gtid = ann["go_term_id"]
-                            vc[gtid] = vc.get(gtid, 0) + 1
-                            if gtid not in kp:
-                                kp[gtid] = k_pos
-
-        # Pre-compute alignment and taxonomy features per unique (query, ref) pair
-        pair_features: dict[tuple[str, str], dict[str, Any]] = {}
-        do_alignments = p.compute_alignments and query_sequences is not None and ref_sequences is not None
-        do_taxonomy = p.compute_taxonomy and query_tax_ids is not None and ref_tax_ids is not None
-
-        if do_alignments or do_taxonomy:
-            for aspect in _ASPECTS:
-                nbs = neighbors_by_aspect[aspect]
-                for q_idx, q_acc in enumerate(valid_queries):
-                    if q_idx >= len(nbs):
-                        continue
-                    for ref_acc, _ in nbs[q_idx]:
-                        pair_key = (q_acc, ref_acc)
-                        if pair_key in pair_features:
-                            continue
-                        feats: dict[str, Any] = {}
-                        if do_alignments:
-                            q_seq = query_sequences.get(q_acc, "")
-                            r_seq = ref_sequences.get(ref_acc, "")
-                            if q_seq and r_seq:
-                                feats.update(compute_alignment(q_seq, r_seq))
-                        if do_taxonomy:
-                            q_tid = query_tax_ids.get(q_acc)
-                            r_tid = ref_tax_ids.get(ref_acc)
-                            feats.update(compute_taxonomy(q_tid, r_tid))
-                            feats["query_taxonomy_id"] = q_tid
-                            feats["ref_taxonomy_id"] = r_tid
-                        pair_features[pair_key] = feats
-
-        # Build labeled predictions
-        records: list[dict[str, Any]] = []
-        for aspect in _ASPECTS:
-            go_map = ref_by_aspect[aspect]["go_map"]
-            for q_idx, q_acc in enumerate(valid_queries):
-                nbs = neighbors_by_aspect[aspect]
-                if q_idx >= len(nbs):
-                    continue
-                seen_terms: set[int] = set()
-                for ref_acc, distance in nbs[q_idx]:
-                    for ann in go_map.get(ref_acc, []):
-                        go_term_id = ann["go_term_id"]
-                        if go_term_id in seen_terms:
-                            continue
-                        seen_terms.add(go_term_id)
-
-                        go_id = go_id_map.get(go_term_id)
-                        if not go_id:
-                            continue
-                        term_aspect = aspect_map.get(go_term_id, "")
-                        label = 1 if (q_acc, go_id) in gt_pairs else 0
-
-                        pf = pair_features.get((q_acc, ref_acc), {})
-                        records.append({
-                            "protein_accession": q_acc,
-                            "go_id": go_id,
-                            "aspect": term_aspect,
-                            LABEL_COLUMN: label,
-                            "distance": distance,
-                            "ref_protein_accession": ref_acc,
-                            "qualifier": ann.get("qualifier") or "",
-                            "evidence_code": ann.get("evidence_code") or "",
-                            # Alignment features
-                            "identity_nw": pf.get("identity_nw"),
-                            "similarity_nw": pf.get("similarity_nw"),
-                            "alignment_score_nw": pf.get("alignment_score_nw"),
-                            "gaps_pct_nw": pf.get("gaps_pct_nw"),
-                            "alignment_length_nw": pf.get("alignment_length_nw"),
-                            "identity_sw": pf.get("identity_sw"),
-                            "similarity_sw": pf.get("similarity_sw"),
-                            "alignment_score_sw": pf.get("alignment_score_sw"),
-                            "gaps_pct_sw": pf.get("gaps_pct_sw"),
-                            "alignment_length_sw": pf.get("alignment_length_sw"),
-                            "length_query": pf.get("length_query"),
-                            "length_ref": pf.get("length_ref"),
-                            # Taxonomy features
-                            "taxonomic_distance": pf.get("taxonomic_distance"),
-                            "taxonomic_common_ancestors": pf.get("taxonomic_common_ancestors"),
-                            "taxonomic_relation": pf.get("taxonomic_relation", ""),
-                            # Reranker features
-                            "vote_count": rr_vote_count.get(q_acc, {}).get(go_term_id, 1),
-                            "k_position": rr_k_position.get(q_acc, {}).get(go_term_id, 1),
-                            "go_term_frequency": go_term_freq.get(go_term_id, 0),
-                            "ref_annotation_density": ref_ann_density.get(ref_acc, 0),
-                            "neighbor_distance_std": rr_distance_std.get(q_acc, 0.0),
-                        })
-
-        return records
-
-    # ── metrics comparison ────────────────────────────────────────────────
-
-    def _compute_comparison_metrics(
-        self,
-        df: pd.DataFrame,
-        train_result: Any,
-        eval_data: Any,
-        category: str,
-    ) -> dict[str, Any]:
-        """Compute baseline Fmax (distance-based) and re-ranker Fmax."""
-        # Baseline: score = 1 - distance (simple cosine similarity)
-        baseline_scored = [
-            {
-                "protein_accession": row["protein_accession"],
-                "go_id": row["go_id"],
-                "score": max(0.0, 1.0 - float(row["distance"])) if pd.notna(row.get("distance")) else 0.0,
-            }
-            for _, row in df.iterrows()
-        ]
-        baseline_metrics = compute_cafa_metrics(baseline_scored, eval_data, category=category)
-
-        # Re-ranker
-        reranker_scores = reranker_predict(train_result.model, df)
-        reranker_scored = [
-            {
-                "protein_accession": df.iloc[i]["protein_accession"],
-                "go_id": df.iloc[i]["go_id"],
-                "score": float(reranker_scores[i]),
-            }
-            for i in range(len(df))
-        ]
-        reranker_metrics = compute_cafa_metrics(reranker_scored, eval_data, category=category)
-
-        return {
-            "baseline_fmax": baseline_metrics.fmax,
-            "baseline_auc_pr": baseline_metrics.auc_pr,
-            "baseline_threshold": baseline_metrics.threshold_at_fmax,
-            "reranker_fmax": reranker_metrics.fmax,
-            "reranker_auc_pr": reranker_metrics.auc_pr,
-            "reranker_threshold": reranker_metrics.threshold_at_fmax,
-            "fmax_improvement": round(reranker_metrics.fmax - baseline_metrics.fmax, 4),
-            "auc_pr_improvement": round(reranker_metrics.auc_pr - baseline_metrics.auc_pr, 4),
-            "n_ground_truth_proteins": baseline_metrics.n_ground_truth_proteins,
-        }
-
-
-# ---------------------------------------------------------------------------
-# Auto payload
-# ---------------------------------------------------------------------------
-
-
-class TrainRerankerAutoPayload(ProteaPayload, frozen=True):
-    """Payload for the train_reranker_auto operation.
-
-    Generates consecutive temporal pairs from ``train_versions``, runs KNN
-    once per pair, then trains 3 per-category LightGBM models (NK, LK, PK)
-    and evaluates each on the held-out test split.
-    """
-
-    name: str
-    embedding_config_id: str
-    ontology_snapshot_id: str
-
-    # GOA source_version numbers for training pairs (e.g. [160,165,...,220])
-    train_versions: list[int]
-    # GOA source_version numbers for test evaluation (e.g. [225] or [225,229])
-    test_versions: list[int]
-
-    # Annotation source in annotation_set (default "goa")
-    annotation_source: str = "goa"
-
-    # KNN parameters
-    limit_per_entry: PositiveInt = 5
-    distance_threshold: float | None = None
-    search_backend: str = "numpy"
-    metric: str = "cosine"
-    faiss_index_type: str = "Flat"
-    faiss_nlist: int = 100
-    faiss_nprobe: int = 10
-
-    # LightGBM parameters
-    num_boost_round: int = 1000
-    early_stopping_rounds: int = 50
-    val_fraction: float = 0.2
-    neg_pos_ratio: float | None = None
-
-    # Feature computation
-    compute_alignments: bool = False
-    compute_taxonomy: bool = False
-
-    # IA weighting: path to IA TSV file (go_id\tia_value, no header).
-    # When set, sample_weight = IA(go_term) during training so the model
-    # focuses on informative (rare, specific) GO terms — aligned with
-    # CAFA evaluation which uses IA weighting.
-    ia_file: str | None = None
-
-    @field_validator("embedding_config_id", "ontology_snapshot_id", "name", mode="before")
-    @classmethod
-    def must_be_non_empty(cls, v: str) -> str:
-        if not isinstance(v, str) or not v.strip():
-            raise ValueError("must be a non-empty string")
-        return v.strip()
-
-    @field_validator("train_versions", mode="before")
-    @classmethod
-    def at_least_two_train(cls, v: list[int]) -> list[int]:
-        if len(v) < 2:
-            raise ValueError("train_versions must have at least 2 entries to form pairs")
-        return sorted(v)
-
-    @field_validator("test_versions", mode="before")
-    @classmethod
-    def at_least_one_test(cls, v: list[int]) -> list[int]:
-        if not v:
-            raise ValueError("test_versions must have at least 1 entry")
-        return sorted(v)
-
-
-_CATEGORIES = ("nk", "lk", "pk")
-_ASPECT_NAMES = {"P": "bpo", "F": "mfo", "C": "cco"}
-
-
-# ---------------------------------------------------------------------------
-# Auto operation
-# ---------------------------------------------------------------------------
-
-
-class TrainRerankerAutoOperation:
-    """Automated multi-split temporal holdout re-ranker training.
-
-    Trains **3 per-category models** (NK, LK, PK) in a single execution.
-    Each model trains on all aspects combined, giving it ~3× more data
-    than per-aspect models and better convergence.
-
-    Pipeline:
-    1. Resolve annotation_set IDs from version numbers.
-    2. Load GO maps once.  Optionally load IA weights for sample weighting.
-    3. For each consecutive pair in train_versions:
-       a. Compute evaluation delta (all 3 categories at once).
-       b. Load references + query embeddings, run KNN + GO transfer.
-       c. Label predictions against each category's ground truth.
-    4. For each category (NK, LK, PK):
-       a. Concatenate labeled data from all splits (all aspects).
-       b. Train one LightGBM model with optional IA sample weights.
-       c. Evaluate on test split.
-       d. Store RerankerModel as ``{name}-{category}``.
-    """
-
-    name = "train_reranker_auto"
-
-    _single = TrainRerankerOperation()
-
-    def execute(
-        self, session: Session, payload: dict[str, Any], *, emit: EmitFn
-    ) -> OperationResult:
-        p = TrainRerankerAutoPayload.model_validate(payload)
-        t0 = time.perf_counter()
-
-        emb_config_id = uuid.UUID(p.embedding_config_id)
-        ontology_snapshot_id = uuid.UUID(p.ontology_snapshot_id)
-
-        # ── 1. Resolve annotation set IDs ────────────────────────────────
-        all_versions = sorted(set(p.train_versions + p.test_versions))
-        version_to_set: dict[int, uuid.UUID] = {}
-        for v in all_versions:
-            aset = (
-                session.query(AnnotationSet)
-                .filter(
-                    AnnotationSet.source == p.annotation_source,
-                    AnnotationSet.source_version == str(v),
-                )
-                .first()
-            )
-            if aset is None:
-                raise ValueError(
-                    f"AnnotationSet not found for source='{p.annotation_source}', "
-                    f"source_version='{v}'"
-                )
-            version_to_set[v] = aset.id
-
-        if session.get(EmbeddingConfig, emb_config_id) is None:
-            raise ValueError(f"EmbeddingConfig {emb_config_id} not found")
-
-        # Check no name collisions for any of the 3 per-category models
-        for cat in _CATEGORIES:
-            model_name = f"{p.name}-{cat}"
-            existing = (
-                session.query(RerankerModel)
-                .filter(RerankerModel.name == model_name)
-                .first()
-            )
-            if existing is not None:
-                raise ValueError(f"RerankerModel '{model_name}' already exists")
-
-        # Load IA weights for sample weighting (optional)
-        ia_weights: dict[str, float] | None = None
-        if p.ia_file:
-            ia_weights = {}
-            with open(p.ia_file) as f:
-                for line in f:
-                    line = line.strip()
-                    if not line:
-                        continue
-                    parts = line.split("\t")
-                    if len(parts) >= 2:
-                        ia_weights[parts[0]] = float(parts[1])
-            emit(
-                "train_reranker_auto.ia_loaded",
-                None,
-                {"ia_file": p.ia_file, "n_terms": len(ia_weights)},
-                "info",
-            )
-
-        emit(
-            "train_reranker_auto.start",
-            None,
-            {
-                "name": p.name,
-                "train_versions": p.train_versions,
-                "test_versions": p.test_versions,
-                "n_pairs": len(p.train_versions) - 1,
-                "n_models": 3,
-                "ia_weighted": ia_weights is not None,
-            },
-            "info",
-        )
-
-        # ── 2. Load GO maps ──────────────────────────────────────────────
-        go_id_map, aspect_map = self._single._load_go_maps(session, ontology_snapshot_id)
-
-        # ── 2b. Preload ALL embeddings once ─────────────────────────────
-        all_embeddings, all_accessions, acc_to_idx = self._single._preload_all_embeddings(
-            session, emb_config_id, emit
-        )
-
-        # ── 3. Generate training data from consecutive pairs ─────────────
-        # Memory-optimised: each split writes to parquet on disk, then all
-        # RAM is freed before the next split.  Training reads from disk.
-        _KEEP_COLS = ["protein_accession", "go_id", "aspect"] + ALL_FEATURES + [LABEL_COLUMN]
-        tmp_dir = Path(tempfile.mkdtemp(prefix="protea_reranker_"))
-        per_split_stats: list[dict[str, Any]] = []
-        split_files: dict[str, list[Path]] = {c: [] for c in _CATEGORIES}
-
-        try:
-            for i in range(len(p.train_versions) - 1):
-                v_old = p.train_versions[i]
-                v_new = p.train_versions[i + 1]
-                old_set_id = version_to_set[v_old]
-                new_set_id = version_to_set[v_new]
-
-                emit(
-                    "train_reranker_auto.split_start",
-                    None,
-                    {"split": i + 1, "v_old": v_old, "v_new": v_new},
-                    "info",
-                )
-
-                # 3a. Compute delta — get all 3 categories at once
-                eval_data = compute_evaluation_data(
-                    session, old_set_id, new_set_id, ontology_snapshot_id
-                )
-
-                # Build gt_pairs for each category; collect union of query proteins
-                cat_gt_pairs: dict[str, set[tuple[str, str]]] = {}
-                all_query_accessions: set[str] = set()
-                for cat in _CATEGORIES:
-                    gt: dict[str, set[str]] = getattr(eval_data, cat)
-                    pairs: set[tuple[str, str]] = set()
-                    for protein, go_ids in gt.items():
-                        for go_id in go_ids:
-                            pairs.add((protein, go_id))
-                    cat_gt_pairs[cat] = pairs
-                    all_query_accessions.update(gt.keys())
-
-                if not all_query_accessions:
-                    emit(
-                        "train_reranker_auto.split_skipped",
-                        None,
-                        {"split": i + 1, "reason": "no ground truth in any category"},
-                        "warning",
-                    )
-                    per_split_stats.append({
-                        "v_old": v_old, "v_new": v_new, "skipped": True,
-                        "reason": "no ground truth",
-                    })
-                    continue
-
-                # 3b. Build references from preloaded embeddings (only loads annotations)
-                ref_by_aspect = self._single._build_reference_from_cache(
-                    session, old_set_id, all_embeddings, all_accessions, acc_to_idx, emit
-                )
-
-                # 3c. Load query embeddings from preloaded cache
-                query_accs = [a for a in all_query_accessions if a in acc_to_idx]
-                query_indices = np.array([acc_to_idx[a] for a in query_accs], dtype=np.int32)
-                query_emb = (
-                    all_embeddings[query_indices].astype(np.float32)
-                    if len(query_indices) > 0
-                    else np.empty((0, all_embeddings.shape[1]), dtype=np.float32)
-                )
-                valid_queries = query_accs
-
-                if not valid_queries:
-                    emit(
-                        "train_reranker_auto.split_skipped",
-                        None,
-                        {"split": i + 1, "reason": "no query embeddings"},
-                        "warning",
-                    )
-                    per_split_stats.append({
-                        "v_old": v_old, "v_new": v_new, "skipped": True,
-                        "reason": "no query embeddings",
-                    })
-                    del ref_by_aspect, query_emb, valid_queries
-                    gc.collect()
-                    continue
-
-                # 3d. Load sequences / taxonomy if requested
-                qs: dict[str, str] | None = None
-                rs: dict[str, str] | None = None
-                qt: dict[str, int | None] | None = None
-                rt: dict[str, int | None] | None = None
-                if p.compute_alignments or p.compute_taxonomy:
-                    all_ref_accs: set[str] = set()
-                    for asp in _ASPECTS:
-                        all_ref_accs.update(ref_by_aspect[asp]["accessions"])
-                    query_set = set(valid_queries)
-                    if p.compute_alignments:
-                        qs = self._single._load_sequences(session, query_set)
-                        rs = self._single._load_sequences(session, all_ref_accs)
-                    if p.compute_taxonomy:
-                        qt = self._single._load_taxonomy_ids(session, query_set)
-                        rt = self._single._load_taxonomy_ids(session, all_ref_accs)
-
-                # 3e. KNN + GO transfer (once, no labeling yet)
-                session.expire_all()
-                unlabeled_preds = self._single._knn_transfer_and_label(
-                    session, valid_queries, query_emb, ref_by_aspect,
-                    go_id_map, aspect_map,
-                    set(),  # empty gt → all label=0
-                    p,
-                    query_sequences=qs,
-                    ref_sequences=rs,
-                    query_tax_ids=qt,
-                    ref_tax_ids=rt,
-                )
-
-                # Free large objects immediately
-                del ref_by_aspect, query_emb, valid_queries, qs, rs, qt, rt
-                gc.collect()
-
-                split_stats: dict[str, Any] = {
-                    "v_old": v_old, "v_new": v_new, "skipped": False,
-                    "total_unlabeled": len(unlabeled_preds),
-                }
-
-                # 3e. Build DataFrame, label per category, write to parquet.
-                base_df = pd.DataFrame(unlabeled_preds, columns=_KEEP_COLS)
-                del unlabeled_preds
-                gc.collect()
-
-                for cat in _CATEGORIES:
-                    gt_p = cat_gt_pairs[cat]
-                    labels = np.array([
-                        1 if (acc, go_id) in gt_p else 0
-                        for acc, go_id in zip(base_df["protein_accession"], base_df["go_id"], strict=False)
-                    ], dtype=np.int8)
-                    base_df[LABEL_COLUMN] = labels
-                    n_pos = int(labels.sum())
-                    split_stats[f"{cat}_positives"] = n_pos
-                    split_stats[f"{cat}_negatives"] = len(base_df) - n_pos
-
-                    pq_path = tmp_dir / f"train_{cat}_split{i}.parquet"
-                    base_df.to_parquet(pq_path, index=False)
-                    split_files[cat].append(pq_path)
-
-                del base_df
-                gc.collect()
-
-                per_split_stats.append(split_stats)
-                emit("train_reranker_auto.split_done", None, split_stats, "info")
-
-            # Check we have data
-            if not any(split_files[c] for c in _CATEGORIES):
-                raise ValueError("No training data produced from any split")
-
-            # ── 4. Test split: KNN once, label per category ──────────────
-            test_old_v = p.train_versions[-1]
-            test_new_v = p.test_versions[0]
-            test_old_set_id = version_to_set[test_old_v]
-            test_new_set_id = version_to_set[test_new_v]
-
-            emit(
-                "train_reranker_auto.test_knn",
-                None,
-                {"test_old": test_old_v, "test_new": test_new_v},
-                "info",
-            )
-
-            test_eval_data = compute_evaluation_data(
-                session, test_old_set_id, test_new_set_id, ontology_snapshot_id
-            )
-
-            # Write test data to parquet too
-            test_files: dict[str, Path | None] = {c: None for c in _CATEGORIES}
-            test_all_queries: set[str] = set()
-            test_cat_gt: dict[str, set[tuple[str, str]]] = {}
-            for cat in _CATEGORIES:
-                gt: dict[str, set[str]] = getattr(test_eval_data, cat)
-                pairs: set[tuple[str, str]] = set()
-                for protein, go_ids in gt.items():
-                    for go_id in go_ids:
-                        pairs.add((protein, go_id))
-                test_cat_gt[cat] = pairs
-                test_all_queries.update(gt.keys())
-
-            if test_all_queries:
-                test_ref = self._single._build_reference_from_cache(
-                    session, test_old_set_id, all_embeddings, all_accessions, acc_to_idx, emit
-                )
-                test_accs = [a for a in test_all_queries if a in acc_to_idx]
-                test_indices = np.array([acc_to_idx[a] for a in test_accs], dtype=np.int32)
-                test_emb = (
-                    all_embeddings[test_indices].astype(np.float32)
-                    if len(test_indices) > 0
-                    else np.empty((0, all_embeddings.shape[1]), dtype=np.float32)
-                )
-                test_valid = test_accs
-                if test_valid:
-                    # Load sequences / taxonomy for test split
-                    test_qs: dict[str, str] | None = None
-                    test_rs: dict[str, str] | None = None
-                    test_qt: dict[str, int | None] | None = None
-                    test_rt: dict[str, int | None] | None = None
-                    if p.compute_alignments or p.compute_taxonomy:
-                        test_ref_accs: set[str] = set()
-                        for asp in _ASPECTS:
-                            test_ref_accs.update(test_ref[asp]["accessions"])
-                        test_query_set = set(test_valid)
-                        if p.compute_alignments:
-                            test_qs = self._single._load_sequences(session, test_query_set)
-                            test_rs = self._single._load_sequences(session, test_ref_accs)
-                        if p.compute_taxonomy:
-                            test_qt = self._single._load_taxonomy_ids(session, test_query_set)
-                            test_rt = self._single._load_taxonomy_ids(session, test_ref_accs)
-
-                    session.expire_all()
-                    test_unlabeled = self._single._knn_transfer_and_label(
-                        session, test_valid, test_emb, test_ref,
-                        go_id_map, aspect_map, set(), p,
-                        query_sequences=test_qs,
-                        ref_sequences=test_rs,
-                        query_tax_ids=test_qt,
-                        ref_tax_ids=test_rt,
-                    )
-                    del test_ref, test_emb, test_valid, test_qs, test_rs, test_qt, test_rt
-                    gc.collect()
-
-                    test_base_df = pd.DataFrame(test_unlabeled, columns=_KEEP_COLS)
-                    del test_unlabeled
-                    gc.collect()
-
-                    for cat in _CATEGORIES:
-                        gt_p = test_cat_gt[cat]
-                        labels = np.array([
-                            1 if (acc, go_id) in gt_p else 0
-                            for acc, go_id in zip(test_base_df["protein_accession"], test_base_df["go_id"], strict=False)
-                        ], dtype=np.int8)
-                        test_base_df[LABEL_COLUMN] = labels
-                        pq_path = tmp_dir / f"test_{cat}.parquet"
-                        test_base_df.to_parquet(pq_path, index=False)
-                        test_files[cat] = pq_path
-
-                    del test_base_df
-                    gc.collect()
-                else:
-                    del test_ref, test_emb, test_valid
-                    gc.collect()
-
-            # ── 5. Train 3 per-category models — read from parquet ────────
-            models_created: list[dict[str, Any]] = []
-
-            for cat in _CATEGORIES:
-                if not split_files[cat]:
-                    continue
-                model_name = f"{p.name}-{cat}"
-                combined_df = pd.concat(
-                    [pd.read_parquet(f) for f in split_files[cat]],
-                    ignore_index=True,
-                )
-
-                if len(combined_df) == 0 or int(combined_df[LABEL_COLUMN].sum()) == 0:
-                    emit(
-                        "train_reranker_auto.model_skipped",
-                        None,
-                        {"model": model_name, "reason": "no data or no positives"},
-                        "warning",
-                    )
-                    del combined_df
-                    gc.collect()
-                    continue
-
-                # Load test data for this category
-                test_df: pd.DataFrame | None = None
-                if test_files.get(cat) is not None:
-                    test_df = pd.read_parquet(test_files[cat])
-
-                # Build sample weights from IA values (if available)
-                sw: np.ndarray | None = None
-                if ia_weights is not None:
-                    sw = combined_df["go_id"].map(
-                        lambda gid: ia_weights.get(gid, 1.0)
-                    ).values.astype(np.float64)
-
-                emit(
-                    "train_reranker_auto.training_model",
-                    None,
-                    {
-                        "model": model_name,
-                        "samples": len(combined_df),
-                        "positives": int(combined_df[LABEL_COLUMN].sum()),
-                        "ia_weighted": sw is not None,
-                    },
-                    "info",
-                )
-
-                train_result = reranker_train(
-                    combined_df,
-                    num_boost_round=p.num_boost_round,
-                    early_stopping_rounds=p.early_stopping_rounds,
-                    val_fraction=p.val_fraction,
-                    neg_pos_ratio=p.neg_pos_ratio,
-                    sample_weight=sw,
-                )
-
-                # Evaluate on test split (all aspects combined)
-                test_metrics: dict[str, Any] = {}
-                if test_df is not None:
-                    if len(test_df) > 0 and int(test_df[LABEL_COLUMN].sum()) > 0:
-                        test_metrics = self._single._compute_comparison_metrics(
-                            test_df, train_result, test_eval_data, cat
-                        )
-
-                full_metrics: dict[str, Any] = {
-                    **train_result.metrics,
-                    "category": cat,
-                    "aspect": None,
-                    "train_versions": p.train_versions,
-                    "test_versions": p.test_versions,
-                    "annotation_source": p.annotation_source,
-                    "embedding_config_id": str(emb_config_id),
-                    "limit_per_entry": p.limit_per_entry,
-                    "search_backend": p.search_backend,
-                    "n_splits": len(split_files[cat]),
-                    "n_predictions": len(combined_df),
-                    "per_split_stats": per_split_stats,
-                    "ia_weighted": ia_weights is not None,
-                }
-                if test_metrics:
-                    full_metrics["test_evaluation"] = {
-                        "v_old": test_old_v,
-                        "v_new": test_new_v,
-                        **test_metrics,
-                    }
-
-                model = RerankerModel(
-                    name=model_name,
-                    prediction_set_id=None,
-                    evaluation_set_id=None,
-                    category=cat,
-                    aspect=None,
-                    model_data=model_to_string(train_result.model),
-                    metrics=full_metrics,
-                    feature_importance=train_result.feature_importance,
-                )
-                session.add(model)
-                session.flush()
-
-                model_summary = {
-                    "reranker_model_id": str(model.id),
-                    "name": model_name,
-                    "category": cat,
-                    "aspect": None,
-                    "n_predictions": len(combined_df),
-                    "positives": int(combined_df[LABEL_COLUMN].sum()),
-                    **{f"test_{k}": v for k, v in test_metrics.items()},
-                }
-                models_created.append(model_summary)
-
-                emit(
-                    "train_reranker_auto.model_done",
-                    None,
-                    model_summary,
-                    "info",
-                )
-
-                del combined_df, test_df, sw
-                gc.collect()
-
-        finally:
-            shutil.rmtree(tmp_dir, ignore_errors=True)
-
-        elapsed = round(time.perf_counter() - t0, 1)
-        result: dict[str, Any] = {
-            "n_models": len(models_created),
-            "models": models_created,
-            "elapsed_seconds": elapsed,
-        }
-        emit("train_reranker_auto.done", None, result, "info")
-        return OperationResult(result=result)
diff --git a/protea/core/parquet_export.py b/protea/core/parquet_export.py
new file mode 100644
index 0000000..81c5434
--- /dev/null
+++ b/protea/core/parquet_export.py
@@ -0,0 +1,261 @@
+"""Shared utility that consolidates per-split parquet shards into the
+frozen reranker dataset layout consumed by ``protea-reranker-lab``.
+
+The layout is a directory containing exactly three files:
+
+    train.parquet    concatenated training shards (all splits, all cats)
+    eval.parquet     test shards
+    manifest.json    metadata compatible with ManifestV1 v2
+
+This module is shared between two producers:
+
+* ``the dump helper`` (operation) — runs KNN + feature generation for
+  training and optionally dumps the resulting shards.
+* ``export_research_dataset`` (operation) — runs the same generation but
+  only to publish the frozen dataset via an ``ArtifactStore`` (local or
+  MinIO). No LightGBM is trained.
+"""
+
+from __future__ import annotations
+
+import hashlib
+import json
+import logging
+import subprocess
+from pathlib import Path
+from typing import Any
+
+import pandas as pd
+from protea_contracts import compute_schema_sha as _canonical_schema_sha
+
+from protea.core.reranker import ALL_FEATURES, LABEL_COLUMN
+from protea.infrastructure.storage import ArtifactStore
+
+logger = logging.getLogger(__name__)
+
+_ASPECT_NAMES = {"P": "bpo", "F": "mfo", "C": "cco"}
+_CATEGORIES = ("nk", "lk", "pk")
+
+
+def resolve_protea_git_sha() -> str | None:
+    """Best-effort current HEAD sha of the PROTEA repo. Returns None when
+    the code is not running inside a git checkout or git is unavailable.
+    """
+    try:
+        repo_root = Path(__file__).resolve().parents[2]
+        out = subprocess.check_output(
+            ["git", "rev-parse", "HEAD"],
+            cwd=repo_root,
+            stderr=subprocess.DEVNULL,
+            text=True,
+            timeout=5,
+        ).strip()
+        return out or None
+    except Exception:
+        return None
+
+
+def _reorder(df: pd.DataFrame, reserved: list[str]) -> pd.DataFrame:
+    if df.empty:
+        return df
+    feature_cols = [c for c in ALL_FEATURES if c in df.columns]
+    reserved_present = [c for c in reserved if c in df.columns]
+    return df[reserved_present + feature_cols]
+
+
+def _validate_manifest_with_contracts(manifest: dict[str, Any]) -> None:
+    """Best-effort validation against the lab's pydantic ManifestV1.
+
+    Dev-time only: when ``protea_reranker_lab`` is installed as an editable
+    path dep, validate the manifest dict to catch schema drift before we
+    publish. When the import fails (prod image without the dev dep), log
+    and skip — this is not a production-path guard.
+    """
+    try:
+        from protea_reranker_lab.contracts import ManifestV1
+    except Exception as exc:
+        logger.debug("skipping contract validation (lab not importable: %s)", exc)
+        return
+    ManifestV1.model_validate(manifest)
+
+
+def export_reranker_parquets(
+    *,
+    stage_dir: Path,
+    split_files: dict[str, list[Path]],
+    valid_split_versions: list[tuple[int, int]],
+    test_files: dict[str, Path | None],
+    test_old_v: int,
+    test_new_v: int,
+    name: str,
+    k: int,
+    embedding_config_id: str,
+    ontology_snapshot_id: str,
+    annotation_source: str,
+    store: ArtifactStore | None = None,
+    key_prefix: str = "",
+    producer_version: str | None = None,
+    producer_git_sha: str | None = None,
+    validate_with_contracts: bool = True,
+) -> dict[str, Any]:
+    """Consolidate per-cat per-split parquet shards into the frozen
+    dataset layout and optionally publish via an ``ArtifactStore``.
+
+    Parameters
+    ----------
+    stage_dir
+        Directory used as the local staging area. The three output files
+        are written here regardless of ``store``; when ``store`` is given
+        they are additionally uploaded under ``key_prefix``.
+    split_files
+        Per-category list of training shard paths, parallel to
+        ``valid_split_versions``.
+    valid_split_versions
+        ``(v_old, v_new)`` pairs for each training shard position.
+    test_files
+        Per-category test shard path (may be ``None`` when the category
+        has no test rows).
+    store
+        When provided, the three consolidated files are uploaded under
+        ``f"{key_prefix}train.parquet"`` etc. The returned dict includes
+        the resulting URIs.
+    key_prefix
+        Prefix for store keys (should typically end with ``/``).
+    producer_version
+        PROTEA version string recorded in the manifest (optional).
+    producer_git_sha
+        PROTEA git HEAD at export time, recorded in the manifest
+        (optional).
+    validate_with_contracts
+        If True, best-effort validate the manifest dict against the lab's
+        ``ManifestV1`` before writing. Silent if the lab isn't installed.
+    """
+    stage_dir.mkdir(parents=True, exist_ok=True)
+    aspect_norm = dict(_ASPECT_NAMES)
+
+    train_frames: list[pd.DataFrame] = []
+    train_snapshot_pairs: list[str] = []
+    for cat in _CATEGORIES:
+        shards = split_files.get(cat, [])
+        for shard_idx, shard_path in enumerate(shards):
+            v_old, v_new = valid_split_versions[shard_idx]
+            snap_pair = f"v{v_old}-v{v_new}"
+            if snap_pair not in train_snapshot_pairs:
+                train_snapshot_pairs.append(snap_pair)
+            sdf = pd.read_parquet(shard_path)
+            sdf["category"] = cat
+            sdf["snapshot_pair"] = snap_pair
+            if "aspect" in sdf.columns:
+                sdf["aspect"] = sdf["aspect"].map(aspect_norm).fillna(sdf["aspect"])
+            sdf = sdf.rename(columns={"go_id": "go_term_id"})
+            train_frames.append(sdf)
+    train_df = (
+        pd.concat(train_frames, ignore_index=True) if train_frames else pd.DataFrame()
+    )
+    del train_frames
+
+    eval_pair = f"v{test_old_v}-v{test_new_v}"
+    eval_frames: list[pd.DataFrame] = []
+    for cat in _CATEGORIES:
+        path = test_files.get(cat)
+        if path is None:
+            continue
+        edf = pd.read_parquet(path)
+        edf["category"] = cat
+        edf["snapshot_pair"] = eval_pair
+        if "aspect" in edf.columns:
+            edf["aspect"] = edf["aspect"].map(aspect_norm).fillna(edf["aspect"])
+        edf = edf.rename(columns={"go_id": "go_term_id"})
+        eval_frames.append(edf)
+    eval_df = (
+        pd.concat(eval_frames, ignore_index=True) if eval_frames else pd.DataFrame()
+    )
+    del eval_frames
+
+    reserved = [
+        "protein_accession",
+        "go_term_id",
+        LABEL_COLUMN,
+        "category",
+        "snapshot_pair",
+    ]
+    train_df = _reorder(train_df, reserved)
+    eval_df = _reorder(eval_df, reserved)
+
+    # T1.8 boundary validation: before writing, the actual feature columns
+    # of every non-empty shard must equal ALL_FEATURES exactly. The
+    # canonical compute_schema_sha (lab format) is used on both sides; if
+    # the shard is missing or carries unknown feature columns the sha
+    # differs and we raise instead of silently shipping a partial dump.
+    canonical_features_sha = _canonical_schema_sha(list(ALL_FEATURES))
+    for shard_name, shard in (("train", train_df), ("eval", eval_df)):
+        if shard.empty:
+            continue
+        present_features = [c for c in shard.columns if c in ALL_FEATURES]
+        present_sha = _canonical_schema_sha(present_features)
+        if present_sha != canonical_features_sha:
+            missing = [c for c in ALL_FEATURES if c not in shard.columns]
+            extras = [
+                c
+                for c in shard.columns
+                if c not in ALL_FEATURES and c not in reserved
+            ]
+            raise ValueError(
+                f"{shard_name} shard fails the canonical column invariant. "
+                f"missing={missing!r} extras={extras!r}. "
+                "All ALL_FEATURES columns must be present before write."
+            )
+
+    train_path = stage_dir / "train.parquet"
+    eval_path = stage_dir / "eval.parquet"
+    manifest_path = stage_dir / "manifest.json"
+    if not train_df.empty:
+        train_df.to_parquet(train_path, index=False, compression="snappy")
+    if not eval_df.empty:
+        eval_df.to_parquet(eval_path, index=False, compression="snappy")
+
+    # Legacy schema_sha hash kept in the manifest until T1.6 of master
+    # plan v3 lands the schema_sha_v2 migration. The T1.8 invariant
+    # above already guarantees the column set is correct.
+    schema_sha = hashlib.sha256(
+        json.dumps(list(ALL_FEATURES), sort_keys=True).encode()
+    ).hexdigest()[:12]
+
+    manifest: dict[str, Any] = {
+        "schema_version": "v2",
+        "name": name,
+        "k": k,
+        "embedding_config_id": embedding_config_id,
+        "ontology_snapshot_id": ontology_snapshot_id,
+        "annotation_source": annotation_source,
+        "train_snapshot_pairs": train_snapshot_pairs,
+        "eval_snapshot_pair": eval_pair,
+        "schema_sha": schema_sha,
+        "n_train_rows": int(len(train_df)),
+        "n_eval_rows": int(len(eval_df)),
+        "format": "parquet",
+        "producer_version": producer_version,
+        "producer_git_sha": producer_git_sha,
+    }
+    if validate_with_contracts:
+        _validate_manifest_with_contracts(manifest)
+    manifest_path.write_text(json.dumps(manifest, indent=2))
+
+    result: dict[str, Any] = {
+        "stage_dir": str(stage_dir),
+        "n_train_rows": int(len(train_df)),
+        "n_eval_rows": int(len(eval_df)),
+        "train_snapshot_pairs": train_snapshot_pairs,
+        "eval_snapshot_pair": eval_pair,
+        "schema_sha": schema_sha,
+    }
+
+    if store is not None:
+        prefix = key_prefix or ""
+        if train_path.exists():
+            result["train_uri"] = store.put(prefix + "train.parquet", train_path)
+        if eval_path.exists():
+            result["eval_uri"] = store.put(prefix + "eval.parquet", eval_path)
+        result["manifest_uri"] = store.put(prefix + "manifest.json", manifest_path)
+
+    return result
diff --git a/protea/core/pca_cache.py b/protea/core/pca_cache.py
new file mode 100644
index 0000000..28dd59b
--- /dev/null
+++ b/protea/core/pca_cache.py
@@ -0,0 +1,89 @@
+"""Process-shared PCA state for the predict_go_terms pipeline.
+
+The PCA projection of reference embeddings into 16 dims is a feature
+input for the lab re-ranker. Fitting it on the full reference pool is
+expensive (~50k samples × ~1280 dims) and the result is deterministic
+for a given ``EmbeddingConfig`` — so we materialise ``(mean, components)``
+into a single ``.npz`` artifact and reuse it across all workers and
+prediction sets that share the config.
+
+Artifact layout: one file per ``EmbeddingConfig``:
+``{_PCA_ARTIFACTS_DIR}/{embedding_config_id}.npz`` with two arrays
+``mean`` (D,) float32 and ``components`` (16, D) float32.
+"""
+
+from __future__ import annotations
+
+import os
+import uuid
+from pathlib import Path
+
+import numpy as np
+
+from protea.core.reranker import EMBEDDING_PCA_DIM, fit_embedding_pca
+
+_PCA_ARTIFACTS_DIR = Path(
+    os.environ.get(
+        "PROTEA_PCA_ARTIFACTS_DIR",
+        str(Path(__file__).resolve().parents[1] / "artifacts" / "pca"),
+    )
+)
+
+
+def _pca_state_path(embedding_config_id: uuid.UUID) -> Path:
+    return _PCA_ARTIFACTS_DIR / f"{embedding_config_id}.npz"
+
+
+def _load_pca_state(
+    embedding_config_id: uuid.UUID,
+) -> tuple[np.ndarray, np.ndarray] | None:
+    path = _pca_state_path(embedding_config_id)
+    if not path.exists():
+        return None
+    try:
+        data = np.load(path)
+        return (
+            np.ascontiguousarray(data["mean"], dtype=np.float32),
+            np.ascontiguousarray(data["components"], dtype=np.float32),
+        )
+    except Exception:
+        return None
+
+
+def _save_pca_state(
+    embedding_config_id: uuid.UUID,
+    mean: np.ndarray,
+    components: np.ndarray,
+) -> None:
+    path = _pca_state_path(embedding_config_id)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    np.savez(path, mean=mean, components=components)
+
+
+def _load_or_fit_pca_state(
+    embedding_config_id: uuid.UUID,
+    unified_embeddings_f32: np.ndarray,
+) -> tuple[np.ndarray, np.ndarray] | None:
+    """Load PCA state from disk or fit on the reference pool.
+
+    Returns ``None`` when the reference pool is empty (no projection possible).
+    The artifact is shared across all workers and every prediction_set that
+    uses this ``EmbeddingConfig`` — fit once, reuse forever.
+    """
+    cached = _load_pca_state(embedding_config_id)
+    if cached is not None:
+        return cached
+    if unified_embeddings_f32.size == 0:
+        return None
+    mean, components = fit_embedding_pca(unified_embeddings_f32, EMBEDDING_PCA_DIM)
+    _save_pca_state(embedding_config_id, mean, components)
+    return mean, components
+
+
+__all__ = [
+    "_PCA_ARTIFACTS_DIR",
+    "_load_or_fit_pca_state",
+    "_load_pca_state",
+    "_pca_state_path",
+    "_save_pca_state",
+]
diff --git a/protea/core/reranker.py b/protea/core/reranker.py
index a58b797..0d57abf 100644
--- a/protea/core/reranker.py
+++ b/protea/core/reranker.py
@@ -1,65 +1,79 @@
-"""LightGBM re-ranker for GO term predictions.
-
-Trains a binary classifier on labeled prediction data (from temporal holdout)
-and produces calibrated probability scores that replace or supplement the
-original distance-based ranking.
-
-Feature columns are the numeric signals stored in ``GOPrediction``.  Categorical
-features (``qualifier``, ``evidence_code``, ``taxonomic_relation``) are
-label-encoded.  Missing values are left as NaN — LightGBM handles them natively.
+"""LightGBM re-ranker — inference helpers for GO term predictions.
+
+Training has been moved to ``protea-reranker-lab``. This module now only
+provides the feature-column schema, a ``prepare_dataset`` helper shared
+with the lab (so inference-time input matches training-time input), the
+``predict`` / ``model_from_string`` calls used by the scoring router,
+and the ArtifactStore-backed booster loader (``load_reranker`` /
+``apply_reranker``) used by ``predict_go_terms_batch``.
+
+Feature columns are the numeric signals stored in ``GOPrediction``.
+Categorical features (``qualifier``, ``evidence_code``,
+``taxonomic_relation``) are label-encoded. Missing values are left as
+NaN — LightGBM handles them natively.
+
+Schema-sha validation is load-bearing: if the live feature set differs
+from what the booster was trained on we refuse to rerank (caller falls
+back to KNN distance ordering) rather than silently scoring rows with
+missing columns filled as NaN.
 """
 
 from __future__ import annotations
 
-import io
-from dataclasses import dataclass
-from typing import Any
+import logging
+import threading
+from pathlib import Path
 
 import lightgbm as lgb
 import numpy as np
 import pandas as pd
 
-# ---------------------------------------------------------------------------
-# Feature definitions
-# ---------------------------------------------------------------------------
+# T1.5 of master plan v3: the feature schema is owned by protea-contracts.
+# Re-export here so existing call sites that import from
+# ``protea.core.reranker`` keep working; new code should import from
+# ``protea_contracts`` directly.
+from protea_contracts import (
+    ALL_FEATURES,
+    CATEGORICAL_FEATURES,
+    EMBEDDING_PCA_DIM,
+    LABEL_COLUMN,
+    NUMERIC_FEATURES,
+)
 
-NUMERIC_FEATURES: list[str] = [
-    "distance",
-    # NW alignment
-    "identity_nw",
-    "similarity_nw",
-    "alignment_score_nw",
-    "gaps_pct_nw",
-    "alignment_length_nw",
-    # SW alignment
-    "identity_sw",
-    "similarity_sw",
-    "alignment_score_sw",
-    "gaps_pct_sw",
-    "alignment_length_sw",
-    # Lengths
-    "length_query",
-    "length_ref",
-    # Taxonomy
-    "taxonomic_distance",
-    "taxonomic_common_ancestors",
-    # Re-ranker features
-    "vote_count",
-    "k_position",
-    "go_term_frequency",
-    "ref_annotation_density",
-    "neighbor_distance_std",
-]
-
-CATEGORICAL_FEATURES: list[str] = [
-    "qualifier",
-    "evidence_code",
-    "taxonomic_relation",
-]
-
-ALL_FEATURES: list[str] = NUMERIC_FEATURES + CATEGORICAL_FEATURES
-
-LABEL_COLUMN = "label"
+from protea.infrastructure.storage import ArtifactStore, LocalFsArtifactStore
+
+logger = logging.getLogger(__name__)
+
+
+def fit_embedding_pca(
+    embeddings: np.ndarray,
+    n_components: int = EMBEDDING_PCA_DIM,
+    *,
+    max_fit_samples: int = 50_000,
+    seed: int = 42,
+) -> tuple[np.ndarray, np.ndarray]:
+    """Fit PCA via truncated SVD on a (possibly subsampled) embedding matrix.
+
+    Returns ``(mean, components)`` with ``mean`` shape ``(D,)`` and
+    ``components`` shape ``(n_components, D)`` — both float32.  Designed to
+    be called once per ``EmbeddingConfig`` pool; subsequent projections are
+    a single matmul.
+    """
+    if embeddings.size == 0:
+        raise ValueError("embeddings matrix is empty")
+    n = embeddings.shape[0]
+    rng = np.random.default_rng(seed)
+    if n > max_fit_samples:
+        idx = rng.choice(n, size=max_fit_samples, replace=False)
+        x = embeddings[idx].astype(np.float32, copy=False)
+    else:
+        x = embeddings.astype(np.float32, copy=False)
+    mean = x.mean(axis=0)
+    xc = x - mean
+    _, _, vh = np.linalg.svd(xc, full_matrices=False)
+    k = min(n_components, vh.shape[0])
+    components = vh[:k].astype(np.float32)
+    return mean.astype(np.float32), components
 
 
 # ---------------------------------------------------------------------------
@@ -70,10 +84,15 @@
 def prepare_dataset(df: pd.DataFrame) -> tuple[pd.DataFrame, pd.Series]:
     """Extract feature matrix and label vector from a training DataFrame.
 
-    Categorical columns are converted to pandas ``category`` dtype so that
-    LightGBM can handle them directly (no manual encoding needed).
+    Categorical columns are label-encoded to int64 codes (missing → -1)
+    via :func:`pandas.factorize` — this mirrors
+    ``protea-reranker-lab.reranker.encode_categoricals`` so a booster
+    trained either inline here or in the lab can be scored by the same
+    ``predict`` helper without LightGBM's "categorical_feature do not
+    match" error firing on cross-instance imports.
 
-    Returns (X, y) where X has only the feature columns and y is the binary label.
+    Returns ``(X, y)`` where X has only the feature columns and y is
+    the binary label.
     """
     X = df[ALL_FEATURES].copy()
     for col in NUMERIC_FEATURES:
@@ -81,195 +100,90 @@ def prepare_dataset(df: pd.DataFrame) -> tuple[pd.DataFrame, pd.Series]:
             X[col] = pd.to_numeric(X[col], errors="coerce")
     for col in CATEGORICAL_FEATURES:
         if col in X.columns:
-            X[col] = X[col].replace("", pd.NA).astype("category")
+            s = X[col].replace("", pd.NA)
+            s = s.astype("object").where(s.notna(), None)
+            codes, _ = pd.factorize(s, use_na_sentinel=True)
+            X[col] = codes
     y = df[LABEL_COLUMN].astype(int)
     return X, y
 
 
 # ---------------------------------------------------------------------------
-# Training
+# Training has been removed.
+#
+# LightGBM training lives in ``protea-reranker-lab``. PROTEA only keeps
+# the inference path below (``predict`` + ``model_from_string``) so that
+# registered boosters can score prediction sets at request time.
 # ---------------------------------------------------------------------------
 
-_DEFAULT_PARAMS: dict[str, Any] = {
-    "objective": "binary",
-    "metric": ["binary_logloss", "auc"],
-    "boosting_type": "gbdt",
-    "num_leaves": 31,
-    "learning_rate": 0.01,
-    "feature_fraction": 0.8,
-    "bagging_fraction": 0.8,
-    "bagging_freq": 5,
-    "verbose": -1,
-    "seed": 42,
-}
-
-
-@dataclass
-class TrainResult:
-    """Result of training a re-ranker model."""
-
-    model: lgb.Booster
-    metrics: dict[str, Any]
-    feature_importance: dict[str, int]
-
-
-def train(
-    df: pd.DataFrame,
-    *,
-    params: dict[str, Any] | None = None,
-    num_boost_round: int = 1000,
-    early_stopping_rounds: int = 50,
-    val_fraction: float = 0.2,
-    neg_pos_ratio: float | None = None,
-    sample_weight: np.ndarray | None = None,
-) -> TrainResult:
-    """Train a LightGBM binary classifier on labeled prediction data.
-
-    Parameters
-    ----------
-    df:
-        DataFrame with feature columns + ``label`` column (0/1).
-    params:
-        LightGBM parameters.  Merged on top of ``_DEFAULT_PARAMS``.
-    num_boost_round:
-        Maximum number of boosting rounds.
-    early_stopping_rounds:
-        Stop if validation metric doesn't improve for this many rounds.
-    val_fraction:
-        Fraction of data to hold out for early stopping validation.
-    neg_pos_ratio:
-        If set, subsample negatives so that the ratio of negatives to
-        positives is at most this value (e.g. 1.0 for 1:1, 10.0 for 10:1).
-        Applied independently to train and val splits.  When ``None``
-        (default), all negatives are kept.
-    sample_weight:
-        Per-sample weights (e.g. Information Accretion of each GO term).
-        Must have the same length as ``df``.  When provided, the weights
-        are passed to LightGBM so that high-weight samples contribute
-        more to the loss.
-
-    Returns
-    -------
-    TrainResult with the trained Booster, validation metrics, and feature importance.
-    """
-    X, y = prepare_dataset(df)
-
-    merged_params = {**_DEFAULT_PARAMS, **(params or {})}
-
-    # Stratified train/val split
-    rng = np.random.RandomState(merged_params.get("seed", 42))
-    pos_idx = np.where(y == 1)[0]
-    neg_idx = np.where(y == 0)[0]
-    rng.shuffle(pos_idx)
-    rng.shuffle(neg_idx)
-
-    n_pos_val = max(1, int(len(pos_idx) * val_fraction))
-    n_neg_val = max(1, int(len(neg_idx) * val_fraction))
-
-    val_pos = pos_idx[:n_pos_val]
-    val_neg = neg_idx[:n_neg_val]
-    train_pos = pos_idx[n_pos_val:]
-    train_neg = neg_idx[n_neg_val:]
-
-    # Subsample negatives if requested
-    if neg_pos_ratio is not None:
-        max_train_neg = max(1, int(len(train_pos) * neg_pos_ratio))
-        if len(train_neg) > max_train_neg:
-            train_neg = train_neg[:max_train_neg]
-        max_val_neg = max(1, int(len(val_pos) * neg_pos_ratio))
-        if len(val_neg) > max_val_neg:
-            val_neg = val_neg[:max_val_neg]
-
-    val_idx = np.concatenate([val_pos, val_neg])
-    train_idx = np.concatenate([train_pos, train_neg])
-
-    cat_cols = [c for c in CATEGORICAL_FEATURES if c in X.columns]
-
-    train_w = sample_weight[train_idx] if sample_weight is not None else None
-    val_w = sample_weight[val_idx] if sample_weight is not None else None
-
-    train_ds = lgb.Dataset(
-        X.iloc[train_idx],
-        label=y.iloc[train_idx],
-        weight=train_w,
-        categorical_feature=cat_cols,
-        free_raw_data=False,
-    )
-    val_ds = lgb.Dataset(
-        X.iloc[val_idx],
-        label=y.iloc[val_idx],
-        weight=val_w,
-        categorical_feature=cat_cols,
-        reference=train_ds,
-        free_raw_data=False,
-    )
-
-    callbacks = [
-        lgb.early_stopping(early_stopping_rounds, verbose=False),
-        lgb.log_evaluation(period=0),
-    ]
-
-    booster = lgb.train(
-        merged_params,
-        train_ds,
-        num_boost_round=num_boost_round,
-        valid_sets=[val_ds],
-        valid_names=["val"],
-        callbacks=callbacks,  # type: ignore[arg-type]
-    )
-
-    # Collect validation metrics
-    val_preds = np.asarray(booster.predict(X.iloc[val_idx]))
-    val_labels = y.iloc[val_idx].values
-
-    tp = np.sum((val_preds >= 0.5) & (val_labels == 1))
-    fp = np.sum((val_preds >= 0.5) & (val_labels == 0))
-    fn = np.sum((val_preds < 0.5) & (val_labels == 1))
-    precision = float(tp / (tp + fp)) if (tp + fp) > 0 else 0.0
-    recall = float(tp / (tp + fn)) if (tp + fn) > 0 else 0.0
-    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0
-
-    metrics = {
-        "best_iteration": booster.best_iteration,
-        "val_auc": float(booster.best_score.get("val", {}).get("auc", 0.0)),
-        "val_logloss": float(booster.best_score.get("val", {}).get("binary_logloss", 0.0)),
-        "val_precision": round(precision, 4),
-        "val_recall": round(recall, 4),
-        "val_f1": round(f1, 4),
-        "train_samples": len(train_idx),
-        "val_samples": len(val_idx),
-        "positive_rate": round(float(y.mean()), 4),
-    }
-
-    importance = dict(
-        zip(booster.feature_name(), booster.feature_importance(importance_type="gain").tolist(), strict=False)
-    )
-
-    return TrainResult(model=booster, metrics=metrics, feature_importance=importance)
-
 
 # ---------------------------------------------------------------------------
 # Inference
 # ---------------------------------------------------------------------------
 
 
-def predict(model: lgb.Booster, df: pd.DataFrame) -> np.ndarray:
+def predict(
+    model: lgb.Booster,
+    df: pd.DataFrame,
+    *,
+    categorical_codes: dict[str, list[str]] | None = None,
+) -> np.ndarray:
     """Score predictions using a trained re-ranker.
 
-    Returns an array of probabilities (0–1) where higher = more likely correct.
+    Returns an array of scores in [0, 1] where higher = more likely correct.
+    For lambdarank boosters, raw scores are unbounded reals; we apply a
+    sigmoid to calibrate them into the [0, 1] range expected by the
+    downstream CAFA evaluator (which sweeps thresholds from 0 to 1).
+    Binary boosters already emit probabilities, so we leave them alone.
+
+    ``categorical_codes`` is the per-column ordered string vocabulary the lab
+    used at training time (``{column: [val0, val1, ...]}``). When provided,
+    each cat column is encoded against this fixed vocabulary so the codes
+    match training; when omitted, falls back to ``pd.factorize`` over the
+    inference batch (correct only if the batch happens to contain the same
+    set of values in the same order — usually wrong for small or
+    aspect-filtered batches).
     """
     if LABEL_COLUMN in df.columns:
         X, _ = prepare_dataset(df)
     else:
-        X = df[ALL_FEATURES].copy()
-        for col in NUMERIC_FEATURES:
-            if col in X.columns:
+        # Align to the model's actual feature set. Older boosters were trained
+        # before consensus features existed; newer ones expect them but the
+        # columns aren't persisted in GOPrediction, so fill missing as NaN
+        # (LightGBM routes NaN down the "missing" branch natively).
+        model_features = list(model.feature_name())
+        aligned = df.copy()
+        for col in model_features:
+            if col not in aligned.columns:
+                aligned[col] = pd.NA
+        X = aligned[model_features].copy()
+        for col in model_features:
+            if col in NUMERIC_FEATURES:
                 X[col] = pd.to_numeric(X[col], errors="coerce")
-        for col in CATEGORICAL_FEATURES:
-            if col in X.columns:
-                X[col] = X[col].replace("", pd.NA).astype("category")
-
-    return np.asarray(model.predict(X))
+            elif col in CATEGORICAL_FEATURES:
+                s = X[col].astype("object").where(X[col].notna(), None)
+                if categorical_codes and col in categorical_codes:
+                    # Encode against the lab's training vocabulary. Values
+                    # not seen at training (rare evidence codes etc.) fall to
+                    # -1 (missing), matching how the lab handled NaN.
+                    mapping = {v: i for i, v in enumerate(categorical_codes[col])}
+                    # Bind ``mapping`` at lambda-definition time so the
+                    # closure does not see a later iteration's value (B023).
+                    X[col] = s.map(lambda v, m=mapping: m.get(v, -1)).astype("int64")
+                else:
+                    # No code map — fall back to the (broken-for-small-batch)
+                    # legacy path. Logged as a warning by callers.
+                    codes, _ = pd.factorize(s, use_na_sentinel=True)
+                    X[col] = codes
+
+    raw = np.asarray(model.predict(X))
+    if raw.size == 0:
+        return raw
+    # Binary classification always returns probabilities in [0, 1]; any
+    # score outside that range must come from a ranking objective.
+    if float(raw.min()) < 0.0 or float(raw.max()) > 1.0:
+        return 1.0 / (1.0 + np.exp(-raw))
+    return raw
 
 
 # ---------------------------------------------------------------------------
@@ -277,25 +191,166 @@ def predict(model: lgb.Booster, df: pd.DataFrame) -> np.ndarray:
 # ---------------------------------------------------------------------------
 
 
-def model_to_string(model: lgb.Booster) -> str:
-    """Serialize a trained model to a string for DB storage."""
-    return model.model_to_string()
-
-
 def model_from_string(model_str: str) -> lgb.Booster:
     """Deserialize a model from its string representation."""
     return lgb.Booster(model_str=model_str)
 
 
-def load_training_tsv(tsv_content: str | bytes) -> pd.DataFrame:
-    """Parse a training data TSV (as produced by the training-data.tsv endpoint)."""
-    if isinstance(tsv_content, bytes):
-        tsv_content = tsv_content.decode("utf-8")
-    df = pd.read_csv(io.StringIO(tsv_content), sep="\t", dtype=str)
-    # Convert numeric columns
-    for col in NUMERIC_FEATURES:
-        if col in df.columns:
-            df[col] = pd.to_numeric(df[col], errors="coerce")
-    if LABEL_COLUMN in df.columns:
-        df[LABEL_COLUMN] = pd.to_numeric(df[LABEL_COLUMN], errors="coerce").fillna(0).astype(int)
-    return df
+# ---------------------------------------------------------------------------
+# ArtifactStore-backed loader (used by predict_go_terms_batch)
+# ---------------------------------------------------------------------------
+
+_BOOSTER_CACHE: dict[str, lgb.Booster] = {}
+_CACHE_LOCK = threading.Lock()
+
+
+def _default_cache_dir() -> Path:
+    """Directory where booster blobs are cached between jobs.
+
+    Mirrors the existing ``storage/`` layout so the reaper / ops tooling
+    only needs to know one root.
+    """
+    return Path(__file__).resolve().parents[2] / "storage" / "reranker_cache"
+
+
+def _cache_path(cache_dir: Path, feature_schema_sha: str) -> Path:
+    safe = "".join(ch for ch in feature_schema_sha if ch.isalnum())[:32] or "booster"
+    return cache_dir / f"{safe}.txt"
+
+
+def _uri_to_key(artifact_uri: str, store: ArtifactStore) -> str:
+    """Best-effort URI → store-key translation.
+
+    ``LocalFsArtifactStore`` supports absolute ``file://`` URIs but also
+    accepts plain keys relative to its root. ``MinioArtifactStore``
+    expects ``s3://bucket/key``. This helper extracts a reasonable key
+    from either form without depending on the concrete class.
+    """
+    if artifact_uri.startswith("s3://"):
+        rest = artifact_uri[len("s3://"):]
+        _, _, key = rest.partition("/")
+        return key
+    if artifact_uri.startswith("file://") and isinstance(store, LocalFsArtifactStore):
+        local_path = Path(artifact_uri[len("file://"):])
+        root = Path(store.root).resolve()
+        try:
+            return str(local_path.resolve().relative_to(root))
+        except ValueError:
+            return str(local_path)
+    return artifact_uri
+
+
+def load_reranker(
+    artifact_uri: str,
+    *,
+    feature_schema_sha: str,
+    store: ArtifactStore,
+    cache_dir: Path | None = None,
+) -> lgb.Booster:
+    """Fetch (once) and load a LightGBM booster by URI.
+
+    The first call materialises the booster blob under
+    ``cache_dir/<feature_schema_sha>.txt``; subsequent calls reuse the
+    on-disk file *and* an in-process booster cache keyed by the URI.
+
+    ``store`` is used only when the cached file does not exist —
+    ``artifact_uri`` is expected to resolve to a store key but the store
+    implementation chooses whether to parse it (``LocalFsArtifactStore``
+    ignores the URI and resolves keys from its root; MinIO derives the
+    key from the ``s3://bucket/key`` URI).
+
+    The on-disk cache is namespaced by ``feature_schema_sha`` because each
+    sha represents a stable column layout; multiple boosters that share a
+    sha (e.g. all per-cell v10 boosters) need to disambiguate by URI to
+    avoid the in-process cache returning the first-loaded booster for
+    every cell — which silently uses the wrong model and produces
+    identical TSVs across LK/PK at scoring time.
+    """
+    with _CACHE_LOCK:
+        cached = _BOOSTER_CACHE.get(artifact_uri)
+        if cached is not None:
+            return cached
+
+    cache_dir = cache_dir or _default_cache_dir()
+    cache_dir.mkdir(parents=True, exist_ok=True)
+    # Disambiguate the on-disk cache file by URI hash so two boosters with
+    # the same schema_sha don't overwrite each other's blobs. usedforsecurity=False
+    # because this is a cache key tag, not a security primitive (MD5 collision
+    # resistance is irrelevant here).
+    import hashlib
+    uri_tag = hashlib.md5(artifact_uri.encode(), usedforsecurity=False).hexdigest()[:8]
+    path = cache_dir / f"{feature_schema_sha}_{uri_tag}.txt"
+
+    if not path.exists():
+        key = _uri_to_key(artifact_uri, store)
+        blob = store.get(key)
+        path.write_bytes(blob)
+        logger.info("cached reranker booster at %s (%d bytes)", path, len(blob))
+
+    booster = lgb.Booster(model_file=str(path))
+    with _CACHE_LOCK:
+        _BOOSTER_CACHE[artifact_uri] = booster
+    return booster
+
+
+def apply_reranker(
+    df: pd.DataFrame,
+    booster: lgb.Booster,
+    *,
+    feature_cols: list[str] | None = None,
+) -> np.ndarray:
+    """Score ``df`` with ``booster`` and return an aligned array.
+
+    If ``feature_cols`` is None we use the booster's own
+    ``feature_name()``. Missing columns are filled with ``pd.NA`` so
+    LightGBM routes them through its native missing-value branch rather
+    than crashing on KeyError.
+    """
+    cols = feature_cols or list(booster.feature_name())
+    aligned = df.copy()
+    for col in cols:
+        if col not in aligned.columns:
+            aligned[col] = np.nan
+    X = aligned[cols].copy()
+    # LightGBM rejects ``object``-dtype columns; coerce everything that
+    # isn't a pandas categorical into numeric so missing values land as
+    # NaN (routed through the native missing-value branch).
+    for col in cols:
+        if not isinstance(X[col].dtype, pd.CategoricalDtype):
+            X[col] = pd.to_numeric(X[col], errors="coerce")
+    raw = np.asarray(booster.predict(X))
+    if raw.size == 0:
+        return raw
+    # Ranking objectives emit unbounded reals — calibrate into [0, 1] so
+    # downstream thresholding remains uniform with binary boosters.
+    if float(raw.min()) < 0.0 or float(raw.max()) > 1.0:
+        return 1.0 / (1.0 + np.exp(-raw))
+    return raw
+
+
+def infer_active_feature_families(
+    *,
+    compute_alignments: bool,
+    compute_taxonomy: bool,
+    compute_v6_features: bool,
+) -> list[str]:
+    """Map the predict-time feature flags onto lab feature families.
+
+    The PROTEA predict pipeline always materialises KNN features and
+    annotation-meta columns (qualifier/evidence_code/aspect); the
+    optional flags enable alignment, taxonomy-pair, taxonomy-voters,
+    GO-context, anc2vec, emb-pca and length families. Keep this in sync
+    with ``protea_reranker_lab.contracts.FEATURE_FAMILIES``.
+    """
+    families: list[str] = ["knn", "annotation_meta"]
+    if compute_alignments:
+        families.append("alignment_nw")
+        families.append("length")
+    if compute_taxonomy:
+        families.append("taxonomy_pair")
+    if compute_v6_features:
+        families.extend(
+            ["anc2vec_neighbor", "anc2vec_query", "emb_pca", "taxonomy_voters", "go_context"]
+        )
+    # Sorted for stable sha computation.
+    return sorted(set(families))
diff --git a/protea/core/retry.py b/protea/core/retry.py
new file mode 100644
index 0000000..9d705f3
--- /dev/null
+++ b/protea/core/retry.py
@@ -0,0 +1,135 @@
+"""Generic retry middleware for transient infrastructure errors.
+
+Used by BaseWorker to survive Postgres deadlocks, serialization
+failures and brief connection interruptions without marking the
+job as failed. Application errors (validation, missing data) are
+NOT retried; only transient infrastructure conditions are.
+
+Example::
+
+    from protea.core.retry import with_retry
+
+    def do_work():
+        ...
+
+    with_retry(do_work, max_attempts=3, base_delay=1.0)
+"""
+
+from __future__ import annotations
+
+import logging
+import random
+import time
+from collections.abc import Callable
+from typing import Any, ParamSpec, TypeVar
+
+from sqlalchemy.exc import OperationalError
+
+logger = logging.getLogger("protea.retry")
+
+# Postgres SQLSTATE codes that signal a retryable transient condition.
+# - 40P01: deadlock_detected
+# - 40001: serialization_failure
+RETRYABLE_PG_CODES: frozenset[str] = frozenset({"40P01", "40001"})
+
+
+def is_retryable_db_error(exc: BaseException) -> bool:
+    """Return True if the exception represents a transient DB condition."""
+    if not isinstance(exc, OperationalError):
+        return False
+    pgcode = getattr(getattr(exc, "orig", None), "pgcode", None)
+    return pgcode in RETRYABLE_PG_CODES
+
+
+def is_retryable_connection_error(exc: BaseException) -> bool:
+    """Return True if the exception represents a transient network condition.
+
+    These are typically raised by pika or low-level socket layers when the
+    broker is briefly unreachable. The publisher retry loop handles these
+    on the publish side; this predicate exists for the consume side.
+    """
+    return isinstance(exc, ConnectionResetError | ConnectionAbortedError | TimeoutError)
+
+
+def is_retryable(exc: BaseException) -> bool:
+    """Default predicate: retryable DB errors plus transient network errors."""
+    return is_retryable_db_error(exc) or is_retryable_connection_error(exc)
+
+
+P = ParamSpec("P")
+R = TypeVar("R")
+
+
+# PEP 612 strict says no params allowed between *args: P.args and
+# **kwargs: P.kwargs; named keyword-only knobs bind correctly at runtime
+# (Python binds explicit names before falling through to **kwargs).
+# PEP 695 syntax churn deferred.
+def with_retry(  # type: ignore[valid-type]  # noqa: UP047
+    fn: Callable[P, R],
+    *args: P.args,
+    max_attempts: int = 3,
+    base_delay: float = 1.0,
+    max_delay: float = 30.0,
+    jitter_ratio: float = 0.5,
+    predicate: Callable[[BaseException], bool] = is_retryable,
+    on_retry: Callable[[int, BaseException, float], None] | None = None,
+    **kwargs: P.kwargs,
+) -> R:
+    """Run ``fn(*args, **kwargs)`` with exponential backoff and jitter.
+
+    The callable is invoked up to ``max_attempts`` times. After each
+    retryable failure (per ``predicate``), the loop sleeps for
+    ``min(base_delay * 2**(attempt-1), max_delay)`` seconds, jittered
+    by a random factor in ``[1-jitter_ratio, 1+jitter_ratio]``.
+
+    Exceptions that do not match ``predicate`` propagate immediately.
+    Once ``max_attempts`` retryable failures accumulate, the last
+    exception propagates to the caller.
+
+    ``on_retry(attempt, exc, sleep_seconds)`` is called before each
+    sleep. Defaults to a structured WARNING log under
+    ``protea.retry``.
+    """
+    if max_attempts < 1:
+        raise ValueError("max_attempts must be >= 1")
+
+    attempt = 0
+    while True:
+        attempt += 1
+        try:
+            return fn(*args, **kwargs)
+        except BaseException as exc:
+            if not predicate(exc) or attempt >= max_attempts:
+                raise
+            sleep_for = min(base_delay * (2 ** (attempt - 1)), max_delay)
+            jitter_low = max(0.0, 1.0 - jitter_ratio)
+            jitter_high = 1.0 + jitter_ratio
+            sleep_for *= random.uniform(jitter_low, jitter_high)
+
+            if on_retry is not None:
+                on_retry(attempt, exc, sleep_for)
+            else:
+                _default_on_retry(attempt, exc, sleep_for, max_attempts)
+            time.sleep(sleep_for)
+
+
+def _default_on_retry(
+    attempt: int, exc: BaseException, sleep_for: float, max_attempts: int
+) -> None:
+    pgcode = getattr(getattr(exc, "orig", None), "pgcode", None)
+    extra: dict[str, Any] = {
+        "attempt": attempt,
+        "max_attempts": max_attempts,
+        "sleep_seconds": round(sleep_for, 3),
+        "error_class": type(exc).__name__,
+    }
+    if pgcode is not None:
+        extra["pgcode"] = pgcode
+    logger.warning(
+        "retryable failure; sleeping %ss then retrying. attempt=%d/%d error=%s",
+        round(sleep_for, 3),
+        attempt,
+        max_attempts,
+        type(exc).__name__,
+        extra=extra,
+    )
diff --git a/protea/core/scoring.py b/protea/core/scoring.py
index 183c01a..416e3d9 100644
--- a/protea/core/scoring.py
+++ b/protea/core/scoring.py
@@ -17,14 +17,16 @@
 3. Codes unknown to both tables fall back to
    :data:`DEFAULT_EVIDENCE_WEIGHT_FALLBACK` (0.5).
 
-This means a ``ScoringConfig`` may carry a *partial* override — e.g. only
-changing the IEA weight from 0.3 to 0.0 — without having to redeclare every
-other code.  The resolution order ensures backwards compatibility: configs
-stored without ``evidence_weights`` behave identically to older configs.
+This means a ``ScoringConfig`` may carry a *partial* override — e.g. zeroing
+the IEA weight for an experiment-only study — without having to redeclare
+every other code.  The resolution order ensures backwards compatibility:
+configs stored without ``evidence_weights`` behave identically to older
+configs.
 """
 
 from __future__ import annotations
 
+from collections import defaultdict
 from typing import Any
 
 from protea.core.evidence_codes import ECO_TO_CODE
@@ -35,6 +37,17 @@
     ScoringConfig,
 )
 
+__all__ = [
+    "DEFAULT_EVIDENCE_WEIGHT_FALLBACK",
+    "DEFAULT_EVIDENCE_WEIGHTS",
+    "FORMULA_EVIDENCE_WEIGHTED",
+    "evidence_weight",
+    "compute_score",
+    "score_predictions",
+    "propagate_scores_to_ancestors",
+    "propagate_ground_truth_to_ancestors",
+]
+
 # ---------------------------------------------------------------------------
 # Evidence-code weight resolution
 # ---------------------------------------------------------------------------
@@ -104,6 +117,8 @@ def compute_score(pred: dict[str, Any], config: ScoringConfig) -> float:
         - ``identity_sw`` (float | None): SW local identity in [0, 1].
         - ``evidence_code`` (str | None): GO or ECO evidence code.
         - ``taxonomic_distance`` (float | None): raw taxonomic distance.
+        - ``neighbor_vote_fraction`` (float | None): fraction of K neighbours
+          that voted for this GO term, in [0, 1].
 
     config:
         A :class:`ScoringConfig` instance defining the formula, signal
@@ -149,6 +164,9 @@ def _add(key: str, value: float | None) -> None:
     if tax_dist is not None:
         _add("taxonomic_proximity", 1.0 / (1.0 + float(tax_dist)))
 
+    # 6. Neighbour vote fraction: already in [0, 1] — no transformation.
+    _add("neighbor_vote_fraction", pred.get("neighbor_vote_fraction"))
+
     if total_w == 0.0:
         return 0.0
 
@@ -169,6 +187,101 @@ def _add(key: str, value: float | None) -> None:
 # ---------------------------------------------------------------------------
 
 
+def propagate_scores_to_ancestors(
+    scored_predictions: list[dict[str, Any]],
+    parent_map: dict[str, set[str]],
+) -> list[dict[str, Any]]:
+    """Max-propagate GO scores to ancestors per protein (True Path Rule).
+
+    For every (protein, go_id, score) row, ensures every ancestor ``a`` of
+    ``go_id`` in ``parent_map`` also has score ≥ that row's score for the
+    same protein. The final score of an ancestor is the max over all its
+    descendants predicted for the protein (including itself if predicted).
+
+    This mirrors cafaeval's behaviour: annotating a child implies every
+    ancestor, so predictions must propagate upward before PR metrics.
+    ``parent_map`` maps a child ``go_id`` (string) to the set of its direct
+    parent ``go_id`` strings — typically the union of ``is_a`` + ``part_of``
+    edges for the relevant ontology snapshot.
+
+    Returns a new list; the input is not modified. Rows that share a
+    (protein, go_id) collapse to a single row keyed by the max score.
+    """
+    ancestor_closure: dict[str, set[str]] = {}
+
+    def ancestors_of(go_id: str) -> set[str]:
+        cached = ancestor_closure.get(go_id)
+        if cached is not None:
+            return cached
+        seen: set[str] = set()
+        stack = [go_id]
+        while stack:
+            node = stack.pop()
+            for parent in parent_map.get(node, ()):
+                if parent not in seen:
+                    seen.add(parent)
+                    stack.append(parent)
+        ancestor_closure[go_id] = seen
+        return seen
+
+    by_protein: defaultdict[str, dict[str, float]] = defaultdict(dict)
+    for p in scored_predictions:
+        acc = p["protein_accession"]
+        gid = str(p["go_id"])
+        s = float(p["score"])
+        cur = by_protein[acc]
+        if s > cur.get(gid, -1.0):
+            cur[gid] = s
+
+    propagated: list[dict[str, Any]] = []
+    for acc, term_scores in by_protein.items():
+        expanded: dict[str, float] = dict(term_scores)
+        for gid, s in term_scores.items():
+            for anc in ancestors_of(gid):
+                if s > expanded.get(anc, -1.0):
+                    expanded[anc] = s
+        for gid, s in expanded.items():
+            propagated.append({"protein_accession": acc, "go_id": gid, "score": s})
+
+    return propagated
+
+
+def propagate_ground_truth_to_ancestors(
+    ground_truth: dict[str, set[str]],
+    parent_map: dict[str, set[str]],
+) -> dict[str, set[str]]:
+    """Expand each protein's GO set with every ancestor in ``parent_map``.
+
+    Returns a new dict; the input is not modified. Pair with
+    :func:`propagate_scores_to_ancestors` when the downstream metric treats
+    predictions and ground truth symmetrically (e.g. cafaeval-style Fmax).
+    """
+    ancestor_closure: dict[str, set[str]] = {}
+
+    def ancestors_of(go_id: str) -> set[str]:
+        cached = ancestor_closure.get(go_id)
+        if cached is not None:
+            return cached
+        seen: set[str] = set()
+        stack = [go_id]
+        while stack:
+            node = stack.pop()
+            for parent in parent_map.get(node, ()):
+                if parent not in seen:
+                    seen.add(parent)
+                    stack.append(parent)
+        ancestor_closure[go_id] = seen
+        return seen
+
+    expanded: dict[str, set[str]] = {}
+    for acc, gos in ground_truth.items():
+        full: set[str] = set(gos)
+        for gid in gos:
+            full.update(ancestors_of(str(gid)))
+        expanded[acc] = full
+    return expanded
+
+
 def score_predictions(
     predictions: list[dict[str, Any]],
     config: ScoringConfig,
diff --git a/protea/core/training_dump_helpers.py b/protea/core/training_dump_helpers.py
new file mode 100644
index 0000000..a15f2e2
--- /dev/null
+++ b/protea/core/training_dump_helpers.py
@@ -0,0 +1,1867 @@
+"""Helpers used to generate frozen re-ranker datasets in-process.
+
+Survives as a container for the KNN, feature-engineering,
+streaming-parquet, and reference-loading utilities consumed by
+``ExportResearchDatasetOperation``. The module used to expose two
+operations (single-pair and multi-split training) wired into the
+OperationRegistry; LightGBM training itself moved to the standalone
+protea-reranker-lab repo, so the operations are unregistered.
+``TrainRerankerAutoOperation.execute()`` still runs the dump pipeline
+(KNN + feature generation + parquet emission) for the export operation
+in ``dump_only=True`` mode.
+
+All execution is in-process; no RabbitMQ coordination.
+"""
+
+from __future__ import annotations
+
+import gc
+import logging
+import shutil
+import tempfile
+import time
+import uuid
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Annotated, Any, cast
+
+import numpy as np
+import pandas as pd
+import pyarrow as pa
+import pyarrow.parquet as pq
+from pydantic import Field, field_validator
+from sqlalchemy import text
+from sqlalchemy.orm import Session
+
+from protea.core.anc2vec_embeddings import Anc2VecIndex
+from protea.core.anc2vec_embeddings import get_index as get_anc2vec_index
+from protea.core.annotation_intern import intern_string
+from protea.core.contracts.operation import EmitFn, OperationResult, ProteaPayload
+from protea.core.domain.aspect import ASPECT_CODES as _ASPECTS
+from protea.core.domain.aspect import Aspect
+from protea.core.evaluation import load_evaluation_data_for_set
+from protea.core.feature_engineering import compute_alignment, compute_taxonomy
+from protea.core.knn_search import search_knn
+from protea.core.pca_cache import _load_or_fit_pca_state
+from protea.core.reranker import (
+    ALL_FEATURES,
+    EMBEDDING_PCA_DIM,
+    LABEL_COLUMN,
+)
+from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
+from protea.infrastructure.orm.models.annotation.evaluation_set import EvaluationSet
+from protea.infrastructure.orm.models.embedding.embedding_config import EmbeddingConfig
+from protea.infrastructure.orm.models.embedding.reranker_model import RerankerModel
+from protea.infrastructure.orm.models.protein.protein import Protein
+from protea.infrastructure.orm.models.sequence.sequence import Sequence
+
+PositiveInt = Annotated[int, Field(gt=0)]
+
+# Chunk sizes are configured via OperationTuning.annotation_chunk_size /
+# stream_chunk_size and resolved at call time inside the helpers below.
+
+_LOG = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Parameter objects for _knn_transfer_and_label
+#
+# The function used to take 20 keyword arguments. Two natural clusters
+# (per-protein sequence/taxonomy lookups, and the streaming-parquet output
+# config) are now passed as small immutable dataclasses to keep call sites
+# readable.
+# ---------------------------------------------------------------------------
+
+
+@dataclass(frozen=True)
+class SequenceContext:
+    """Per-protein sequence and taxonomy lookups.
+
+    All four attributes are optional; passing ``None`` disables the
+    corresponding feature family (alignment / taxonomy).
+    """
+
+    query_sequences: dict[str, str] | None = None
+    ref_sequences: dict[str, str] | None = None
+    query_tax_ids: dict[str, int | None] | None = None
+    ref_tax_ids: dict[str, int | None] | None = None
+
+
+@dataclass(frozen=True)
+class StreamOutput:
+    """Streaming parquet output for memory-bounded dataset generation.
+
+    When provided, ``_knn_transfer_and_label`` writes labeled rows
+    directly to ``output_parquet`` in chunks of ``chunk_rows`` instead of
+    accumulating the full result list in memory.
+    """
+
+    output_parquet: Path
+    chunk_rows: int = 100_000
+
+
+# ---------------------------------------------------------------------------
+# Payload
+# ---------------------------------------------------------------------------
+
+
+# NOTE: a single-pair ``TrainRerankerPayload`` used to live here for an
+# Operation that trained one LightGBM model per (old, new) snapshot pair.
+# That Operation was retired when LightGBM training moved to
+# protea-reranker-lab; the payload was kept around for tests until T0.6
+# pruned it (no production code referenced it).
+
+
+# ---------------------------------------------------------------------------
+# Dataset-export pipeline helpers
+#
+# These were originally instance methods of the (now-removed)
+# TrainRerankerOperation class. They had zero ``self`` references — the
+# class wrapper was only there to share helpers with TrainRerankerAutoOperation.
+# Module-level functions are clearer and let unit tests call them directly
+# without instantiating an Operation.
+# ---------------------------------------------------------------------------
+
+
+def _load_parent_map(session: Session, snapshot_id: uuid.UUID) -> dict[str, set[str]]:
+    """Return ``{child_go_id: {parent_go_id, ...}}`` for is_a + part_of edges.
+
+    Used to drive True-Path-Rule max-propagation of predicted scores
+    before computing Fmax / AUC-PR, so the internal training-time
+    metric matches what cafaeval reports externally.
+    """
+    rows = session.execute(
+        text(
+            "SELECT c.go_id AS child, p.go_id AS parent "
+            "FROM go_term_relationship r "
+            "JOIN go_term c ON c.id = r.child_go_term_id "
+            "JOIN go_term p ON p.id = r.parent_go_term_id "
+            "WHERE r.ontology_snapshot_id = :snap_id "
+            "AND r.relation_type IN ('is_a', 'part_of')"
+        ),
+        {"snap_id": snapshot_id},
+    ).fetchall()
+    parent_map: dict[str, set[str]] = {}
+    for child, parent in rows:
+        parent_map.setdefault(str(child), set()).add(str(parent))
+    return parent_map
+
+
+# ── bulk embedding preload (used by dump_helper) ─────────────
+
+
+def _preload_all_embeddings(
+    session: Session,
+    emb_config_id: uuid.UUID,
+    emit: EmitFn,
+) -> tuple[np.ndarray, list[str], dict[str, int]]:
+    """Load ALL embeddings once into memory.
+
+    Returns (embeddings_f16, accessions, acc_to_idx).
+    This avoids reloading 527K vectors from PostgreSQL on every split.
+    """
+    conn = session.connection()
+
+    count_row = conn.execute(
+        text(
+            "SELECT COUNT(*), "
+            "       (SELECT vector_dims(se2.embedding) "
+            "          FROM sequence_embedding se2 "
+            "         WHERE se2.embedding_config_id = :ecid LIMIT 1) "
+            "  FROM protein p "
+            "  JOIN sequence_embedding se "
+            "    ON se.sequence_id = p.sequence_id "
+            "   AND se.embedding_config_id = :ecid"
+        ),
+        {"ecid": emb_config_id},
+    ).one()
+    total, dim = int(count_row[0]), int(count_row[1]) if count_row[1] else 960
+
+    emit(
+        "dump_helper.preloading_embeddings",
+        None,
+        {"total": total, "dim": dim},
+        "info",
+    )
+
+    from protea.config.tuning import get_tuning
+
+    stream_chunk = get_tuning().operation.stream_chunk_size
+
+    embeddings = np.empty((total, dim), dtype=np.float16)
+    accessions: list[str] = []
+    result_proxy = conn.execute(
+        text(
+            "SELECT p.accession, se.embedding "
+            "  FROM protein p "
+            "  JOIN sequence_embedding se "
+            "    ON se.sequence_id = p.sequence_id "
+            "   AND se.embedding_config_id = :ecid"
+        ),
+        {"ecid": emb_config_id},
+    ).yield_per(stream_chunk)
+
+    for i, (acc, emb_str) in enumerate(result_proxy):
+        if isinstance(emb_str, str):
+            emb_arr = np.fromstring(emb_str.strip("[]"), sep=",", dtype=np.float16)
+        else:
+            emb_arr = np.array(emb_str, dtype=np.float16)
+        embeddings[i] = emb_arr
+        accessions.append(acc)
+
+    acc_to_idx = {acc: i for i, acc in enumerate(accessions)}
+
+    emit(
+        "dump_helper.embeddings_preloaded",
+        None,
+        {
+            "total": len(accessions),
+            "dim": dim,
+            "memory_mb": round(embeddings.nbytes / 1024 / 1024, 1),
+        },
+        "info",
+    )
+
+    return embeddings, accessions, acc_to_idx
+
+
+def _build_reference_from_cache(
+    session: Session,
+    annotation_set_id: uuid.UUID,
+    all_embeddings: np.ndarray,
+    all_accessions: list[str],
+    acc_to_idx: dict[str, int],
+    emit: EmitFn,
+) -> dict[str, dict[str, Any]]:
+    """Build per-aspect reference data using preloaded embeddings.
+
+    Only loads annotations from the DB (fast, small rows), then filters
+    the preloaded embedding matrix in memory.
+    """
+    conn = session.connection()
+
+    ann_rows = conn.execute(
+        text(
+            "SELECT pga.protein_accession, gt.aspect, pga.go_term_id, "
+            "       pga.qualifier, pga.evidence_code "
+            "  FROM protein_go_annotation pga "
+            "  JOIN go_term gt ON gt.id = pga.go_term_id "
+            " WHERE pga.annotation_set_id = :asid "
+            "   AND gt.aspect IN ('P', 'F', 'C') "
+            "   AND (pga.qualifier IS NULL OR pga.qualifier NOT LIKE '%%NOT%%')"
+        ),
+        {"asid": annotation_set_id},
+    ).yield_per(50_000)
+
+    aspect_accs: dict[str, set[str]] = {a: set() for a in _ASPECTS}
+    aspect_go_map: dict[str, dict[str, list[dict[str, Any]]]] = {a: {} for a in _ASPECTS}
+    for acc, asp, go_term_id, qualifier, evidence_code in ann_rows:
+        if asp in aspect_accs and acc in acc_to_idx:
+            aspect_accs[asp].add(acc)
+            aspect_go_map[asp].setdefault(acc, []).append(
+                {
+                    "go_term_id": go_term_id,
+                    # Flyweight — see ``protea.core.annotation_intern``.
+                    "qualifier": intern_string(qualifier),
+                    "evidence_code": intern_string(evidence_code),
+                }
+            )
+
+    result: dict[str, dict[str, Any]] = {}
+    for asp in _ASPECTS:
+        indices = np.array(
+            [acc_to_idx[a] for a in aspect_accs[asp]],
+            dtype=np.int32,
+        )
+        asp_accessions = [all_accessions[i] for i in indices]
+        # Store indices into the shared preload pool instead of a fancy-indexed
+        # copy: with ~500k refs × 1024 × float16, the copy was ~1 GB per aspect
+        # and stayed alive until the split ended. The consumer
+        # (_knn_transfer_and_label) materialises a transient float32 view on
+        # demand, one aspect at a time.
+        result[asp] = {
+            "accessions": asp_accessions,
+            "indices": indices,
+            "go_map": aspect_go_map[asp],
+        }
+        emit(
+            "dump_helper.aspect_loaded",
+            None,
+            {"aspect": asp, "references": len(indices)},
+            "info",
+        )
+
+    return result
+
+
+# ── reference embeddings per aspect ───────────────────────────────────
+
+
+def _load_sequences(
+    session: Session,
+    accessions: set[str],
+) -> dict[str, str]:
+    from protea.config.tuning import get_tuning
+
+    chunk_size = get_tuning().operation.annotation_chunk_size
+    result: dict[str, str] = {}
+    acc_list = list(accessions)
+    for i in range(0, len(acc_list), chunk_size):
+        chunk = acc_list[i : i + chunk_size]
+        rows = (
+            session.query(Protein.accession, Sequence.sequence)
+            .join(Protein.sequence)
+            .filter(Protein.accession.in_(chunk))
+            .all()
+        )
+        for acc, seq in rows:
+            result[acc] = seq
+    return result
+
+
+def _load_taxonomy_ids(
+    session: Session,
+    accessions: set[str],
+) -> dict[str, int | None]:
+    from protea.config.tuning import get_tuning
+
+    chunk_size = get_tuning().operation.annotation_chunk_size
+    result: dict[str, int | None] = {}
+    acc_list = list(accessions)
+    for i in range(0, len(acc_list), chunk_size):
+        chunk = acc_list[i : i + chunk_size]
+        rows = (
+            session.query(Protein.accession, Protein.taxonomy_id)
+            .filter(Protein.accession.in_(chunk))
+            .all()
+        )
+        for acc, tid in rows:
+            result[acc] = int(tid) if tid else None
+    return result
+
+
+# ── KNN + transfer + label ────────────────────────────────────────────
+
+
+def _knn_transfer_and_label(
+    session: Session,
+    valid_queries: list[str],
+    query_emb: np.ndarray,
+    ref_by_aspect: dict[str, dict[str, Any]],
+    go_id_map: dict[int, str],
+    aspect_map: dict[int, str],
+    gt_pairs: set[tuple[str, str]],
+    p: TrainRerankerAutoPayload,
+    *,
+    sequence_context: SequenceContext | None = None,
+    query_known_gos: dict[str, set[str]] | None = None,
+    parent_map_str: dict[str, set[str]] | None = None,
+    ia_weights: dict[str, float] | None = None,
+    pca_state: tuple[np.ndarray, np.ndarray] | None = None,
+    pivot_go_ids: set[str] | frozenset[str] | None = None,
+    stream_output: StreamOutput | None = None,
+    embedding_pool: np.ndarray | None = None,
+) -> list[dict[str, Any]] | dict[str, Any]:
+    """Run per-aspect KNN, transfer GO terms, label, compute features.
+
+    ``query_known_gos`` is ``{protein_accession: {go_id}}`` of annotations
+    the query already carries before the prediction cutoff (from
+    ``EvaluationData.known``). Used to compute query-side Anc2Vec coherence
+    features — the PK-killer signal: how close is each candidate GO to the
+    query's existing annotation profile?
+
+    Streaming mode: when ``stream_output`` is given, records are
+    written to disk in ``stream_output.chunk_rows`` chunks as they
+    are generated (re-ordered to iterate per ``(q_acc, aspect)``
+    group so the ancestor-expansion stays local). In this mode the
+    function returns ``{"parquet_path": str, "n_rows": int}`` instead
+    of the full list. ``pivot_go_ids`` (orthogonal to streaming)
+    filters records by go_id; useful in either mode.
+    """
+    # Unpack the parameter objects so the body keeps using the original
+    # local names — body untouched, only the call surface shrinks.
+    ctx = sequence_context or SequenceContext()
+    query_sequences = ctx.query_sequences
+    ref_sequences = ctx.ref_sequences
+    query_tax_ids = ctx.query_tax_ids
+    ref_tax_ids = ctx.ref_tax_ids
+
+    if stream_output is not None:
+        output_parquet: Path | None = stream_output.output_parquet
+        chunk_rows: int = stream_output.chunk_rows
+    else:
+        output_parquet = None
+        chunk_rows = 100_000
+
+    # Collect neighbors per aspect
+    neighbors_by_aspect: dict[str, list[list[tuple[str, float]]]] = {}
+    for aspect in _ASPECTS:
+        ref = ref_by_aspect[aspect]
+        if not ref["accessions"]:
+            neighbors_by_aspect[aspect] = [[] for _ in valid_queries]
+            continue
+        # Two source shapes are supported:
+        #   - ``ref["indices"]`` + ``embedding_pool`` (preload-aware path);
+        #     no per-aspect float16 copy is held in the dict.
+        #   - ``ref["embeddings"]`` (legacy path used by the single-version
+        #     dump_helper that loads embeddings per aspect from SQL).
+        indices = ref.get("indices")
+        if indices is not None and embedding_pool is not None:
+            ref_f32 = embedding_pool[indices].astype(np.float32)
+        else:
+            ref_f32 = ref["embeddings"].astype(np.float32)
+        neighbors_by_aspect[aspect] = search_knn(
+            query_emb,
+            ref_f32,
+            ref["accessions"],
+            k=p.limit_per_entry,
+            distance_threshold=p.distance_threshold,
+            backend=p.search_backend,
+            metric=p.metric,
+            faiss_index_type=p.faiss_index_type,
+            faiss_nlist=p.faiss_nlist,
+            faiss_nprobe=p.faiss_nprobe,
+        )
+        del ref_f32
+        # Embeddings/indices are no longer needed past this point — only
+        # ``go_map`` and ``accessions`` are read downstream. Releasing
+        # ~940 MB per aspect (f16, ~500k × dim) keeps RSS flat before
+        # the record-building phase.
+        ref["embeddings"] = None
+        ref["indices"] = None
+    gc.collect()
+
+    # Pre-compute reranker features
+    rr_distance_std: dict[str, float] = {}
+    rr_vote_count: dict[str, dict[int, int]] = {}
+    rr_k_position: dict[str, dict[int, int]] = {}
+    # Consensus features: collect the distances of neighbors that voted
+    # for each candidate term, per query. We compute min/mean on the fly
+    # and never store the full list to keep memory bounded.
+    rr_vote_min_d: dict[str, dict[int, float]] = {}
+    rr_vote_sum_d: dict[str, dict[int, float]] = {}
+    go_term_freq: dict[int, int] = {}
+    ref_ann_density: dict[str, int] = {}
+    k_limit = int(p.limit_per_entry) or 1
+
+    for q_idx, q_acc in enumerate(valid_queries):
+        all_dists: list[float] = []
+        rr_vote_count[q_acc] = {}
+        rr_k_position[q_acc] = {}
+        rr_vote_min_d[q_acc] = {}
+        rr_vote_sum_d[q_acc] = {}
+        for aspect in _ASPECTS:
+            nbs = neighbors_by_aspect[aspect]
+            if q_idx < len(nbs):
+                for _, d in nbs[q_idx]:
+                    all_dists.append(d)
+        rr_distance_std[q_acc] = float(np.std(all_dists)) if len(all_dists) > 1 else 0.0
+
+    for aspect in _ASPECTS:
+        go_map = ref_by_aspect[aspect]["go_map"]
+        # Ref annotation density
+        for acc, anns in go_map.items():
+            if acc not in ref_ann_density:
+                ref_ann_density[acc] = 0
+            ref_ann_density[acc] += len(anns)
+            for ann in anns:
+                gtid = ann["go_term_id"]
+                go_term_freq[gtid] = go_term_freq.get(gtid, 0) + 1
+
+        # Vote count, k_position and voting-distance stats per query
+        for q_idx, q_acc in enumerate(valid_queries):
+            vc = rr_vote_count[q_acc]
+            kp = rr_k_position[q_acc]
+            vmin = rr_vote_min_d[q_acc]
+            vsum = rr_vote_sum_d[q_acc]
+            nbs = neighbors_by_aspect[aspect]
+            if q_idx < len(nbs):
+                for k_pos, (ref_acc, nb_d) in enumerate(nbs[q_idx], 1):
+                    for ann in go_map.get(ref_acc, []):
+                        gtid = ann["go_term_id"]
+                        vc[gtid] = vc.get(gtid, 0) + 1
+                        if gtid not in kp:
+                            kp[gtid] = k_pos
+                        if gtid not in vmin or nb_d < vmin[gtid]:
+                            vmin[gtid] = float(nb_d)
+                        vsum[gtid] = vsum.get(gtid, 0.0) + float(nb_d)
+
+    # Pre-compute alignment and taxonomy features per unique (query, ref) pair.
+    # Nested by q_acc so per-query state can be popped atomically once the
+    # record-building loop is done with each query — keeps RSS bounded in
+    # the test split (30k queries × 5 nbrs × 3 aspects ≈ 450k entries).
+    pair_features: dict[str, dict[str, dict[str, Any]]] = {}
+    do_alignments = (
+        p.compute_alignments and query_sequences is not None and ref_sequences is not None
+    )
+    do_taxonomy = p.compute_taxonomy and query_tax_ids is not None and ref_tax_ids is not None
+
+    if do_alignments or do_taxonomy:
+        # Heartbeat: this loop can run for hours on large splits with
+        # alignments enabled. Without periodic logging a stall is
+        # indistinguishable from slow progress. Logs every 30s with
+        # pairs-computed / elapsed / current aspect so operators can
+        # spot stalls or plateaus in the worker log.
+        _hb_t0 = time.perf_counter()
+        _hb_last = _hb_t0
+        _hb_interval = 30.0
+        _hb_n = 0
+        for aspect in _ASPECTS:
+            nbs = neighbors_by_aspect[aspect]
+            for q_idx, q_acc in enumerate(valid_queries):
+                if q_idx >= len(nbs):
+                    continue
+                q_pairs = pair_features.setdefault(q_acc, {})
+                for ref_acc, _ in nbs[q_idx]:
+                    if ref_acc in q_pairs:
+                        continue
+                    feats: dict[str, Any] = {}
+                    if do_alignments:
+                        # do_alignments implies both dicts are non-None.
+                        assert query_sequences is not None and ref_sequences is not None
+                        q_seq = query_sequences.get(q_acc, "")
+                        r_seq = ref_sequences.get(ref_acc, "")
+                        if q_seq and r_seq:
+                            feats.update(compute_alignment(q_seq, r_seq))
+                    if do_taxonomy:
+                        # do_taxonomy implies both dicts are non-None.
+                        assert query_tax_ids is not None and ref_tax_ids is not None
+                        q_tid = query_tax_ids.get(q_acc)
+                        r_tid = ref_tax_ids.get(ref_acc)
+                        feats.update(compute_taxonomy(q_tid, r_tid))
+                        feats["query_taxonomy_id"] = q_tid
+                        feats["ref_taxonomy_id"] = r_tid
+                    q_pairs[ref_acc] = feats
+                    _hb_n += 1
+                    now = time.perf_counter()
+                    if now - _hb_last >= _hb_interval:
+                        _LOG.info(
+                            "pair_features heartbeat: pairs=%d aspect=%s q_idx=%d/%d elapsed=%.1fs rate=%.0f/s",
+                            _hb_n,
+                            aspect,
+                            q_idx,
+                            len(valid_queries),
+                            now - _hb_t0,
+                            _hb_n / max(1e-9, now - _hb_t0),
+                        )
+                        _hb_last = now
+
+    # Taxonomic-consensus features: mirror the (neighbor_vote_fraction,
+    # neighbor_min_distance, neighbor_mean_distance) design but aggregate
+    # taxonomic signal across the subset of neighbors that voted for each
+    # candidate term. Requires ``compute_taxonomy=True`` — otherwise
+    # pair_features has no taxonomy keys and all three features stay NaN.
+    #
+    #   tax_voters_same_frac              — fraction of voters in same organism
+    #   tax_voters_close_frac             — fraction in "close relatives" bucket
+    #   tax_voters_mean_common_ancestors  — mean lineage overlap across voters
+    _CLOSE_RELATIONS = frozenset(
+        {
+            "same",
+            "ancestor",
+            "descendant",
+            "child",
+            "parent",
+            "close",
+        }
+    )
+    tax_same_cnt: dict[str, dict[int, int]] = {}
+    tax_close_cnt: dict[str, dict[int, int]] = {}
+    tax_ca_sum: dict[str, dict[int, float]] = {}
+    tax_ca_n: dict[str, dict[int, int]] = {}
+    if do_taxonomy:
+        for aspect in _ASPECTS:
+            go_map = ref_by_aspect[aspect]["go_map"]
+            nbs_all = neighbors_by_aspect[aspect]
+            for q_idx, q_acc in enumerate(valid_queries):
+                if q_idx >= len(nbs_all):
+                    continue
+                same_d = tax_same_cnt.setdefault(q_acc, {})
+                close_d = tax_close_cnt.setdefault(q_acc, {})
+                sum_d = tax_ca_sum.setdefault(q_acc, {})
+                n_d = tax_ca_n.setdefault(q_acc, {})
+                q_pairs = pair_features.get(q_acc, {})
+                for ref_acc, _ in nbs_all[q_idx]:
+                    pf = q_pairs.get(ref_acc, {})
+                    rel = pf.get("taxonomic_relation") or ""
+                    ca = pf.get("taxonomic_common_ancestors")
+                    is_same = rel == "same"
+                    is_close = rel in _CLOSE_RELATIONS
+                    for ann in go_map.get(ref_acc, []):
+                        gtid = ann["go_term_id"]
+                        if is_same:
+                            same_d[gtid] = same_d.get(gtid, 0) + 1
+                        if is_close:
+                            close_d[gtid] = close_d.get(gtid, 0) + 1
+                        if isinstance(ca, int | float) and ca is not None:
+                            sum_d[gtid] = sum_d.get(gtid, 0.0) + float(ca)
+                            n_d[gtid] = n_d.get(gtid, 0) + 1
+
+    # Anc2Vec semantic-coherence features: for each (query, aspect), project
+    # all GO terms annotated to the query's KNN neighbors into a unit-normed
+    # 200-dim space, then score each candidate against that neighbor set.
+    #
+    #   anc2vec_neighbor_cos    — cos(cand, mean(neighbor_gos)) centroid fit
+    #   anc2vec_neighbor_maxcos — max cos(cand, g) over neighbor GOs
+    #   anc2vec_has_emb         — 1 if candidate has a non-zero Anc2Vec vector
+    anc_idx: Anc2VecIndex = get_anc2vec_index()
+    all_go_id_strs: set[str] = set()
+    for aspect in _ASPECTS:
+        for anns in ref_by_aspect[aspect]["go_map"].values():
+            for ann in anns:
+                gid_str = go_id_map.get(ann["go_term_id"])
+                if gid_str:
+                    all_go_id_strs.add(gid_str)
+    # Query-known GOs join the projection pool so we can embed them too.
+    if query_known_gos:
+        for _gos in query_known_gos.values():
+            all_go_id_strs.update(_gos)
+    all_go_id_list = sorted(all_go_id_strs)
+    idx_of_go: dict[str, int] = {g: i for i, g in enumerate(all_go_id_list)}
+    all_emb = anc_idx.batch(all_go_id_list)
+    raw_norms = np.linalg.norm(all_emb, axis=1)
+    has_emb_mask = raw_norms > 0.0
+    safe_norms = np.where(has_emb_mask, raw_norms, 1.0)[:, None]
+    all_norm = (all_emb / safe_norms).astype(np.float32)
+    all_norm[~has_emb_mask] = 0.0
+
+    neighbor_info: dict[tuple[str, str], tuple[np.ndarray | None, np.ndarray | None]] = {}
+    for aspect in _ASPECTS:
+        go_map = ref_by_aspect[aspect]["go_map"]
+        nbs_all = neighbors_by_aspect[aspect]
+        for q_idx, q_acc in enumerate(valid_queries):
+            if q_idx >= len(nbs_all):
+                continue
+            rows: list[int] = []
+            seen: set[str] = set()
+            for ref_acc, _ in nbs_all[q_idx]:
+                for ann in go_map.get(ref_acc, []):
+                    gid_str = go_id_map.get(ann["go_term_id"])
+                    if not gid_str or gid_str in seen:
+                        continue
+                    seen.add(gid_str)
+                    i = idx_of_go.get(gid_str)
+                    if i is not None and has_emb_mask[i]:
+                        rows.append(i)
+            if not rows:
+                neighbor_info[(q_acc, aspect)] = (None, None)
+                continue
+            nmat = all_norm[rows]
+            centroid = nmat.mean(axis=0)
+            cn = float(np.linalg.norm(centroid))
+            centroid_unit = (centroid / cn).astype(np.float32) if cn > 0.0 else None
+            neighbor_info[(q_acc, aspect)] = (centroid_unit, nmat)
+
+    # Query-side Anc2Vec: centroid + embedding matrix of the query's own
+    # known GO set (all aspects pooled). For NK queries this stays empty →
+    # features fall back to NaN. For LK/PK queries this captures the
+    # "annotation profile" the candidate should stay close to — the
+    # canonical PK-discriminator signal.
+    query_known_info: dict[str, tuple[np.ndarray | None, np.ndarray | None, int]] = {}
+    if query_known_gos:
+        for q_acc in valid_queries:
+            known = query_known_gos.get(q_acc, set())
+            if not known:
+                query_known_info[q_acc] = (None, None, 0)
+                continue
+            rows = [idx_of_go[g] for g in known if g in idx_of_go and has_emb_mask[idx_of_go[g]]]
+            if not rows:
+                query_known_info[q_acc] = (None, None, len(known))
+                continue
+            kmat = all_norm[rows]
+            centroid = kmat.mean(axis=0)
+            cn = float(np.linalg.norm(centroid))
+            centroid_unit = (centroid / cn).astype(np.float32) if cn > 0.0 else None
+            query_known_info[q_acc] = (centroid_unit, kmat, len(known))
+
+    # Sequence-embedding PCA: per-query projection onto the precomputed
+    # principal components of the reference embedding pool. Emits 16
+    # features (emb_pca_query_0..15) at record-creation time. NaN when
+    # pca_state is None → LightGBM routes as missing.
+    pca_query_proj: np.ndarray | None = None
+    if pca_state is not None and query_emb.size:
+        pca_mean, pca_components = pca_state
+        pca_query_proj = ((query_emb.astype(np.float32) - pca_mean) @ pca_components.T).astype(
+            np.float32
+        )
+
+    # Build labeled predictions
+    #
+    # This block is structured per ``(q_acc, aspect)`` group so that
+    # ancestor expansion is local and intermediate state never holds
+    # more than one group worth of records. In list mode (the legacy
+    # default) records accumulate in memory. In streaming mode
+    # (``output_parquet`` given) each group flushes through a
+    # pyarrow ParquetWriter in ``chunk_rows`` batches — the caller
+    # never sees the ~10M-row list so the test split avoids OOM.
+    expand = getattr(p, "expand_votes_to_ancestors", False) and bool(parent_map_str)
+    k_limit_f = float(k_limit)
+    ancestor_closure: dict[str, set[str]] = {}
+
+    def _ancestors(gid: str) -> set[str]:
+        cached = ancestor_closure.get(gid)
+        if cached is not None:
+            return cached
+        seen: set[str] = set()
+        stack = [gid]
+        while stack:
+            node = stack.pop()
+            for parent in (parent_map_str or {}).get(node, ()):
+                if parent not in seen:
+                    seen.add(parent)
+                    stack.append(parent)
+        ancestor_closure[gid] = seen
+        return seen
+
+    def _ia_weight(anc_gid: str, leaf_gid: str) -> float:
+        if not ia_weights:
+            return 1.0
+        anc_w = float(ia_weights.get(anc_gid, 0.0))
+        leaf_w = float(ia_weights.get(leaf_gid, 0.0))
+        if leaf_w <= 0.0:
+            return 1.0
+        return anc_w / leaf_w
+
+    streaming = output_parquet is not None
+    records: list[dict[str, Any]] = []
+    buffer: list[dict[str, Any]] = []
+    writer: pq.ParquetWriter | None = None
+    n_rows = 0
+    _nan_pca = [float("nan")] * EMBEDDING_PCA_DIM
+
+    def _flush() -> None:
+        nonlocal writer, n_rows
+        if not buffer:
+            return
+        table = pa.Table.from_pylist(buffer)
+        if writer is None:
+            writer = pq.ParquetWriter(str(output_parquet), table.schema)
+        writer.write_table(table)
+        n_rows += len(buffer)
+        buffer.clear()
+
+    def _emit(rec: dict[str, Any]) -> None:
+        if pivot_go_ids is not None and rec["go_id"] not in pivot_go_ids:
+            return
+        if streaming:
+            buffer.append(rec)
+            if len(buffer) >= chunk_rows:
+                _flush()
+        else:
+            records.append(rec)
+
+    for q_idx, q_acc in enumerate(valid_queries):
+        q_pca_row = pca_query_proj[q_idx].tolist() if pca_query_proj is not None else _nan_pca
+        q_known_cent, q_known_mat, q_known_n = query_known_info.get(q_acc, (None, None, 0))
+        q_pairs_features = pair_features.get(q_acc, {})
+        for aspect in _ASPECTS:
+            go_map = ref_by_aspect[aspect]["go_map"]
+            nbs = neighbors_by_aspect[aspect]
+            if q_idx >= len(nbs):
+                continue
+            # neighbor_info value is Optional; downstream None-check below.
+            centroid_unit, nmat = neighbor_info.get(  # type: ignore[assignment]
+                (q_acc, aspect), (None, None)
+            )
+
+            leaf_by_gid: dict[str, dict[str, Any]] = {}
+            seen_terms: set[int] = set()
+            for ref_acc, distance in nbs[q_idx]:
+                for ann in go_map.get(ref_acc, []):
+                    go_term_id = ann["go_term_id"]
+                    if go_term_id in seen_terms:
+                        continue
+                    seen_terms.add(go_term_id)
+
+                    go_id = go_id_map.get(go_term_id)
+                    if not go_id:
+                        continue
+                    term_aspect = aspect_map.get(go_term_id, "")
+                    label = 1 if (q_acc, go_id) in gt_pairs else 0
+
+                    pf = q_pairs_features.get(ref_acc, {})
+
+                    cand_i = idx_of_go.get(go_id, -1)
+                    if cand_i >= 0 and has_emb_mask[cand_i]:
+                        cand_vec = all_norm[cand_i]
+                        anc_cos = (
+                            float(cand_vec @ centroid_unit)
+                            if centroid_unit is not None
+                            else float("nan")
+                        )
+                        anc_maxcos = (
+                            float((nmat @ cand_vec).max()) if nmat is not None else float("nan")
+                        )
+                        anc_has = 1.0
+                        anc_q_cos = (
+                            float(cand_vec @ q_known_cent)
+                            if q_known_cent is not None
+                            else float("nan")
+                        )
+                        anc_q_maxcos = (
+                            float((q_known_mat @ cand_vec).max())
+                            if q_known_mat is not None
+                            else float("nan")
+                        )
+                    else:
+                        anc_cos = float("nan")
+                        anc_maxcos = float("nan")
+                        anc_has = 0.0
+                        anc_q_cos = float("nan")
+                        anc_q_maxcos = float("nan")
+
+                    rec = {
+                        "protein_accession": q_acc,
+                        "go_id": go_id,
+                        "aspect": term_aspect,
+                        LABEL_COLUMN: label,
+                        "distance": distance,
+                        "ref_protein_accession": ref_acc,
+                        "qualifier": ann.get("qualifier") or "",
+                        "evidence_code": ann.get("evidence_code") or "",
+                        # Alignment features
+                        "identity_nw": pf.get("identity_nw"),
+                        "similarity_nw": pf.get("similarity_nw"),
+                        "alignment_score_nw": pf.get("alignment_score_nw"),
+                        "gaps_pct_nw": pf.get("gaps_pct_nw"),
+                        "alignment_length_nw": pf.get("alignment_length_nw"),
+                        "identity_sw": pf.get("identity_sw"),
+                        "similarity_sw": pf.get("similarity_sw"),
+                        "alignment_score_sw": pf.get("alignment_score_sw"),
+                        "gaps_pct_sw": pf.get("gaps_pct_sw"),
+                        "alignment_length_sw": pf.get("alignment_length_sw"),
+                        "length_query": pf.get("length_query"),
+                        "length_ref": pf.get("length_ref"),
+                        # Taxonomy features
+                        "taxonomic_distance": pf.get("taxonomic_distance"),
+                        "taxonomic_common_ancestors": pf.get("taxonomic_common_ancestors"),
+                        "taxonomic_relation": pf.get("taxonomic_relation", ""),
+                        # Reranker features
+                        "vote_count": rr_vote_count.get(q_acc, {}).get(go_term_id, 1),
+                        "k_position": rr_k_position.get(q_acc, {}).get(go_term_id, 1),
+                        "go_term_frequency": go_term_freq.get(go_term_id, 0),
+                        "ref_annotation_density": ref_ann_density.get(ref_acc, 0),
+                        "neighbor_distance_std": rr_distance_std.get(q_acc, 0.0),
+                        # Consensus features (this candidate term only)
+                        "neighbor_vote_fraction": (
+                            rr_vote_count.get(q_acc, {}).get(go_term_id, 1) / k_limit
+                        ),
+                        "neighbor_min_distance": rr_vote_min_d.get(q_acc, {}).get(
+                            go_term_id, float(distance)
+                        ),
+                        "neighbor_mean_distance": (
+                            rr_vote_sum_d.get(q_acc, {}).get(go_term_id, float(distance))
+                            / max(1, rr_vote_count.get(q_acc, {}).get(go_term_id, 1))
+                        ),
+                        # Anc2Vec semantic-coherence features
+                        "anc2vec_neighbor_cos": anc_cos,
+                        "anc2vec_neighbor_maxcos": anc_maxcos,
+                        "anc2vec_has_emb": anc_has,
+                        # Query-side Anc2Vec (PK-killer): cand vs query's
+                        # pre-cutoff annotation profile. NaN for NK queries.
+                        "anc2vec_query_known_cos": anc_q_cos,
+                        "anc2vec_query_known_maxcos": anc_q_maxcos,
+                        "anc2vec_query_known_count": float(q_known_n),
+                        # Taxonomic consensus across voting neighbors
+                        "tax_voters_same_frac": (
+                            tax_same_cnt.get(q_acc, {}).get(go_term_id, 0)
+                            / max(1, rr_vote_count.get(q_acc, {}).get(go_term_id, 1))
+                            if do_taxonomy
+                            else float("nan")
+                        ),
+                        "tax_voters_close_frac": (
+                            tax_close_cnt.get(q_acc, {}).get(go_term_id, 0)
+                            / max(1, rr_vote_count.get(q_acc, {}).get(go_term_id, 1))
+                            if do_taxonomy
+                            else float("nan")
+                        ),
+                        "tax_voters_mean_common_ancestors": (
+                            (
+                                tax_ca_sum.get(q_acc, {}).get(go_term_id, 0.0)
+                                / max(1, tax_ca_n.get(q_acc, {}).get(go_term_id, 1))
+                            )
+                            if (do_taxonomy and tax_ca_n.get(q_acc, {}).get(go_term_id, 0) > 0)
+                            else float("nan")
+                        ),
+                        **{f"emb_pca_query_{i}": q_pca_row[i] for i in range(EMBEDDING_PCA_DIM)},
+                    }
+                    leaf_by_gid[go_id] = rec
+
+            # Ancestor expansion for this (q_acc, aspect) group only.
+            # Synthesize records for every ancestor of each predicted
+            # leaf so the candidate set covers the full upward closure.
+            # Neighbor-vote-fraction is additively weighted by
+            # IA(ancestor)/IA(leaf) when an IA table is provided
+            # (else weight = 1). The closest leaf donates its per-pair
+            # features (distance, alignment, taxonomy) to the
+            # synthesized ancestor record.
+            synth: dict[str, dict[str, Any]] = {}
+            if expand:
+                for leaf_gid, leaf_rec in list(leaf_by_gid.items()):
+                    leaf_d = float(leaf_rec.get("distance", 1.0))
+                    for anc in _ancestors(leaf_gid):
+                        w = _ia_weight(anc, leaf_gid)
+                        if anc in leaf_by_gid:
+                            leaf_anc = leaf_by_gid[anc]
+                            leaf_anc["neighbor_vote_fraction"] = min(
+                                1.0,
+                                float(leaf_anc.get("neighbor_vote_fraction", 0.0)) + w / k_limit_f,
+                            )
+                            lmd = float(leaf_rec.get("neighbor_min_distance", leaf_d))
+                            cur_md = float(leaf_anc.get("neighbor_min_distance", leaf_d))
+                            if lmd < cur_md:
+                                leaf_anc["neighbor_min_distance"] = lmd
+                            continue
+                        entry = synth.get(anc)
+                        if entry is None or leaf_d < float(entry["distance"]):
+                            base = dict(leaf_rec)
+                            base["go_id"] = anc
+                            base[LABEL_COLUMN] = 1 if (q_acc, anc) in gt_pairs else 0
+                            prior_frac = (
+                                float(entry["neighbor_vote_fraction"]) if entry is not None else 0.0
+                            )
+                            base["neighbor_vote_fraction"] = min(1.0, prior_frac + w / k_limit_f)
+                            synth[anc] = base
+                        else:
+                            entry["neighbor_vote_fraction"] = min(
+                                1.0,
+                                float(entry["neighbor_vote_fraction"]) + w / k_limit_f,
+                            )
+
+            for rec in leaf_by_gid.values():
+                _emit(rec)
+            for rec in synth.values():
+                _emit(rec)
+
+            # Free per-(q,aspect) state. Keeps neighbor_info / nbs[q_idx]
+            # bounded to "queries not yet processed" instead of growing
+            # for the full run.
+            neighbor_info.pop((q_acc, aspect), None)
+            if q_idx < len(nbs):
+                nbs[q_idx] = []
+
+        # Free per-query state. Without this the test split's intermediate
+        # dicts (pair_features, rr_*, tax_*, query_known_info, neighbor_info)
+        # accumulate ~5 GB across 30k queries before the function returns,
+        # dominating RSS during the 2-3h record-building loop and tripping
+        # systemd-oomd.
+        pair_features.pop(q_acc, None)
+        rr_vote_count.pop(q_acc, None)
+        rr_k_position.pop(q_acc, None)
+        rr_vote_min_d.pop(q_acc, None)
+        rr_vote_sum_d.pop(q_acc, None)
+        rr_distance_std.pop(q_acc, None)
+        tax_same_cnt.pop(q_acc, None)
+        tax_close_cnt.pop(q_acc, None)
+        tax_ca_sum.pop(q_acc, None)
+        tax_ca_n.pop(q_acc, None)
+        query_known_info.pop(q_acc, None)
+        if (q_idx + 1) % 1000 == 0:
+            gc.collect()
+
+    if streaming:
+        _flush()
+        if writer is not None:
+            writer.close()
+        return {"parquet_path": str(output_parquet), "n_rows": n_rows}
+    return records
+
+
+# ---------------------------------------------------------------------------
+# Auto payload
+# ---------------------------------------------------------------------------
+
+
+# ProteaPayload is a pydantic BaseModel, not a dataclass;
+# mypy's dataclass-frozen-from-non-frozen check is a false positive.
+class TrainRerankerAutoPayload(ProteaPayload, frozen=True):  # type: ignore[misc]
+    """Payload for the dump_helper operation.
+
+    Generates consecutive temporal pairs from ``train_versions``, runs KNN
+    once per pair, then trains 3 per-category LightGBM models (NK, LK, PK)
+    and evaluates each on the held-out test split.
+    """
+
+    name: str
+    embedding_config_id: str
+    ontology_snapshot_id: str
+
+    # GOA source_version numbers for training pairs (e.g. [160,165,...,220])
+    train_versions: list[int]
+    # GOA source_version numbers for test evaluation (e.g. [225] or [225,229])
+    test_versions: list[int]
+
+    # Annotation source in annotation_set (default "goa")
+    annotation_source: str = "goa"
+
+    # KNN parameters. Default to FAISS IVFFlat: numpy brute-force on 500k+
+    # refs materialises a full (n_queries x n_refs) distance matrix that
+    # peaks at ~10 GB per aspect. IVFFlat keeps peak memory ~2.5 GB and
+    # is 5-10x faster.
+    limit_per_entry: PositiveInt = 5
+    distance_threshold: float | None = None
+    search_backend: str = "faiss"
+    metric: str = "cosine"
+    faiss_index_type: str = "IVFFlat"
+    faiss_nlist: int = 256
+    faiss_nprobe: int = 32
+
+    # LightGBM parameters
+    num_boost_round: int = 1000
+    early_stopping_rounds: int = 50
+    val_fraction: float = 0.2
+    neg_pos_ratio: float | None = None
+
+    # Reranker objective: "binary" (default, classic logloss+AUC) or
+    # "lambdarank" (listwise ranking loss with groups keyed by query
+    # protein). LambdaRank optimizes ranking order directly and tends to
+    # improve Fmax on retrieval-style tasks.
+    reranker_objective: str = "binary"
+
+    # Feature computation
+    compute_alignments: bool = False
+    compute_taxonomy: bool = False
+
+    # IA weighting: path to IA TSV file (go_id\tia_value, no header).
+    # When set, sample_weight = IA(go_term) during training so the model
+    # focuses on informative (rare, specific) GO terms — aligned with
+    # CAFA evaluation which uses IA weighting.
+    ia_file: str | None = None
+
+    # Ancestor expansion: when True, synthesize candidate records for every
+    # ancestor of each leaf GO term voted by a neighbor (True Path Rule at
+    # vote time). Weight of the inherited vote = IA(ancestor)/IA(leaf) when
+    # an IA table is available; 1.0 otherwise. Expands the candidate set
+    # and helps the reranker learn on abstract terms that never get direct
+    # KNN votes but do appear in ground truth.
+    expand_votes_to_ancestors: bool = False
+
+    # Sequence-embedding PCA: when True, fit PCA(16) once on the reference
+    # embedding pool, project each query, and emit 16 extra features
+    # (emb_pca_query_0..15) per candidate row. Gives LightGBM a location
+    # signal in PLM space beyond the scalar query<->ref distance.
+    use_embedding_pca: bool = False
+
+    # Training scope:
+    #   "per_category" (default) — 3 models, one per NK/LK/PK (all aspects).
+    #   "per_cell"               — up to 9 models ({cat}-{aspect}) plus the
+    #                              3 per-category fallbacks. Any cell whose
+    #                              positive count falls below
+    #                              ``per_cell_min_positives`` is skipped and
+    #                              the per-category model is used instead
+    #                              (CCO-LK protection).
+    training_scope: str = "per_category"
+    per_cell_min_positives: PositiveInt = 200
+
+    # Dataset dump: when ``dump_to`` is set, consolidate the generated
+    # per-split / test parquet shards into ``{dump_to}/train.parquet`` +
+    # ``{dump_to}/eval.parquet`` + ``manifest.json``. When ``dump_only`` is
+    # True, skip the LightGBM training stage and return after the dump —
+    # used by the protea-reranker-lab repo to iterate on frozen datasets.
+    dump_to: str | None = None
+    dump_only: bool = False
+
+    @field_validator("embedding_config_id", "ontology_snapshot_id", "name", mode="before")
+    @classmethod
+    def must_be_non_empty(cls, v: str) -> str:
+        if not isinstance(v, str) or not v.strip():
+            raise ValueError("must be a non-empty string")
+        return v.strip()
+
+    @field_validator("train_versions", mode="before")
+    @classmethod
+    def at_least_two_train(cls, v: list[int]) -> list[int]:
+        if len(v) < 2:
+            raise ValueError("train_versions must have at least 2 entries to form pairs")
+        return sorted(v)
+
+    @field_validator("test_versions", mode="before")
+    @classmethod
+    def at_least_one_test(cls, v: list[int]) -> list[int]:
+        if not v:
+            raise ValueError("test_versions must have at least 1 entry")
+        return sorted(v)
+
+    @field_validator("training_scope", mode="before")
+    @classmethod
+    def scope_is_valid(cls, v: str) -> str:
+        allowed = {"per_category", "per_cell"}
+        if v not in allowed:
+            raise ValueError(f"training_scope must be one of {sorted(allowed)}, got {v!r}")
+        return v
+
+
+_CATEGORIES = ("nk", "lk", "pk")
+# Lower-cased CAFA codes — used only for model-name suffixes ("model-NK-bpo").
+# Built from the canonical Aspect enum so the three encodings stay in sync.
+_ASPECT_NAMES: dict[str, str] = {a.code: a.cafa.lower() for a in Aspect}
+
+
+# ---------------------------------------------------------------------------
+# Auto operation
+# ---------------------------------------------------------------------------
+
+
+class TrainRerankerAutoOperation:
+    """Automated multi-split temporal holdout re-ranker training.
+
+    Trains **3 per-category models** (NK, LK, PK) in a single execution.
+    Each model trains on all aspects combined, giving it ~3× more data
+    than per-aspect models and better convergence.
+
+    Pipeline:
+
+    1. Resolve annotation set IDs from version numbers.
+    2. Load GO maps once; optionally load IA weights for sample weighting.
+    3. For each consecutive pair in ``train_versions``, compute the
+       evaluation delta (all 3 categories at once), load references and
+       query embeddings, run KNN + GO transfer, and label predictions
+       against each category's ground truth.
+    4. For each category (NK, LK, PK), concatenate the labeled data from
+       all splits, train one LightGBM model with optional IA sample
+       weights, evaluate on the test split, and store a ``RerankerModel``
+       as ``{name}-{category}``.
+    """
+
+    # Unregistered since LightGBM training moved to protea-reranker-lab.
+    # Kept as in-process helper invoked from ExportResearchDatasetOperation.
+    name = "research_dataset_dump_helper"
+    description = (
+        "Run KNN + feature generation across multiple temporal holdout "
+        "pairs and emit frozen parquets. Originally also trained "
+        "LightGBM models; that path now lives in protea-reranker-lab."
+    )
+
+    def summarize_payload(self, payload: dict[str, Any], *, session: Session | None = None) -> str:
+        p = payload or {}
+        bits: list[str] = []
+        if p.get("name"):
+            bits.append(str(p["name"]))
+
+        cfg_id_raw = p.get("embedding_config_id")
+        if cfg_id_raw and session is not None:
+            try:
+                cfg = session.get(EmbeddingConfig, uuid.UUID(str(cfg_id_raw)))
+            except Exception:
+                cfg = None
+            if cfg is not None:
+                model_label = cfg.display_name or cfg.model_name or str(cfg.id)[:8]
+                bits.append(model_label)
+
+        train = p.get("train_versions") or []
+        test = p.get("test_versions") or []
+        if train:
+            bits.append(f"train={train[0]}→{train[-1]} (n={len(train)})")
+        if test:
+            bits.append(f"test={','.join(str(v) for v in test)}")
+        if p.get("num_boost_round"):
+            bits.append(f"rounds={p['num_boost_round']}")
+        return " · ".join(bits)
+
+    @staticmethod
+    def _dump_frozen_dataset(
+        *,
+        dump_dir: Path,
+        split_files: dict[str, list[Path]],
+        valid_split_versions: list[tuple[int, int]],
+        test_files: dict[str, Path | None],
+        test_old_v: int,
+        test_new_v: int,
+        name: str,
+        k: int,
+        embedding_config_id: str,
+        ontology_snapshot_id: str,
+        annotation_source: str,
+    ) -> dict[str, Any]:
+        """Thin wrapper that delegates to ``parquet_export`` — kept so
+        ``dump_helper`` can still dump a frozen dataset to a local
+        path via ``dump_to=...``. New code should prefer the
+        ``export_research_dataset`` operation which publishes via the
+        configured ``ArtifactStore``.
+        """
+        from protea import __version__ as _protea_version
+        from protea.core.parquet_export import (
+            export_reranker_parquets,
+            resolve_protea_git_sha,
+        )
+
+        result = export_reranker_parquets(
+            stage_dir=dump_dir,
+            split_files=split_files,
+            valid_split_versions=valid_split_versions,
+            test_files=test_files,
+            test_old_v=test_old_v,
+            test_new_v=test_new_v,
+            name=name,
+            k=k,
+            embedding_config_id=embedding_config_id,
+            ontology_snapshot_id=ontology_snapshot_id,
+            annotation_source=annotation_source,
+            store=None,
+            producer_version=_protea_version,
+            producer_git_sha=resolve_protea_git_sha(),
+        )
+        # Preserve the historical return contract — callers rely on
+        # ``dump_dir`` instead of ``stage_dir``.
+        result["dump_dir"] = result.pop("stage_dir", str(dump_dir))
+        return result
+
+    def execute(
+        self, session: Session, payload: dict[str, Any], *, emit: EmitFn
+    ) -> OperationResult:
+        p = TrainRerankerAutoPayload.model_validate(payload)
+        t0 = time.perf_counter()
+
+        emb_config_id = uuid.UUID(p.embedding_config_id)
+        ontology_snapshot_id = uuid.UUID(p.ontology_snapshot_id)
+
+        # ── 1. Resolve annotation set IDs ────────────────────────────────
+        all_versions = sorted(set(p.train_versions + p.test_versions))
+        version_to_set: dict[int, uuid.UUID] = {}
+        version_to_native: dict[int, uuid.UUID] = {}
+        for v in all_versions:
+            aset = (
+                session.query(AnnotationSet)
+                .filter(
+                    AnnotationSet.source == p.annotation_source,
+                    AnnotationSet.source_version == str(v),
+                )
+                .first()
+            )
+            if aset is None:
+                raise ValueError(
+                    f"AnnotationSet not found for source='{p.annotation_source}', "
+                    f"source_version='{v}'"
+                )
+            version_to_set[v] = aset.id
+            version_to_native[v] = aset.ontology_snapshot_id
+
+        if session.get(EmbeddingConfig, emb_config_id) is None:
+            raise ValueError(f"EmbeddingConfig {emb_config_id} not found")
+
+        # Candidate model names (also used in the start event's max_models count).
+        candidate_names: list[str] = [f"{p.name}-{cat}" for cat in _CATEGORIES]
+        if p.training_scope == "per_cell":
+            candidate_names.extend(
+                f"{p.name}-{cat}-{_ASPECT_NAMES[asp]}" for cat in _CATEGORIES for asp in _ASPECTS
+            )
+        # Name-collision check — skipped when dump_only=True since no
+        # RerankerModel rows are written.
+        if not p.dump_only:
+            for model_name in candidate_names:
+                existing = (
+                    session.query(RerankerModel).filter(RerankerModel.name == model_name).first()
+                )
+                if existing is not None:
+                    raise ValueError(f"RerankerModel '{model_name}' already exists")
+
+        # Load IA weights for sample weighting (optional)
+        ia_weights: dict[str, float] | None = None
+        if p.ia_file:
+            ia_weights = {}
+            with open(p.ia_file) as f:
+                for line in f:
+                    line = line.strip()
+                    if not line:
+                        continue
+                    parts = line.split("\t")
+                    if len(parts) >= 2:
+                        ia_weights[parts[0]] = float(parts[1])
+            emit(
+                "dump_helper.ia_loaded",
+                None,
+                {"ia_file": p.ia_file, "n_terms": len(ia_weights)},
+                "info",
+            )
+
+        max_models = len(candidate_names)
+        emit(
+            "dump_helper.start",
+            None,
+            {
+                "name": p.name,
+                "train_versions": p.train_versions,
+                "test_versions": p.test_versions,
+                "n_pairs": len(p.train_versions) - 1,
+                "training_scope": p.training_scope,
+                "max_models": max_models,
+                "per_cell_min_positives": int(p.per_cell_min_positives)
+                if p.training_scope == "per_cell"
+                else None,
+                "ia_weighted": ia_weights is not None,
+            },
+            "info",
+        )
+
+        # ── 2. Load GO maps ──────────────────────────────────────────────
+        # Each training/test GOA release has its own contemporary OBO snapshot,
+        # so reference annotations use native go_term.id UUIDs — different from
+        # the pivot snapshot's ids. Build a union id→(go_id, aspect) map across
+        # the pivot + every native snapshot encountered, so native go_term_ids
+        # from any set resolve correctly. Predictions are later filtered to
+        # terms that also exist in the pivot term universe, matching the
+        # reconciled ground-truth space.
+        map_snapshots = {ontology_snapshot_id} | set(version_to_native.values())
+        union_rows = session.execute(
+            text(
+                "SELECT id, go_id, aspect FROM go_term WHERE ontology_snapshot_id = ANY(:snap_ids)"
+            ),
+            {"snap_ids": [str(s) for s in map_snapshots]},
+        ).fetchall()
+        go_id_map: dict[Any, str] = {row_id: go_id for row_id, go_id, _ in union_rows}
+        aspect_map: dict[Any, str] = {row_id: aspect for row_id, _, aspect in union_rows if aspect}
+        pivot_rows = session.execute(
+            text(
+                "SELECT go_id FROM go_term "
+                "WHERE ontology_snapshot_id = :snap_id AND aspect IS NOT NULL"
+            ),
+            {"snap_id": ontology_snapshot_id},
+        ).fetchall()
+        pivot_go_ids: set[str] = {row[0] for row in pivot_rows}
+
+        # Parent map on pivot — used for TPR max-propagation in test metrics.
+        parent_map = _load_parent_map(session, ontology_snapshot_id)
+
+        # Optional sequence-embedding PCA — fit once on the preloaded pool.
+        pca_state: tuple[np.ndarray, np.ndarray] | None = None
+
+        # ── 2b. Preload ALL embeddings once ─────────────────────────────
+        all_embeddings, all_accessions, acc_to_idx = _preload_all_embeddings(
+            session, emb_config_id, emit
+        )
+
+        if p.use_embedding_pca and all_embeddings.size:
+            # Use the shared PCA cache so the projection components match
+            # whatever ``predict_go_terms`` will use at inference time.
+            # Previously this called ``fit_embedding_pca`` directly, which
+            # produced a fresh fit on a different (and randomly subsampled)
+            # pool — the resulting components only matched the live cache by
+            # coincidence, and any drift silently broke ``emb_pca_query_*``
+            # parity for the trained reranker. Boosters trained on a
+            # mismatched PCA score garbage at predict time even when every
+            # other feature is correct.
+            pca_state = _load_or_fit_pca_state(emb_config_id, all_embeddings)
+            emit(
+                "dump_helper.pca_fit",
+                None,
+                {
+                    "n_refs": int(all_embeddings.shape[0]),
+                    "dim_in": int(all_embeddings.shape[1]),
+                    "dim_out": EMBEDDING_PCA_DIM,
+                    "source": "shared_cache",
+                },
+                "info",
+            )
+
+        # ── 3. Generate training data from consecutive pairs ─────────────
+        # Memory-optimised: each split writes to parquet on disk, then all
+        # RAM is freed before the next split.  Training reads from disk.
+        # "aspect" is now part of ALL_FEATURES (categorical) — keep protein_accession/go_id
+        # for grouping/metrics but avoid duplicating the column name.
+        _KEEP_COLS = ["protein_accession", "go_id"] + ALL_FEATURES + [LABEL_COLUMN]
+        tmp_dir = Path(tempfile.mkdtemp(prefix="protea_reranker_"))
+        per_split_stats: list[dict[str, Any]] = []
+        split_files: dict[str, list[Path]] = {c: [] for c in _CATEGORIES}
+        # Parallel to split_files[cat]: records (v_old, v_new) for each
+        # successfully generated split shard, so --dump-to can stamp a
+        # ``snapshot_pair`` column per row.
+        valid_split_versions: list[tuple[int, int]] = []
+
+        try:
+            for i in range(len(p.train_versions) - 1):
+                v_old = p.train_versions[i]
+                v_new = p.train_versions[i + 1]
+                old_set_id = version_to_set[v_old]
+                new_set_id = version_to_set[v_new]
+
+                emit(
+                    "dump_helper.split_start",
+                    None,
+                    {"split": i + 1, "v_old": v_old, "v_new": v_new},
+                    "info",
+                )
+
+                # 3a. Load delta from the persisted EvaluationSet artifact.
+                # Per the project's "no on-the-fly reuse" rule, the delta
+                # must be materialized beforehand via
+                # /annotations/evaluation-sets/generate (or the
+                # scripts/materialize_lab_intervals.py helper).
+                eset = (
+                    session.query(EvaluationSet)
+                    .filter_by(
+                        old_annotation_set_id=old_set_id,
+                        new_annotation_set_id=new_set_id,
+                    )
+                    .one_or_none()
+                )
+                if eset is None:
+                    raise RuntimeError(
+                        f"EvaluationSet missing for train pair ({v_old}->{v_new}). "
+                        "Materialize it via scripts/materialize_lab_intervals.py "
+                        "or POST /annotations/evaluation-sets/generate before retrying."
+                    )
+                eval_data, _ = load_evaluation_data_for_set(session, eset)
+
+                # Build gt_pairs for each category; collect union of query proteins
+                cat_gt_pairs: dict[str, set[tuple[str, str]]] = {}
+                all_query_accessions: set[str] = set()
+                for cat in _CATEGORIES:
+                    gt: dict[str, set[str]] = getattr(eval_data, cat)
+                    pairs: set[tuple[str, str]] = set()
+                    for protein, go_ids in gt.items():
+                        for go_id in go_ids:
+                            pairs.add((protein, go_id))
+                    cat_gt_pairs[cat] = pairs
+                    all_query_accessions.update(gt.keys())
+
+                if not all_query_accessions:
+                    emit(
+                        "dump_helper.split_skipped",
+                        None,
+                        {"split": i + 1, "reason": "no ground truth in any category"},
+                        "warning",
+                    )
+                    per_split_stats.append(
+                        {
+                            "v_old": v_old,
+                            "v_new": v_new,
+                            "skipped": True,
+                            "reason": "no ground truth",
+                        }
+                    )
+                    continue
+
+                # 3b. Build references from preloaded embeddings (only loads annotations)
+                ref_by_aspect = _build_reference_from_cache(
+                    session, old_set_id, all_embeddings, all_accessions, acc_to_idx, emit
+                )
+
+                # 3c. Load query embeddings from preloaded cache
+                query_accs = [a for a in all_query_accessions if a in acc_to_idx]
+                query_indices = np.array([acc_to_idx[a] for a in query_accs], dtype=np.int32)
+                query_emb = (
+                    all_embeddings[query_indices].astype(np.float32)
+                    if len(query_indices) > 0
+                    else np.empty((0, all_embeddings.shape[1]), dtype=np.float32)
+                )
+                valid_queries = query_accs
+
+                if not valid_queries:
+                    emit(
+                        "dump_helper.split_skipped",
+                        None,
+                        {"split": i + 1, "reason": "no query embeddings"},
+                        "warning",
+                    )
+                    per_split_stats.append(
+                        {
+                            "v_old": v_old,
+                            "v_new": v_new,
+                            "skipped": True,
+                            "reason": "no query embeddings",
+                        }
+                    )
+                    del ref_by_aspect, query_emb, valid_queries
+                    gc.collect()
+                    continue
+
+                # 3d. Load sequences / taxonomy if requested
+                qs: dict[str, str] | None = None
+                rs: dict[str, str] | None = None
+                qt: dict[str, int | None] | None = None
+                rt: dict[str, int | None] | None = None
+                if p.compute_alignments or p.compute_taxonomy:
+                    all_ref_accs: set[str] = set()
+                    for asp in _ASPECTS:
+                        all_ref_accs.update(ref_by_aspect[asp]["accessions"])
+                    query_set = set(valid_queries)
+                    if p.compute_alignments:
+                        qs = _load_sequences(session, query_set)
+                        rs = _load_sequences(session, all_ref_accs)
+                    if p.compute_taxonomy:
+                        qt = _load_taxonomy_ids(session, query_set)
+                        rt = _load_taxonomy_ids(session, all_ref_accs)
+
+                # 3e. KNN + GO transfer (once, no labeling yet)
+                session.expire_all()
+                unlabeled_preds = _knn_transfer_and_label(
+                    session,
+                    valid_queries,
+                    query_emb,
+                    ref_by_aspect,
+                    go_id_map,
+                    aspect_map,
+                    set(),  # empty gt → all label=0
+                    p,
+                    sequence_context=SequenceContext(
+                        query_sequences=qs,
+                        ref_sequences=rs,
+                        query_tax_ids=qt,
+                        ref_tax_ids=rt,
+                    ),
+                    query_known_gos=eval_data.known,
+                    parent_map_str=parent_map if p.expand_votes_to_ancestors else None,
+                    ia_weights=ia_weights,
+                    pca_state=pca_state,
+                    embedding_pool=all_embeddings,
+                )
+
+                # Restrict predictions to terms present in the pivot universe —
+                # ground truth was reconciled into pivot space above.
+                # _knn_transfer_and_label always returns list[dict].
+                # Cast explicitly to silence the list-widening mypy
+                # check that fires on self-reassignment of unlabeled_preds.
+                unlabeled_preds = cast(
+                    "list[dict[str, Any]]",
+                    [
+                        r
+                        for r in unlabeled_preds
+                        if r["go_id"] in pivot_go_ids  # type: ignore[index]
+                    ],
+                )
+
+                # Free large objects immediately
+                del ref_by_aspect, query_emb, valid_queries, qs, rs, qt, rt
+                gc.collect()
+
+                split_stats: dict[str, Any] = {
+                    "v_old": v_old,
+                    "v_new": v_new,
+                    "skipped": False,
+                    "total_unlabeled": len(unlabeled_preds),
+                }
+
+                # 3e. Build DataFrame, label per category, write to parquet.
+                # Filter rows by genuine (protein, aspect) cat membership
+                # before labelling. Without this filter the same row appears
+                # in all three cat shards with only the label re-assigned;
+                # the resulting parquet leaks the cat boundary through
+                # ``anc2vec_query_known_count`` (constant 0 in NK whenever
+                # the protein is genuinely NK), which the booster picks up
+                # as a near-perfect positive proxy.
+                #
+                # Each (protein, aspect) maps to exactly one category by
+                # the CAFA definition: NK is global (zero pre-cutoff
+                # annotations across namespaces), LK and PK are per
+                # namespace. Membership is recovered from the aspects of
+                # the new annotations recorded in ``eval_data.cat``.
+                base_df = pd.DataFrame(unlabeled_preds, columns=_KEEP_COLS)
+                del unlabeled_preds
+                gc.collect()
+
+                # ``aspect_map`` is keyed by go_term_id (int); ``eval_data``
+                # carries go_id strings. Invert ``go_id_map`` once so the
+                # cat-membership lookup uses the same key space as the
+                # parquet's ``aspect`` column ("P"/"F"/"C" single codes).
+                aspect_by_go_id: dict[str, str] = {
+                    go_id: aspect_map[term_id]
+                    for term_id, go_id in go_id_map.items()
+                    if term_id in aspect_map
+                }
+                cat_membership: dict[str, set[tuple[str, str]]] = {}
+                for cat in _CATEGORIES:
+                    gt = getattr(eval_data, cat)
+                    members: set[tuple[str, str]] = set()
+                    for protein, go_ids in gt.items():
+                        for go_id in go_ids:
+                            asp = aspect_by_go_id.get(go_id, "")
+                            if asp:
+                                members.add((protein, asp))
+                    cat_membership[cat] = members
+
+                for cat in _CATEGORIES:
+                    members = cat_membership[cat]
+                    cat_mask = np.fromiter(
+                        (
+                            (acc, asp) in members
+                            for acc, asp in zip(
+                                base_df["protein_accession"],
+                                base_df["aspect"],
+                                strict=False,
+                            )
+                        ),
+                        count=len(base_df),
+                        dtype=bool,
+                    )
+                    cat_df = base_df.loc[cat_mask].copy()
+                    if cat_df.empty:
+                        split_stats[f"{cat}_positives"] = 0
+                        split_stats[f"{cat}_negatives"] = 0
+                        continue
+
+                    gt_p = cat_gt_pairs[cat]
+                    labels = np.fromiter(
+                        (
+                            1 if (acc, go_id) in gt_p else 0
+                            for acc, go_id in zip(
+                                cat_df["protein_accession"],
+                                cat_df["go_id"],
+                                strict=False,
+                            )
+                        ),
+                        count=len(cat_df),
+                        dtype=np.int8,
+                    )
+                    cat_df[LABEL_COLUMN] = labels
+                    n_pos = int(labels.sum())
+                    split_stats[f"{cat}_positives"] = n_pos
+                    split_stats[f"{cat}_negatives"] = len(cat_df) - n_pos
+
+                    pq_path = tmp_dir / f"train_{cat}_split{i}.parquet"
+                    cat_df.to_parquet(pq_path, index=False)
+                    split_files[cat].append(pq_path)
+                    del cat_df
+
+                del base_df
+                gc.collect()
+
+                # Evict the SQLAlchemy identity map — ``session.expire_all``
+                # (called earlier) only marks rows stale but keeps them in
+                # the identity map, so across 12+ splits the cached
+                # ``Sequence.protein_sequence`` strings leak ~30 GB. Full
+                # expunge + gc forces the ORM to release them before the
+                # next split's ``_load_sequences`` repopulates the cache.
+                session.expunge_all()
+                gc.collect()
+
+                valid_split_versions.append((v_old, v_new))
+                per_split_stats.append(split_stats)
+                emit("dump_helper.split_done", None, split_stats, "info")
+
+            # Check we have data
+            if not any(split_files[c] for c in _CATEGORIES):
+                raise ValueError("No training data produced from any split")
+
+            # ── 4. Test split: KNN once, label per category ──────────────
+            test_old_v = p.train_versions[-1]
+            test_new_v = p.test_versions[0]
+            test_old_set_id = version_to_set[test_old_v]
+            test_new_set_id = version_to_set[test_new_v]
+
+            emit(
+                "dump_helper.test_knn",
+                None,
+                {"test_old": test_old_v, "test_new": test_new_v},
+                "info",
+            )
+
+            test_eset = (
+                session.query(EvaluationSet)
+                .filter_by(
+                    old_annotation_set_id=test_old_set_id,
+                    new_annotation_set_id=test_new_set_id,
+                )
+                .one_or_none()
+            )
+            if test_eset is None:
+                raise RuntimeError(
+                    f"EvaluationSet missing for test pair ({test_old_v}->{test_new_v}). "
+                    "Materialize it via scripts/materialize_lab_intervals.py "
+                    "or POST /annotations/evaluation-sets/generate before retrying."
+                )
+            test_eval_data, _ = load_evaluation_data_for_set(session, test_eset)
+
+            # Write test data to parquet too
+            test_files: dict[str, Path | None] = {c: None for c in _CATEGORIES}
+            test_all_queries: set[str] = set()
+            test_cat_gt: dict[str, set[tuple[str, str]]] = {}
+            for cat in _CATEGORIES:
+                # gt + pairs reused from train-side block above; lexically distinct usage.
+                gt: dict[str, set[str]] = getattr(test_eval_data, cat)  # type: ignore[no-redef]
+                pairs: set[tuple[str, str]] = set()  # type: ignore[no-redef]
+                for protein, go_ids in gt.items():
+                    for go_id in go_ids:
+                        pairs.add((protein, go_id))
+                test_cat_gt[cat] = pairs
+                test_all_queries.update(gt.keys())
+
+            if test_all_queries:
+                test_ref = _build_reference_from_cache(
+                    session, test_old_set_id, all_embeddings, all_accessions, acc_to_idx, emit
+                )
+                test_accs = [a for a in test_all_queries if a in acc_to_idx]
+                test_indices = np.array([acc_to_idx[a] for a in test_accs], dtype=np.int32)
+                test_emb = (
+                    all_embeddings[test_indices].astype(np.float32)
+                    if len(test_indices) > 0
+                    else np.empty((0, all_embeddings.shape[1]), dtype=np.float32)
+                )
+                test_valid = test_accs
+                if test_valid:
+                    # Load sequences / taxonomy for test split
+                    test_qs: dict[str, str] | None = None
+                    test_rs: dict[str, str] | None = None
+                    test_qt: dict[str, int | None] | None = None
+                    test_rt: dict[str, int | None] | None = None
+                    if p.compute_alignments or p.compute_taxonomy:
+                        test_ref_accs: set[str] = set()
+                        for asp in _ASPECTS:
+                            test_ref_accs.update(test_ref[asp]["accessions"])
+                        test_query_set = set(test_valid)
+                        if p.compute_alignments:
+                            test_qs = _load_sequences(session, test_query_set)
+                            test_rs = _load_sequences(session, test_ref_accs)
+                        if p.compute_taxonomy:
+                            test_qt = _load_taxonomy_ids(session, test_query_set)
+                            test_rt = _load_taxonomy_ids(session, test_ref_accs)
+
+                    session.expire_all()
+                    # Stream the test split directly to parquet instead of
+                    # materialising ~10M records in a Python list. The
+                    # record-building loop inside ``_knn_transfer_and_label``
+                    # already filters by ``pivot_go_ids``, so the on-disk
+                    # intermediate is the pivot-reconciled set. Per-cat
+                    # labeling reads the intermediate back in pyarrow
+                    # batches and fans out to one writer per category —
+                    # peak RSS stays bounded by ``batch_size`` rows.
+                    test_unlabeled_path = tmp_dir / "test_unlabeled.parquet"
+                    test_stream_info = _knn_transfer_and_label(
+                        session,
+                        test_valid,
+                        test_emb,
+                        test_ref,
+                        go_id_map,
+                        aspect_map,
+                        set(),
+                        p,
+                        sequence_context=SequenceContext(
+                            query_sequences=test_qs,
+                            ref_sequences=test_rs,
+                            query_tax_ids=test_qt,
+                            ref_tax_ids=test_rt,
+                        ),
+                        query_known_gos=test_eval_data.known,
+                        parent_map_str=parent_map if p.expand_votes_to_ancestors else None,
+                        ia_weights=ia_weights,
+                        pca_state=pca_state,
+                        pivot_go_ids=pivot_go_ids,
+                        stream_output=StreamOutput(output_parquet=test_unlabeled_path),
+                        embedding_pool=all_embeddings,
+                    )
+                    del test_ref, test_emb, test_valid, test_qs, test_rs, test_qt, test_rt
+                    gc.collect()
+
+                    # test_stream_info is always a dict in this branch; the
+                    # list variant is only used for empty-split short-circuits.
+                    n_rows = int(test_stream_info.get("n_rows", 0))  # type: ignore[union-attr]
+                    if n_rows > 0 and test_unlabeled_path.exists():
+                        pf = pq.ParquetFile(str(test_unlabeled_path))
+                        project_cols = [c for c in _KEEP_COLS if c in pf.schema_arrow.names]
+                        # Per-cat (protein, aspect) membership recovered from
+                        # ``test_eval_data`` so each test row lands only in
+                        # the genuine cat bucket. See train-side comment for
+                        # rationale. ``aspect_map`` is keyed by int
+                        # go_term_id; invert ``go_id_map`` to look up by
+                        # the go_id string that ``test_eval_data`` carries.
+                        # Same name as train-side block; lexically distinct test path.
+                        aspect_by_go_id: dict[str, str] = {  # type: ignore[no-redef]
+                            go_id: aspect_map[term_id]
+                            for term_id, go_id in go_id_map.items()
+                            if term_id in aspect_map
+                        }
+                        test_cat_membership: dict[str, set[tuple[str, str]]] = {}
+                        for cat in _CATEGORIES:
+                            gt = getattr(test_eval_data, cat)
+                            members: set[tuple[str, str]] = set()  # type: ignore[no-redef]
+                            for protein, go_ids in gt.items():
+                                for go_id in go_ids:
+                                    asp = aspect_by_go_id.get(go_id, "")
+                                    if asp:
+                                        members.add((protein, asp))
+                            test_cat_membership[cat] = members
+                        cat_writers: dict[str, pq.ParquetWriter] = {}
+                        cat_paths: dict[str, Path] = {
+                            cat: tmp_dir / f"test_{cat}.parquet" for cat in _CATEGORIES
+                        }
+                        try:
+                            for batch in pf.iter_batches(batch_size=200_000, columns=project_cols):
+                                # Drop any pre-existing LABEL_COLUMN (it was
+                                # written as zero during streaming) so we can
+                                # append a fresh per-cat label column without
+                                # triggering a schema mismatch.
+                                if LABEL_COLUMN in batch.schema.names:
+                                    batch = batch.drop_columns([LABEL_COLUMN])
+                                accs = batch.column("protein_accession").to_pylist()
+                                asps = batch.column("aspect").to_pylist()
+                                for cat in _CATEGORIES:
+                                    members = test_cat_membership[cat]
+                                    mask_list = [
+                                        (a, asp) in members
+                                        for a, asp in zip(accs, asps, strict=False)
+                                    ]
+                                    if not any(mask_list):
+                                        continue
+                                    mask_arr = pa.array(mask_list, type=pa.bool_())
+                                    cat_batch = batch.filter(mask_arr)
+                                    cat_accs = cat_batch.column("protein_accession").to_pylist()
+                                    cat_gids = cat_batch.column("go_id").to_pylist()
+                                    gt_p = test_cat_gt[cat]
+                                    labels = pa.array(
+                                        [
+                                            1 if (a, g) in gt_p else 0
+                                            for a, g in zip(cat_accs, cat_gids, strict=False)
+                                        ],
+                                        type=pa.int8(),
+                                    )
+                                    cat_batch = cat_batch.append_column(LABEL_COLUMN, labels)
+                                    table = pa.Table.from_batches([cat_batch])
+                                    if cat not in cat_writers:
+                                        cat_writers[cat] = pq.ParquetWriter(
+                                            str(cat_paths[cat]), table.schema
+                                        )
+                                    cat_writers[cat].write_table(table)
+                        finally:
+                            for w in cat_writers.values():
+                                w.close()
+                        for cat in _CATEGORIES:
+                            if cat in cat_writers:
+                                test_files[cat] = cat_paths[cat]
+                        test_unlabeled_path.unlink(missing_ok=True)
+                    gc.collect()
+                else:
+                    del test_ref, test_emb, test_valid
+                    gc.collect()
+
+            # Release the preloaded embedding pool — from here on the
+            # pipeline only reads from the per-split parquets on disk.
+            # Keeps ~1.2 GB out of RSS before dump / training.
+            del all_embeddings, all_accessions, acc_to_idx
+            gc.collect()
+
+            # ── 4b. Dataset dump ──────────────────────────────────────────
+            # Training moved to protea-reranker-lab; this operation now
+            # only materializes the frozen parquets + manifest that the
+            # lab consumes via ``scripts/pull_dataset.py``. Callers must
+            # pass ``dump_to`` — ExportResearchDatasetOperation always does.
+            if not p.dump_to:
+                raise ValueError(
+                    "dump_helper requires dump_to — LightGBM "
+                    "training has been moved to protea-reranker-lab. Use "
+                    "ExportResearchDatasetOperation / POST /datasets."
+                )
+            dump_stats = self._dump_frozen_dataset(
+                dump_dir=Path(p.dump_to),
+                split_files=split_files,
+                valid_split_versions=valid_split_versions,
+                test_files=test_files,
+                test_old_v=test_old_v,
+                test_new_v=test_new_v,
+                name=p.name,
+                k=int(p.limit_per_entry),
+                embedding_config_id=str(emb_config_id),
+                ontology_snapshot_id=str(ontology_snapshot_id),
+                annotation_source=p.annotation_source,
+            )
+            emit("dump_helper.dump_done", None, dump_stats, "info")
+            elapsed = round(time.perf_counter() - t0, 1)
+            result: dict[str, Any] = {
+                "dumped": True,
+                "dump_path": str(p.dump_to),
+                "dump_stats": dump_stats,
+                "elapsed_seconds": elapsed,
+            }
+            emit("dump_helper.done", None, result, "info")
+            return OperationResult(result=result)
+
+        finally:
+            shutil.rmtree(tmp_dir, ignore_errors=True)
diff --git a/protea/core/utils.py b/protea/core/utils.py
index 4811c1f..0e3a4c0 100644
--- a/protea/core/utils.py
+++ b/protea/core/utils.py
@@ -1,16 +1,9 @@
 from __future__ import annotations
 
-import random
-import time
 from collections.abc import Iterable
 from collections.abc import Sequence as Seq
 from datetime import UTC, datetime
-from typing import Any, Protocol
-
-import requests
-from requests import Response
-
-from protea.core.contracts.operation import EmitFn
+from typing import Any
 
 
 def utcnow() -> datetime:
@@ -22,87 +15,3 @@ def chunks(seq: Seq[Any], n: int) -> Iterable[Seq[Any]]:
     """Yield successive n-sized chunks from seq."""
     for i in range(0, len(seq), n):
         yield seq[i : i + n]
-
-
-class _HttpPayload(Protocol):
-    """Structural type for payloads that carry HTTP retry parameters."""
-
-    user_agent: str
-    timeout_seconds: int
-    max_retries: int
-    backoff_base_seconds: float
-    backoff_max_seconds: float
-    jitter_seconds: float
-
-
-class UniProtHttpMixin:
-    """Shared HTTP retry logic for UniProt REST API operations.
-
-    Requires the subclass ``__init__`` to set:
-        self._http_requests: int = 0
-        self._http_retries:  int = 0
-        self._http:          requests.Session = requests.Session()
-    """
-
-    _http: requests.Session
-    _http_requests: int
-    _http_retries: int
-
-    def _get_with_retries(self, url: str, p: _HttpPayload, emit: EmitFn) -> Response:
-        headers = {"User-Agent": p.user_agent}
-        attempt = 0
-        while True:
-            attempt += 1
-            self._http_requests += 1
-            try:
-                resp = self._http.get(url, timeout=p.timeout_seconds, headers=headers)
-            except requests.RequestException as e:
-                if attempt > p.max_retries:
-                    raise
-                self._http_retries += 1
-                self._sleep_backoff(
-                    p, attempt, emit, reason=f"request_exception:{e.__class__.__name__}"
-                )
-                continue
-
-            if 200 <= resp.status_code < 300:
-                return resp
-
-            if resp.status_code in (429, 500, 502, 503, 504):
-                if attempt > p.max_retries:
-                    resp.raise_for_status()
-                self._http_retries += 1
-                retry_after = resp.headers.get("Retry-After")
-                if retry_after and retry_after.isdigit():
-                    wait_s = min(float(retry_after), p.backoff_max_seconds)
-                    emit(
-                        "http.retry",
-                        None,
-                        {"attempt": attempt, "wait_seconds": wait_s, "reason": "retry_after"},
-                        "warning",
-                    )
-                    time.sleep(wait_s)
-                else:
-                    self._sleep_backoff(p, attempt, emit, reason=f"status_{resp.status_code}")
-                continue
-
-            resp.raise_for_status()
-
-    def _sleep_backoff(self, p: _HttpPayload, attempt: int, emit: EmitFn, reason: str) -> None:
-        base = p.backoff_base_seconds * (2 ** (attempt - 1))
-        wait_s = min(base, p.backoff_max_seconds) + random.uniform(0.0, p.jitter_seconds)
-        emit(
-            "http.retry",
-            None,
-            {"attempt": attempt, "wait_seconds": wait_s, "reason": reason},
-            "warning",
-        )
-        time.sleep(wait_s)
-
-    def _extract_next_cursor(self, link_header: str) -> str | None:
-        if not link_header or 'rel="next"' not in link_header or "cursor=" not in link_header:
-            return None
-        try:
-            return link_header.split("cursor=")[-1].split(">")[0]
-        except Exception:
-            return None
diff --git a/protea/infrastructure/benchmark_config.py b/protea/infrastructure/benchmark_config.py
new file mode 100644
index 0000000..7cabb2c
--- /dev/null
+++ b/protea/infrastructure/benchmark_config.py
@@ -0,0 +1,91 @@
+"""Loader for ``protea/config/benchmark.yaml``.
+
+Mirrors the ``load_settings`` pattern in ``settings.py`` but returns a typed
+dataclass specific to the benchmark matrix view. Consumed by the
+``/benchmark/matrix`` router to avoid hardcoding stage taxonomy, labels,
+or GO-namespace constants.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+from protea.core.domain.aspect import ASPECT_CAFA_CODES as _DEFAULT_ASPECTS
+
+
+@dataclass(frozen=True)
+class BenchmarkConfig:
+    preferred_default_stages: tuple[str, ...]
+    baseline_scoring_name: str | None
+    hidden_stages: frozenset[str]
+    stage_labels: dict[str, str]
+    eval_set_labels: dict[str, str]
+    categories: tuple[str, ...]
+    aspects: tuple[str, ...]
+
+    def label_for_stage(self, name: str) -> str:
+        """Return the human-readable label for a stage.
+
+        Falls back to a Title-Cased version of the raw name if the YAML
+        does not define an explicit label.
+        """
+        if name in self.stage_labels:
+            return self.stage_labels[name]
+        return name.replace("_", " ").title()
+
+
+_DEFAULT_CATEGORIES = ("NK", "LK", "PK")
+
+
+def _as_str_tuple(val: Any, fallback: tuple[str, ...]) -> tuple[str, ...]:
+    if not val:
+        return fallback
+    if isinstance(val, (list, tuple)):
+        return tuple(str(v) for v in val)
+    return fallback
+
+
+def _as_str_dict(val: Any) -> dict[str, str]:
+    if not val or not isinstance(val, dict):
+        return {}
+    return {str(k): str(v) for k, v in val.items()}
+
+
+def load_benchmark_config(project_root: Path) -> BenchmarkConfig:
+    """Load ``protea/config/benchmark.yaml`` into a :class:`BenchmarkConfig`.
+
+    Missing file → sane defaults (no hidden stages, no custom labels,
+    categories = NK/LK/PK, aspects = BPO/MFO/CCO). This keeps the API
+    functional even in fresh checkouts without the YAML.
+    """
+    path = project_root / "protea" / "config" / "benchmark.yaml"
+    raw: dict[str, Any] = {}
+    if path.exists():
+        with path.open("r", encoding="utf-8") as f:
+            raw = yaml.safe_load(f) or {}
+
+    stages_raw: dict[str, Any] = raw.get("stages") or {}
+
+    preferred = _as_str_tuple(stages_raw.get("preferred_default"), ())
+    baseline = stages_raw.get("baseline_scoring_name")
+    baseline_name = str(baseline) if baseline else None
+    hidden = frozenset(_as_str_tuple(stages_raw.get("hidden"), ()))
+    labels = _as_str_dict(stages_raw.get("labels"))
+
+    eval_labels = _as_str_dict(raw.get("eval_set_labels"))
+    cats = _as_str_tuple(raw.get("categories"), _DEFAULT_CATEGORIES)
+    asps = _as_str_tuple(raw.get("aspects"), _DEFAULT_ASPECTS)
+
+    return BenchmarkConfig(
+        preferred_default_stages=preferred,
+        baseline_scoring_name=baseline_name,
+        hidden_stages=hidden,
+        stage_labels=labels,
+        eval_set_labels=eval_labels,
+        categories=cats,
+        aspects=asps,
+    )
diff --git a/protea/infrastructure/database/engine.py b/protea/infrastructure/database/engine.py
index 7df6692..4df0c7b 100644
--- a/protea/infrastructure/database/engine.py
+++ b/protea/infrastructure/database/engine.py
@@ -3,13 +3,16 @@
 from sqlalchemy import create_engine
 from sqlalchemy.engine import Engine
 
+from protea.config.tuning import get_tuning
+
 
 def build_engine(db_url: str) -> Engine:
+    settings = get_tuning().worker
     return create_engine(
         db_url,
         future=True,
         pool_pre_ping=True,
-        pool_size=20,
-        max_overflow=40,
-        pool_recycle=3600,
+        pool_size=settings.db_pool_size,
+        max_overflow=settings.db_pool_max_overflow,
+        pool_recycle=settings.db_pool_recycle_seconds,
     )
diff --git a/protea/infrastructure/logging.py b/protea/infrastructure/logging.py
index 7ecc15d..7300055 100644
--- a/protea/infrastructure/logging.py
+++ b/protea/infrastructure/logging.py
@@ -4,6 +4,7 @@
 Provides a JSON formatter using only the Python standard library and a
 ``configure_logging()`` helper that workers and the API can call at startup.
 """
+
 from __future__ import annotations
 
 import json
diff --git a/protea/infrastructure/orm/models/__init__.py b/protea/infrastructure/orm/models/__init__.py
index f3a41a8..71e435f 100644
--- a/protea/infrastructure/orm/models/__init__.py
+++ b/protea/infrastructure/orm/models/__init__.py
@@ -5,6 +5,7 @@
 from .annotation.go_term_relationship import GOTermRelationship  # noqa: F401
 from .annotation.ontology_snapshot import OntologySnapshot  # noqa: F401
 from .annotation.protein_go_annotation import ProteinGOAnnotation  # noqa: F401
+from .embedding.dataset import Dataset  # noqa: F401
 from .embedding.embedding_config import EmbeddingConfig  # noqa: F401
 from .embedding.go_prediction import GOPrediction  # noqa: F401
 from .embedding.prediction_set import PredictionSet  # noqa: F401
@@ -17,3 +18,4 @@
 from .query.query_set import QuerySet, QuerySetEntry  # noqa: F401
 from .sequence.sequence import Sequence  # noqa: F401
 from .support_entry import SupportEntry  # noqa: F401
+from .visitor_event import VisitorEvent  # noqa: F401
diff --git a/protea/infrastructure/orm/models/annotation/evaluation_set.py b/protea/infrastructure/orm/models/annotation/evaluation_set.py
index a19ecd7..8d4f56b 100644
--- a/protea/infrastructure/orm/models/annotation/evaluation_set.py
+++ b/protea/infrastructure/orm/models/annotation/evaluation_set.py
@@ -4,7 +4,7 @@
 from datetime import datetime
 from typing import TYPE_CHECKING, Any
 
-from sqlalchemy import DateTime, ForeignKey, func
+from sqlalchemy import DateTime, ForeignKey, String, func
 from sqlalchemy.dialects.postgresql import JSONB, UUID
 from sqlalchemy.orm import Mapped, mapped_column, relationship
 
@@ -54,6 +54,7 @@ class EvaluationSet(Base):
         DateTime(timezone=True), nullable=False, server_default=func.now()
     )
     stats: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False, default=dict)
+    groundtruth_uri: Mapped[str | None] = mapped_column(String(512), nullable=True)
 
     old_annotation_set: Mapped[AnnotationSet] = relationship(
         "AnnotationSet", foreign_keys=[old_annotation_set_id]
diff --git a/protea/infrastructure/orm/models/annotation/go_term.py b/protea/infrastructure/orm/models/annotation/go_term.py
index e341b00..2eb18b0 100644
--- a/protea/infrastructure/orm/models/annotation/go_term.py
+++ b/protea/infrastructure/orm/models/annotation/go_term.py
@@ -21,7 +21,7 @@ class GOTerm(Base):
 
     GO terms are scoped to an ``OntologySnapshot`` so that the meaning of a
     term at a specific ontology release is preserved. ``(go_id,
-    ontology_snapshot_id)`` is unique — the same GO:XXXXXXX can exist in
+    ontology_snapshot_id)`` is unique — the same ``GO:XXXXXXX`` can exist in
     multiple snapshots with potentially different names or definitions.
     """
 
diff --git a/protea/infrastructure/orm/models/embedding/dataset.py b/protea/infrastructure/orm/models/embedding/dataset.py
new file mode 100644
index 0000000..2090593
--- /dev/null
+++ b/protea/infrastructure/orm/models/embedding/dataset.py
@@ -0,0 +1,95 @@
+from __future__ import annotations
+
+import uuid
+from datetime import datetime
+from typing import Any
+
+from sqlalchemy import BigInteger, DateTime, ForeignKey, Integer, String, func
+from sqlalchemy.dialects.postgresql import JSONB, UUID
+from sqlalchemy.orm import Mapped, mapped_column
+
+from protea.infrastructure.orm.base import Base
+
+
+class Dataset(Base):
+    """A frozen re-ranker dataset published to the artifact store.
+
+    One row per ``export_research_dataset`` (or ``dump_reranker_dataset``)
+    run that completes successfully. The row is the durable handle the
+    lab uses to pull an exact dump — lookups by ``name`` (human label)
+    or ``id`` (UUID) both resolve here.
+
+    ``train_uri`` / ``eval_uri`` / ``manifest_uri`` are opaque URIs in the
+    backend scheme (``file://…``, ``s3://bucket/key``). The caller does
+    not need to know the backend; :class:`ArtifactStore` resolves them.
+
+    Content addressing: ``schema_sha`` fingerprints the feature set
+    (must match the booster's ``feature_schema_sha`` at inference) and
+    ``manifest_sha`` is the sha256 of the serialized manifest bytes —
+    two independent dumps with identical provenance will collide on this
+    field, making drift detectable.
+    """
+
+    __tablename__ = "dataset"
+
+    id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
+    )
+    name: Mapped[str] = mapped_column(String(255), nullable=False, unique=True)
+
+    # Where the dump came from.
+    operation: Mapped[str] = mapped_column(String(64), nullable=False)
+    job_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True),
+        ForeignKey("job.id", ondelete="SET NULL"),
+        nullable=True,
+        index=True,
+    )
+
+    # Storage layout.
+    storage_backend: Mapped[str] = mapped_column(String(32), nullable=False)
+    key_prefix: Mapped[str] = mapped_column(String(512), nullable=False)
+    train_uri: Mapped[str | None] = mapped_column(String(1024), nullable=True)
+    eval_uri: Mapped[str | None] = mapped_column(String(1024), nullable=True)
+    manifest_uri: Mapped[str] = mapped_column(String(1024), nullable=False)
+
+    # Content fingerprints.
+    schema_sha: Mapped[str] = mapped_column(String(16), nullable=False)
+    manifest_sha: Mapped[str | None] = mapped_column(String(64), nullable=True)
+    n_train_rows: Mapped[int] = mapped_column(BigInteger, nullable=False, default=0)
+    n_eval_rows: Mapped[int] = mapped_column(BigInteger, nullable=False, default=0)
+
+    # Dump parameters (also in manifest; duplicated here for indexable lookup).
+    k: Mapped[int] = mapped_column(Integer, nullable=False)
+    annotation_source: Mapped[str] = mapped_column(String(32), nullable=False)
+    embedding_config_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True),
+        ForeignKey("embedding_config.id", ondelete="SET NULL"),
+        nullable=True,
+        index=True,
+    )
+    ontology_snapshot_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True),
+        ForeignKey("ontology_snapshot.id", ondelete="SET NULL"),
+        nullable=True,
+        index=True,
+    )
+
+    # List of "v{old}-v{new}" strings — one per training shard.
+    train_snapshot_pairs: Mapped[list[str]] = mapped_column(
+        JSONB, nullable=False, default=list
+    )
+    eval_snapshot_pair: Mapped[str | None] = mapped_column(String(64), nullable=True)
+
+    # Reproducibility.
+    producer_version: Mapped[str | None] = mapped_column(String(64), nullable=True)
+    producer_git_sha: Mapped[str | None] = mapped_column(String(40), nullable=True)
+
+    # Free-form extension hook (e.g. future ``features_hash``, dataset
+    # tags, curator notes). Keep schema fields that actually gate
+    # behaviour out of here.
+    meta: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False, default=dict)
+
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), nullable=False, server_default=func.now()
+    )
diff --git a/protea/infrastructure/orm/models/embedding/embedding_config.py b/protea/infrastructure/orm/models/embedding/embedding_config.py
index 6633149..0c87b66 100644
--- a/protea/infrastructure/orm/models/embedding/embedding_config.py
+++ b/protea/infrastructure/orm/models/embedding/embedding_config.py
@@ -4,7 +4,7 @@
 from datetime import datetime
 from typing import Any
 
-from sqlalchemy import Boolean, DateTime, Integer, String, func
+from sqlalchemy import BigInteger, Boolean, DateTime, Integer, String, func
 from sqlalchemy.dialects.postgresql import JSONB, UUID
 from sqlalchemy.orm import Mapped, mapped_column
 
@@ -12,7 +12,7 @@
 
 _VALID_LAYER_AGG = {"mean", "last", "concat"}
 _VALID_POOLING = {"mean", "max", "cls", "mean_max"}
-_VALID_BACKENDS = {"esm", "esm3c", "t5", "auto"}
+_VALID_BACKENDS = {"esm", "esm3c", "t5", "ankh", "auto"}
 
 
 class EmbeddingConfig(Base):
@@ -48,6 +48,9 @@ class EmbeddingConfig(Base):
                   Runs FP16 on GPU.  CLS and EOS tokens stripped for pooling.
     - ``t5``    : HuggingFace ``T5EncoderModel`` (ProstT5, prot_t5_xl…).
                   ProSTT5 mode auto-detected from ``model_name``.
+    - ``ankh``  : HuggingFace ``T5EncoderModel`` loaded via ``AutoTokenizer``
+                  (Ankh base/large).  No ``<AA2fold>`` prefix; ambiguous
+                  residues are substituted with ``X`` like the other T5 path.
     - ``auto``  : falls back to ``esm``.
 
     Normalisation
@@ -78,6 +81,9 @@ class EmbeddingConfig(Base):
     chunk_size: Mapped[int] = mapped_column(Integer, nullable=False, default=512)
     chunk_overlap: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
     description: Mapped[str | None] = mapped_column(String, nullable=True)
+    display_name: Mapped[str | None] = mapped_column(String, nullable=True)
+    family: Mapped[str | None] = mapped_column(String, nullable=True)
+    param_count: Mapped[int | None] = mapped_column(BigInteger, nullable=True)
     created_at: Mapped[datetime] = mapped_column(
         DateTime(timezone=True), nullable=False, server_default=func.now()
     )
diff --git a/protea/infrastructure/orm/models/embedding/go_prediction.py b/protea/infrastructure/orm/models/embedding/go_prediction.py
index 84ca1fa..acd4483 100644
--- a/protea/infrastructure/orm/models/embedding/go_prediction.py
+++ b/protea/infrastructure/orm/models/embedding/go_prediction.py
@@ -82,6 +82,11 @@ class GOPrediction(Base):
     ref_annotation_density: Mapped[int | None] = mapped_column(Integer, nullable=True)
     neighbor_distance_std: Mapped[float | None] = mapped_column(Float, nullable=True)
 
+    # --- Consensus features (per candidate term, computed over voting neighbors) ---
+    neighbor_vote_fraction: Mapped[float | None] = mapped_column(Float, nullable=True)
+    neighbor_min_distance: Mapped[float | None] = mapped_column(Float, nullable=True)
+    neighbor_mean_distance: Mapped[float | None] = mapped_column(Float, nullable=True)
+
     # --- Taxonomy features ---
     query_taxonomy_id: Mapped[int | None] = mapped_column(Integer, nullable=True)
     ref_taxonomy_id: Mapped[int | None] = mapped_column(Integer, nullable=True)
@@ -90,6 +95,37 @@ class GOPrediction(Base):
     taxonomic_common_ancestors: Mapped[int | None] = mapped_column(Integer, nullable=True)
     taxonomic_relation: Mapped[str | None] = mapped_column(String(20), nullable=True)
 
+    # --- Anc2Vec semantic-coherence features (GO 2020-10 pretrained) ---
+    anc2vec_neighbor_cos: Mapped[float | None] = mapped_column(Float, nullable=True)
+    anc2vec_neighbor_maxcos: Mapped[float | None] = mapped_column(Float, nullable=True)
+    anc2vec_has_emb: Mapped[float | None] = mapped_column(Float, nullable=True)
+    anc2vec_query_known_cos: Mapped[float | None] = mapped_column(Float, nullable=True)
+    anc2vec_query_known_maxcos: Mapped[float | None] = mapped_column(Float, nullable=True)
+    anc2vec_query_known_count: Mapped[float | None] = mapped_column(Float, nullable=True)
+
+    # --- Taxonomic consensus across voting neighbors ---
+    tax_voters_same_frac: Mapped[float | None] = mapped_column(Float, nullable=True)
+    tax_voters_close_frac: Mapped[float | None] = mapped_column(Float, nullable=True)
+    tax_voters_mean_common_ancestors: Mapped[float | None] = mapped_column(Float, nullable=True)
+
+    # --- Sequence-embedding PCA: per-query projection (16 components) ---
+    emb_pca_query_0: Mapped[float | None] = mapped_column(Float, nullable=True)
+    emb_pca_query_1: Mapped[float | None] = mapped_column(Float, nullable=True)
+    emb_pca_query_2: Mapped[float | None] = mapped_column(Float, nullable=True)
+    emb_pca_query_3: Mapped[float | None] = mapped_column(Float, nullable=True)
+    emb_pca_query_4: Mapped[float | None] = mapped_column(Float, nullable=True)
+    emb_pca_query_5: Mapped[float | None] = mapped_column(Float, nullable=True)
+    emb_pca_query_6: Mapped[float | None] = mapped_column(Float, nullable=True)
+    emb_pca_query_7: Mapped[float | None] = mapped_column(Float, nullable=True)
+    emb_pca_query_8: Mapped[float | None] = mapped_column(Float, nullable=True)
+    emb_pca_query_9: Mapped[float | None] = mapped_column(Float, nullable=True)
+    emb_pca_query_10: Mapped[float | None] = mapped_column(Float, nullable=True)
+    emb_pca_query_11: Mapped[float | None] = mapped_column(Float, nullable=True)
+    emb_pca_query_12: Mapped[float | None] = mapped_column(Float, nullable=True)
+    emb_pca_query_13: Mapped[float | None] = mapped_column(Float, nullable=True)
+    emb_pca_query_14: Mapped[float | None] = mapped_column(Float, nullable=True)
+    emb_pca_query_15: Mapped[float | None] = mapped_column(Float, nullable=True)
+
     prediction_set: Mapped[PredictionSet] = relationship(
         "PredictionSet", back_populates="predictions"
     )
diff --git a/protea/infrastructure/orm/models/embedding/reranker_model.py b/protea/infrastructure/orm/models/embedding/reranker_model.py
index 106659b..ed72113 100644
--- a/protea/infrastructure/orm/models/embedding/reranker_model.py
+++ b/protea/infrastructure/orm/models/embedding/reranker_model.py
@@ -12,11 +12,19 @@
 
 
 class RerankerModel(Base):
-    """A trained LightGBM re-ranker model stored in the database.
+    """A trained LightGBM re-ranker model.
 
-    The model is serialized as a LightGBM model string and stored in
-    ``model_data``.  Training metrics and feature importance are stored
-    as JSONB for easy querying and display.
+    The booster can be stored inline (``model_data``, legacy) or by
+    reference (``artifact_uri``, preferred). Rows registered through
+    ``scripts/register_reranker.py`` always point at the artifact store;
+    older rows still serialize the booster inline.
+
+    Provenance columns (``feature_schema_sha``, ``producer_version``,
+    ``producer_git_sha``, ``spec_yaml``) let us reproduce and audit a
+    model without re-running the lab. ``feature_schema_sha`` is
+    load-bearing at inference time: the predict operation refuses to use
+    a booster whose expected feature schema does not match the live
+    pipeline (fallback to no-reranking).
     """
 
     __tablename__ = "reranker_model"
@@ -37,7 +45,50 @@ class RerankerModel(Base):
     )
     category: Mapped[str] = mapped_column(String(10), nullable=False)
     aspect: Mapped[str | None] = mapped_column(String(3), nullable=True)
-    model_data: Mapped[str] = mapped_column(Text, nullable=False)
+
+    # Legacy inline booster string. Nullable — new rows carry
+    # ``artifact_uri`` and leave this NULL.
+    model_data: Mapped[str | None] = mapped_column(Text, nullable=True)
+
+    # Artifact-store URI for the booster (``file://…`` or ``s3://…``).
+    artifact_uri: Mapped[str | None] = mapped_column(String(512), nullable=True)
+
+    # Feature-family-aware schema fingerprint (12 hex chars) from
+    # ``protea_reranker_lab.contracts.compute_feature_schema_sha``.
+    feature_schema_sha: Mapped[str | None] = mapped_column(String(16), nullable=True)
+
+    embedding_config_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True),
+        ForeignKey("embedding_config.id", ondelete="SET NULL"),
+        nullable=True,
+        index=True,
+    )
+    ontology_snapshot_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True),
+        ForeignKey("ontology_snapshot.id", ondelete="SET NULL"),
+        nullable=True,
+        index=True,
+    )
+
+    producer_version: Mapped[str | None] = mapped_column(String(64), nullable=True)
+    producer_git_sha: Mapped[str | None] = mapped_column(String(40), nullable=True)
+
+    # External provenance — set when the booster was trained in
+    # ``protea-reranker-lab`` (or any future offline trainer) rather than
+    # by a PROTEA-internal operation. ``dataset_id`` points at the
+    # ``Dataset`` row consumed by the lab run; ``external_source`` is a
+    # free-form tag such as ``"protea-reranker-lab@<git-sha>"``.
+    dataset_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True),
+        ForeignKey("dataset.id", ondelete="SET NULL"),
+        nullable=True,
+        index=True,
+    )
+    external_source: Mapped[str | None] = mapped_column(String(128), nullable=True)
+
+    # Full ExperimentSpec YAML for reproducibility.
+    spec_yaml: Mapped[str | None] = mapped_column(Text, nullable=True)
+
     metrics: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False, default=dict)
     feature_importance: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False, default=dict)
     created_at: Mapped[datetime] = mapped_column(
diff --git a/protea/infrastructure/orm/models/embedding/scoring_config.py b/protea/infrastructure/orm/models/embedding/scoring_config.py
index 259a228..182023c 100644
--- a/protea/infrastructure/orm/models/embedding/scoring_config.py
+++ b/protea/infrastructure/orm/models/embedding/scoring_config.py
@@ -27,6 +27,14 @@
     final multiplier on top of the weighted sum — even when its signal weight is
     0.  This allows down-ranking IEA-sourced predictions regardless of how
     strong the embedding or alignment signals are.
+
+    **Recommended usage**: set ``evidence_weight = 0`` in ``weights`` when
+    using this formula.  The multiplier is always applied, so including
+    ``evidence_weight`` in the linear sum compounds the evidence signal
+    twice — usually not intentional.  If you want a preset that only
+    *vetoes* low-evidence predictions without bending the rest of the
+    ranking, use ``formula=evidence_weighted`` with ``evidence_weight=0``
+    in ``weights``.
 """
 
 from __future__ import annotations
@@ -61,6 +69,7 @@
     "identity_sw": 0.0,
     "evidence_weight": 0.0,
     "taxonomic_proximity": 0.0,
+    "neighbor_vote_fraction": 0.0,
 }
 
 # ---------------------------------------------------------------------------
@@ -76,11 +85,16 @@
 # Default tier mapping:
 #   Experimental (EXP, IDA, IPI, IMP, IGI, IEP, HTP, HDA, HMP, HGI, HEP,
 #                 IC, TAS)                                          → 1.0
+#   Electronic annotation (IEA)                                    → 0.8
 #   Computational / Phylogenetic (ISS, ISO, ISA, ISM, IGC, IBA,
 #                                 IBD, IKR, IRD, RCA)              → 0.7
 #   Non-traceable author statement (NAS)                           → 0.5
-#   Electronic annotation (IEA)                                    → 0.3
 #   No biological data (ND)                                        → 0.1
+#
+# IEA is placed above the computational tier: GOA history shows that IEA
+# annotations are promoted to an experimental code at a higher rate than
+# ISS/IBA/NAS, so their prior quality is underestimated by the classic
+# GO-docs hierarchy that ranked IEA below every human-supplied code.
 
 DEFAULT_EVIDENCE_WEIGHTS: dict[str, float] = {
     # Experimental — direct biological evidence
@@ -110,7 +124,7 @@
     "RCA": 0.7,  # Inferred from Reviewed Computational Analysis
     # Electronic / author statement — lowest-effort annotation
     "NAS": 0.5,  # Non-traceable Author Statement
-    "IEA": 0.3,  # Inferred from Electronic Annotation (automated, bulk)
+    "IEA": 0.8,  # Inferred from Electronic Annotation (automated, bulk)
     # No biological data — used only as a placeholder
     "ND": 0.1,  # No biological Data available
 }
@@ -162,33 +176,17 @@ class ScoringConfig(Base):
     """Persistent scoring formula definition.
 
     Instances are stored in the ``scoring_config`` table and referenced by
-    evaluation endpoints and the UI scoring selector.  Every field that
+    evaluation endpoints and the UI scoring selector. Every field that
     influences score computation is serialised, making any result fully
     reproducible by re-applying the same ``ScoringConfig`` to the raw
     ``GOPrediction`` rows.
 
-    Attributes
-    ----------
-    id:
-        UUID primary key.
-    name:
-        Human-readable label shown in the UI dropdown.
-    formula:
-        One of :data:`VALID_FORMULAS` — controls how the weighted average is
-        combined with the evidence multiplier.
-    weights:
-        JSONB dict mapping signal keys to their relative weights.  Valid keys
-        are the ones in :data:`DEFAULT_WEIGHTS`.  Weights of 0 deactivate a
-        signal; absent keys are treated as 0.
-    evidence_weights:
-        Optional JSONB dict mapping GO evidence codes (e.g. ``"IEA"``) to
-        per-code quality multipliers in [0, 1].  When ``None`` the system falls
-        back to :data:`DEFAULT_EVIDENCE_WEIGHTS`.  Partial dicts are allowed:
-        codes absent from the override still resolve via the default table.
-    description:
-        Free-text description shown as a tooltip in the UI.
-    created_at:
-        UTC timestamp set by the database at insert time.
+    The ``formula`` column must be one of :data:`VALID_FORMULAS`. The
+    ``weights`` JSONB maps signal keys (see :data:`DEFAULT_WEIGHTS`) to
+    their relative weights — a weight of 0 deactivates the signal. The
+    optional ``evidence_weights`` JSONB maps GO evidence codes to per-code
+    quality multipliers in [0, 1] and falls back to
+    :data:`DEFAULT_EVIDENCE_WEIGHTS` for absent codes.
     """
 
     __tablename__ = "scoring_config"
diff --git a/protea/infrastructure/orm/models/embedding/sequence_embedding.py b/protea/infrastructure/orm/models/embedding/sequence_embedding.py
index 5e86101..346460b 100644
--- a/protea/infrastructure/orm/models/embedding/sequence_embedding.py
+++ b/protea/infrastructure/orm/models/embedding/sequence_embedding.py
@@ -4,7 +4,7 @@
 from datetime import datetime
 from typing import TYPE_CHECKING, Any
 
-from pgvector.sqlalchemy import Vector
+from pgvector.sqlalchemy import HALFVEC
 from sqlalchemy import BigInteger, DateTime, ForeignKey, Integer, UniqueConstraint, func
 from sqlalchemy.dialects.postgresql import UUID
 from sqlalchemy.orm import Mapped, mapped_column, relationship
@@ -61,7 +61,7 @@ class SequenceEmbedding(Base):
     # chunk_index_e: end residue index (exclusive).
     # NULL when chunking is disabled (covers the full sequence).
     chunk_index_e: Mapped[int | None] = mapped_column(Integer, nullable=True)
-    embedding: Mapped[Any] = mapped_column(Vector, nullable=False)
+    embedding: Mapped[Any] = mapped_column(HALFVEC, nullable=False)
     embedding_dim: Mapped[int] = mapped_column(Integer, nullable=False)
     created_at: Mapped[datetime] = mapped_column(
         DateTime(timezone=True), nullable=False, server_default=func.now()
diff --git a/protea/infrastructure/orm/models/protein/protein.py b/protea/infrastructure/orm/models/protein/protein.py
index 93d8186..ba9c099 100644
--- a/protea/infrastructure/orm/models/protein/protein.py
+++ b/protea/infrastructure/orm/models/protein/protein.py
@@ -76,15 +76,17 @@ class Protein(Base):
 
     @staticmethod
     def parse_isoform(accession: str) -> tuple[str, bool, int | None]:
+        """Parse isoform accession pattern ``"<canonical>-<n>"``.
+
+        Forwards to :func:`protea_contracts.parse_isoform`. The
+        canonical implementation lives in ``protea-contracts.bio_utils``
+        so the FASTA parser in ``protea-sources`` can reuse it
+        without inverting the C-stack dependency direction (D-MIGR-04
+        of master plan v3).
         """
-        Parse isoform accession pattern "<canonical>-<n>".
-        Returns: (canonical_accession, is_canonical, isoform_index)
-        """
-        if "-" in accession:
-            left, right = accession.rsplit("-", 1)
-            if right.isdigit():
-                return left, False, int(right)
-        return accession, True, None
+        from protea_contracts import parse_isoform as _parse_isoform
+
+        return _parse_isoform(accession)
 
     def __repr__(self) -> str:
         return (
diff --git a/protea/infrastructure/orm/models/query/query_set.py b/protea/infrastructure/orm/models/query/query_set.py
index 36f6b31..64a0a4d 100644
--- a/protea/infrastructure/orm/models/query/query_set.py
+++ b/protea/infrastructure/orm/models/query/query_set.py
@@ -74,6 +74,8 @@ class QuerySetEntry(Base):
         index=True,
     )
     accession: Mapped[str] = mapped_column(String, nullable=False, index=True)
+    taxonomy_id: Mapped[int | None] = mapped_column(Integer, nullable=True, index=True)
+    species: Mapped[str | None] = mapped_column(String, nullable=True)
     created_at: Mapped[datetime] = mapped_column(
         DateTime(timezone=True), nullable=False, server_default=func.now()
     )
diff --git a/protea/infrastructure/orm/models/sequence/sequence.py b/protea/infrastructure/orm/models/sequence/sequence.py
index 24f026c..0dfc5e7 100644
--- a/protea/infrastructure/orm/models/sequence/sequence.py
+++ b/protea/infrastructure/orm/models/sequence/sequence.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import hashlib
 from datetime import datetime
 from typing import TYPE_CHECKING
 
@@ -41,7 +40,18 @@ class Sequence(Base):
 
     @staticmethod
     def compute_hash(seq: str) -> str:
-        return hashlib.md5(seq.encode("utf-8")).hexdigest()
+        """Forward to :func:`protea_contracts.compute_sequence_hash`.
+
+        The canonical implementation lives in ``protea-contracts.bio_utils``
+        (D-MIGR-04 of master plan v3) so the FASTA parser in
+        ``protea-sources`` can populate ``UniProtProteinRecord
+        .sequence_hash`` without depending on PROTEA's ORM. The wrapper
+        keeps every existing call site (``Sequence.compute_hash(seq)``)
+        working unchanged.
+        """
+        from protea_contracts import compute_sequence_hash
+
+        return compute_sequence_hash(seq)
 
     def __init__(self, *args: object, **kwargs: object) -> None:
         super().__init__(*args, **kwargs)
diff --git a/protea/infrastructure/orm/models/visitor_event.py b/protea/infrastructure/orm/models/visitor_event.py
new file mode 100644
index 0000000..08519f7
--- /dev/null
+++ b/protea/infrastructure/orm/models/visitor_event.py
@@ -0,0 +1,43 @@
+from __future__ import annotations
+
+from datetime import date, datetime
+
+from sqlalchemy import BigInteger, Date, DateTime, Index, Integer, String
+from sqlalchemy.orm import Mapped, mapped_column
+
+from protea.core.utils import utcnow
+from protea.infrastructure.orm.base import Base
+
+
+class VisitorEvent(Base):
+    """Anonymous visit record used for aggregate traffic analytics.
+
+    Privacy model: the ``visitor_hash`` column never stores an IP address.
+    It is the first 16 hex chars of ``sha256(daily_salt || client_ip)`` where
+    ``daily_salt`` is a 32-byte random value kept only in process memory and
+    regenerated every calendar day. Once the day rolls over, correlating a
+    visitor across days becomes cryptographically infeasible. This is the same
+    approach Plausible and Fathom use to avoid storing PII while still being
+    able to report "unique visitors per day".
+    """
+
+    __tablename__ = "visitor_event"
+
+    id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
+
+    day: Mapped[date] = mapped_column(Date, nullable=False)
+    visitor_hash: Mapped[str] = mapped_column(String(16), nullable=False)
+
+    path: Mapped[str] = mapped_column(String(255), nullable=False)
+    method: Mapped[str] = mapped_column(String(8), nullable=False)
+    status: Mapped[int] = mapped_column(Integer, nullable=False)
+
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), nullable=False, default=utcnow
+    )
+
+    __table_args__ = (
+        Index("ix_visitor_event_day_hash", "day", "visitor_hash"),
+        Index("ix_visitor_event_created_at", "created_at"),
+        Index("ix_visitor_event_path", "path"),
+    )
diff --git a/protea/infrastructure/queue/consumer.py b/protea/infrastructure/queue/consumer.py
index 876b1cf..ccea1be 100644
--- a/protea/infrastructure/queue/consumer.py
+++ b/protea/infrastructure/queue/consumer.py
@@ -11,7 +11,8 @@
 from pika.spec import Basic, BasicProperties
 from sqlalchemy.orm import Session, sessionmaker
 
-from protea.core.contracts.operation import RetryLaterError
+from protea.config.tuning import get_tuning
+from protea.core.contracts.operation import RetryLaterError, make_safe_emit
 from protea.core.contracts.registry import OperationRegistry
 from protea.infrastructure.orm.models.job import JobEvent
 from protea.infrastructure.queue.publisher import publish_operation
@@ -22,6 +23,12 @@
 _DLX_NAME = "protea.dlx"
 _DLQ_NAME = "protea.dead-letter"
 
+# CUDA OOM retry policy for OperationConsumer.
+# Configured via QueueTuning (oom_max_retries / oom_base_delay /
+# oom_max_delay). Defaults: 5 retries, 5s base, 300s cap, backoff
+# 5/10/20/40/80s. ~155s wait budget before dead-letter.
+_OOM_RETRY_HEADER = "x-oom-retry"
+
 
 def _setup_dead_letter(channel: BlockingChannel) -> None:
     """Declare the dead-letter exchange and queue (idempotent)."""
@@ -263,45 +270,39 @@ def _on_message(
             except (ValueError, TypeError):
                 pass
 
+        # Current OOM retry count from message headers (0 for fresh messages).
+        incoming_headers = dict(properties.headers or {})
+        oom_retry_count = int(incoming_headers.get(_OOM_RETRY_HEADER, 0))
+
         op = self._registry.get(operation_name)
         session = self._factory()
         try:
 
-            def emit(
+            def raw_emit(
                 event: str,
                 message: str | None = None,
                 fields: dict[str, Any] | None = None,
                 level: str = "info",
             ) -> None:
                 logger.info("operation.%s fields=%s", event, fields or {})
-                if parent_job_id is not None:
-                    event_session = self._factory()
-                    try:
-                        event_session.add(
-                            JobEvent(
-                                job_id=parent_job_id,
-                                event=f"child.{event}",
-                                message=message,
-                                fields=fields or {},
-                                level=level,
-                            )
-                        )
-                        event_session.commit()
-                    except Exception as emit_exc:
-                        logger.warning(
-                            "Failed to write child event to parent job. "
-                            "parent_job_id=%s error=%s",
-                            parent_job_id,
-                            emit_exc,
+                if parent_job_id is None:
+                    return
+                event_session = self._factory()
+                try:
+                    event_session.add(
+                        JobEvent(
+                            job_id=parent_job_id,
+                            event=f"child.{event}",
+                            message=message,
+                            fields=fields or {},
+                            level=level,
                         )
-                        try:
-                            event_session.rollback()
-                        except Exception:
-                            pass
-                    finally:
-                        event_session.close()
-
-            result = op.execute(session, payload, emit=emit)
+                    )
+                    event_session.commit()
+                finally:
+                    event_session.close()
+
+            result = op.execute(session, payload, emit=make_safe_emit(raw_emit))
             session.commit()
             # Forward any downstream operation messages (e.g. GPU→write worker).
             for queue_name, op_payload in result.publish_operations or []:
@@ -309,9 +310,15 @@ def emit(
             channel.basic_ack(delivery_tag=method.delivery_tag)
             logger.info("Operation acked. operation=%s", operation_name)
         except Exception as exc:
-            requeue = self._requeue_on_failure
-            # CUDA OOM: free the GPU cache and requeue so the batch is retried
-            # once memory is available (e.g. after other workers release theirs).
+            try:
+                session.rollback()
+            except Exception:
+                pass
+
+            # CUDA OOM: free the GPU cache, apply exponential backoff, and
+            # republish with an incremented retry counter in message headers.
+            # After _OOM_MAX_RETRIES the message is dead-lettered so the hot
+            # loop cannot burn the GPU for hours on an impossible batch size.
             if "CUDA out of memory" in str(exc):
                 try:
                     import torch
@@ -319,43 +326,138 @@ def emit(
                     torch.cuda.empty_cache()
                 except Exception:
                     pass
-                requeue = True
-                logger.warning(
-                    "CUDA OOM — cache cleared, message requeued. operation=%s", operation_name
-                )
-            else:
-                logger.error("Operation failed. operation=%s error=%s", operation_name, exc)
-                # Record failure event on parent job so it's visible in the UI.
-                if parent_job_id is not None:
-                    err_session = self._factory()
+
+                qsettings = get_tuning().queue
+                if oom_retry_count < qsettings.oom_max_retries:
+                    next_count = oom_retry_count + 1
+                    delay = min(
+                        qsettings.oom_base_delay * (2**oom_retry_count),
+                        qsettings.oom_max_delay,
+                    )
+                    logger.warning(
+                        "CUDA OOM: backing off %ds (retry %d/%d). operation=%s",
+                        delay,
+                        next_count,
+                        qsettings.oom_max_retries,
+                        operation_name,
+                    )
+                    self._emit_parent_event(
+                        parent_job_id,
+                        "child.cuda_oom_retry",
+                        f"CUDA OOM on {operation_name}; retry {next_count}/{qsettings.oom_max_retries} "
+                        f"after {delay}s backoff",
+                        {
+                            "operation": operation_name,
+                            "retry_count": next_count,
+                            "max_retries": qsettings.oom_max_retries,
+                            "delay_seconds": delay,
+                        },
+                        level="warning",
+                    )
+                    # Block the consumer while honouring AMQP heartbeats.
                     try:
-                        err_session.add(
-                            JobEvent(
-                                job_id=parent_job_id,
-                                event="child.failed",
-                                message=str(exc)[:2000],
-                                fields={
-                                    "operation": operation_name,
-                                    "error_code": exc.__class__.__name__,
-                                },
-                                level="error",
-                            )
-                        )
-                        err_session.commit()
+                        channel.connection.sleep(delay)
                     except Exception:
-                        try:
-                            err_session.rollback()
-                        except Exception:
-                            pass
-                    finally:
-                        err_session.close()
+                        pass
+
+                    new_headers = {**incoming_headers, _OOM_RETRY_HEADER: next_count}
+                    try:
+                        channel.basic_publish(
+                            exchange="",
+                            routing_key=self._queue_name,
+                            body=body,
+                            properties=pika.BasicProperties(
+                                delivery_mode=pika.DeliveryMode.Persistent,
+                                headers=new_headers,
+                            ),
+                        )
+                        channel.basic_ack(delivery_tag=method.delivery_tag)
+                        return
+                    except Exception as republish_exc:
+                        logger.error(
+                            "Failed to republish OOM message; dead-lettering. "
+                            "operation=%s error=%s",
+                            operation_name,
+                            republish_exc,
+                        )
+                        # fall through to dead-letter path
+
+                # Retries exhausted — dead-letter the message.
+                logger.error(
+                    "CUDA OOM retries exhausted — dead-lettering. operation=%s retries=%d",
+                    operation_name,
+                    oom_retry_count,
+                )
+                self._emit_parent_event(
+                    parent_job_id,
+                    "child.cuda_oom_dead_letter",
+                    f"CUDA OOM on {operation_name} after {oom_retry_count} retries; "
+                    f"message dead-lettered",
+                    {
+                        "operation": operation_name,
+                        "retries": oom_retry_count,
+                        "error": str(exc)[:500],
+                    },
+                    level="error",
+                )
+                channel.basic_nack(
+                    delivery_tag=method.delivery_tag,
+                    requeue=False,
+                )
+                return
+
+            logger.error("Operation failed. operation=%s error=%s", operation_name, exc)
+            self._emit_parent_event(
+                parent_job_id,
+                "child.failed",
+                str(exc)[:2000],
+                {
+                    "operation": operation_name,
+                    "error_code": exc.__class__.__name__,
+                },
+                level="error",
+            )
+            channel.basic_nack(
+                delivery_tag=method.delivery_tag,
+                requeue=self._requeue_on_failure,
+            )
+        finally:
+            session.close()
+
+    def _emit_parent_event(
+        self,
+        parent_job_id: UUID | None,
+        event: str,
+        message: str | None,
+        fields: dict[str, Any],
+        *,
+        level: str = "info",
+    ) -> None:
+        """Write a ``JobEvent`` row against the parent job (best-effort)."""
+        if parent_job_id is None:
+            return
+        session = self._factory()
+        try:
+            session.add(
+                JobEvent(
+                    job_id=parent_job_id,
+                    event=event,
+                    message=message,
+                    fields=fields,
+                    level=level,
+                )
+            )
+            session.commit()
+        except Exception as exc:
+            logger.warning(
+                "Failed to write event to parent job. parent_job_id=%s event=%s error=%s",
+                parent_job_id,
+                event,
+                exc,
+            )
             try:
                 session.rollback()
             except Exception:
                 pass
-            channel.basic_nack(
-                delivery_tag=method.delivery_tag,
-                requeue=requeue,
-            )
         finally:
             session.close()
diff --git a/protea/infrastructure/queue/publisher.py b/protea/infrastructure/queue/publisher.py
index 8417251..77158b9 100644
--- a/protea/infrastructure/queue/publisher.py
+++ b/protea/infrastructure/queue/publisher.py
@@ -9,10 +9,9 @@
 
 import pika
 
-logger = logging.getLogger(__name__)
+from protea.config.tuning import get_tuning
 
-_MAX_ATTEMPTS = 5
-_BASE_DELAY = 1  # seconds; exponential backoff: 1, 2, 4, 8, 16 (capped at 30)
+logger = logging.getLogger(__name__)
 
 # Thread-local persistent connection to avoid opening/closing per publish.
 _local = threading.local()
@@ -39,9 +38,12 @@ def _close_cached_connection() -> None:
 
 def _publish(amqp_url: str, queue_name: str, body: bytes) -> None:
     """Core publish logic with retries and connection reuse."""
+    settings = get_tuning().queue
+    max_attempts = settings.publisher_max_attempts
+    base_delay = settings.publisher_base_delay
     last_exc: Exception | None = None
 
-    for attempt in range(1, _MAX_ATTEMPTS + 1):
+    for attempt in range(1, max_attempts + 1):
         try:
             connection = _get_connection(amqp_url)
             channel = connection.channel()
@@ -61,14 +63,14 @@ def _publish(amqp_url: str, queue_name: str, body: bytes) -> None:
             return
         except Exception as exc:
             last_exc = exc
-            # Connection is stale — discard it so next attempt creates a fresh one.
+            # Connection is stale: discard it so next attempt creates a fresh one.
             _close_cached_connection()
-            if attempt < _MAX_ATTEMPTS:
-                delay = min(_BASE_DELAY * (2 ** (attempt - 1)), 30)
+            if attempt < max_attempts:
+                delay = min(base_delay * (2 ** (attempt - 1)), 30)
                 logger.warning(
-                    "publish failed (attempt %d/%d), retrying in %ds. queue=%s error=%s",
+                    "publish failed (attempt %d/%d), retrying in %ss. queue=%s error=%s",
                     attempt,
-                    _MAX_ATTEMPTS,
+                    max_attempts,
                     delay,
                     queue_name,
                     exc,
@@ -77,13 +79,13 @@ def _publish(amqp_url: str, queue_name: str, body: bytes) -> None:
             else:
                 logger.error(
                     "publish failed after %d attempts. queue=%s error=%s",
-                    _MAX_ATTEMPTS,
+                    max_attempts,
                     queue_name,
                     exc,
                 )
 
     raise RuntimeError(
-        f"Failed to publish to queue {queue_name!r} after {_MAX_ATTEMPTS} attempts"
+        f"Failed to publish to queue {queue_name!r} after {max_attempts} attempts"
     ) from last_exc
 
 
diff --git a/protea/infrastructure/settings.py b/protea/infrastructure/settings.py
index ada09a1..f4b3ff8 100644
--- a/protea/infrastructure/settings.py
+++ b/protea/infrastructure/settings.py
@@ -14,6 +14,13 @@ class Settings:
     amqp_url: str
     artifacts_dir: Path
     admin_token: str
+    storage_backend: str = "local"
+    storage_root: Path | None = None
+    minio_endpoint: str | None = None
+    minio_bucket: str = "protea"
+    minio_access_key: str | None = None
+    minio_secret_key: str | None = None
+    minio_secure: bool = False
 
 
 def _load_yaml(path: Path) -> dict[str, Any]:
@@ -23,6 +30,14 @@ def _load_yaml(path: Path) -> dict[str, Any]:
         return yaml.safe_load(f) or {}
 
 
+def _as_bool(value: Any) -> bool:
+    if isinstance(value, bool):
+        return value
+    if value is None:
+        return False
+    return str(value).strip().lower() in {"1", "true", "yes", "on"}
+
+
 def load_settings(project_root: Path, *, env_prefix: str = "PROTEA_") -> Settings:
     """
     Load settings from:
@@ -32,6 +47,13 @@ def load_settings(project_root: Path, *, env_prefix: str = "PROTEA_") -> Setting
     Expected env vars:
       - PROTEA_DB_URL
       - PROTEA_AMQP_URL
+      - PROTEA_STORAGE_BACKEND            (local | minio)
+      - PROTEA_STORAGE_ROOT               (path for local backend)
+      - PROTEA_MINIO_ENDPOINT             (e.g. localhost:9000)
+      - PROTEA_MINIO_BUCKET
+      - PROTEA_MINIO_ACCESS_KEY
+      - PROTEA_MINIO_SECRET_KEY
+      - PROTEA_MINIO_SECURE               (truthy for HTTPS)
     """
     system_path = project_root / "protea" / "config" / "system.yaml"
     system = _load_yaml(system_path)
@@ -48,9 +70,11 @@ def load_settings(project_root: Path, *, env_prefix: str = "PROTEA_") -> Setting
         os.getenv(f"{env_prefix}AMQP_URL") or file_amqp_url or "amqp://guest:guest@localhost:5672/"
     )
 
+    storage_cfg = system.get("storage", {}) or {}
+
     raw_artifacts = (
         os.getenv(f"{env_prefix}ARTIFACTS_DIR")
-        or system.get("storage", {}).get("artifacts_dir")
+        or storage_cfg.get("artifacts_dir")
         or "storage/evaluation_artifacts"
     )
     artifacts_dir = Path(raw_artifacts)
@@ -58,9 +82,46 @@ def load_settings(project_root: Path, *, env_prefix: str = "PROTEA_") -> Setting
         artifacts_dir = project_root / artifacts_dir
 
     admin_token = (
-        os.getenv(f"{env_prefix}ADMIN_TOKEN")
-        or system.get("admin", {}).get("token")
-        or ""
+        os.getenv(f"{env_prefix}ADMIN_TOKEN") or system.get("admin", {}).get("token") or ""
     )
 
-    return Settings(db_url=db_url, amqp_url=amqp_url, artifacts_dir=artifacts_dir, admin_token=admin_token)
+    storage_backend = (
+        os.getenv(f"{env_prefix}STORAGE_BACKEND")
+        or storage_cfg.get("backend")
+        or "local"
+    ).lower()
+
+    raw_storage_root = (
+        os.getenv(f"{env_prefix}STORAGE_ROOT")
+        or storage_cfg.get("root")
+    )
+    storage_root: Path | None
+    if raw_storage_root:
+        storage_root = Path(raw_storage_root)
+        if not storage_root.is_absolute():
+            storage_root = project_root / storage_root
+    else:
+        storage_root = None
+
+    minio_cfg = storage_cfg.get("minio", {}) or {}
+    minio_endpoint = os.getenv(f"{env_prefix}MINIO_ENDPOINT") or minio_cfg.get("endpoint")
+    minio_bucket = os.getenv(f"{env_prefix}MINIO_BUCKET") or minio_cfg.get("bucket") or "protea"
+    minio_access_key = os.getenv(f"{env_prefix}MINIO_ACCESS_KEY") or minio_cfg.get("access_key")
+    minio_secret_key = os.getenv(f"{env_prefix}MINIO_SECRET_KEY") or minio_cfg.get("secret_key")
+    minio_secure = _as_bool(
+        os.getenv(f"{env_prefix}MINIO_SECURE", minio_cfg.get("secure", False))
+    )
+
+    return Settings(
+        db_url=db_url,
+        amqp_url=amqp_url,
+        artifacts_dir=artifacts_dir,
+        admin_token=admin_token,
+        storage_backend=storage_backend,
+        storage_root=storage_root,
+        minio_endpoint=minio_endpoint,
+        minio_bucket=minio_bucket,
+        minio_access_key=minio_access_key,
+        minio_secret_key=minio_secret_key,
+        minio_secure=minio_secure,
+    )
diff --git a/protea/infrastructure/storage/__init__.py b/protea/infrastructure/storage/__init__.py
new file mode 100644
index 0000000..11659eb
--- /dev/null
+++ b/protea/infrastructure/storage/__init__.py
@@ -0,0 +1,61 @@
+"""Artifact storage abstraction.
+
+An :class:`ArtifactStore` is the single write/read surface for large blobs
+produced by PROTEA (exported datasets, reranker models, etc.) — separate
+from the filesystem-based ``artifacts_dir`` used by the evaluation pipeline.
+
+Two backends are provided:
+
+* :class:`~protea.infrastructure.storage.local.LocalFsArtifactStore` — a
+  plain directory on disk. Default, always available.
+* :class:`~protea.infrastructure.storage.minio_store.MinioArtifactStore` —
+  S3-compatible object storage (self-hosted). Requires the ``minio``
+  extra (``pip install 'protea[storage]'``).
+
+Choose a backend through :func:`get_artifact_store` which reads
+``PROTEA_STORAGE_BACKEND`` / ``settings.storage_backend``.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Protocol, runtime_checkable
+
+
+@runtime_checkable
+class ArtifactStore(Protocol):
+    """Minimal blob store interface.
+
+    Keys are forward-slash separated strings (``foo/bar/baz.txt``). URIs
+    returned by :meth:`put` / :meth:`url` are backend-specific
+    (``file:///…`` for local, ``s3://bucket/key`` for MinIO) and should be
+    persisted as-is in the database so consumers can resolve them without
+    knowing the backend.
+    """
+
+    def put(self, key: str, src: Path | bytes) -> str:
+        """Store ``src`` under ``key`` and return its URI."""
+        ...
+
+    def get(self, key: str) -> bytes:
+        """Fetch raw bytes stored at ``key``."""
+        ...
+
+    def url(self, key: str) -> str:
+        """Return the URI for ``key`` without performing I/O."""
+        ...
+
+    def exists(self, key: str) -> bool:
+        """Whether ``key`` is present in the store."""
+        ...
+
+    def delete(self, key: str) -> bool:
+        """Remove ``key`` from the store. Returns ``True`` if an object was
+        deleted, ``False`` if the key did not exist. Idempotent."""
+        ...
+
+
+from .factory import get_artifact_store  # noqa: E402
+from .local import LocalFsArtifactStore  # noqa: E402
+
+__all__ = ["ArtifactStore", "LocalFsArtifactStore", "get_artifact_store"]
diff --git a/protea/infrastructure/storage/factory.py b/protea/infrastructure/storage/factory.py
new file mode 100644
index 0000000..d1da5e3
--- /dev/null
+++ b/protea/infrastructure/storage/factory.py
@@ -0,0 +1,67 @@
+"""Backend selection for :class:`ArtifactStore`.
+
+Reads ``settings.storage_backend`` (with the ``PROTEA_STORAGE_BACKEND``
+env var override resolved by :func:`load_settings`) and returns a
+concrete store. Missing MinIO configuration (no endpoint/credentials)
+falls back to the local filesystem with a warning so dev stacks boot
+cleanly. An explicit ``backend: minio`` with an unreachable endpoint
+raises :class:`ArtifactStoreUnavailable` — silent degradation here
+would produce ``Dataset`` rows with ``storage_backend=local`` +
+``file://…`` URIs that the lab cannot resolve from another host, which
+is a harder bug to spot than a startup failure.
+"""
+
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+
+from protea.infrastructure.settings import Settings
+
+logger = logging.getLogger(__name__)
+
+
+class ArtifactStoreUnavailable(RuntimeError):
+    """Raised when the configured MinIO backend cannot be reached."""
+
+
+def get_artifact_store(settings: Settings):
+    backend = (settings.storage_backend or "local").lower()
+
+    if backend == "minio":
+        if not (
+            settings.minio_endpoint
+            and settings.minio_access_key
+            and settings.minio_secret_key
+        ):
+            logger.warning(
+                "storage_backend=minio but MinIO config is incomplete "
+                "(endpoint/access_key/secret_key); falling back to local"
+            )
+            return _make_local(settings)
+        try:
+            from .minio_store import MinioArtifactStore
+
+            return MinioArtifactStore(
+                endpoint=settings.minio_endpoint,
+                bucket=settings.minio_bucket,
+                access_key=settings.minio_access_key,
+                secret_key=settings.minio_secret_key,
+                secure=settings.minio_secure,
+            )
+        except Exception as exc:
+            raise ArtifactStoreUnavailable(
+                f"MinIO backend configured but unreachable at "
+                f"{settings.minio_endpoint!r}: {exc}. Start the storage "
+                f"profile (docker compose --profile storage up) or switch "
+                f"to backend: local in system.yaml."
+            ) from exc
+
+    return _make_local(settings)
+
+
+def _make_local(settings: Settings):
+    from .local import LocalFsArtifactStore
+
+    root = Path(settings.storage_root or settings.artifacts_dir)
+    return LocalFsArtifactStore(root=root)
diff --git a/protea/infrastructure/storage/local.py b/protea/infrastructure/storage/local.py
new file mode 100644
index 0000000..1e95d53
--- /dev/null
+++ b/protea/infrastructure/storage/local.py
@@ -0,0 +1,47 @@
+"""Filesystem-backed artifact store."""
+
+from __future__ import annotations
+
+import shutil
+from pathlib import Path
+
+
+class LocalFsArtifactStore:
+    """Stores blobs under ``root`` as regular files.
+
+    URIs are ``file:///absolute/path/…`` so a consumer that reads a URI out
+    of the DB can ``open(urlparse(uri).path, "rb")`` without any client.
+    """
+
+    def __init__(self, root: Path) -> None:
+        self.root = Path(root).resolve()
+        self.root.mkdir(parents=True, exist_ok=True)
+
+    def _full_path(self, key: str) -> Path:
+        safe = key.lstrip("/")
+        return self.root / safe
+
+    def put(self, key: str, src: Path | bytes) -> str:
+        dest = self._full_path(key)
+        dest.parent.mkdir(parents=True, exist_ok=True)
+        if isinstance(src, (bytes, bytearray)):
+            dest.write_bytes(bytes(src))
+        else:
+            shutil.copyfile(Path(src), dest)
+        return self.url(key)
+
+    def get(self, key: str) -> bytes:
+        return self._full_path(key).read_bytes()
+
+    def url(self, key: str) -> str:
+        return f"file://{self._full_path(key).as_posix()}"
+
+    def exists(self, key: str) -> bool:
+        return self._full_path(key).exists()
+
+    def delete(self, key: str) -> bool:
+        path = self._full_path(key)
+        if not path.exists():
+            return False
+        path.unlink()
+        return True
diff --git a/protea/infrastructure/storage/minio_store.py b/protea/infrastructure/storage/minio_store.py
new file mode 100644
index 0000000..784980a
--- /dev/null
+++ b/protea/infrastructure/storage/minio_store.py
@@ -0,0 +1,89 @@
+"""MinIO-backed artifact store.
+
+The ``minio`` client is imported lazily so the package can be installed
+without it (``protea[storage]`` opts in). A clear ImportError is raised
+on instantiation if the dependency is missing.
+"""
+
+from __future__ import annotations
+
+import io
+from pathlib import Path
+
+
+class MinioArtifactStore:
+    """S3-compatible object store (MinIO).
+
+    URIs are ``s3://<bucket>/<key>``. The endpoint, credentials, and bucket
+    come from settings; the caller should not need to touch them directly.
+    """
+
+    def __init__(
+        self,
+        *,
+        endpoint: str,
+        bucket: str,
+        access_key: str,
+        secret_key: str,
+        secure: bool = False,
+    ) -> None:
+        try:
+            from minio import Minio
+        except ImportError as exc:
+            raise ImportError(
+                "MinioArtifactStore requires the 'minio' package. "
+                "Install with: pip install 'protea[storage]'"
+            ) from exc
+
+        self._client = Minio(
+            endpoint,
+            access_key=access_key,
+            secret_key=secret_key,
+            secure=secure,
+        )
+        self.bucket = bucket
+        self._ensure_bucket()
+
+    def _ensure_bucket(self) -> None:
+        if not self._client.bucket_exists(self.bucket):
+            self._client.make_bucket(self.bucket)
+
+    def put(self, key: str, src: Path | bytes) -> str:
+        if isinstance(src, (bytes, bytearray)):
+            data = bytes(src)
+            self._client.put_object(
+                self.bucket, key, io.BytesIO(data), length=len(data)
+            )
+        else:
+            self._client.fput_object(self.bucket, key, str(Path(src)))
+        return self.url(key)
+
+    def get(self, key: str) -> bytes:
+        response = self._client.get_object(self.bucket, key)
+        try:
+            return response.read()
+        finally:
+            response.close()
+            response.release_conn()
+
+    def url(self, key: str) -> str:
+        return f"s3://{self.bucket}/{key}"
+
+    def exists(self, key: str) -> bool:
+        from minio.error import S3Error
+
+        try:
+            self._client.stat_object(self.bucket, key)
+            return True
+        except S3Error:
+            return False
+
+    def delete(self, key: str) -> bool:
+        from minio.error import S3Error
+
+        try:
+            self._client.stat_object(self.bucket, key)
+        except S3Error:
+            return False
+        self._client.remove_object(self.bucket, key)
+        return True
diff --git a/protea/utils/__init__.py b/protea/utils/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/protea/workers/base_worker.py b/protea/workers/base_worker.py
index 7381b00..e569ccc 100644
--- a/protea/workers/base_worker.py
+++ b/protea/workers/base_worker.py
@@ -10,8 +10,9 @@
 from sqlalchemy import update as sa_update
 from sqlalchemy.orm import Session, sessionmaker
 
-from protea.core.contracts.operation import OperationResult, RetryLaterError
+from protea.core.contracts.operation import OperationResult, RetryLaterError, make_safe_emit
 from protea.core.contracts.registry import OperationRegistry
+from protea.core.retry import is_retryable, with_retry
 from protea.core.utils import utcnow
 from protea.infrastructure.orm.models.job import Job, JobEvent, JobStatus
 from protea.infrastructure.queue.publisher import publish_job, publish_operation
@@ -54,17 +55,44 @@ def handle_job(self, job_id: UUID) -> None:
         Claim and execute a single job identified by ``job_id``.
 
         Silently returns if the job does not exist or is not in QUEUED status.
-        Re-raises any exception from the operation after recording FAILED status.
+        Transient infrastructure failures (Postgres deadlocks, brief
+        connection resets) are retried up to 3 times with exponential
+        backoff before the job is marked FAILED. Re-raises any exception
+        from the operation after recording FAILED status.
+        """
+        if not self._claim_job(job_id):
+            return
+        try:
+            with_retry(
+                self._execute_with_session,
+                job_id,
+                max_attempts=3,
+                base_delay=1.0,
+                max_delay=10.0,
+                jitter_ratio=0.3,
+            )
+        except RetryLaterError:
+            # Consumer re-publishes; job is already QUEUED.
+            raise
+        except Exception as exc:
+            # If retry was exhausted on a retryable error, the job is still
+            # in RUNNING with no FAILED transition recorded. Force-mark FAILED
+            # via fallback session so it never gets stuck.
+            if is_retryable(exc):
+                self._force_fail_job(job_id, exc)
+            raise
+
+    def _claim_job(self, job_id: UUID) -> bool:
+        """Transition the job from QUEUED to RUNNING in its own session.
+
+        Returns True if claim succeeded; False if the job is missing or
+        already in a non-QUEUED state.
         """
-        # Claim + run with DB-backed state.
         session = self._factory()
         try:
             job = session.get(Job, job_id)
-            if job is None:
-                return
-
-            if job.status != JobStatus.QUEUED:
-                return
+            if job is None or job.status != JobStatus.QUEUED:
+                return False
 
             job.status = JobStatus.RUNNING
             job.started_at = utcnow()
@@ -77,162 +105,192 @@ def handle_job(self, job_id: UUID) -> None:
                 level="info",
             )
             session.commit()
+            return True
         finally:
             session.close()
 
-        # Execute in a separate session
+    def _execute_with_session(self, job_id: UUID) -> None:
+        """Run the operation in a fresh session.
+
+        Called by ``handle_job`` through ``with_retry`` so transient
+        infrastructure failures (deadlocks, etc.) get a clean session
+        on each attempt. Non-retryable exceptions propagate through to
+        the FAILED-handling branch below; ``RetryLaterError`` is also
+        propagated so the consumer can re-publish.
+        """
         session = self._factory()
         try:
             job = session.get(Job, job_id)
             if job is None:
                 return
 
-            # If the parent was cancelled while this child was being claimed,
-            # cancel ourselves and stop without executing.
-            if job.parent_job_id is not None:
-                parent = session.get(Job, job.parent_job_id)
-                if parent is not None and parent.status == JobStatus.CANCELLED:
-                    job.status = JobStatus.CANCELLED
-                    job.finished_at = utcnow()
-                    self._emit(
-                        session,
-                        job_id,
-                        "job.cancelled",
-                        None,
-                        {"reason": "parent_cancelled"},
-                        level="info",
-                    )
-                    session.commit()
-                    return
+            if self._cancel_if_parent_cancelled(session, job, job_id):
+                return
 
             op = self._registry.get(job.operation)
-
-            def emit(
-                event: str,
-                message: str | None = None,
-                fields: dict[str, Any] | None = None,
-                level: str = "info",
-            ) -> None:
-                # Dedicated short-lived session that commits immediately so
-                # events are visible in real time, not just at job completion.
-                f = fields or {}
-                event_session = self._factory()
-                try:
-                    self._emit(event_session, job_id, event, message, f, level=level)
-                    # Allow operations to report live progress via reserved fields.
-                    if "_progress_current" in f or "_progress_total" in f:
-                        j = event_session.get(Job, job_id)
-                        if j is not None:
-                            if "_progress_current" in f:
-                                j.progress_current = int(f["_progress_current"])
-                            if "_progress_total" in f:
-                                j.progress_total = int(f["_progress_total"])
-                    event_session.commit()
-                finally:
-                    event_session.close()
+            emit = make_safe_emit(self._build_emit(job_id))
+            enhanced_payload = {**job.payload, "_job_id": str(job.id)}
 
             try:
-                # Inject runtime context into payload so operations can reference their own job.
-                enhanced_payload = {**job.payload, "_job_id": str(job.id)}
                 result: OperationResult = op.execute(session, enhanced_payload, emit=emit)
-
-                if result.progress_current is not None:
-                    job.progress_current = int(result.progress_current)
-                if result.progress_total is not None:
-                    job.progress_total = int(result.progress_total)
-
-                if result.deferred:
-                    # Coordinator job: children will mark it SUCCEEDED when done.
-                    self._emit(
-                        session,
-                        job_id,
-                        "job.dispatched",
-                        None,
-                        {"result": result.result},
-                        level="info",
-                    )
-                else:
-                    job.status = JobStatus.SUCCEEDED
-                    job.finished_at = utcnow()
-                    self._emit(
-                        session,
-                        job_id,
-                        "job.succeeded",
-                        None,
-                        {"result": result.result},
-                        level="info",
-                    )
-
-                session.commit()
-
-                # Publish child jobs to RabbitMQ after commit so workers always find the DB row.
-                if result.publish_after_commit and self._amqp_url:
-                    for queue_name, child_job_id in result.publish_after_commit:
-                        publish_job(self._amqp_url, queue_name, child_job_id)
-
-                # Publish ephemeral operation messages (e.g. embedding batches).
-                if result.publish_operations and self._amqp_url:
-                    for queue_name, op_payload in result.publish_operations:
-                        publish_operation(self._amqp_url, queue_name, op_payload)
-
+                self._on_operation_success(session, job, job_id, result)
             except RetryLaterError as e:
-                # Resource busy — reset to QUEUED so the consumer can re-publish.
-                # Adaptive backoff: count previous retries and increase delay.
-                retry_count = (
-                    session.query(func.count(JobEvent.id))
-                    .filter(JobEvent.job_id == job_id, JobEvent.event == "job.retry_later")
-                    .scalar()
-                    or 0
-                )
-                base_delay = e.delay_seconds
-                delay = min(base_delay * (2 ** retry_count), 600)  # cap at 10 min
-
-                job.status = JobStatus.QUEUED
-                job.started_at = None
-                self._emit(
-                    session,
-                    job_id,
-                    "job.retry_later",
-                    str(e),
-                    {"delay_seconds": delay, "retry_count": retry_count + 1},
-                    level="info",
-                )
-                session.commit()
-                # Propagate adaptive delay to the consumer.
-                e.delay_seconds = delay
-                raise  # consumer handles re-publish
-
+                self._on_retry_later(session, job, job_id, e)
+                raise
             except Exception as e:
-                job.status = JobStatus.FAILED
-                job.finished_at = utcnow()
-                job.error_code = e.__class__.__name__
-                job.error_message = str(e)
-                self._emit(
-                    session,
-                    job_id,
-                    "job.failed",
-                    str(e),
-                    {"error_code": job.error_code},
-                    level="error",
-                )
-                if job.parent_job_id is not None:
-                    self._maybe_fail_parent(session, job.parent_job_id)
-                try:
-                    session.commit()
-                except Exception as commit_exc:
-                    # Execute session is corrupted (e.g. DB connection dropped during a
-                    # long operation).  Fall back to a fresh session so the job is never
-                    # left permanently stuck in RUNNING.
-                    logger.error(
-                        "Execute session commit failed; using fallback session. job_id=%s error=%s",
-                        job_id,
-                        commit_exc,
-                    )
-                    self._force_fail_job(job_id, e)
+                if is_retryable(e):
+                    # Let with_retry handle this; rollback so the next
+                    # attempt sees a clean session state.
+                    try:
+                        session.rollback()
+                    except Exception:
+                        pass
+                    raise
+                self._on_operation_failure(session, job, job_id, e)
                 raise
         finally:
             session.close()
 
+    def _build_emit(self, job_id: UUID):
+        """Build the raw emit closure that writes JobEvent rows.
+
+        Returned callable opens a short-lived session per event so
+        progress is visible in real time. Wrapped by ``make_safe_emit``
+        before being handed to operations so emit failures never crash
+        the job.
+        """
+
+        def raw_emit(
+            event: str,
+            message: str | None = None,
+            fields: dict[str, Any] | None = None,
+            level: str = "info",
+        ) -> None:
+            f = fields or {}
+            event_session = self._factory()
+            try:
+                self._emit(event_session, job_id, event, message, f, level=level)
+                if "_progress_current" in f or "_progress_total" in f:
+                    j = event_session.get(Job, job_id)
+                    if j is not None:
+                        if "_progress_current" in f:
+                            j.progress_current = int(f["_progress_current"])
+                        if "_progress_total" in f:
+                            j.progress_total = int(f["_progress_total"])
+                event_session.commit()
+            finally:
+                event_session.close()
+
+        return raw_emit
+
+    def _cancel_if_parent_cancelled(
+        self, session: Session, job: Job, job_id: UUID
+    ) -> bool:
+        if job.parent_job_id is None:
+            return False
+        parent = session.get(Job, job.parent_job_id)
+        if parent is None or parent.status != JobStatus.CANCELLED:
+            return False
+        job.status = JobStatus.CANCELLED
+        job.finished_at = utcnow()
+        self._emit(
+            session,
+            job_id,
+            "job.cancelled",
+            None,
+            {"reason": "parent_cancelled"},
+            level="info",
+        )
+        session.commit()
+        return True
+
+    def _on_operation_success(
+        self, session: Session, job: Job, job_id: UUID, result: OperationResult
+    ) -> None:
+        if result.progress_current is not None:
+            job.progress_current = int(result.progress_current)
+        if result.progress_total is not None:
+            job.progress_total = int(result.progress_total)
+
+        if result.deferred:
+            self._emit(
+                session,
+                job_id,
+                "job.dispatched",
+                None,
+                {"result": result.result},
+                level="info",
+            )
+        else:
+            job.status = JobStatus.SUCCEEDED
+            job.finished_at = utcnow()
+            self._emit(
+                session,
+                job_id,
+                "job.succeeded",
+                None,
+                {"result": result.result},
+                level="info",
+            )
+        session.commit()
+
+        if result.publish_after_commit and self._amqp_url:
+            for queue_name, child_job_id in result.publish_after_commit:
+                publish_job(self._amqp_url, queue_name, child_job_id)
+        if result.publish_operations and self._amqp_url:
+            for queue_name, op_payload in result.publish_operations:
+                publish_operation(self._amqp_url, queue_name, op_payload)
+
+    def _on_retry_later(
+        self, session: Session, job: Job, job_id: UUID, exc: RetryLaterError
+    ) -> None:
+        retry_count = (
+            session.query(func.count(JobEvent.id))
+            .filter(JobEvent.job_id == job_id, JobEvent.event == "job.retry_later")
+            .scalar()
+            or 0
+        )
+        delay = min(exc.delay_seconds * (2**retry_count), 600)
+        job.status = JobStatus.QUEUED
+        job.started_at = None
+        self._emit(
+            session,
+            job_id,
+            "job.retry_later",
+            str(exc),
+            {"delay_seconds": delay, "retry_count": retry_count + 1},
+            level="info",
+        )
+        session.commit()
+        exc.delay_seconds = delay
+
+    def _on_operation_failure(
+        self, session: Session, job: Job, job_id: UUID, exc: Exception
+    ) -> None:
+        job.status = JobStatus.FAILED
+        job.finished_at = utcnow()
+        job.error_code = exc.__class__.__name__
+        job.error_message = str(exc)
+        self._emit(
+            session,
+            job_id,
+            "job.failed",
+            str(exc),
+            {"error_code": job.error_code},
+            level="error",
+        )
+        if job.parent_job_id is not None:
+            self._maybe_fail_parent(session, job.parent_job_id)
+        try:
+            session.commit()
+        except Exception as commit_exc:
+            logger.error(
+                "Execute session commit failed; using fallback session. job_id=%s error=%s",
+                job_id,
+                commit_exc,
+            )
+            self._force_fail_job(job_id, exc)
+
     def _force_fail_job(self, job_id: UUID, original_exc: Exception) -> None:
         """Mark a job FAILED using a fresh session.
 
diff --git a/protea/workers/stale_job_reaper.py b/protea/workers/stale_job_reaper.py
index 6af5556..fa7da6d 100644
--- a/protea/workers/stale_job_reaper.py
+++ b/protea/workers/stale_job_reaper.py
@@ -6,11 +6,27 @@
 and transitions any job that has been in RUNNING status for longer than
 ``timeout_seconds`` to FAILED with error_code ``JobTimeout``.
 
+Progress-aware grace period
+---------------------------
+Deferred coordinator jobs (e.g. ``compute_embeddings``, ``predict_go_terms``)
+may legitimately run longer than ``timeout_seconds`` because the actual work
+is done by child batch messages on downstream queues.  Killing these jobs
+at the global wall-clock threshold would drop hours of successful work on
+the floor.
+
+To avoid that, the reaper applies a second check to every candidate: if the
+job has produced any ``JobEvent`` within the last ``stall_seconds`` window,
+it is considered *alive* and left in place.  Only truly stalled jobs — no
+events for ``stall_seconds`` — are marked FAILED.  The hard ``timeout_seconds``
+still acts as the lower bound (a job under the timeout is never touched),
+so this is strictly more permissive than the previous behaviour.
+
 Usage::
 
-    reaper = StaleJobReaper(session_factory, timeout_seconds=3600)
+    reaper = StaleJobReaper(session_factory, timeout_seconds=21600)
     reaper.run(interval_seconds=60)  # checks every minute
 """
+
 from __future__ import annotations
 
 import logging
@@ -18,6 +34,7 @@
 import time
 from datetime import timedelta
 
+from sqlalchemy import func
 from sqlalchemy.orm import Session, sessionmaker
 
 from protea.core.utils import utcnow
@@ -31,9 +48,12 @@ def __init__(
         self,
         session_factory: sessionmaker[Session],
         timeout_seconds: int = 3600,
+        *,
+        stall_seconds: int = 1800,
     ) -> None:
         self._factory = session_factory
         self._timeout = timedelta(seconds=timeout_seconds)
+        self._stall = timedelta(seconds=stall_seconds)
         self._stop = False
 
     def run(self, interval_seconds: int = 60) -> None:
@@ -41,8 +61,9 @@ def run(self, interval_seconds: int = 60) -> None:
         signal.signal(signal.SIGTERM, self._handle_stop)
 
         logger.info(
-            "StaleJobReaper started. timeout=%ss interval=%ss",
+            "StaleJobReaper started. timeout=%ss stall=%ss interval=%ss",
             self._timeout.total_seconds(),
+            self._stall.total_seconds(),
             interval_seconds,
         )
         while not self._stop:
@@ -60,10 +81,12 @@ def _handle_stop(self, *_: object) -> None:
         self._stop = True
 
     def _reap(self) -> int:
-        cutoff = utcnow() - self._timeout
+        now = utcnow()
+        cutoff = now - self._timeout
+        stall_cutoff = now - self._stall
         session = self._factory()
         try:
-            stale_jobs = (
+            candidates = (
                 session.query(Job)
                 .filter(
                     Job.status == JobStatus.RUNNING,
@@ -71,30 +94,51 @@ def _reap(self) -> int:
                 )
                 .all()
             )
-            for job in stale_jobs:
+            reaped = 0
+            for job in candidates:
+                last_event_ts = (
+                    session.query(func.max(JobEvent.ts)).filter(JobEvent.job_id == job.id).scalar()
+                )
+                if last_event_ts is not None and last_event_ts > stall_cutoff:
+                    logger.debug(
+                        "Reaper skipped live job. job_id=%s operation=%s last_event=%s",
+                        job.id,
+                        job.operation,
+                        last_event_ts,
+                    )
+                    continue
+
                 job.status = JobStatus.FAILED
-                job.finished_at = utcnow()
+                job.finished_at = now
                 job.error_code = "JobTimeout"
                 job.error_message = (
-                    f"Job exceeded timeout of {self._timeout.total_seconds():.0f}s"
+                    f"Job stalled: no activity in {self._stall.total_seconds():.0f}s "
+                    f"(started >{self._timeout.total_seconds():.0f}s ago)"
                 )
                 session.add(
                     JobEvent(
                         job_id=job.id,
                         event="job.timeout",
                         message=job.error_message,
-                        fields={"timeout_seconds": self._timeout.total_seconds()},
+                        fields={
+                            "timeout_seconds": self._timeout.total_seconds(),
+                            "stall_seconds": self._stall.total_seconds(),
+                            "last_event_ts": (last_event_ts.isoformat() if last_event_ts else None),
+                        },
                         level="error",
                     )
                 )
                 logger.warning(
-                    "Marking stale job FAILED. job_id=%s operation=%s started_at=%s",
+                    "Marking stalled job FAILED. job_id=%s operation=%s "
+                    "started_at=%s last_event=%s",
                     job.id,
                     job.operation,
                     job.started_at,
+                    last_event_ts,
                 )
+                reaped += 1
             session.commit()
-            return len(stale_jobs)
+            return reaped
         except Exception:
             try:
                 session.rollback()
diff --git a/pyproject.toml b/pyproject.toml
index 12d6125..56522c4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,17 +23,39 @@ dependencies = [
     "faiss-cpu (>=1.7.0)",
     "parasail (>=1.3.4)",
     "ete3 (>=3.1.3)",
-    "cafaeval @ git+https://github.com/claradepaolis/CAFA-evaluator-PK.git",
+    "cafaeval-protea @ git+https://github.com/frapercan/cafaeval-protea.git@main",
     "lightgbm (>=4.6.0,<5.0.0)",
     "pyarrow (>=23.0.1,<24.0.0)",
 ]
 
+[project.optional-dependencies]
+storage = ["minio (>=7.2,<8.0)"]
+
+
+[tool.poetry.group.plugins.dependencies]
+# F0 T0.15 of master plan v3: protea-core references the C-stack repos
+# as a poetry group so contributors can install with
+# ``poetry install --with plugins`` once the bootstrap repos exist on
+# disk. Real ABCs / payloads land in F1 (T1.1-T1.5); during F0 these
+# packages export only their version sentinel and entry_point stubs,
+# which is enough to validate the multi-repo layout.
+protea-contracts = { git = "https://github.com/frapercan/protea-contracts.git", branch = "master" }
+protea-method = { git = "https://github.com/frapercan/protea-method.git", branch = "master" }
+protea-sources = { git = "https://github.com/frapercan/protea-sources.git", branch = "master" }
+protea-runners = { git = "https://github.com/frapercan/protea-runners.git", branch = "master" }
+protea-backends = { git = "https://github.com/frapercan/protea-backends.git", branch = "master" }
+
 
 [build-system]
 requires = ["poetry-core>=2.0.0,<3.0.0"]
 build-backend = "poetry.core.masonry.api"
 
+
 [tool.poetry.group.dev.dependencies]
+# Dev-time only: imported by protea.core.parquet_export at export-validation
+# time so the manifest dict is checked against the lab's pydantic contract
+# before we publish to the ArtifactStore. Production images do NOT need this.
+protea-reranker-lab = {git = "https://github.com/frapercan/protea-reranker-lab.git", branch = "main"}
 pytest = ">=9.0.2,<10.0.0"
 uvicorn = ">=0.41.0,<0.42.0"
 coverage = {extras = ["toml"], version = ">=7.2.1"}
@@ -51,6 +73,7 @@ mypy = ">=1.19.1,<2.0.0"
 pytest-cov = ">=7.0.0,<8.0.0"
 types-requests = "^2.32.4.20260107"
 types-pyyaml = "^6.0.12.20250915"
+sphinxcontrib-bibtex = "^2.7.0"
 
 [tool.taskipy.tasks]
 lint     = "ruff check protea scripts && flake8 protea"
@@ -64,9 +87,30 @@ target-version = "py312"
 line-length = 100
 
 [tool.ruff.lint]
+# F0 ruleset: keeps the existing baseline (E/F/W/I/UP/B) and ignores
+# enforced for project conventions. Master plan v3 F-OPS phase will
+# extend this to the full ruff catalog (N, RUF, SIM, PT, etc.).
 select = ["E", "F", "W", "I", "UP", "B"]
 ignore = ["E501", "B008"]
 
+[tool.bandit]
+# Bandit security scan (run via .github/workflows/security.yml).
+# Skips test files (assert statements + magic mocks would create noise)
+# and the lab archeology that lives outside protea/.
+exclude_dirs = ["tests", "scripts/dump_reranker_dataset.py"]
+# B404: subprocess import (legitimate in scripts/manage.sh wrappers).
+# B603: subprocess call without shell=True (we use it correctly).
+# B101: assert_used (assertions in non-test code; project keeps a few).
+skips = ["B404", "B603", "B101"]
+
+[tool.ruff.lint.per-file-ignores]
+# Standalone runner scripts insert PROJECT_ROOT into sys.path before
+# importing the protea package, so module-level imports cannot live at
+# the top of the file. The pattern is deliberate (lets `python scripts/foo.py`
+# work without `poetry install` first); silencing E402 instead of moving
+# the imports keeps the runner self-contained.
+"scripts/*.py" = ["E402"]
+
 [tool.mypy]
 python_version = "3.12"
 strict = false
diff --git a/scripts/backfill_evaluation_groundtruth.py b/scripts/backfill_evaluation_groundtruth.py
new file mode 100644
index 0000000..c53d0cc
--- /dev/null
+++ b/scripts/backfill_evaluation_groundtruth.py
@@ -0,0 +1,153 @@
+"""Materialize the ground-truth parquet for every EvaluationSet that lacks one.
+
+EvaluationSet rows created before ``generate_evaluation_set`` started persisting
+the full delta (nk/lk/pk/known/pk_known) to the artifact store have
+``groundtruth_uri = NULL``.  This violates the project's "no on-the-fly reuse"
+rule the moment any downstream consumer (lab dump, predict_go_terms, cafaeval)
+tries to load them.  This script fixes that without regenerating the
+EvaluationSet rows themselves — the DB ids stay stable.
+
+For each row:
+
+  1. Skip if ``groundtruth_uri`` is already set (idempotent).
+  2. Recompute EvaluationData via the same code path the worker uses
+     (``compute_evaluation_data`` for matching snapshots,
+     ``compute_evaluation_data_reconciled`` otherwise).
+  3. Serialize to parquet, upload to the configured ArtifactStore, set the
+     column.
+"""
+
+from __future__ import annotations
+
+import argparse
+import sys
+import tempfile
+import uuid
+from pathlib import Path
+
+PROJECT_ROOT = Path(__file__).resolve().parents[1]
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+
+from sqlalchemy import select  # noqa: E402
+
+from protea.core.evaluation import (  # noqa: E402
+    compute_evaluation_data,
+    compute_evaluation_data_reconciled,
+    groundtruth_key_for,
+    serialize_evaluation_data_to_parquet,
+)
+from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet  # noqa: E402
+from protea.infrastructure.orm.models.annotation.evaluation_set import EvaluationSet  # noqa: E402
+from protea.infrastructure.session import build_session_factory, session_scope  # noqa: E402
+from protea.infrastructure.settings import load_settings  # noqa: E402
+from protea.infrastructure.storage import get_artifact_store  # noqa: E402
+
+
+def _args() -> argparse.Namespace:
+    p = argparse.ArgumentParser()
+    p.add_argument("--only-id", help="Backfill a single EvaluationSet UUID and exit")
+    p.add_argument("--dry-run", action="store_true",
+                   help="Report what would be backfilled, don't compute or upload")
+    return p.parse_args()
+
+
+def _resolve_pivot(eval_set: EvaluationSet, ann_old: AnnotationSet,
+                   ann_new: AnnotationSet) -> uuid.UUID:
+    stats = eval_set.stats or {}
+    pivot_raw = stats.get("pivot_ontology_snapshot_id")
+    if pivot_raw:
+        return uuid.UUID(str(pivot_raw))
+    return ann_new.ontology_snapshot_id if ann_new else ann_old.ontology_snapshot_id
+
+
+def _backfill_one(session, store, eval_set: EvaluationSet) -> str | None:
+    if eval_set.groundtruth_uri:
+        return None
+
+    ann_old = session.get(AnnotationSet, eval_set.old_annotation_set_id)
+    ann_new = session.get(AnnotationSet, eval_set.new_annotation_set_id)
+    if ann_old is None or ann_new is None:
+        raise RuntimeError(
+            f"EvaluationSet {eval_set.id}: missing AnnotationSet "
+            f"old={eval_set.old_annotation_set_id} new={eval_set.new_annotation_set_id}"
+        )
+
+    pivot_id = _resolve_pivot(eval_set, ann_old, ann_new)
+    same_snapshot = (
+        ann_old.ontology_snapshot_id == ann_new.ontology_snapshot_id == pivot_id
+    )
+
+    print(
+        f"  {eval_set.id} mode={'same' if same_snapshot else 'reconciled':<10} "
+        f"old={str(ann_old.id)[:8]} new={str(ann_new.id)[:8]} pivot={str(pivot_id)[:8]}"
+    )
+
+    if same_snapshot:
+        data = compute_evaluation_data(session, ann_old.id, ann_new.id, pivot_id)
+    else:
+        data = compute_evaluation_data_reconciled(
+            session, ann_old.id, ann_new.id,
+            ann_old.ontology_snapshot_id, ann_new.ontology_snapshot_id, pivot_id,
+        )
+
+    key = groundtruth_key_for(eval_set.id)
+    with tempfile.TemporaryDirectory(prefix="protea_eval_gt_backfill_") as tmp:
+        local = Path(tmp) / "groundtruth.parquet"
+        serialize_evaluation_data_to_parquet(data, local)
+        size = local.stat().st_size
+        uri = store.put(key, local)
+    eval_set.groundtruth_uri = uri
+    print(
+        f"    delta_proteins={data.delta_proteins} nk={data.nk_proteins} "
+        f"lk={data.lk_proteins} pk={data.pk_proteins} parquet={size:,}B → {uri}"
+    )
+    return uri
+
+
+def main() -> None:
+    a = _args()
+    settings = load_settings(PROJECT_ROOT)
+    factory = build_session_factory(settings.db_url)
+    store = get_artifact_store(settings)
+
+    with session_scope(factory) as session:
+        if a.only_id:
+            target_id = uuid.UUID(a.only_id)
+            row = session.get(EvaluationSet, target_id)
+            if row is None:
+                raise SystemExit(f"EvaluationSet {target_id} not found")
+            rows = [row]
+        else:
+            stmt = (
+                select(EvaluationSet)
+                .where(EvaluationSet.groundtruth_uri.is_(None))
+                .order_by(EvaluationSet.created_at.asc())
+            )
+            rows = list(session.execute(stmt).scalars())
+
+        print(f"[backfill] {len(rows)} EvaluationSet(s) without groundtruth_uri")
+        if not rows:
+            return
+
+        if a.dry_run:
+            for row in rows:
+                print(f"  [dry-run] would backfill {row.id}")
+            return
+
+        ok = 0
+        for row in rows:
+            try:
+                _backfill_one(session, store, row)
+                session.flush()
+                ok += 1
+            except Exception as exc:
+                print(f"  ERROR backfilling {row.id}: {exc}")
+                session.rollback()
+                raise
+
+        print(f"[backfill] {ok}/{len(rows)} backfilled")
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/compute_ia_for_snapshot.py b/scripts/compute_ia_for_snapshot.py
new file mode 100644
index 0000000..3d31cca
--- /dev/null
+++ b/scripts/compute_ia_for_snapshot.py
@@ -0,0 +1,209 @@
+#!/usr/bin/env python
+"""Compute Information Accretion (Clark & Radivojac 2013) for an OntologySnapshot.
+
+IA(v) = -log2( P(v | parents(v)) )
+      = -log2( |proteins[v]| / |intersect proteins[p] for p in parents(v)| )
+
+Implementation notes
+--------------------
+* Corpus: a reference AnnotationSet (typically GOA at the pivot date). Annotations
+  may be pinned to a *different* snapshot, so we join through ``go_term.go_id``
+  to translate to the target (pivot) snapshot. Terms absent from the target
+  snapshot are dropped.
+* DAG edges: ``is_a`` and ``part_of`` on the target snapshot (the two True Path
+  Rule edges CAFA/cafaeval use for propagation).
+* Propagation: for each (protein, term) pair we add the protein to ``term`` and
+  all its ancestors. Ancestors per term are precomputed once via iterative BFS.
+* Roots: IA = 0 (no parents).
+* Terms with no annotation after propagation: IA = 0 (skipped when writing,
+  treated as 0 by cafaeval).
+
+Outputs
+-------
+* ``data/benchmarks/IA_<snapshot_id>.tsv``  (go_id\\tia_value, no header)
+* Updates ``ontology_snapshot.ia_url`` to the ``file://`` URL of that TSV.
+"""
+from __future__ import annotations
+
+import argparse
+import math
+import sys
+import time
+import uuid
+from collections import defaultdict
+from pathlib import Path
+
+from sqlalchemy import text
+
+from protea.infrastructure.session import build_session_factory
+from protea.infrastructure.settings import load_settings
+
+PROPAGATE_RELATIONS = ("is_a", "part_of")
+
+
+def _log(msg: str) -> None:
+    print(f"[ia] {msg}", flush=True)
+
+
+def compute_ia(
+    session,
+    snapshot_id: uuid.UUID,
+    annotation_set_id: uuid.UUID,
+) -> dict[str, float]:
+    t0 = time.time()
+
+    # 1. go_id -> pivot term_id and term_id -> go_id maps
+    rows = session.execute(
+        text(
+            "select id, go_id from go_term "
+            "where ontology_snapshot_id = :s and is_obsolete = false"
+        ),
+        {"s": str(snapshot_id)},
+    ).all()
+    pivot_goid_by_id: dict[str, str] = {str(r[0]): r[1] for r in rows}
+    pivot_id_by_goid: dict[str, str] = {r[1]: str(r[0]) for r in rows}
+    _log(f"pivot terms: {len(pivot_goid_by_id)}")
+
+    # 2. parent map on pivot DAG (child_go_id -> set(parent_go_id))
+    from sqlalchemy import bindparam
+    rels_stmt = text(
+        "select c.go_id, p.go_id "
+        "from go_term_relationship r "
+        "join go_term c on c.id = r.child_go_term_id "
+        "join go_term p on p.id = r.parent_go_term_id "
+        "where r.ontology_snapshot_id = :s "
+        "  and r.relation_type in :rels"
+    ).bindparams(bindparam("rels", expanding=True))
+    rels = session.execute(
+        rels_stmt,
+        {"s": str(snapshot_id), "rels": list(PROPAGATE_RELATIONS)},
+    ).all()
+    parents: dict[str, set[str]] = defaultdict(set)
+    for child_go, parent_go in rels:
+        parents[child_go].add(parent_go)
+    _log(f"parent edges: {sum(len(v) for v in parents.values())} "
+         f"({len(parents)} terms with parents)")
+
+    # 3. precompute ancestors (including self) via memoized DFS
+    ancestors_cache: dict[str, frozenset[str]] = {}
+
+    def ancestors(go_id: str) -> frozenset[str]:
+        if go_id in ancestors_cache:
+            return ancestors_cache[go_id]
+        acc = {go_id}
+        stack = list(parents.get(go_id, ()))
+        while stack:
+            p = stack.pop()
+            if p in acc:
+                continue
+            acc.add(p)
+            stack.extend(parents.get(p, ()))
+        fs = frozenset(acc)
+        ancestors_cache[go_id] = fs
+        return fs
+
+    for go_id in pivot_goid_by_id.values():
+        ancestors(go_id)
+    _log(f"ancestor closures computed: {len(ancestors_cache)}")
+
+    # 4. load corpus annotations — stream to avoid loading 5M rows into mem at once
+    _log(f"loading annotations from annotation_set {annotation_set_id} …")
+    stmt = text(
+        "select pga.protein_accession, gt.go_id "
+        "from protein_go_annotation pga "
+        "join go_term gt on gt.id = pga.go_term_id "
+        "where pga.annotation_set_id = :a"
+    )
+    # proteins_per_term[go_id] = set(protein_accession)
+    proteins_per_term: dict[str, set[str]] = defaultdict(set)
+    count_raw = 0
+    count_used = 0
+    result = session.execute(stmt, {"a": str(annotation_set_id)})
+    for protein, go_id in result.yield_per(50000):
+        count_raw += 1
+        if go_id not in pivot_id_by_goid:
+            continue
+        count_used += 1
+        for anc in ancestors(go_id):
+            proteins_per_term[anc].add(protein)
+        if count_raw % 500000 == 0:
+            _log(f"  processed {count_raw:,} raw annotations "
+                 f"({count_used:,} kept; elapsed {time.time()-t0:.0f}s)")
+    _log(f"raw={count_raw:,} used={count_used:,} "
+         f"terms_with_proteins={len(proteins_per_term)}")
+
+    # 5. compute IA
+    ia: dict[str, float] = {}
+    for go_id in pivot_goid_by_id.values():
+        pp = parents.get(go_id)
+        if not pp:
+            ia[go_id] = 0.0
+            continue
+        child_set = proteins_per_term.get(go_id)
+        if not child_set:
+            ia[go_id] = 0.0
+            continue
+        parent_sets = [proteins_per_term.get(p) for p in pp]
+        if any(ps is None for ps in parent_sets):
+            ia[go_id] = 0.0
+            continue
+        denom_set = set.intersection(*parent_sets)
+        denom = len(denom_set)
+        if denom == 0:
+            ia[go_id] = 0.0
+            continue
+        num = len(child_set)
+        if num == 0 or num > denom:
+            ia[go_id] = 0.0
+            continue
+        ia[go_id] = -math.log2(num / denom)
+
+    nonzero = sum(1 for v in ia.values() if v > 0)
+    total = sum(ia.values())
+    _log(f"IA computed: {len(ia)} terms ({nonzero} non-zero, "
+         f"sum={total:.1f}, elapsed {time.time()-t0:.0f}s)")
+    return ia
+
+
+def main() -> int:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--snapshot", required=True,
+                    help="Target OntologySnapshot id (pivot).")
+    ap.add_argument("--annotation-set", required=True,
+                    help="Reference corpus AnnotationSet id (e.g. GOA 220).")
+    ap.add_argument("--out-dir", default="data/benchmarks")
+    ap.add_argument("--update-snapshot", action="store_true",
+                    help="Also update ontology_snapshot.ia_url to the output file.")
+    args = ap.parse_args()
+
+    settings = load_settings(Path("."))
+    factory = build_session_factory(settings.db_url)
+    with factory() as sess:
+        ia = compute_ia(
+            sess,
+            uuid.UUID(args.snapshot),
+            uuid.UUID(args.annotation_set),
+        )
+
+        out_dir = Path(args.out_dir).resolve()
+        out_dir.mkdir(parents=True, exist_ok=True)
+        out_path = out_dir / f"IA_{args.snapshot}.tsv"
+        with out_path.open("w") as fh:
+            for go_id in sorted(ia):
+                fh.write(f"{go_id}\t{ia[go_id]}\n")
+        _log(f"wrote {out_path}")
+
+        if args.update_snapshot:
+            ia_url = f"file://{out_path}"
+            sess.execute(
+                text("update ontology_snapshot set ia_url=:u where id=:s"),
+                {"u": ia_url, "s": args.snapshot},
+            )
+            sess.commit()
+            _log(f"ontology_snapshot.ia_url = {ia_url}")
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/deploy_vast.sh b/scripts/deploy_vast.sh
deleted file mode 100755
index 3f8499b..0000000
--- a/scripts/deploy_vast.sh
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/usr/bin/env bash
-# scripts/deploy_vast.sh — Deploy PROTEA to a vast.ai instance via Docker
-#
-# Usage:
-#   bash scripts/deploy_vast.sh <IP> <SSH_PORT> [GHCR_TOKEN]
-#
-# Examples:
-#   bash scripts/deploy_vast.sh 173.206.147.184 41624
-#   bash scripts/deploy_vast.sh 173.206.147.184 41624 ghp_xxxxx
-#
-# What it does:
-#   1. Sync docker-compose files to the remote (no source code needed)
-#   2. Login to ghcr.io on the remote
-#   3. Pull latest images from ghcr.io
-#   4. Run migrations and restart the stack (migrate service runs automatically)
-#
-# Requirements on the remote:
-#   - Docker with NVIDIA Container Toolkit (standard vast.ai images)
-
-set -euo pipefail
-
-IP="${1:?Usage: deploy_vast.sh <IP> <SSH_PORT> [GHCR_TOKEN]}"
-PORT="${2:?Usage: deploy_vast.sh <IP> <SSH_PORT> [GHCR_TOKEN]}"
-GHCR_TOKEN="${3:-${GITHUB_TOKEN:-}}"
-
-ROOT="$(cd "$(dirname "$0")/.." && pwd)"
-SSH="ssh -p $PORT root@$IP"
-
-GREEN="\033[32m"; YELLOW="\033[33m"; BOLD="\033[1m"; RESET="\033[0m"
-step() { printf "\n${BOLD}==> %s${RESET}\n" "$*"; }
-ok()   { printf "  ${GREEN}✓${RESET} %s\n" "$*"; }
-warn() { printf "  ${YELLOW}⚠${RESET}  %s\n" "$*"; }
-
-# ── 0. Verify SSH connectivity ─────────────────────────────────────────────────
-step "Checking SSH connectivity"
-if ! $SSH "echo ok" &>/dev/null; then
-    printf "${BOLD}ERROR${RESET}: Cannot reach root@$IP on port $PORT\n"
-    printf "  Is the instance running? Check: vastai show instances\n"
-    exit 1
-fi
-ok "Connected to $IP:$PORT"
-
-# ── 1. Sync compose files (no source code needed) ─────────────────────────────
-step "Syncing compose files → /root/PROTEA"
-$SSH "mkdir -p /root/PROTEA/docker"
-rsync -az -e "ssh -p $PORT" \
-    "$ROOT/docker-compose.yml" \
-    "$ROOT/docker-compose.prod.yml" \
-    "root@$IP:/root/PROTEA/"
-rsync -az -e "ssh -p $PORT" \
-    "$ROOT/docker/init.sql" \
-    "root@$IP:/root/PROTEA/docker/"
-ok "Compose files synced"
-
-# ── 2. Login to ghcr.io ───────────────────────────────────────────────────────
-step "Logging in to ghcr.io"
-if [[ -n "$GHCR_TOKEN" ]]; then
-    $SSH "echo '$GHCR_TOKEN' | docker login ghcr.io -u frapercan --password-stdin"
-    ok "Logged in to ghcr.io"
-else
-    warn "No GHCR_TOKEN provided — assuming images are public or already logged in"
-fi
-
-# ── 3. Pull latest images ─────────────────────────────────────────────────────
-step "Pulling latest images from ghcr.io"
-$SSH "cd /root/PROTEA && docker compose -f docker-compose.yml -f docker-compose.prod.yml pull"
-ok "Images up to date"
-
-# ── 4. Restart stack (migrate runs automatically before API/workers) ───────────
-step "Restarting PROTEA stack"
-$SSH "cd /root/PROTEA && docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d"
-ok "Stack restarted"
-
-# ── Done ──────────────────────────────────────────────────────────────────────
-printf "\n${BOLD}╔══════════════════════════════════════════════════╗${RESET}\n"
-printf "${BOLD}║           PROTEA deployed successfully           ║${RESET}\n"
-printf "${BOLD}╚══════════════════════════════════════════════════╝${RESET}\n\n"
-printf "  Logs:    $SSH 'cd /root/PROTEA && docker compose -f docker-compose.yml -f docker-compose.prod.yml logs -f'\n"
-printf "  Status:  $SSH 'cd /root/PROTEA && docker compose -f docker-compose.yml -f docker-compose.prod.yml ps'\n\n"
diff --git a/scripts/dump_reranker_dataset.py b/scripts/dump_reranker_dataset.py
new file mode 100644
index 0000000..5c96d5c
--- /dev/null
+++ b/scripts/dump_reranker_dataset.py
@@ -0,0 +1,94 @@
+"""Legacy CLI for publishing a frozen re-ranker dataset.
+
+Thin wrapper over ``POST /datasets`` (which enqueues the
+``export_research_dataset`` operation on the ``protea.training`` worker
+and inserts a ``Dataset`` row once the artifacts land in the configured
+artifact store). Polls until the job completes, then fetches the
+registered dataset and prints its URIs.
+
+Usage:
+
+    python scripts/dump_reranker_dataset.py \\
+        --name bench-v1-K5 \\
+        --train-versions 160 165 170 175 180 185 190 195 200 205 211 215 220 \\
+        --test-versions 230 \\
+        --k 5 \\
+        [--embedding <uuid>] [--ontology <uuid>] \\
+        [--all-features]
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+import time
+
+import requests
+
+API = "http://127.0.0.1:8000"
+DEFAULT_EMB_CFG = "c0ae5b69-d6dc-41cf-a711-1739d3d2e170"  # ProstT5-XL
+DEFAULT_ONTO = "35c3ad67-3002-47db-8f71-eeed69d22ad6"  # GO 2026-01-23 (post-wipe pivot)
+
+
+def _args() -> argparse.Namespace:
+    p = argparse.ArgumentParser()
+    p.add_argument("--name", required=True, help="output_name for the Dataset row")
+    p.add_argument("--train-versions", nargs="+", type=int, required=True)
+    p.add_argument("--test-versions", nargs="+", type=int, required=True)
+    p.add_argument("--k", type=int, default=5)
+    p.add_argument("--embedding", default=DEFAULT_EMB_CFG)
+    p.add_argument("--ontology", default=DEFAULT_ONTO)
+    p.add_argument("--search-backend", default="faiss")
+    p.add_argument("--annotation-source", default="goa")
+    p.add_argument("--all-features", action="store_true",
+                   help="enable alignments, taxonomy, ancestor expansion, PCA")
+    p.add_argument("--api", default=API)
+    p.add_argument("--poll-interval", type=float, default=10.0)
+    return p.parse_args()
+
+
+def main() -> None:
+    a = _args()
+
+    body = {
+        "output_name": a.name,
+        "embedding_config_id": a.embedding,
+        "ontology_snapshot_id": a.ontology,
+        "train_versions": a.train_versions,
+        "test_versions": a.test_versions,
+        "annotation_source": a.annotation_source,
+        "k": a.k,
+        "search_backend": a.search_backend,
+        "compute_alignments": bool(a.all_features),
+        "compute_taxonomy": bool(a.all_features),
+        "expand_votes_to_ancestors": bool(a.all_features),
+        "use_embedding_pca": bool(a.all_features),
+    }
+
+    r = requests.post(f"{a.api}/datasets", json=body, timeout=30)
+    if r.status_code == 409:
+        sys.exit(f"dataset name {a.name!r} already exists")
+    r.raise_for_status()
+    job_id = r.json()["job_id"]
+    print(f"[dump] submitted job {job_id}  → dataset {a.name!r}")
+
+    while True:
+        time.sleep(a.poll_interval)
+        # 60s poll timeout absorbs transient queue pauses under GPU load.
+        st = requests.get(f"{a.api}/jobs/{job_id}", timeout=60).json()
+        status = st.get("status")
+        print(f"[dump] status={status}")
+        if status in {"succeeded", "failed", "cancelled"}:
+            if status != "succeeded":
+                print(json.dumps(st.get("result"), indent=2, default=str))
+                sys.exit(f"dump job {status}")
+            break
+
+    ds = requests.get(f"{a.api}/datasets/{a.name}", timeout=30).json()
+    print("[dump] registered dataset:")
+    print(json.dumps(ds, indent=2, default=str))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/manage.sh b/scripts/manage.sh
index fe49d1d..4b98773 100755
--- a/scripts/manage.sh
+++ b/scripts/manage.sh
@@ -85,6 +85,7 @@ cmd_start() {
     printf "\n${BOLD}[3] Core workers${RESET}\n"
     _start_bg worker-ping        poetry run python scripts/worker.py --queue protea.ping
     _start_bg worker-jobs        poetry run python scripts/worker.py --queue protea.jobs
+    _start_bg worker-training    poetry run python scripts/worker.py --queue protea.training
 
     # Embeddings pipeline
     printf "\n${BOLD}[4] Embeddings pipeline${RESET}\n"
@@ -97,20 +98,44 @@ cmd_start() {
 
     # Predictions pipeline
     printf "\n${BOLD}[5] Predictions pipeline${RESET}\n"
+    _start_bg worker-predictions-coord poetry run python scripts/worker.py --queue protea.predictions
     for i in $(seq 1 "$BATCH_WORKERS"); do
         _start_bg "worker-predictions-batch-${i}" \
             poetry run python scripts/worker.py --queue protea.predictions.batch
     done
     _start_bg worker-predictions-write poetry run python scripts/worker.py --queue protea.predictions.write
 
+    # Evaluations pipeline
+    printf "\n${BOLD}[6] Evaluations pipeline${RESET}\n"
+    _start_bg worker-evaluations poetry run python scripts/worker.py --queue protea.evaluations
+
     # Stale job reaper
-    printf "\n${BOLD}[6] Stale job reaper${RESET}\n"
+    printf "\n${BOLD}[7] Stale job reaper${RESET}\n"
     _start_bg worker-reaper poetry run python scripts/worker.py --queue reaper
 
     # Frontend
-    printf "\n${BOLD}[7] Frontend${RESET}\n"
+    # Production mode by default: the dev server serves unminified Turbopack
+    # chunks + HMR websocket, which destroys any bandwidth-capped tunnel
+    # (ngrok, Cloudflare free tier, etc). Override with FRONTEND_MODE=dev for
+    # local hacking where HMR is actually useful.
+    local FRONTEND_MODE="${FRONTEND_MODE:-prod}"
+    printf "\n${BOLD}[8] Frontend (%s)${RESET}\n" "$FRONTEND_MODE"
     cd "$ROOT/apps/web"
-    _start_bg frontend npm run dev
+    if [[ "$FRONTEND_MODE" == "prod" ]]; then
+        printf "  Building production bundle (this may take ~30-60s)...\n"
+        if npm run build >> "$LOG_DIR/frontend-build.log" 2>&1; then
+            printf "  ${GREEN}✓${RESET} build OK → logs/frontend-build.log\n"
+        else
+            printf "  ${RED}✗ build FAILED${RESET} — see logs/frontend-build.log\n"
+            printf "  ${YELLOW}Falling back to dev mode.${RESET}\n"
+            FRONTEND_MODE="dev"
+        fi
+    fi
+    if [[ "$FRONTEND_MODE" == "prod" ]]; then
+        _start_bg frontend npm run start
+    else
+        _start_bg frontend npm run dev
+    fi
     sleep 6
     curl -sf http://localhost:3000 -o /dev/null \
         && printf "  ${GREEN}Frontend OK${RESET} → http://localhost:3000\n" \
diff --git a/scripts/materialize_lab_intervals.py b/scripts/materialize_lab_intervals.py
new file mode 100644
index 0000000..ad00de1
--- /dev/null
+++ b/scripts/materialize_lab_intervals.py
@@ -0,0 +1,217 @@
+"""Materialize EvaluationSet + QuerySet rows for every snapshot pair the lab
+benchmarks consume.
+
+The lab dump (the dump helper with ``dump_only=True``) historically
+recomputed the per-pair delta on the fly via ``compute_evaluation_data``.
+That violated the project rule "never recompute on-the-fly what can be
+persisted and reused".  This script materializes the missing artefacts so
+both the lab dump and ``predict_go_terms`` can read them by id.
+
+For each consecutive pair ``(v_old, v_new)`` in ``--versions``:
+
+  1. Resolve AnnotationSet ids by ``source_version``.
+  2. Skip if an EvaluationSet already exists for that ``(old, new)`` pair.
+  3. Otherwise queue a ``generate_evaluation_set`` job (reconciled mode,
+     pivot snapshot) and wait for it to land.
+  4. Skip QuerySet creation if a row named ``delta-{v_old}-{v_new}`` exists.
+  5. Stream ``/annotations/evaluation-sets/{id}/delta-proteins.fasta?category=all``
+     and POST it to ``/query-sets``.
+
+Usage:
+
+    python scripts/materialize_lab_intervals.py \\
+        --versions 160 165 170 175 180 185 190 195 200 205 211 215 220 230
+
+The default version list matches ``run_overnight_esmc300.sh``.
+"""
+
+from __future__ import annotations
+
+import argparse
+import io
+import sys
+import time
+
+import requests
+
+API = "http://127.0.0.1:8000"
+PIVOT_SNAPSHOT = "35c3ad67-3002-47db-8f71-eeed69d22ad6"  # GO 2026-01-23 (post-wipe pivot)
+DEFAULT_VERSIONS = [160, 165, 170, 175, 180, 185, 190, 195, 200, 205, 211, 215, 220, 230]
+ANNOTATION_SOURCE = "goa"
+
+
+def _args() -> argparse.Namespace:
+    p = argparse.ArgumentParser()
+    p.add_argument("--versions", nargs="+", type=int, default=DEFAULT_VERSIONS)
+    p.add_argument("--pivot", default=PIVOT_SNAPSHOT)
+    p.add_argument("--api", default=API)
+    p.add_argument("--source", default=ANNOTATION_SOURCE)
+    p.add_argument("--poll-interval", type=float, default=5.0)
+    p.add_argument("--poll-timeout", type=float, default=900.0,
+                   help="seconds to wait for each generate_evaluation_set job")
+    p.add_argument("--dry-run", action="store_true")
+    return p.parse_args()
+
+
+def _resolve_annotation_sets(api: str, versions: list[int], source: str) -> dict[int, str]:
+    r = requests.get(f"{api}/annotations/sets", params={"limit": 500}, timeout=30)
+    r.raise_for_status()
+    rows = r.json() if isinstance(r.json(), list) else r.json().get("items", [])
+    needed = {str(v) for v in versions}
+    out: dict[int, str] = {}
+    for row in rows:
+        v = str(row.get("source_version", ""))
+        if v in needed and row.get("source") == source:
+            out[int(v)] = row["id"]
+    missing = [v for v in versions if v not in out]
+    if missing:
+        raise SystemExit(f"AnnotationSet missing for versions: {missing}")
+    return out
+
+
+def _existing_eval_sets(api: str) -> dict[tuple[str, str], dict]:
+    r = requests.get(f"{api}/annotations/evaluation-sets", timeout=30)
+    r.raise_for_status()
+    out: dict[tuple[str, str], dict] = {}
+    for row in r.json():
+        out[(row["old_annotation_set_id"], row["new_annotation_set_id"])] = row
+    return out
+
+
+def _existing_query_sets(api: str) -> dict[str, str]:
+    r = requests.get(f"{api}/query-sets", timeout=30)
+    r.raise_for_status()
+    rows = r.json() if isinstance(r.json(), list) else r.json().get("items", [])
+    return {row["name"]: row["id"] for row in rows}
+
+
+def _wait_job(api: str, job_id: str, *, poll: float, timeout: float) -> dict:
+    deadline = time.monotonic() + timeout
+    last_status = None
+    while time.monotonic() < deadline:
+        r = requests.get(f"{api}/jobs/{job_id}", timeout=30)
+        r.raise_for_status()
+        st = r.json()
+        status = st.get("status")
+        if status != last_status:
+            print(f"    job {job_id[:8]} status={status}")
+            last_status = status
+        if status in {"succeeded", "failed", "cancelled"}:
+            if status != "succeeded":
+                raise SystemExit(f"job {job_id} ended with status={status}: {st.get('result')}")
+            return st
+        time.sleep(poll)
+    raise SystemExit(f"job {job_id} timed out after {timeout}s")
+
+
+def _generate_eval_set(api: str, *, old_set_id: str, new_set_id: str, pivot: str,
+                       poll: float, timeout: float) -> str:
+    body = {
+        "old_annotation_set_id": old_set_id,
+        "new_annotation_set_id": new_set_id,
+        "pivot_ontology_snapshot_id": pivot,
+    }
+    r = requests.post(f"{api}/annotations/evaluation-sets/generate", json=body, timeout=30)
+    r.raise_for_status()
+    job_id = r.json()["id"]
+    print(f"    queued generate_evaluation_set job {job_id[:8]}")
+    _wait_job(api, job_id, poll=poll, timeout=timeout)
+
+    # Resolve the new EvaluationSet by (old, new) pair
+    existing = _existing_eval_sets(api)
+    eval_row = existing.get((old_set_id, new_set_id))
+    if eval_row is None:
+        raise SystemExit(
+            f"EvaluationSet not found after job succeeded for ({old_set_id[:8]},{new_set_id[:8]})"
+        )
+    return eval_row["id"]
+
+
+def _upload_query_set(api: str, *, name: str, description: str, fasta_bytes: bytes) -> str:
+    files = {"file": (f"{name}.fasta", io.BytesIO(fasta_bytes), "text/x-fasta")}
+    data = {"name": name, "description": description}
+    r = requests.post(f"{api}/query-sets", files=files, data=data, timeout=120)
+    if r.status_code >= 400:
+        raise SystemExit(f"query-set upload failed: {r.status_code} {r.text[:300]}")
+    return r.json()["id"]
+
+
+def main() -> None:
+    a = _args()
+    versions = sorted(set(a.versions))
+    pairs = list(zip(versions[:-1], versions[1:], strict=False))
+    if not pairs:
+        raise SystemExit("need at least 2 versions to form one pair")
+
+    print(f"[mat] resolving AnnotationSets for {len(versions)} versions ({a.source})")
+    v_to_id = _resolve_annotation_sets(a.api, versions, a.source)
+    for v in versions:
+        print(f"  v{v:>3} → {v_to_id[v][:8]}")
+
+    print(f"[mat] {len(pairs)} pairs to process")
+    eval_existing = _existing_eval_sets(a.api)
+    qs_existing = _existing_query_sets(a.api)
+
+    summary: list[tuple[int, int, str, str, str]] = []  # (v_old, v_new, action_eval, action_qs, qs_id)
+    for v_old, v_new in pairs:
+        old_id, new_id = v_to_id[v_old], v_to_id[v_new]
+        qs_name = f"delta-{v_old}-{v_new}"
+        print(f"\n[pair] {v_old}→{v_new}  (old={old_id[:8]} new={new_id[:8]})")
+
+        # 1) EvaluationSet
+        eval_row = eval_existing.get((old_id, new_id))
+        if eval_row is not None:
+            eval_id = eval_row["id"]
+            eval_action = "exists"
+            print(f"    EvaluationSet exists: {eval_id[:8]}")
+        else:
+            if a.dry_run:
+                print("    [dry-run] would generate EvaluationSet")
+                summary.append((v_old, v_new, "would-create", "would-create", "-"))
+                continue
+            eval_id = _generate_eval_set(
+                a.api, old_set_id=old_id, new_set_id=new_id, pivot=a.pivot,
+                poll=a.poll_interval, timeout=a.poll_timeout,
+            )
+            eval_action = "created"
+            print(f"    EvaluationSet created: {eval_id[:8]}")
+
+        # 2) QuerySet
+        if qs_name in qs_existing:
+            qs_id = qs_existing[qs_name]
+            qs_action = "exists"
+            print(f"    QuerySet '{qs_name}' exists: {qs_id[:8]}")
+        else:
+            if a.dry_run:
+                print(f"    [dry-run] would download FASTA and upload QuerySet '{qs_name}'")
+                summary.append((v_old, v_new, eval_action, "would-create", "-"))
+                continue
+            print(f"    downloading delta-proteins.fasta for eval_set {eval_id[:8]}")
+            r = requests.get(
+                f"{a.api}/annotations/evaluation-sets/{eval_id}/delta-proteins.fasta",
+                params={"category": "all"},
+                timeout=120,
+            )
+            r.raise_for_status()
+            fasta = r.content
+            n_records = fasta.count(b">")
+            print(f"    FASTA size={len(fasta):,} bytes, records≈{n_records}")
+            description = (
+                f"Delta proteins for evaluation_set {eval_id[:8]} ({v_old}->{v_new} reconciled). "
+                "Used as query_set for predict_go_terms scoping to the evaluation universe only."
+            )
+            qs_id = _upload_query_set(a.api, name=qs_name, description=description, fasta_bytes=fasta)
+            qs_action = "created"
+            print(f"    QuerySet created: {qs_id[:8]}  ({n_records} entries)")
+
+        summary.append((v_old, v_new, eval_action, qs_action, qs_id))
+
+    print("\n[mat] === summary ===")
+    print(f"  {'pair':<10} {'eval_set':<10} {'query_set':<10} {'qs_id'}")
+    for v_old, v_new, ea, qa, qid in summary:
+        print(f"  {v_old}-{v_new:<5} {ea:<10} {qa:<10} {qid}")
+    print(f"\n[mat] {len(summary)} pairs processed")
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/overnight_matrix.py b/scripts/overnight_matrix.py
new file mode 100644
index 0000000..cb88c06
--- /dev/null
+++ b/scripts/overnight_matrix.py
@@ -0,0 +1,688 @@
+"""Overnight 8-PLM canonical benchmark: bootstrap + predict × 8 + eval × 8.
+
+This script is a *single-shot* launcher designed to be started before going to
+sleep. It owns the full pipeline from an empty DB (modulo embeddings) to a
+summary Fmax table.
+
+Phases
+------
+Phase 0 — Bootstrap (idempotent):
+    1. load_ontology_snapshot for GO release 2024-03-28  (matches GOA 220)
+    2. load_ontology_snapshot for GO release 2026-01-23  (matches GOA 230 — pivot)
+    3. load_goa_annotations  release 220  → AnnotationSet A220
+    4. load_goa_annotations  release 230  → AnnotationSet A230
+       (auto-triggers generate_evaluation_set(220, 230) with pivot=2026-01-23)
+    5. wait for the auto-triggered EvaluationSet to land.
+
+Phase A — KNN predictions × 8 embedding configs:
+    For every EmbeddingConfig currently in the DB, submit one
+    ``predict_go_terms`` job over the delta accessions (NK ∪ LK ∪ PK) of the
+    EvaluationSet. All feature families are on:
+
+        search_backend            = faiss
+        limit_per_entry           = 5
+        aspect_separated_knn      = True
+        compute_alignments        = True
+        compute_taxonomy          = True
+        compute_reranker_features = True
+        compute_v6_features       = True
+
+Phase B — CAFA evaluation × 8 prediction sets:
+    For every successful PredictionSet from Phase A, submit
+    ``run_cafa_evaluation`` against the bootstrap EvaluationSet. No
+    ``scoring_config_id`` (baseline / raw KNN score).
+
+Outputs
+-------
+    results/overnight_matrix_<timestamp>/
+        bootstrap.json                 phase 0 ids and timings
+        predictions.tsv                phase A manifest
+        evaluations.tsv                phase B manifest
+        summary_fmax.tsv               model × aspect × category Fmax table
+        run.log                        human-readable trace
+
+Usage
+-----
+    poetry run python scripts/overnight_matrix.py
+    poetry run python scripts/overnight_matrix.py --dry-run
+    poetry run python scripts/overnight_matrix.py --only esmc_300m,prostt5_xl
+
+Every step checks the DB for existing artefacts and skips when the work is
+already done. Safe to re-run after a partial failure.
+"""
+from __future__ import annotations
+
+import argparse
+import csv
+import json
+import sys
+import time
+import uuid
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Any
+
+import requests
+
+PROJECT_ROOT = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(PROJECT_ROOT))
+
+from sqlalchemy import select
+
+from protea.core.evaluation import load_evaluation_data_for_set
+from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
+from protea.infrastructure.orm.models.annotation.evaluation_set import EvaluationSet
+from protea.infrastructure.orm.models.annotation.ontology_snapshot import OntologySnapshot
+from protea.infrastructure.session import build_session_factory
+from protea.infrastructure.settings import load_settings
+
+# ── Canonical benchmark constants ────────────────────────────────────────────
+
+ONTOLOGIES = [
+    # (obo_url, obo_version, human label)
+    (
+        "https://release.geneontology.org/2024-03-28/ontology/go.obo",
+        "releases/2024-03-28",
+        "GO 2024-03-28 (for GOA 220)",
+    ),
+    (
+        "https://release.geneontology.org/2026-01-23/ontology/go.obo",
+        "releases/2026-01-23",
+        "GO 2026-01-23 (pivot, for GOA 230)",
+    ),
+]
+
+# The pivot OntologySnapshot used for KNN prediction and evaluation.
+PIVOT_OBO_VERSION = "releases/2026-01-23"
+
+# GOA releases to load.
+GOA_LOADS = [
+    # (source_version, gaf_url, obo_version)
+    (
+        "220",
+        "https://ftp.ebi.ac.uk/pub/databases/GO/goa/old/UNIPROT/goa_uniprot_all.gaf.220.gz",
+        "releases/2024-03-28",
+    ),
+    (
+        "230",
+        "https://ftp.ebi.ac.uk/pub/databases/GO/goa/old/UNIPROT/goa_uniprot_all.gaf.230.gz",
+        "releases/2026-01-23",
+    ),
+]
+
+# The KNN reference AnnotationSet (used by predict_go_terms) is GOA 220;
+# the EvaluationSet is always the (220, 230) pair with pivot 2026-01-23.
+REFERENCE_SOURCE_VERSION = "220"
+EVAL_OLD_SOURCE_VERSION = "220"
+EVAL_NEW_SOURCE_VERSION = "230"
+
+
+# ── Utilities ────────────────────────────────────────────────────────────────
+
+
+def _short_label(model_name: str) -> str:
+    low = (model_name or "").lower()
+    if "esm2_t36_3b" in low:
+        return "esm2_3b"
+    if "esm2_t33_650m" in low:
+        return "esm2_650m"
+    if "esmc_300m" in low:
+        return "esmc_300m"
+    if "esmc_600m" in low:
+        return "esmc_600m"
+    if "ankh-base" in low:
+        return "ankh_base"
+    if "ankh-large" in low:
+        return "ankh_large"
+    if "prostt5" in low:
+        return "prostt5_xl"
+    if "prot_t5_xl" in low:
+        return "prott5_xl"
+    return low.split("/")[-1]
+
+
+class Log:
+    def __init__(self, path: Path):
+        self.path = path
+        self.path.parent.mkdir(parents=True, exist_ok=True)
+        self.path.touch(exist_ok=True)
+
+    def __call__(self, msg: str) -> None:
+        line = f"[{datetime.now(UTC).isoformat(timespec='seconds')}] {msg}"
+        print(line, flush=True)
+        with self.path.open("a") as f:
+            f.write(line + "\n")
+
+
+def _post(api: str, path: str, **kw) -> dict[str, Any]:
+    r = requests.post(f"{api}{path}", **kw)
+    if r.status_code >= 400:
+        raise SystemExit(f"POST {path} → {r.status_code} {r.text[:500]}")
+    return r.json()
+
+
+def _get(api: str, path: str, **kw) -> Any:
+    r = requests.get(f"{api}{path}", **kw)
+    r.raise_for_status()
+    return r.json()
+
+
+def _poll_job(
+    api: str, job_id: str, *, poll: float, timeout: float, log: Log
+) -> dict[str, Any]:
+    deadline = time.monotonic() + timeout
+    last_status = None
+    while True:
+        job = _get(api, f"/jobs/{job_id}", timeout=30)
+        status = (job.get("status") or "").upper()
+        job["status"] = status
+        if status != last_status:
+            cur = job.get("progress_current") or "?"
+            tot = job.get("progress_total") or "?"
+            log(f"    job {job_id[:8]} {status}  ({cur}/{tot})")
+            last_status = status
+        if status in ("SUCCEEDED", "FAILED", "CANCELLED"):
+            if status != "SUCCEEDED":
+                log(f"    job {job_id} → {status}: {job.get('error_message', '')[:400]}")
+            return job
+        if time.monotonic() > deadline:
+            raise TimeoutError(f"job {job_id} timed out after {timeout}s")
+        time.sleep(poll)
+
+
+def _resolve_prediction_set(api: str, job_id: str) -> str | None:
+    """Extract prediction_set_id from job events / result."""
+    events = _get(api, f"/jobs/{job_id}/events", timeout=30)
+    for ev in reversed(events):
+        fields = ev.get("fields") or ev.get("payload") or {}
+        if fields.get("prediction_set_id"):
+            return fields["prediction_set_id"]
+    job = _get(api, f"/jobs/{job_id}", timeout=30)
+    result = job.get("result") or {}
+    return result.get("prediction_set_id")
+
+
+# ── Phase 0 — bootstrap ──────────────────────────────────────────────────────
+
+
+def _ensure_ontology(
+    api: str, *, obo_url: str, obo_version: str, factory, log: Log,
+    poll: float, timeout: float,
+) -> str:
+    with factory() as s:
+        row = s.execute(
+            select(OntologySnapshot).where(OntologySnapshot.obo_version == obo_version)
+        ).scalar_one_or_none()
+        if row is not None:
+            log(f"  ontology {obo_version} exists: {row.id}")
+            return str(row.id)
+
+    log(f"  submitting load_ontology_snapshot for {obo_version}")
+    res = _post(api, "/annotations/snapshots/load", json={"obo_url": obo_url}, timeout=60)
+    job_id = res["id"]
+    job = _poll_job(api, job_id, poll=poll, timeout=timeout, log=log)
+    if job["status"] != "SUCCEEDED":
+        raise SystemExit(f"ontology load failed for {obo_version}")
+
+    with factory() as s:
+        row = s.execute(
+            select(OntologySnapshot).where(OntologySnapshot.obo_version == obo_version)
+        ).scalar_one()
+        log(f"  ontology {obo_version} loaded: {row.id}")
+        return str(row.id)
+
+
+def _ensure_goa(
+    api: str, *, source_version: str, gaf_url: str, ontology_snapshot_id: str,
+    factory, log: Log, poll: float, timeout: float,
+) -> tuple[str, str | None]:
+    """Return (annotation_set_id, auto_eval_job_id | None)."""
+    with factory() as s:
+        row = s.execute(
+            select(AnnotationSet).where(
+                AnnotationSet.source == "goa",
+                AnnotationSet.source_version == source_version,
+            )
+        ).scalar_one_or_none()
+        if row is not None:
+            log(f"  goa {source_version} exists: {row.id}")
+            return str(row.id), None
+
+    log(f"  submitting load_goa_annotations for goa {source_version}")
+    body = {
+        "ontology_snapshot_id": ontology_snapshot_id,
+        "gaf_url": gaf_url,
+        "source_version": source_version,
+    }
+    res = _post(api, "/annotations/sets/load-goa", json=body, timeout=60)
+    job_id = res["id"]
+    job = _poll_job(api, job_id, poll=poll, timeout=timeout, log=log)
+    if job["status"] != "SUCCEEDED":
+        raise SystemExit(f"goa load failed for {source_version}")
+
+    result = job.get("result") or {}
+    ann_id = result.get("annotation_set_id")
+    auto_eval = result.get("auto_eval_job_id")
+    if not ann_id:
+        # Fallback: look up via ORM.
+        with factory() as s:
+            row = s.execute(
+                select(AnnotationSet).where(
+                    AnnotationSet.source == "goa",
+                    AnnotationSet.source_version == source_version,
+                )
+            ).scalar_one()
+            ann_id = str(row.id)
+    log(f"  goa {source_version} loaded: {ann_id[:8]} (auto_eval={auto_eval})")
+    return str(ann_id), auto_eval
+
+
+def _ensure_eval_set(
+    api: str, *, old_annotation_set_id: str, new_annotation_set_id: str,
+    pivot_ontology_snapshot_id: str, auto_eval_job_id: str | None,
+    factory, log: Log, poll: float, timeout: float,
+) -> str:
+    with factory() as s:
+        row = s.execute(
+            select(EvaluationSet).where(
+                EvaluationSet.old_annotation_set_id == uuid.UUID(old_annotation_set_id),
+                EvaluationSet.new_annotation_set_id == uuid.UUID(new_annotation_set_id),
+            )
+        ).scalar_one_or_none()
+        if row is not None:
+            log(f"  eval_set exists: {row.id}")
+            return str(row.id)
+
+    if auto_eval_job_id:
+        log(f"  waiting for auto-triggered generate_evaluation_set job {auto_eval_job_id[:8]}")
+        job = _poll_job(api, auto_eval_job_id, poll=poll, timeout=timeout, log=log)
+        if job["status"] != "SUCCEEDED":
+            log(f"  auto-trigger job {auto_eval_job_id} failed; submitting explicit")
+            auto_eval_job_id = None
+
+    if not auto_eval_job_id:
+        log("  submitting explicit generate_evaluation_set")
+        body = {
+            "old_annotation_set_id": old_annotation_set_id,
+            "new_annotation_set_id": new_annotation_set_id,
+            "pivot_ontology_snapshot_id": pivot_ontology_snapshot_id,
+        }
+        res = _post(api, "/annotations/evaluation-sets/generate", json=body, timeout=60)
+        job = _poll_job(api, res["id"], poll=poll, timeout=timeout, log=log)
+        if job["status"] != "SUCCEEDED":
+            raise SystemExit("generate_evaluation_set failed")
+
+    with factory() as s:
+        row = s.execute(
+            select(EvaluationSet).where(
+                EvaluationSet.old_annotation_set_id == uuid.UUID(old_annotation_set_id),
+                EvaluationSet.new_annotation_set_id == uuid.UUID(new_annotation_set_id),
+            )
+        ).scalar_one()
+        log(f"  eval_set ready: {row.id}")
+        return str(row.id)
+
+
+def run_phase0(args, *, factory, log: Log) -> dict[str, Any]:
+    log("=" * 60)
+    log("Phase 0 — Bootstrap")
+    t0 = time.monotonic()
+
+    ontology_ids: dict[str, str] = {}
+    for url, ver, label in ONTOLOGIES:
+        log(f"- {label}")
+        oid = _ensure_ontology(
+            args.api, obo_url=url, obo_version=ver, factory=factory, log=log,
+            poll=args.poll, timeout=args.ontology_timeout,
+        )
+        ontology_ids[ver] = oid
+
+    goa_ids: dict[str, str] = {}
+    last_auto_eval: str | None = None
+    for version, url, ver in GOA_LOADS:
+        log(f"- GOA release {version}")
+        aid, auto_eval = _ensure_goa(
+            args.api, source_version=version, gaf_url=url,
+            ontology_snapshot_id=ontology_ids[ver],
+            factory=factory, log=log,
+            poll=args.poll, timeout=args.goa_timeout,
+        )
+        goa_ids[version] = aid
+        if auto_eval:
+            last_auto_eval = auto_eval
+
+    log("- EvaluationSet (220 → 230, reconciled, pivot 2026-01-23)")
+    eval_id = _ensure_eval_set(
+        args.api,
+        old_annotation_set_id=goa_ids[EVAL_OLD_SOURCE_VERSION],
+        new_annotation_set_id=goa_ids[EVAL_NEW_SOURCE_VERSION],
+        pivot_ontology_snapshot_id=ontology_ids[PIVOT_OBO_VERSION],
+        auto_eval_job_id=last_auto_eval,
+        factory=factory, log=log,
+        poll=args.poll, timeout=args.eval_timeout,
+    )
+
+    out = {
+        "ontology_ids": ontology_ids,
+        "annotation_set_ids": goa_ids,
+        "evaluation_set_id": eval_id,
+        "pivot_ontology_snapshot_id": ontology_ids[PIVOT_OBO_VERSION],
+        "reference_annotation_set_id": goa_ids[REFERENCE_SOURCE_VERSION],
+        "elapsed_seconds": time.monotonic() - t0,
+    }
+    log(f"Phase 0 done in {out['elapsed_seconds']:.0f}s")
+    return out
+
+
+# ── Phase A — predictions ────────────────────────────────────────────────────
+
+
+def _delta_accessions(factory, eval_set_id: str) -> list[str]:
+    with factory() as s:
+        e = s.get(EvaluationSet, uuid.UUID(eval_set_id))
+        if e is None:
+            raise SystemExit(f"EvaluationSet {eval_set_id} not found")
+        data, _ = load_evaluation_data_for_set(s, e)
+        return sorted(set(data.nk) | set(data.lk) | set(data.pk))
+
+
+def _submit_predict(
+    api: str, *, embedding_config_id: str, annotation_set_id: str,
+    ontology_snapshot_id: str, query_accessions: list[str], k: int, label: str,
+) -> str:
+    body = {
+        "operation": "predict_go_terms",
+        "queue_name": "protea.jobs",
+        "payload": {
+            "embedding_config_id": embedding_config_id,
+            "annotation_set_id": annotation_set_id,
+            "ontology_snapshot_id": ontology_snapshot_id,
+            "query_accessions": query_accessions,
+            "limit_per_entry": k,
+            "search_backend": "faiss",
+            "faiss_index_type": "IVFFlat",
+            "aspect_separated_knn": True,
+            "compute_alignments": True,
+            "compute_taxonomy": True,
+            "compute_reranker_features": True,
+            "compute_v6_features": True,
+        },
+        "meta": {"experiment_label": label},
+    }
+    return _post(api, "/jobs", json=body, timeout=60)["id"]
+
+
+def run_phase_a(args, *, bootstrap: dict, factory, out_dir: Path, log: Log) -> list[dict]:
+    log("=" * 60)
+    log("Phase A — predict_go_terms × 8 PLMs")
+    accessions = _delta_accessions(factory, bootstrap["evaluation_set_id"])
+    log(f"  delta accessions (NK∪LK∪PK): {len(accessions)}")
+
+    configs = _get(args.api, "/embeddings/configs", timeout=30)
+    only = {a.strip() for a in (args.only or "").split(",") if a.strip()}
+    plans: list[dict] = []
+    for c in configs:
+        lab = _short_label(c["model_name"])
+        if only and lab not in only:
+            continue
+        if (c.get("embedding_count") or 0) < args.min_embeddings:
+            log(f"  [skip] {lab} embeddings={c.get('embedding_count')}")
+            continue
+        plans.append({"label": lab, "config": c})
+    log(f"  planned: {len(plans)} prediction jobs (K={args.k})")
+
+    if args.dry_run:
+        for p in plans:
+            log(f"    [dry] {p['label']:12s}  config={p['config']['id']}")
+        return []
+
+    submitted: list[dict] = []
+    for p in plans:
+        lab = p["label"]
+        try:
+            job_id = _submit_predict(
+                args.api,
+                embedding_config_id=p["config"]["id"],
+                annotation_set_id=bootstrap["reference_annotation_set_id"],
+                ontology_snapshot_id=bootstrap["pivot_ontology_snapshot_id"],
+                query_accessions=accessions,
+                k=args.k,
+                label=f"overnight_matrix/{lab}/k{args.k}",
+            )
+            log(f"  [submit] {lab:12s}  job={job_id}")
+            submitted.append({
+                "model": lab,
+                "model_name": p["config"]["model_name"],
+                "embedding_config_id": p["config"]["id"],
+                "k": args.k,
+                "job_id": job_id,
+            })
+        except SystemExit as exc:
+            log(f"  [err] {lab}: {exc}")
+
+    # Poll.
+    rows: list[dict] = []
+    for s in submitted:
+        log(f"  polling {s['model']} ({s['job_id']})")
+        try:
+            job = _poll_job(
+                args.api, s["job_id"], poll=args.poll, timeout=args.predict_timeout, log=log,
+            )
+            status = job["status"]
+            ps_id = _resolve_prediction_set(args.api, s["job_id"]) if status == "SUCCEEDED" else ""
+            rows.append({**s, "status": status, "prediction_set_id": ps_id or ""})
+            log(f"  [done]   {s['model']:12s}  {status}  ps={ps_id}")
+        except TimeoutError as exc:
+            log(f"  [timeout] {s['model']}: {exc}")
+            rows.append({**s, "status": "TIMEOUT", "prediction_set_id": ""})
+
+    # Manifest.
+    manifest = out_dir / "predictions.tsv"
+    cols = ["model", "model_name", "embedding_config_id", "k", "job_id",
+            "status", "prediction_set_id"]
+    with manifest.open("w", newline="") as fh:
+        w = csv.DictWriter(fh, fieldnames=cols, delimiter="\t")
+        w.writeheader()
+        w.writerows(rows)
+    log(f"  → {manifest}")
+    return rows
+
+
+# ── Phase B — evaluations ────────────────────────────────────────────────────
+
+
+def _submit_eval(api: str, *, eval_set_id: str, prediction_set_id: str) -> str:
+    body = {"prediction_set_id": prediction_set_id}
+    return _post(
+        api, f"/annotations/evaluation-sets/{eval_set_id}/run", json=body, timeout=60,
+    )["id"]
+
+
+def run_phase_b(args, *, bootstrap: dict, pred_rows: list[dict],
+                out_dir: Path, log: Log) -> list[dict]:
+    log("=" * 60)
+    log("Phase B — run_cafa_evaluation × prediction sets")
+    ready = [r for r in pred_rows if r["status"] == "SUCCEEDED" and r["prediction_set_id"]]
+    log(f"  {len(ready)}/{len(pred_rows)} prediction_sets ready for eval")
+    if not ready:
+        log("  nothing to evaluate; skipping")
+        return []
+
+    if args.dry_run:
+        for r in ready:
+            log(f"    [dry] eval {r['model']:12s}  ps={r['prediction_set_id']}")
+        return []
+
+    submitted: list[dict] = []
+    for r in ready:
+        try:
+            job_id = _submit_eval(
+                args.api,
+                eval_set_id=bootstrap["evaluation_set_id"],
+                prediction_set_id=r["prediction_set_id"],
+            )
+            log(f"  [submit] eval {r['model']:12s}  job={job_id}")
+            submitted.append({**r, "eval_job_id": job_id})
+        except SystemExit as exc:
+            log(f"  [err] {r['model']}: {exc}")
+
+    rows: list[dict] = []
+    for s in submitted:
+        log(f"  polling {s['model']} eval ({s['eval_job_id']})")
+        try:
+            job = _poll_job(
+                args.api, s["eval_job_id"], poll=args.poll,
+                timeout=args.eval_run_timeout, log=log,
+            )
+            status = job["status"]
+            result = job.get("result") or {}
+            eval_result_id = result.get("evaluation_result_id") or ""
+            results = result.get("results") or {}
+            rows.append({
+                **s,
+                "eval_status": status,
+                "evaluation_result_id": eval_result_id,
+                "results": results,
+            })
+            log(f"  [done] eval {s['model']:12s}  {status}  er={eval_result_id}")
+        except TimeoutError as exc:
+            log(f"  [timeout] eval {s['model']}: {exc}")
+            rows.append({**s, "eval_status": "TIMEOUT",
+                         "evaluation_result_id": "", "results": {}})
+
+    # Eval manifest.
+    manifest = out_dir / "evaluations.tsv"
+    cols = ["model", "model_name", "embedding_config_id", "k",
+            "prediction_set_id", "eval_job_id", "eval_status",
+            "evaluation_result_id"]
+    with manifest.open("w", newline="") as fh:
+        w = csv.DictWriter(fh, fieldnames=cols, delimiter="\t")
+        w.writeheader()
+        for r in rows:
+            w.writerow({k: r.get(k, "") for k in cols})
+    log(f"  → {manifest}")
+
+    # Summary Fmax table: model × category × aspect.
+    summary_path = out_dir / "summary_fmax.tsv"
+    with summary_path.open("w", newline="") as fh:
+        w = csv.writer(fh, delimiter="\t")
+        w.writerow(["model", "category", "aspect", "fmax", "coverage", "n_proteins"])
+        for r in rows:
+            if r["eval_status"] != "SUCCEEDED":
+                continue
+            results = r["results"]
+            for cat in ("NK", "LK", "PK"):
+                cell = results.get(cat) or {}
+                for asp in ("BPO", "MFO", "CCO"):
+                    m = cell.get(asp)
+                    if m is None:
+                        continue
+                    w.writerow([
+                        r["model"], cat, asp,
+                        m.get("fmax", ""), m.get("coverage", ""), m.get("n_proteins", ""),
+                    ])
+    log(f"  → {summary_path}")
+
+    # Quick textual summary.
+    for r in rows:
+        if r["eval_status"] != "SUCCEEDED":
+            continue
+        fmaxs: list[float] = []
+        for cat in ("NK", "LK", "PK"):
+            cell = r["results"].get(cat) or {}
+            for asp in ("BPO", "MFO", "CCO"):
+                m = cell.get(asp)
+                if m and m.get("fmax") is not None:
+                    fmaxs.append(float(m["fmax"]))
+        if fmaxs:
+            log(f"  {r['model']:12s}  avg Fmax = {sum(fmaxs)/len(fmaxs):.4f}  ({len(fmaxs)}/9 cells)")
+
+    return rows
+
+
+# ── Entry point ──────────────────────────────────────────────────────────────
+
+
+def _args() -> argparse.Namespace:
+    ap = argparse.ArgumentParser(description=__doc__)
+    ap.add_argument("--api", default="http://localhost:8000")
+    ap.add_argument("--k", type=int, default=5)
+    ap.add_argument("--only", default="",
+                    help="Comma-separated model short labels to include (default: all).")
+    ap.add_argument("--min-embeddings", type=int, default=1)
+    ap.add_argument("--dry-run", action="store_true")
+    ap.add_argument("--skip-bootstrap", action="store_true",
+                    help="Resolve bootstrap IDs from DB; do not submit any Phase 0 job.")
+    ap.add_argument("--poll", type=float, default=15.0)
+    ap.add_argument("--ontology-timeout", type=float, default=2 * 3600)
+    ap.add_argument("--goa-timeout", type=float, default=10 * 3600)
+    ap.add_argument("--eval-timeout", type=float, default=2 * 3600)
+    ap.add_argument("--predict-timeout", type=float, default=12 * 3600)
+    ap.add_argument("--eval-run-timeout", type=float, default=2 * 3600)
+    return ap.parse_args()
+
+
+def _resolve_existing_bootstrap(factory, log: Log) -> dict[str, Any]:
+    with factory() as s:
+        onto = {o.obo_version: str(o.id) for o in s.execute(select(OntologySnapshot)).scalars()}
+        ann = {a.source_version: str(a.id)
+               for a in s.execute(
+                   select(AnnotationSet).where(AnnotationSet.source == "goa")
+               ).scalars()}
+        pivot_id = onto.get(PIVOT_OBO_VERSION)
+        ref_id = ann.get(REFERENCE_SOURCE_VERSION)
+        new_id = ann.get(EVAL_NEW_SOURCE_VERSION)
+        if not (pivot_id and ref_id and new_id):
+            raise SystemExit(
+                "--skip-bootstrap: could not resolve ontology/annotation sets from DB"
+            )
+        row = s.execute(
+            select(EvaluationSet).where(
+                EvaluationSet.old_annotation_set_id == uuid.UUID(ref_id),
+                EvaluationSet.new_annotation_set_id == uuid.UUID(new_id),
+            )
+        ).scalar_one_or_none()
+        if row is None:
+            raise SystemExit("--skip-bootstrap: EvaluationSet (220,230) not found")
+        log(f"  resolved eval_set={row.id}  pivot={pivot_id}  ref={ref_id}")
+        return {
+            "ontology_ids": onto,
+            "annotation_set_ids": ann,
+            "evaluation_set_id": str(row.id),
+            "pivot_ontology_snapshot_id": pivot_id,
+            "reference_annotation_set_id": ref_id,
+            "elapsed_seconds": 0.0,
+        }
+
+
+def main() -> int:
+    args = _args()
+    stamp = datetime.now(UTC).strftime("%Y%m%dT%H%M%SZ")
+    out_dir = PROJECT_ROOT / "results" / f"overnight_matrix_{stamp}"
+    out_dir.mkdir(parents=True, exist_ok=True)
+    log = Log(out_dir / "run.log")
+    log(f"overnight_matrix START — output={out_dir}")
+    log(f"  api={args.api}  k={args.k}  dry_run={args.dry_run}")
+
+    settings = load_settings(PROJECT_ROOT)
+    factory = build_session_factory(settings.db_url)
+
+    if args.skip_bootstrap:
+        log("Phase 0 — skipped (resolving from DB)")
+        bootstrap = _resolve_existing_bootstrap(factory, log)
+    else:
+        bootstrap = run_phase0(args, factory=factory, log=log)
+
+    (out_dir / "bootstrap.json").write_text(json.dumps(bootstrap, indent=2, default=str))
+
+    pred_rows = run_phase_a(args, bootstrap=bootstrap, factory=factory,
+                            out_dir=out_dir, log=log)
+    run_phase_b(args, bootstrap=bootstrap, pred_rows=pred_rows,
+                out_dir=out_dir, log=log)
+
+    log("overnight_matrix DONE")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/query_eval_results.py b/scripts/query_eval_results.py
deleted file mode 100644
index 5cdf167..0000000
--- a/scripts/query_eval_results.py
+++ /dev/null
@@ -1,111 +0,0 @@
-"""Query all EvaluationResult rows and compare PK metrics across configurations."""
-
-from pathlib import Path
-
-from sqlalchemy.orm import joinedload
-
-from protea.infrastructure.orm.models.annotation.evaluation_result import EvaluationResult
-from protea.infrastructure.session import build_session_factory, session_scope
-from protea.infrastructure.settings import load_settings
-
-PROJECT_ROOT = Path(__file__).resolve().parent.parent
-settings = load_settings(PROJECT_ROOT)
-factory = build_session_factory(settings.db_url)
-
-with session_scope(factory) as session:
-    results = (
-        session.query(EvaluationResult)
-        .options(
-            joinedload(EvaluationResult.prediction_set),
-            joinedload(EvaluationResult.scoring_config),
-            joinedload(EvaluationResult.reranker_model),
-        )
-        .order_by(EvaluationResult.created_at)
-        .all()
-    )
-
-    if not results:
-        print("No EvaluationResult rows found.")
-        raise SystemExit(0)
-
-    # Header
-    header = (
-        f"{'eval_id':>8s}  "
-        f"{'pred_set_id':>11s}  "
-        f"{'K':>5s}  "
-        f"{'scoring_config':>40s}  "
-        f"{'reranker':>30s}  "
-        f"{'PK/BPO Fmax':>11s}  "
-        f"{'PK/MFO Fmax':>11s}  "
-        f"{'PK/CCO Fmax':>11s}  "
-        f"{'NK/BPO Fmax':>11s}  "
-        f"{'NK/MFO Fmax':>11s}  "
-        f"{'NK/CCO Fmax':>11s}  "
-        f"{'LK/BPO Fmax':>11s}  "
-        f"{'LK/MFO Fmax':>11s}  "
-        f"{'LK/CCO Fmax':>11s}"
-    )
-    print(header)
-    print("-" * len(header))
-
-    for er in results:
-        ps = er.prediction_set
-        k_val = str(ps.limit_per_entry) if ps else "?"
-        pred_id = str(ps.id)[:8] if ps else "?"
-        eval_id = str(er.id)[:8]
-
-        sc_name = er.scoring_config.name if er.scoring_config else "(none)"
-        sc_formula = er.scoring_config.formula if er.scoring_config else ""
-        sc_label = f"{sc_name} [{sc_formula}]" if sc_formula else sc_name
-
-        rr_name = er.reranker_model.name if er.reranker_model else "(none)"
-
-        r = er.results or {}
-
-        def fmax(cat: str, ns: str, _r: dict = r) -> str:
-            val = _r.get(cat, {}).get(ns, {}).get("fmax")
-            if val is None:
-                return "-"
-            return f"{val:.4f}"
-
-        print(
-            f"{eval_id:>8s}  "
-            f"{pred_id:>11s}  "
-            f"{k_val:>5s}  "
-            f"{sc_label:>40s}  "
-            f"{rr_name:>30s}  "
-            f"{fmax('PK','BPO'):>11s}  "
-            f"{fmax('PK','MFO'):>11s}  "
-            f"{fmax('PK','CCO'):>11s}  "
-            f"{fmax('NK','BPO'):>11s}  "
-            f"{fmax('NK','MFO'):>11s}  "
-            f"{fmax('NK','CCO'):>11s}  "
-            f"{fmax('LK','BPO'):>11s}  "
-            f"{fmax('LK','MFO'):>11s}  "
-            f"{fmax('LK','CCO'):>11s}"
-        )
-
-    # Summary: group by K and show PK averages
-    print("\n\n=== PK Fmax Summary by K value ===\n")
-    from collections import defaultdict
-    by_k: dict[int, list] = defaultdict(list)
-    for er in results:
-        ps = er.prediction_set
-        if not ps:
-            continue
-        k = ps.limit_per_entry
-        pk = (er.results or {}).get("PK", {})
-        by_k[k].append(pk)
-
-    for k in sorted(by_k.keys()):
-        entries = by_k[k]
-        n = len(entries)
-        for ns in ("BPO", "MFO", "CCO"):
-            vals = [e.get(ns, {}).get("fmax") for e in entries if e.get(ns, {}).get("fmax") is not None]
-            if vals:
-                avg = sum(vals) / len(vals)
-                best = max(vals)
-                worst = min(vals)
-                print(f"  K={k:>3d}  {ns}  n={len(vals):>3d}  avg={avg:.4f}  best={best:.4f}  worst={worst:.4f}")
-            else:
-                print(f"  K={k:>3d}  {ns}  n=  0  (no data)")
diff --git a/scripts/queue_evals_when_ready.py b/scripts/queue_evals_when_ready.py
deleted file mode 100644
index 41bd485..0000000
--- a/scripts/queue_evals_when_ready.py
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/usr/bin/env python3
-"""Wait for predict jobs to finish, then queue CAFA evaluations.
-
-Usage:
-    python scripts/queue_evals_when_ready.py
-"""
-import time
-
-import requests
-
-API = "http://localhost:8000"
-EVAL_SET = "42b34e79-6fe9-4fa0-b718-02f43a1e3192"
-
-# (predict_job_id, prediction_set_id, k)
-PENDING = [
-    ("20b56bb7-8f3c-4278-89e7-715a1792c3c4", "a4442444-a7c7-4568-8432-eb1efecf1e24", 20),
-    ("5a21422b-c4ae-4bde-979b-e5a357c8cb80", "d41b8d05-e591-4153-85bb-04d22413d1e7", 50),
-]
-
-POLL = 30  # seconds
-
-
-def main():
-    remaining = list(PENDING)
-    while remaining:
-        still_waiting = []
-        for job_id, ps_id, k in remaining:
-            r = requests.get(f"{API}/jobs/{job_id}", timeout=10)
-            status = r.json()["status"]
-            progress = r.json().get("progress_current", "?")
-            total = r.json().get("progress_total", "?")
-            print(f"  k={k}  job={job_id[:8]}  status={status}  {progress}/{total}")
-
-            if status == "succeeded":
-                # Queue CAFA eval
-                resp = requests.post(
-                    f"{API}/annotations/evaluation-sets/{EVAL_SET}/run",
-                    json={"prediction_set_id": ps_id},
-                    timeout=10,
-                )
-                resp.raise_for_status()
-                eval_id = resp.json()["id"]
-                print(f"  → Queued CAFA eval for k={k}: {eval_id}")
-            elif status in ("failed", "cancelled"):
-                print(f"  → SKIP k={k}: job {status}")
-            else:
-                still_waiting.append((job_id, ps_id, k))
-
-        remaining = still_waiting
-        if remaining:
-            print(f"\n  Waiting {POLL}s for {len(remaining)} job(s)...\n")
-            time.sleep(POLL)
-
-    print("\nDone — all CAFA evals queued.")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/scripts/register_reranker.py b/scripts/register_reranker.py
new file mode 100644
index 0000000..37ce473
--- /dev/null
+++ b/scripts/register_reranker.py
@@ -0,0 +1,208 @@
+"""Register a trained reranker from ``protea-reranker-lab`` in PROTEA.
+
+Reads the artefacts of a lab run (``runs/<name>/``) — ``run.json``,
+``spec.yaml``, ``model.txt`` — uploads the booster to the configured
+:class:`~protea.infrastructure.storage.ArtifactStore` under
+``rerankers/<run_id>/model.txt``, and inserts a ``RerankerModel`` row
+that links the artefact URI + feature schema sha + provenance
+(``producer_version`` / ``producer_git_sha`` / ``spec_yaml``).
+
+Usage::
+
+    poetry run python scripts/register_reranker.py \\
+        --run-dir ../protea-reranker-lab/runs/smoke-K5_pk-bpo_knn-only \\
+        [--prediction-set-id <uuid>] [--evaluation-set-id <uuid>] \\
+        [--name-override <str>] [--force]
+
+On success, prints the new RerankerModel UUID to stdout.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+import uuid
+from pathlib import Path
+from typing import Any
+
+# Ensure the project root is importable when running the script directly.
+PROJECT_ROOT = Path(__file__).resolve().parent.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+
+from protea.infrastructure.orm.models.embedding.reranker_model import RerankerModel
+from protea.infrastructure.session import build_session_factory, session_scope
+from protea.infrastructure.settings import load_settings
+from protea.infrastructure.storage import get_artifact_store
+
+try:
+    from protea_reranker_lab.contracts import compute_feature_schema_sha
+except Exception as exc:  # pragma: no cover — dev-time dep must be installed
+    raise SystemExit(
+        "protea_reranker_lab is not importable — install the editable dev "
+        "dependency (pyproject.toml already declares it). "
+        f"Underlying error: {exc}"
+    ) from exc
+
+
+def _args() -> argparse.Namespace:
+    p = argparse.ArgumentParser(description=__doc__)
+    p.add_argument("--run-dir", required=True, type=Path,
+                   help="Path to runs/<name>/ containing run.json + spec.yaml + model.txt")
+    p.add_argument("--prediction-set-id", type=str, default=None,
+                   help="Optional UUID of an existing PredictionSet to link")
+    p.add_argument("--evaluation-set-id", type=str, default=None,
+                   help="Optional UUID of an existing EvaluationSet to link")
+    p.add_argument("--name-override", type=str, default=None,
+                   help="Override the RerankerModel.name (default: run_id from run.json)")
+    p.add_argument("--force", action="store_true",
+                   help="If a RerankerModel with the same name exists, delete+replace it")
+    return p.parse_args()
+
+
+def _parse_cell(cell: str | None) -> tuple[str, str | None]:
+    """Parse ``training.cell`` from spec.yaml into (category, aspect)."""
+    if not cell:
+        return ("pk", None)  # reasonable default; category is NOT NULL
+    cell = cell.strip().lower()
+    if "-" in cell:
+        cat, asp = cell.split("-", 1)
+        return (cat, asp)
+    return (cell, None)
+
+
+def _load_manifest(run_dir: Path, run_json: dict[str, Any]) -> dict[str, Any]:
+    """Best-effort load of the dataset manifest referenced by the run.
+
+    The run records a relative manifest path; try resolving it against the
+    lab repo root (run_dir.parents[1]) and against run_dir itself.
+    """
+    rel = run_json.get("dataset", {}).get("manifest_path")
+    if not rel:
+        return {}
+    candidates = [
+        run_dir.parents[1] / rel,   # <lab>/runs/<name>/ → <lab>
+        run_dir / rel,
+        Path(rel),
+    ]
+    for path in candidates:
+        if path.is_file():
+            try:
+                return json.loads(path.read_text())
+            except Exception:
+                pass
+    return {}
+
+
+def main() -> None:
+    a = _args()
+    run_dir = a.run_dir.resolve()
+
+    run_json_path = run_dir / "run.json"
+    spec_yaml_path = run_dir / "spec.yaml"
+    model_txt_path = run_dir / "model.txt"
+
+    for path in (run_json_path, spec_yaml_path, model_txt_path):
+        if not path.is_file():
+            raise SystemExit(f"missing required artefact: {path}")
+
+    run = json.loads(run_json_path.read_text())
+    spec_yaml_text = spec_yaml_path.read_text()
+    manifest = _load_manifest(run_dir, run)
+
+    run_id: str = run.get("run_id") or run_dir.name
+    name: str = a.name_override or run_id
+
+    # Provenance — pull PROTEA version/git sha from the dataset's manifest
+    # (written by export_research_dataset). Lab git sha is in
+    # run.environment but we persist PROTEA's sha here, not the lab's.
+    producer_version = manifest.get("producer_version")
+    producer_git_sha = manifest.get("producer_git_sha")
+
+    # Feature schema sha — family-aware fingerprint. Falls back to the
+    # dataset's schema_sha when families aren't recorded (older runs).
+    feature_families: list[str] | None = (
+        run.get("features", {}).get("families_enabled")
+        or run.get("dataset", {}).get("feature_families")
+    )
+    drop_features: list[str] = run.get("features", {}).get("drop_features") or []
+    if feature_families:
+        feature_schema_sha = compute_feature_schema_sha(feature_families, drop_features or None)
+    else:
+        feature_schema_sha = run.get("dataset", {}).get("schema_sha")
+
+    dataset = run.get("dataset", {})
+    embedding_config_id = dataset.get("embedding_config_id")
+    ontology_snapshot_id = dataset.get("ontology_snapshot_id")
+
+    category, aspect = _parse_cell(_extract_cell_from_spec(spec_yaml_text))
+
+    metrics = run.get("metrics", {}) or {}
+    feature_importance = run.get("feature_importance", {}) or {}
+
+    # ── 1. Upload booster to the artifact store ─────────────────────────
+    settings = load_settings(PROJECT_ROOT)
+    store = get_artifact_store(settings)
+    artifact_key = f"rerankers/{run_id}/model.txt"
+    artifact_uri = store.put(artifact_key, model_txt_path)
+    print(f"[register] uploaded booster → {artifact_uri}")
+
+    # ── 2. Insert RerankerModel row ──────────────────────────────────────
+    factory = build_session_factory(settings.db_url)
+    with session_scope(factory) as session:
+        existing = session.query(RerankerModel).filter(RerankerModel.name == name).first()
+        if existing is not None:
+            if not a.force:
+                raise SystemExit(
+                    f"RerankerModel name={name!r} already exists (id={existing.id}). "
+                    "Pass --force to replace."
+                )
+            session.delete(existing)
+            session.flush()
+
+        row = RerankerModel(
+            name=name,
+            prediction_set_id=uuid.UUID(a.prediction_set_id) if a.prediction_set_id else None,
+            evaluation_set_id=uuid.UUID(a.evaluation_set_id) if a.evaluation_set_id else None,
+            category=category,
+            aspect=aspect,
+            model_data=None,
+            artifact_uri=artifact_uri,
+            feature_schema_sha=feature_schema_sha,
+            embedding_config_id=uuid.UUID(embedding_config_id) if embedding_config_id else None,
+            ontology_snapshot_id=uuid.UUID(ontology_snapshot_id) if ontology_snapshot_id else None,
+            producer_version=producer_version,
+            producer_git_sha=producer_git_sha,
+            spec_yaml=spec_yaml_text,
+            metrics=metrics,
+            feature_importance=feature_importance,
+        )
+        session.add(row)
+        session.flush()
+        reranker_id = row.id
+
+    # Emit the new row id to stdout so callers can capture it.
+    print(str(reranker_id))
+
+
+def _extract_cell_from_spec(spec_yaml_text: str) -> str | None:
+    """Cheap YAML peek — avoids dragging PyYAML dep into this script when
+    the only field we need is ``training.cell``.
+    """
+    in_training = False
+    for raw in spec_yaml_text.splitlines():
+        line = raw.rstrip()
+        if not line or line.lstrip().startswith("#"):
+            continue
+        if not line.startswith(" "):  # top-level key
+            in_training = line.strip().rstrip(":") == "training"
+            continue
+        if in_training:
+            stripped = line.strip()
+            if stripped.startswith("cell:"):
+                return stripped.split(":", 1)[1].strip().strip('"').strip("'") or None
+    return None
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/setup_vast.sh b/scripts/setup_vast.sh
deleted file mode 100755
index 5b77f2e..0000000
--- a/scripts/setup_vast.sh
+++ /dev/null
@@ -1,160 +0,0 @@
-#!/usr/bin/env bash
-# scripts/setup_vast.sh — Bootstrap PROTEA on a fresh vast.ai instance
-#
-# Usage (on the vast.ai instance, from the repo root):
-#   bash scripts/setup_vast.sh [DB_PASSWORD] [BATCH_WORKERS]
-#
-# After running:
-#   Frontend  →  http://<PUBLIC_IP>:3000
-#   API       →  http://<PUBLIC_IP>:8000
-#   RabbitMQ  →  http://<PUBLIC_IP>:15672  (guest/guest)
-
-set -euo pipefail
-
-DB_PASSWORD="${1:-protea}"
-BATCH_WORKERS="${2:-1}"
-
-ROOT="$(cd "$(dirname "$0")/.." && pwd)"
-GREEN="\033[32m"; YELLOW="\033[33m"; BOLD="\033[1m"; RESET="\033[0m"
-step() { printf "\n${BOLD}==> %s${RESET}\n" "$*"; }
-ok()   { printf "  ${GREEN}✓${RESET} %s\n" "$*"; }
-
-# ── 0. Detect public IP ───────────────────────────────────────────────────────
-step "Detecting public IP"
-PUBLIC_IP=$(curl -sf https://ifconfig.me || curl -sf https://api.ipify.org || echo "127.0.0.1")
-ok "Public IP: $PUBLIC_IP"
-
-# ── 1. System packages + Python 3.12 ─────────────────────────────────────────
-step "Installing system packages + Python 3.12"
-export DEBIAN_FRONTEND=noninteractive
-apt-get update -qq
-apt-get install -y -qq \
-    curl wget gnupg lsb-release ca-certificates \
-    build-essential git software-properties-common libpq-dev
-
-if ! python3.12 --version &>/dev/null; then
-    add-apt-repository -y ppa:deadsnakes/ppa
-    apt-get update -qq
-    apt-get install -y -qq python3.12 python3.12-dev python3.12-venv
-fi
-curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12
-ok "Python $(python3.12 --version)"
-
-# ── 2. PostgreSQL 16 + pgvector ───────────────────────────────────────────────
-step "Installing PostgreSQL 16 + pgvector"
-if ! command -v psql &>/dev/null; then
-    curl -fsSL https://www.postgresql.org/media/keys/ACCC4CF8.asc \
-        | gpg --dearmor -o /usr/share/keyrings/postgresql.gpg
-    echo "deb [signed-by=/usr/share/keyrings/postgresql.gpg] \
-https://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" \
-        > /etc/apt/sources.list.d/pgdg.list
-    apt-get update -qq
-    apt-get install -y -qq postgresql-16 postgresql-16-pgvector
-fi
-ok "PostgreSQL installed"
-
-service postgresql start || pg_ctlcluster 16 main start
-sleep 2
-
-su -c "psql -tc \"SELECT 1 FROM pg_roles WHERE rolname='protea'\" | grep -q 1 || \
-    psql -c \"CREATE USER protea WITH PASSWORD '${DB_PASSWORD}';\"" postgres
-su -c "psql -tc \"SELECT 1 FROM pg_database WHERE datname='protea'\" | grep -q 1 || \
-    psql -c \"CREATE DATABASE protea OWNER protea;\"" postgres
-su -c "psql -d protea -c \"CREATE EXTENSION IF NOT EXISTS vector;\"" postgres
-ok "Database 'protea' ready"
-
-# ── 3. Erlang 26 + RabbitMQ ───────────────────────────────────────────────────
-step "Installing Erlang 26 + RabbitMQ"
-if ! command -v rabbitmqctl &>/dev/null; then
-    # Erlang 26 from RabbitMQ's Cloudsmith repo (manual setup, no helper script)
-    rm -f /usr/share/keyrings/rabbitmq-erlang.gpg /usr/share/keyrings/rabbitmq-server.gpg
-    curl -fsSL https://github.com/rabbitmq/signing-keys/releases/download/3.0/cloudsmith.rabbitmq-erlang.E495BB49CC4BBE5B.key \
-        -o /tmp/rabbitmq-erlang.key
-    gpg --batch --no-tty --dearmor < /tmp/rabbitmq-erlang.key > /usr/share/keyrings/rabbitmq-erlang.gpg
-    curl -fsSL https://github.com/rabbitmq/signing-keys/releases/download/3.0/cloudsmith.rabbitmq-server.9F4587F226208342.key \
-        -o /tmp/rabbitmq-server.key
-    gpg --batch --no-tty --dearmor < /tmp/rabbitmq-server.key > /usr/share/keyrings/rabbitmq-server.gpg
-    cat > /etc/apt/sources.list.d/rabbitmq.list <<'APTEOF'
-deb [arch=amd64 signed-by=/usr/share/keyrings/rabbitmq-erlang.gpg] https://ppa1.rabbitmq.com/rabbitmq/rabbitmq-erlang/deb/ubuntu jammy main
-deb [arch=amd64 signed-by=/usr/share/keyrings/rabbitmq-server.gpg] https://ppa1.rabbitmq.com/rabbitmq/rabbitmq-server/deb/ubuntu jammy main
-APTEOF
-    apt-get update -qq
-    # Pin RabbitMQ's Erlang over Ubuntu's older version
-    apt-get install -y -qq -o Dpkg::Options::="--force-overwrite" \
-        erlang-base erlang-asn1 erlang-crypto erlang-eldap \
-        erlang-ftp erlang-inets erlang-mnesia erlang-os-mon erlang-parsetools \
-        erlang-public-key erlang-runtime-tools erlang-snmp erlang-ssl \
-        erlang-syntax-tools erlang-tftp erlang-tools erlang-xmerl
-    apt-get install -y -qq rabbitmq-server
-fi
-
-rabbitmq-plugins enable rabbitmq_management
-service rabbitmq-server start || rabbitmq-server -detached
-sleep 3
-ok "RabbitMQ running (UI on :15672)"
-
-# ── 4. Node.js 20 ─────────────────────────────────────────────────────────────
-step "Installing Node.js 20"
-if ! node --version 2>/dev/null | grep -q "^v2[0-9]"; then
-    curl -fsSL https://deb.nodesource.com/setup_20.x | bash -
-    apt-get install -y -qq nodejs
-fi
-ok "Node $(node --version)"
-
-# ── 5. Poetry ─────────────────────────────────────────────────────────────────
-step "Installing Poetry"
-export PATH="$HOME/.local/bin:$PATH"
-if ! command -v poetry &>/dev/null; then
-    curl -sSL https://install.python-poetry.org | python3.12 -
-    echo 'export PATH="$HOME/.local/bin:$PATH"' >> "$HOME/.bashrc"
-fi
-ok "Poetry $(poetry --version)"
-
-# ── 6. Python dependencies ────────────────────────────────────────────────────
-step "Installing Python dependencies (torch + ESM, ~10 min)"
-cd "$ROOT"
-poetry env use python3.12
-poetry install --without dev
-ok "Python deps installed"
-
-# ── 7. Configure system.yaml ──────────────────────────────────────────────────
-step "Writing protea/config/system.yaml"
-mkdir -p "$ROOT/protea/config"
-cat > "$ROOT/protea/config/system.yaml" <<EOF
-database:
-  url: postgresql+psycopg://protea:${DB_PASSWORD}@localhost:5432/protea
-
-queue:
-  amqp_url: amqp://guest:guest@localhost:5672/
-EOF
-ok "system.yaml written"
-
-# ── 8. Alembic migrations ─────────────────────────────────────────────────────
-step "Running database migrations"
-cd "$ROOT"
-poetry run alembic upgrade head
-ok "Schema up to date"
-
-# ── 9. Configure frontend ─────────────────────────────────────────────────────
-step "Configuring frontend"
-cd "$ROOT/apps/web"
-cat > .env.local <<EOF
-NEXT_PUBLIC_API_URL=http://${PUBLIC_IP}:8000
-EOF
-npm install --silent
-ok ".env.local → http://${PUBLIC_IP}:8000"
-
-# ── 10. Start PROTEA stack ────────────────────────────────────────────────────
-step "Starting PROTEA stack ($BATCH_WORKERS batch worker(s))"
-cd "$ROOT"
-bash "$ROOT/scripts/manage.sh" start "$BATCH_WORKERS"
-
-printf "\n${BOLD}╔══════════════════════════════════════════════════╗${RESET}\n"
-printf "${BOLD}║           PROTEA on vast.ai — ready             ║${RESET}\n"
-printf "${BOLD}╚══════════════════════════════════════════════════╝${RESET}\n\n"
-printf "  Frontend   →  ${GREEN}http://${PUBLIC_IP}:3000${RESET}\n"
-printf "  API        →  ${GREEN}http://${PUBLIC_IP}:8000${RESET}\n"
-printf "  RabbitMQ   →  ${GREEN}http://${PUBLIC_IP}:15672${RESET}  (guest/guest)\n\n"
-printf "  ${YELLOW}Ports needed open in vast.ai: 3000, 8000, 15672${RESET}\n\n"
-printf "  Logs:  bash scripts/manage.sh logs\n"
-printf "  Stop:  bash scripts/manage.sh stop\n\n"
diff --git a/scripts/smoke.sh b/scripts/smoke.sh
new file mode 100755
index 0000000..aba535d
--- /dev/null
+++ b/scripts/smoke.sh
@@ -0,0 +1,105 @@
+#!/usr/bin/env bash
+# scripts/smoke.sh — PROTEA end-to-end smoke
+#
+# Assumes a stack is already running and reachable at $PROTEA_API_URL
+# (default http://127.0.0.1:8000). Does NOT start or stop services
+# (per project policy: the stack is never restarted without explicit
+# user permission; see memory/feedback_no_restart.md).
+#
+# What it checks:
+#   1. /health responds 200.
+#   2. /readiness responds 200 (DB + AMQP both up).
+#   3. POST /jobs creates a ping job and returns a job_id.
+#   4. The ping job transitions to SUCCEEDED within $PROTEA_SMOKE_TIMEOUT_S
+#      seconds (default 30).
+#   5. GET /jobs/{id}/events lists at least one event with event=job.succeeded.
+#
+# Usage:
+#   bash scripts/smoke.sh                  # local stack, default URL
+#   PROTEA_API_URL=https://... bash scripts/smoke.sh
+#   PROTEA_SMOKE_TIMEOUT_S=60 bash scripts/smoke.sh
+#
+# Exit code:
+#   0 = stack healthy, ping job ran end-to-end
+#   1 = any step failed (with diagnostics on stderr)
+#
+# Designed to fit comfortably in CI: no fixtures persisted, no DB writes
+# beyond the ping JobEvent, completes in <90s when the stack is up.
+
+set -euo pipefail
+
+API_URL="${PROTEA_API_URL:-http://127.0.0.1:8000}"
+TIMEOUT_S="${PROTEA_SMOKE_TIMEOUT_S:-30}"
+
+GREEN="\033[32m"; RED="\033[31m"; CYAN="\033[36m"; BOLD="\033[1m"; RESET="\033[0m"
+
+_log() {
+    printf "${CYAN}[smoke]${RESET} %s\n" "$*"
+}
+_ok() {
+    printf "  ${GREEN}✓${RESET} %s\n" "$*"
+}
+_fail() {
+    printf "  ${RED}✗${RESET} %s\n" "$*" >&2
+    exit 1
+}
+
+# Ensure curl + jq are present.
+command -v curl >/dev/null || _fail "curl not found"
+command -v jq >/dev/null || _fail "jq not found (sudo apt install jq, or brew install jq)"
+
+# 1. Health
+_log "1/5  GET ${API_URL}/health"
+if ! curl -sSf -o /dev/null -w "%{http_code}" "${API_URL}/health" | grep -q "^200$"; then
+    _fail "/health did not return 200"
+fi
+_ok "/health is 200"
+
+# 2. Readiness (DB + AMQP)
+_log "2/5  GET ${API_URL}/health/ready"
+ready=$(curl -sS "${API_URL}/health/ready" || echo '{}')
+if [[ "$(echo "$ready" | jq -r '.status // empty')" != "ready" ]]; then
+    _fail "/health/ready did not report ready: $ready"
+fi
+_ok "/health/ready is ready (db + amqp up)"
+
+# 3. Submit ping job
+_log "3/5  POST ${API_URL}/jobs (operation=ping)"
+job_response=$(curl -sSf -X POST "${API_URL}/jobs" \
+    -H "Content-Type: application/json" \
+    -d '{"operation":"ping","queue_name":"protea.ping","payload":{"smoke":true}}')
+job_id=$(echo "$job_response" | jq -r '.id // empty')
+if [[ -z "$job_id" ]]; then
+    _fail "no job id returned: $job_response"
+fi
+_ok "ping job submitted (id=$job_id)"
+
+# 4. Poll until succeeded or timeout. Status is lowercase in the API.
+_log "4/5  poll ${API_URL}/jobs/${job_id} until succeeded (timeout ${TIMEOUT_S}s)"
+deadline=$(( $(date +%s) + TIMEOUT_S ))
+status=""
+while [[ $(date +%s) -lt $deadline ]]; do
+    job=$(curl -sSf "${API_URL}/jobs/${job_id}")
+    status=$(echo "$job" | jq -r '.status // empty' | tr '[:upper:]' '[:lower:]')
+    case "$status" in
+        succeeded) break ;;
+        failed|cancelled) _fail "ping job ended in $status: $(echo "$job" | jq -c '.error_code // empty,.error_message // empty')" ;;
+    esac
+    sleep 1
+done
+if [[ "$status" != "succeeded" ]]; then
+    _fail "ping job did not reach succeeded within ${TIMEOUT_S}s (last status=$status)"
+fi
+_ok "ping job succeeded"
+
+# 5. Verify events log
+_log "5/5  GET ${API_URL}/jobs/${job_id}/events"
+events=$(curl -sSf "${API_URL}/jobs/${job_id}/events")
+n_events=$(echo "$events" | jq 'length')
+has_succeeded=$(echo "$events" | jq '[.[] | select(.event == "job.succeeded")] | length')
+if [[ "$n_events" -lt 1 || "$has_succeeded" -lt 1 ]]; then
+    _fail "events log incomplete (n=$n_events, succeeded=$has_succeeded)"
+fi
+_ok "events log: $n_events events, includes job.succeeded"
+
+printf "\n${GREEN}${BOLD}smoke ok${RESET} — stack healthy and end-to-end path works\n"
diff --git a/scripts/sync_db_vast.sh b/scripts/sync_db_vast.sh
deleted file mode 100755
index b88943e..0000000
--- a/scripts/sync_db_vast.sh
+++ /dev/null
@@ -1,127 +0,0 @@
-#!/usr/bin/env bash
-# scripts/sync_db_vast.sh — Dump local DB and restore it on a vast.ai instance
-#
-# Usage:
-#   bash scripts/sync_db_vast.sh <IP> <SSH_PORT> [OPTIONS]
-#
-# Options:
-#   --local-db      Local database name       (default: BioData)
-#   --local-user    Local PostgreSQL user      (default: usuario)
-#   --remote-db     Remote database name       (default: protea)
-#   --remote-user   Remote PostgreSQL user     (default: protea)
-#   --full-reset    Drop and recreate remote DB before restore (default: true)
-#
-# Examples:
-#   bash scripts/sync_db_vast.sh 173.206.147.184 41624
-#   bash scripts/sync_db_vast.sh 173.206.147.184 41624 --full-reset
-
-set -euo pipefail
-
-IP="${1:?Usage: sync_db_vast.sh <IP> <SSH_PORT>}"
-PORT="${2:?Usage: sync_db_vast.sh <IP> <SSH_PORT>}"
-shift 2
-
-# Defaults
-LOCAL_DB="BioData"
-LOCAL_USER="usuario"
-REMOTE_DB="protea"
-REMOTE_USER="protea"
-FULL_RESET=true
-
-while [[ $# -gt 0 ]]; do
-    case "$1" in
-        --local-db)    LOCAL_DB="$2";    shift 2 ;;
-        --local-user)  LOCAL_USER="$2";  shift 2 ;;
-        --remote-db)   REMOTE_DB="$2";   shift 2 ;;
-        --remote-user) REMOTE_USER="$2"; shift 2 ;;
-        --no-full-reset) FULL_RESET=false; shift ;;
-        --full-reset)    FULL_RESET=true;  shift ;;
-        *) printf "Unknown option: %s\n" "$1"; exit 1 ;;
-    esac
-done
-
-SSH="ssh -p $PORT root@$IP"
-DUMP_FILE="/tmp/protea_dump_$(date +%Y%m%d_%H%M%S).pgdump"
-
-GREEN="\033[32m"; YELLOW="\033[33m"; RED="\033[31m"; BOLD="\033[1m"; RESET="\033[0m"
-step() { printf "\n${BOLD}==> %s${RESET}\n" "$*"; }
-ok()   { printf "  ${GREEN}✓${RESET} %s\n" "$*"; }
-warn() { printf "  ${YELLOW}⚠${RESET}  %s\n" "$*"; }
-
-# ── 0. Verify SSH ──────────────────────────────────────────────────────────────
-step "Checking SSH connectivity"
-if ! $SSH "echo ok" &>/dev/null; then
-    printf "${RED}ERROR${RESET}: Cannot reach root@$IP on port $PORT\n"
-    exit 1
-fi
-ok "Connected to $IP:$PORT"
-
-# ── 1. Dump local DB ───────────────────────────────────────────────────────────
-step "Dumping local database '$LOCAL_DB' → $DUMP_FILE"
-pg_dump \
-    --username="$LOCAL_USER" \
-    --host=localhost \
-    --port=5432 \
-    --format=custom \
-    --compress=9 \
-    --no-privileges \
-    --no-owner \
-    "$LOCAL_DB" \
-    > "$DUMP_FILE"
-
-DUMP_SIZE=$(du -sh "$DUMP_FILE" | cut -f1)
-ok "Dump complete ($DUMP_SIZE)"
-
-# ── 2. Transfer to instance ────────────────────────────────────────────────────
-step "Transferring dump to instance"
-REMOTE_DUMP="/tmp/$(basename "$DUMP_FILE")"
-rsync -az --progress -e "ssh -p $PORT" "$DUMP_FILE" "root@$IP:$REMOTE_DUMP"
-ok "Transferred to $REMOTE_DUMP"
-
-# ── 3. Stop the PROTEA stack (to avoid writes during restore) ─────────────────
-step "Stopping PROTEA stack on instance"
-$SSH "cd /root/PROTEA && export PATH=\$HOME/.local/bin:\$PATH && bash scripts/manage.sh stop 2>/dev/null || true"
-ok "Stack stopped"
-
-# ── 4. Restore on remote ───────────────────────────────────────────────────────
-step "Restoring database on instance"
-
-if [[ "$FULL_RESET" == "true" ]]; then
-    warn "Full reset: dropping and recreating '$REMOTE_DB'"
-    $SSH "su -c \"psql -c 'DROP DATABASE IF EXISTS $REMOTE_DB;'\" postgres"
-    $SSH "su -c \"psql -c \\\"CREATE DATABASE $REMOTE_DB OWNER $REMOTE_USER;\\\"\" postgres"
-    $SSH "su -c \"psql -d $REMOTE_DB -c 'CREATE EXTENSION IF NOT EXISTS vector;'\" postgres"
-    ok "Database recreated"
-fi
-
-$SSH "export PGPASSWORD=protea && pg_restore \
-    --username=$REMOTE_USER \
-    --host=localhost \
-    --port=5432 \
-    --dbname=$REMOTE_DB \
-    --no-privileges \
-    --no-owner \
-    --exit-on-error \
-    $REMOTE_DUMP"
-ok "Restore complete"
-
-# ── 5. Run pending migrations (in case code is newer than dump) ────────────────
-step "Running Alembic migrations (to apply any new schema changes)"
-$SSH "cd /root/PROTEA && export PATH=\$HOME/.local/bin:\$PATH && poetry run alembic upgrade head"
-ok "Schema up to date"
-
-# ── 6. Restart stack ───────────────────────────────────────────────────────────
-step "Restarting PROTEA stack"
-$SSH "cd /root/PROTEA && export PATH=\$HOME/.local/bin:\$PATH && bash scripts/manage.sh start 1"
-ok "Stack restarted"
-
-# ── 7. Cleanup ────────────────────────────────────────────────────────────────
-rm -f "$DUMP_FILE"
-$SSH "rm -f $REMOTE_DUMP"
-ok "Temporary dump files removed"
-
-printf "\n${BOLD}╔══════════════════════════════════════════════════╗${RESET}\n"
-printf "${BOLD}║        Database synced successfully              ║${RESET}\n"
-printf "${BOLD}╚══════════════════════════════════════════════════╝${RESET}\n\n"
-printf "  Source:  ${LOCAL_USER}@localhost/${LOCAL_DB}\n"
-printf "  Target:  ${REMOTE_USER}@${IP}/${REMOTE_DB}\n\n"
diff --git a/scripts/worker.py b/scripts/worker.py
index 4ccb709..361cde1 100644
--- a/scripts/worker.py
+++ b/scripts/worker.py
@@ -13,26 +13,7 @@
 import time
 from pathlib import Path
 
-from protea.core.contracts.registry import OperationRegistry
-from protea.core.operations.compute_embeddings import (
-    ComputeEmbeddingsBatchOperation,
-    ComputeEmbeddingsOperation,
-    StoreEmbeddingsOperation,
-)
-from protea.core.operations.fetch_uniprot_metadata import FetchUniProtMetadataOperation
-from protea.core.operations.generate_evaluation_set import GenerateEvaluationSetOperation
-from protea.core.operations.insert_proteins import InsertProteinsOperation
-from protea.core.operations.load_goa_annotations import LoadGOAAnnotationsOperation
-from protea.core.operations.load_ontology_snapshot import LoadOntologySnapshotOperation
-from protea.core.operations.load_quickgo_annotations import LoadQuickGOAnnotationsOperation
-from protea.core.operations.ping import PingOperation
-from protea.core.operations.predict_go_terms import (
-    PredictGOTermsBatchOperation,
-    PredictGOTermsOperation,
-    StorePredictionsOperation,
-)
-from protea.core.operations.run_cafa_evaluation import RunCafaEvaluationOperation
-from protea.core.operations.train_reranker import TrainRerankerAutoOperation, TrainRerankerOperation
+from protea.core.operation_catalog import build_operation_registry
 from protea.infrastructure.queue.consumer import OperationConsumer, QueueConsumer
 from protea.infrastructure.session import build_session_factory
 from protea.infrastructure.settings import load_settings
@@ -63,23 +44,7 @@ def main() -> None:
 
     factory = build_session_factory(settings.db_url)
 
-    registry = OperationRegistry()
-    registry.register(PingOperation())
-    registry.register(InsertProteinsOperation())
-    registry.register(FetchUniProtMetadataOperation())
-    registry.register(LoadOntologySnapshotOperation())
-    registry.register(LoadQuickGOAnnotationsOperation())
-    registry.register(LoadGOAAnnotationsOperation())
-    registry.register(GenerateEvaluationSetOperation())
-    registry.register(RunCafaEvaluationOperation())
-    registry.register(ComputeEmbeddingsOperation())
-    registry.register(ComputeEmbeddingsBatchOperation())
-    registry.register(StoreEmbeddingsOperation())
-    registry.register(PredictGOTermsOperation())
-    registry.register(PredictGOTermsBatchOperation())
-    registry.register(StorePredictionsOperation())
-    registry.register(TrainRerankerOperation())
-    registry.register(TrainRerankerAutoOperation())
+    registry = build_operation_registry()
 
     # Queues that carry ephemeral operation messages (no DB Job row per message)
     # use OperationConsumer.  All other queues use the standard QueueConsumer.
@@ -92,8 +57,26 @@ def main() -> None:
 
     # Special mode: stale job reaper (no queue, just periodic DB check).
     if args.queue == "reaper":
-        reaper = StaleJobReaper(factory, timeout_seconds=21600)
-        logging.info("Stale job reaper started. timeout=21600s interval=60s")
+        # 24h hard timeout + 30min stall window by default. Earlier value
+        # (6h) killed predict_go_terms coords that waited in the batch FIFO
+        # behind other coords; with only one predictions.batch worker the
+        # last ones in a 23-job batch routinely sat past 6h even though
+        # work was progressing upstream.
+        # Both numbers configurable via WorkerTuning (PROTEA_TUNING__WORKER__
+        # REAPER_MAIN_TIMEOUT_SECONDS and ..._STALL_SECONDS).
+        from protea.config.tuning import get_tuning
+
+        worker_settings = get_tuning().worker
+        reaper = StaleJobReaper(
+            factory,
+            timeout_seconds=worker_settings.reaper_main_timeout_seconds,
+            stall_seconds=worker_settings.reaper_stall_seconds,
+        )
+        logging.info(
+            "Stale job reaper started. timeout=%ds stall=%ds interval=60s",
+            worker_settings.reaper_main_timeout_seconds,
+            worker_settings.reaper_stall_seconds,
+        )
         reaper.run(interval_seconds=60)
         return
 
@@ -115,7 +98,7 @@ def main() -> None:
         )
 
     # Pre-warm taxonomy DB for prediction workers that may need it.
-    if args.queue in ("protea.predictions.batch", "protea.jobs"):
+    if args.queue in ("protea.predictions.batch", "protea.jobs", "protea.training"):
         try:
             from protea.core.feature_engineering import warmup_taxonomy_db
 
diff --git a/tests/BROKEN.md b/tests/BROKEN.md
new file mode 100644
index 0000000..a567321
--- /dev/null
+++ b/tests/BROKEN.md
@@ -0,0 +1,40 @@
+# Broken tests inventory
+
+Snapshot 2026-05-05 tras T0.1b. Suite unit verde end-to-end: **1056 passed, 10 skipped, 0 failed**.
+
+Política: este fichero registra cualquier test que esté roto en `master` o `refactor/*`. Si está vacío, la suite está limpia.
+
+## Histórico de resoluciones
+
+### T0.1b — 2026-05-05
+
+Resueltos en este pase los 16 tests rotos detectados en T0.1:
+
+**A. Regresiones de los 5 commits previos (2 tests)**
+
+- `test_queue.py::TestPublishJob::test_exponential_backoff_delays`
+- `test_queue.py::TestPublishJob::test_closes_connection_on_exception`
+
+Causa: commit `e299672` subió publisher retry de 5 a 12. Tests actualizados a las nuevas constantes.
+
+**B. Payload schema drift en `test_predict_go_terms.py` (12 tests)**
+
+- `TestPredictBatch::*` (4 tests)
+- `TestPredictBatchParentCancellation::*` (2 tests)
+- `TestPredictGOTermsBatchPayload::test_valid_payload`
+- `TestPredictGOTermsBatchPayload::test_feature_flags_default_false`
+- `TestPredictBatchRerankerFeatures::*` (2 tests)
+- `TestPredictGOTermsBatchReranker::*` (4 tests)
+
+Causa: `ontology_snapshot_id` añadido como campo requerido en `PredictGOTermsPayload` y `PredictGOTermsBatchPayload`. Fixtures y payloads inline no se actualizaron al añadirlo. Patch: 5 ediciones añadiendo `"ontology_snapshot_id": _SNAPSHOT_ID` (constante ya existente) en `_payload()` helpers e inline.
+
+**C. EvaluationSet mock fields drift en `test_scoring_router.py` (2 tests)**
+
+- `TestRerankerMetrics::test_returns_metrics`
+- `TestRerankerMetrics::test_empty_predictions_returns_zero_metrics`
+
+Causa: `EvaluationSet.groundtruth_uri` añadido al modelo; el helper `_make_eval_set()` lo dejaba como MagicMock truthy, lo que enrutaba el handler hacia el path persisted-artifact (que no estaba mockeado) en lugar del path on-the-fly (sí mockeado). Patch: `_make_eval_set()` setea explícitamente `groundtruth_uri = None` y `stats = None`.
+
+## Hard rule
+
+Antes de cerrar cualquier fase mayor del master plan, este fichero debe estar vacío o con explicación de exclusión por test.
diff --git a/tests/FLAKY.md b/tests/FLAKY.md
new file mode 100644
index 0000000..1199227
--- /dev/null
+++ b/tests/FLAKY.md
@@ -0,0 +1,14 @@
+# Flaky tests inventory
+
+Snapshot 2026-05-05 tras T0.1.
+
+Ningún test detectado como flaky en este pase. Esto significa que en una sola ejecución todos los que pasan, pasan deterministically y todos los que fallan, fallan deterministically (deuda documentada en `BROKEN.md`).
+
+Política: si un test pasa unas veces y falla otras durante el desarrollo, registrarlo aquí con:
+
+- Nombre completo
+- Síntoma observado (timing, orden de ejecución, race condition sospechada)
+- Reproducción mínima si conocida
+- Plan: `time.sleep` a sustituir por `wait_until`, fixture a aislar, mock a estabilizar, etc.
+
+Hard rule del master plan v3 §F6 T6.1: cero `time.sleep` en `tests/`. Si entra un test con sleep, va a este fichero hasta sustituirse por `wait_until(predicate, timeout)`.
diff --git a/tests/SLOW.md b/tests/SLOW.md
new file mode 100644
index 0000000..b980bec
--- /dev/null
+++ b/tests/SLOW.md
@@ -0,0 +1,17 @@
+# Slow tests inventory
+
+Snapshot 2026-05-05 tras T0.1. Cobertura unit `--durations=30`.
+
+Ningún test supera el umbral del master plan v3 (`5s`).
+
+Top 5 tests más lentos:
+
+| Test | Duración | Razón |
+|------|---------:|-------|
+| `tests/test_compute_embeddings.py::TestBatchSizeConsistency::test_esm_batch_size_consistency` | 3.21s | Carga ESM-2 8M para chequeo de consistencia (legítimo) |
+| `tests/test_compute_embeddings.py::TestValidateLayers::test_valid_reverse_index` | 2.03s | Validación numérica intensiva |
+| `tests/test_real_models.py::TestESM2_8M::test_output_shape_and_finite` | 0.45s setup | Marker `slow`, opt-in |
+| `tests/test_knn_streaming_smoke.py::test_list_vs_stream_equivalence` | 0.36s | KNN smoke OK |
+| `tests/test_infrastructure.py::TestCreateApp::*` | ~0.12s cada | App boot suite OK |
+
+Política: si un test cruza 5s en ejecuciones futuras, añadir aquí con causa raíz y plan de aceleración (mock pesado, fixture de sesión, marker `slow` opt-in, etc.).
diff --git a/tests/conftest.py b/tests/conftest.py
index 96d8143..a95ecab 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -56,7 +56,9 @@ def pytest_addoption(parser: pytest.Parser) -> None:
 @pytest.fixture(scope="session")
 def postgres_url(pytestconfig: pytest.Config) -> str:
     if not pytestconfig.getoption("--with-postgres"):
-        pytest.skip("Pass --with-postgres to run integration tests with a temporary Postgres container.")
+        pytest.skip(
+            "Pass --with-postgres to run integration tests with a temporary Postgres container."
+        )
 
     user = os.getenv("PROTEA_PG_USER", "usuario")
     password = os.getenv("PROTEA_PG_PASSWORD", "clave")
@@ -65,12 +67,14 @@ def postgres_url(pytestconfig: pytest.Config) -> str:
 
     # If all connection params are provided via env vars, assume an external DB
     # is already running (e.g. a GitHub Actions service container) and skip Docker.
-    external_db = all([
-        os.getenv("PROTEA_PG_USER"),
-        os.getenv("PROTEA_PG_PASSWORD"),
-        os.getenv("PROTEA_PG_DB"),
-        host_port,
-    ])
+    external_db = all(
+        [
+            os.getenv("PROTEA_PG_USER"),
+            os.getenv("PROTEA_PG_PASSWORD"),
+            os.getenv("PROTEA_PG_DB"),
+            host_port,
+        ]
+    )
 
     if external_db:
         url = f"postgresql+psycopg://{user}:{password}@localhost:{host_port}/{db}"
@@ -109,7 +113,18 @@ def postgres_url(pytestconfig: pytest.Config) -> str:
         _wait_ready(container, user, db, timeout_s=int(os.getenv("PROTEA_PG_TIMEOUT", "60")))
 
         subprocess.run(
-            ["docker", "exec", container, "psql", "-U", user, "-d", db, "-c", "CREATE EXTENSION IF NOT EXISTS vector;"],
+            [
+                "docker",
+                "exec",
+                container,
+                "psql",
+                "-U",
+                user,
+                "-d",
+                db,
+                "-c",
+                "CREATE EXTENSION IF NOT EXISTS vector;",
+            ],
             text=True,
             capture_output=True,
         )
diff --git a/tests/test_admin_router.py b/tests/test_admin_router.py
index 8ff0382..ecc82da 100644
--- a/tests/test_admin_router.py
+++ b/tests/test_admin_router.py
@@ -2,6 +2,7 @@
 
 Database and subprocess calls are fully mocked -- no real infrastructure required.
 """
+
 from __future__ import annotations
 
 import sys
@@ -21,6 +22,7 @@
 # Helpers
 # ---------------------------------------------------------------------------
 
+
 def _make_app():
     app = FastAPI()
     app.state.session_factory = MagicMock()
@@ -32,6 +34,7 @@ def _make_app():
 # Fixtures
 # ---------------------------------------------------------------------------
 
+
 @pytest.fixture()
 def mock_psycopg():
     """Ensure psycopg is available as a mock in sys.modules for the local import."""
@@ -54,6 +57,7 @@ def client(mock_psycopg):
 # POST /admin/reset-db
 # ---------------------------------------------------------------------------
 
+
 class TestResetDBAuth:
     def test_no_token_configured_returns_403(self, mock_psycopg):
         app = _make_app()
diff --git a/tests/test_annotate_router.py b/tests/test_annotate_router.py
index 4255fe4..14baf83 100644
--- a/tests/test_annotate_router.py
+++ b/tests/test_annotate_router.py
@@ -2,6 +2,7 @@
 
 Database and queue are fully mocked — no real infrastructure required.
 """
+
 from __future__ import annotations
 
 from contextlib import contextmanager
@@ -18,6 +19,7 @@
 # Helpers
 # ---------------------------------------------------------------------------
 
+
 def _make_app(session_factory, amqp_url="amqp://guest:guest@localhost:5672/"):
     app = FastAPI()
     app.state.session_factory = session_factory
@@ -67,6 +69,7 @@ def _mock_ontology_snapshot(session):
 # Fixtures
 # ---------------------------------------------------------------------------
 
+
 @pytest.fixture()
 def session():
     return MagicMock()
@@ -80,12 +83,15 @@ def factory(session):
 @pytest.fixture()
 def client(session, factory):
     app = _make_app(factory)
-    with patch(
-        "protea.api.routers.annotate.session_scope",
-        side_effect=lambda _: _mock_scope(session),
-    ), patch(
-        "protea.api.routers.annotate.publish_job",
-    ) as mock_publish:
+    with (
+        patch(
+            "protea.api.routers.annotate.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ),
+        patch(
+            "protea.api.routers.annotate.publish_job",
+        ) as mock_publish,
+    ):
         with TestClient(app) as c:
             yield c, session, mock_publish
 
@@ -94,6 +100,7 @@ def client(session, factory):
 # POST /annotate — input validation
 # ---------------------------------------------------------------------------
 
+
 class TestAnnotateInputValidation:
     def test_no_input_returns_422(self, client):
         c, session, _ = client
@@ -131,6 +138,7 @@ def test_file_upload_non_utf8_returns_422(self, client):
 # POST /annotate — missing prerequisites
 # ---------------------------------------------------------------------------
 
+
 class TestAnnotatePrerequisites:
     def _setup_session(self, session, has_config=True, has_ann=True, has_snap=True):
         """Configure mock session for the annotate flow."""
@@ -148,6 +156,7 @@ def _setup_session(self, session, has_config=True, has_ann=True, has_snap=True):
         def add_side_effect(obj):
             if not hasattr(obj, "id") or obj.id is None:
                 obj.id = uuid4()
+
         session.add.side_effect = add_side_effect
         session.flush.return_value = None
 
@@ -155,7 +164,9 @@ def add_side_effect(obj):
         if has_config:
             config = MagicMock()
             config.id = uuid4()
-            query_mock.outerjoin.return_value.group_by.return_value.order_by.return_value.all.return_value = [(config, 10)]
+            query_mock.outerjoin.return_value.group_by.return_value.order_by.return_value.all.return_value = [
+                (config, 10)
+            ]
         else:
             query_mock.outerjoin.return_value.group_by.return_value.order_by.return_value.all.return_value = []
 
@@ -189,11 +200,14 @@ def test_no_annotation_set_returns_409(self, client):
         def add_side_effect(obj):
             if not hasattr(obj, "id") or obj.id is None:
                 obj.id = uuid4()
+
         session.add.side_effect = add_side_effect
 
         config = MagicMock()
         config.id = uuid4()
-        query_mock.outerjoin.return_value.group_by.return_value.order_by.return_value.all.return_value = [(config, 10)]
+        query_mock.outerjoin.return_value.group_by.return_value.order_by.return_value.all.return_value = [
+            (config, 10)
+        ]
         # No annotation set
         query_mock.order_by.return_value.first.return_value = None
 
@@ -206,6 +220,7 @@ def add_side_effect(obj):
 # POST /annotate — successful flow
 # ---------------------------------------------------------------------------
 
+
 class TestAnnotateSuccess:
     def test_fasta_text_happy_path(self, client):
         c, session, mock_publish = client
@@ -225,7 +240,9 @@ def test_fasta_text_happy_path(self, client):
         def query_side_effect(*args):
             q = MagicMock()
             q.filter.return_value.all.return_value = []
-            q.outerjoin.return_value.group_by.return_value.order_by.return_value.all.return_value = [(config, 10)]
+            q.outerjoin.return_value.group_by.return_value.order_by.return_value.all.return_value = [
+                (config, 10)
+            ]
             q.order_by.return_value.first.side_effect = lambda: next(first_results)
             return q
 
@@ -234,6 +251,7 @@ def query_side_effect(*args):
         def add_side_effect(obj):
             if not hasattr(obj, "id") or obj.id is None:
                 obj.id = uuid4()
+
         session.add.side_effect = add_side_effect
         session.flush.return_value = None
 
@@ -264,7 +282,9 @@ def test_file_upload_happy_path(self, client):
         def query_side_effect(*args):
             q = MagicMock()
             q.filter.return_value.all.return_value = []
-            q.outerjoin.return_value.group_by.return_value.order_by.return_value.all.return_value = [(config, 10)]
+            q.outerjoin.return_value.group_by.return_value.order_by.return_value.all.return_value = [
+                (config, 10)
+            ]
             q.order_by.return_value.first.side_effect = lambda: next(first_results)
             return q
 
@@ -273,6 +293,7 @@ def query_side_effect(*args):
         def add_side_effect(obj):
             if not hasattr(obj, "id") or obj.id is None:
                 obj.id = uuid4()
+
         session.add.side_effect = add_side_effect
 
         resp = c.post(
@@ -289,6 +310,7 @@ def add_side_effect(obj):
 # Helper functions
 # ---------------------------------------------------------------------------
 
+
 class TestBestEmbeddingConfig:
     def test_returns_config_with_most_embeddings(self):
         from protea.api.routers.annotate import _best_embedding_config
@@ -364,11 +386,3 @@ def test_returns_none_when_empty(self):
         assert _newest_ontology_snapshot(session) is None
 
 
-class TestDeriveMethod:
-    def test_derive_method_used_in_showcase(self):
-        from protea.api.routers.showcase import _derive_method
-
-        assert _derive_method(None, None) == ("knn_baseline", "KNN (embedding distance)")
-        assert _derive_method(uuid4(), None) == ("knn_scored", "KNN + Scoring")
-        assert _derive_method(None, uuid4()) == ("knn_reranker", "KNN + Re-ranker")
-        assert _derive_method(uuid4(), uuid4()) == ("knn_reranker", "KNN + Re-ranker")
diff --git a/tests/test_annotation_intern.py b/tests/test_annotation_intern.py
new file mode 100644
index 0000000..7bd5ff8
--- /dev/null
+++ b/tests/test_annotation_intern.py
@@ -0,0 +1,66 @@
+"""Tests for protea.core.annotation_intern."""
+
+from __future__ import annotations
+
+import pytest
+
+from protea.core import annotation_intern
+from protea.core.annotation_intern import intern_string, pool_size
+
+
+@pytest.fixture(autouse=True)
+def _clean_pool():
+    """The intern pool is process-global; clear before every test so
+    one test's allocations don't leak into another's pool-size assertion.
+    """
+    annotation_intern._INTERN_POOL.clear()
+    yield
+    annotation_intern._INTERN_POOL.clear()
+
+
+class TestInternString:
+    def test_first_call_caches_value(self):
+        result = intern_string("IEA")
+        assert result == "IEA"
+        assert pool_size() == 1
+
+    def test_second_call_returns_same_instance(self):
+        # Build two distinct string objects with the same content.
+        a = "I" + "E" + "A"
+        b = "IEA" + ""
+        # Use sys.intern? No — we want to verify our pool actually dedups.
+        first = intern_string(a)
+        second = intern_string(b)
+        assert first is second  # SAME object, not just equal
+
+    def test_none_passes_through(self):
+        assert intern_string(None) is None
+        assert pool_size() == 0
+
+    def test_pool_grows_with_distinct_values(self):
+        intern_string("IEA")
+        intern_string("IDA")
+        intern_string("EXP")
+        assert pool_size() == 3
+
+    def test_empty_string_is_interned(self):
+        # Empty string is a legitimate qualifier value; not the same as None.
+        result = intern_string("")
+        assert result == ""
+        assert pool_size() == 1
+
+    def test_many_duplicates_dedup_to_one(self):
+        for _ in range(100):
+            intern_string("IEA")
+        assert pool_size() == 1
+
+
+class TestPoolSize:
+    def test_starts_empty(self):
+        assert pool_size() == 0
+
+    def test_counts_distinct_only(self):
+        intern_string("IEA")
+        intern_string("IEA")
+        intern_string("IDA")
+        assert pool_size() == 2
diff --git a/tests/test_annotations_router.py b/tests/test_annotations_router.py
index ccaac33..ca78067 100644
--- a/tests/test_annotations_router.py
+++ b/tests/test_annotations_router.py
@@ -2,6 +2,7 @@
 
 Database and queue are fully mocked -- no real infrastructure required.
 """
+
 from __future__ import annotations
 
 from contextlib import contextmanager
@@ -14,8 +15,35 @@
 from fastapi.testclient import TestClient
 from sqlalchemy.exc import IntegrityError
 
+from protea.api.cache import invalidate as _cache_invalidate
 from protea.api.routers.annotations import router
 
+
+@pytest.fixture(autouse=True)
+def _reset_router_cache():
+    _cache_invalidate()
+    yield
+    _cache_invalidate()
+
+
+@pytest.fixture()
+def artifact_store():
+    """Stubbed ArtifactStore: bypasses MinIO and records put/get/delete calls."""
+    store = MagicMock()
+    store.get.return_value = b"stub-artifact-content"
+    with (
+        patch(
+            "protea.infrastructure.storage.factory.get_artifact_store",
+            return_value=store,
+        ),
+        patch(
+            "protea.infrastructure.storage.get_artifact_store",
+            return_value=store,
+        ),
+    ):
+        yield store
+
+
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
@@ -26,6 +54,7 @@ def _make_app(session_factory, amqp_url="amqp://guest:guest@localhost", artifact
     app.state.session_factory = session_factory
     app.state.amqp_url = amqp_url
     app.state.artifacts_dir = artifacts_dir or Path("/tmp/protea-test-artifacts")
+    app.state.benchmark_config = MagicMock()
     app.include_router(router)
     return app
 
@@ -46,7 +75,9 @@ def _make_snapshot(snap_id=None, obo_url="http://obo", obo_version="2024-01-01",
     return s
 
 
-def _make_annotation_set(set_id=None, source="goa", source_version="2024-01", snap_id=None, job_id=None):
+def _make_annotation_set(
+    set_id=None, source="goa", source_version="2024-01", snap_id=None, job_id=None
+):
     a = MagicMock()
     a.id = set_id or uuid4()
     a.source = source
@@ -71,7 +102,9 @@ def _make_evaluation_set(eval_id=None, old_id=None, new_id=None, job_id=None, st
     return e
 
 
-def _make_evaluation_result(result_id=None, eval_set_id=None, pred_set_id=None, scoring_id=None, job_id=None, results=None):
+def _make_evaluation_result(
+    result_id=None, eval_set_id=None, pred_set_id=None, scoring_id=None, job_id=None, results=None
+):
     r = MagicMock()
     r.id = result_id or uuid4()
     r.evaluation_set_id = eval_set_id or uuid4()
@@ -97,7 +130,9 @@ def factory(session):
 @pytest.fixture()
 def client(session, factory):
     app = _make_app(factory)
-    with patch("protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)):
+    with patch(
+        "protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)
+    ):
         with TestClient(app) as c:
             yield c, session
 
@@ -105,7 +140,9 @@ def client(session, factory):
 @pytest.fixture()
 def client_with_artifacts(session, factory, tmp_path):
     app = _make_app(factory, artifacts_dir=tmp_path)
-    with patch("protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)):
+    with patch(
+        "protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)
+    ):
         with TestClient(app) as c:
             yield c, session, tmp_path
 
@@ -191,8 +228,10 @@ def test_success(self, client):
 
         def add_side(obj):
             from protea.infrastructure.orm.models.job import Job
+
             if isinstance(obj, Job):
                 obj.id = uuid4()
+
         session.add.side_effect = add_side
 
         with patch("protea.api.routers.annotations.publish_job"):
@@ -297,8 +336,10 @@ def test_success(self, client):
 
         def add_side(obj):
             from protea.infrastructure.orm.models.job import Job
+
             if isinstance(obj, Job):
                 obj.id = uuid4()
+
         session.add.side_effect = add_side
 
         with patch("protea.api.routers.annotations.publish_job"):
@@ -330,8 +371,10 @@ def test_success(self, client):
 
         def add_side(obj):
             from protea.infrastructure.orm.models.job import Job
+
             if isinstance(obj, Job):
                 obj.id = uuid4()
+
         session.add.side_effect = add_side
 
         with patch("protea.api.routers.annotations.publish_job"):
@@ -369,23 +412,16 @@ def test_missing_amqp_url_raises(self, session):
         app.state.session_factory = MagicMock()
         # no amqp_url set
         app.include_router(router)
-        with patch("protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.annotations.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             with TestClient(app, raise_server_exceptions=False) as c:
-                resp = c.post("/annotations/snapshots/load", json={"obo_url": "http://example.com/go.obo"})
+                resp = c.post(
+                    "/annotations/snapshots/load", json={"obo_url": "http://example.com/go.obo"}
+                )
         assert resp.status_code == 500
 
-    def test_missing_artifacts_dir_raises(self, session):
-        app = FastAPI()
-        app.state.session_factory = MagicMock()
-        # no artifacts_dir set
-        app.include_router(router)
-        eval_id = uuid4()
-        with patch("protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)):
-            with TestClient(app, raise_server_exceptions=False) as c:
-                resp = c.delete(f"/annotations/evaluation-sets/{eval_id}")
-        assert resp.status_code == 500
-
-
 # ---------------------------------------------------------------------------
 # PATCH /annotations/snapshots/{snapshot_id}/ia-url (lines 146-158)
 # ---------------------------------------------------------------------------
@@ -489,8 +525,10 @@ def test_success(self, client):
         # Mock Job creation
         def add_side(obj):
             from protea.infrastructure.orm.models.job import Job
+
             if isinstance(obj, Job):
                 obj.id = uuid4()
+
         session.add.side_effect = add_side
 
         with patch("protea.api.routers.annotations.publish_job"):
@@ -540,25 +578,27 @@ def test_empty_list(self, client):
 
 
 class TestDeleteEvaluationSet:
-    def test_delete_success(self, client_with_artifacts):
-        c, session, tmp_path = client_with_artifacts
+    def test_delete_success(self, client_with_artifacts, artifact_store):
+        c, session, _ = client_with_artifacts
         ev = _make_evaluation_set()
         session.get.side_effect = lambda model, id_: ev if id_ == ev.id else None
 
-        # Create a fake result with an artifact directory
         result_mock = MagicMock()
         result_mock.id = uuid4()
-        result_dir = tmp_path / str(result_mock.id)
-        result_dir.mkdir()
-        (result_dir / "output.tsv").write_text("test")
+        result_keys = [
+            f"eval_artifacts/{result_mock.id}/NK/pr.tsv",
+            f"eval_artifacts/{result_mock.id}/NK/metrics.json",
+        ]
+        result_mock.results = {"artifacts": {"keys": result_keys}}
 
         session.query.return_value.filter.return_value.all.return_value = [result_mock]
 
         resp = c.delete(f"/annotations/evaluation-sets/{ev.id}")
         assert resp.status_code == 204
         session.delete.assert_called_once_with(ev)
-        # Artifact directory should be removed
-        assert not result_dir.exists()
+        # Both per-result artifact keys + the ground-truth key are removed.
+        deleted_keys = {call.args[0] for call in artifact_store.delete.call_args_list}
+        assert set(result_keys).issubset(deleted_keys)
 
     def test_delete_not_found(self, client_with_artifacts):
         c, session, _ = client_with_artifacts
@@ -567,8 +607,8 @@ def test_delete_not_found(self, client_with_artifacts):
         resp = c.delete(f"/annotations/evaluation-sets/{uuid4()}")
         assert resp.status_code == 404
 
-    def test_delete_no_artifact_dir(self, client_with_artifacts):
-        c, session, tmp_path = client_with_artifacts
+    def test_delete_no_artifact_keys(self, client_with_artifacts, artifact_store):
+        c, session, _ = client_with_artifacts
         ev = _make_evaluation_set()
         session.get.side_effect = lambda model, id_: ev if id_ == ev.id else None
         session.query.return_value.filter.return_value.all.return_value = []
@@ -623,6 +663,7 @@ def test_no_job_id(self, client):
 
 class _EvalData:
     """Fake result of compute_evaluation_data."""
+
     def __init__(self, nk=None, lk=None, pk=None, known=None):
         self.nk = nk or {}
         self.lk = lk or {}
@@ -639,15 +680,19 @@ def test_success(self, client):
         def get_side(model, id_):
             from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
             from protea.infrastructure.orm.models.annotation.evaluation_set import EvaluationSet
+
             if model is EvaluationSet:
                 return ev
             if model is AnnotationSet:
                 return ann_old
             return None
+
         session.get.side_effect = get_side
 
         fake_data = _EvalData(nk={"P12345": {"GO:0003674", "GO:0008150"}})
-        with patch("protea.api.routers.annotations.compute_evaluation_data", return_value=fake_data):
+        with patch(
+            "protea.api.routers.annotations.load_evaluation_data_for_set", return_value=(fake_data, uuid4())
+        ):
             resp = c.get(f"/annotations/evaluation-sets/{ev.id}/ground-truth-NK.tsv")
         assert resp.status_code == 200
         assert "text/tab-separated-values" in resp.headers["content-type"]
@@ -672,15 +717,19 @@ def test_success(self, client):
         def get_side(model, id_):
             from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
             from protea.infrastructure.orm.models.annotation.evaluation_set import EvaluationSet
+
             if model is EvaluationSet:
                 return ev
             if model is AnnotationSet:
                 return ann_old
             return None
+
         session.get.side_effect = get_side
 
         fake_data = _EvalData(lk={"Q99999": {"GO:0005575"}})
-        with patch("protea.api.routers.annotations.compute_evaluation_data", return_value=fake_data):
+        with patch(
+            "protea.api.routers.annotations.load_evaluation_data_for_set", return_value=(fake_data, uuid4())
+        ):
             resp = c.get(f"/annotations/evaluation-sets/{ev.id}/ground-truth-LK.tsv")
         assert resp.status_code == 200
         lines = resp.text.strip().split("\n")
@@ -697,15 +746,19 @@ def test_success(self, client):
         def get_side(model, id_):
             from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
             from protea.infrastructure.orm.models.annotation.evaluation_set import EvaluationSet
+
             if model is EvaluationSet:
                 return ev
             if model is AnnotationSet:
                 return ann_old
             return None
+
         session.get.side_effect = get_side
 
         fake_data = _EvalData(pk={"A00001": {"GO:0003674"}})
-        with patch("protea.api.routers.annotations.compute_evaluation_data", return_value=fake_data):
+        with patch(
+            "protea.api.routers.annotations.load_evaluation_data_for_set", return_value=(fake_data, uuid4())
+        ):
             resp = c.get(f"/annotations/evaluation-sets/{ev.id}/ground-truth-PK.tsv")
         assert resp.status_code == 200
         assert "A00001\tGO:0003674" in resp.text
@@ -720,15 +773,19 @@ def test_success(self, client):
         def get_side(model, id_):
             from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
             from protea.infrastructure.orm.models.annotation.evaluation_set import EvaluationSet
+
             if model is EvaluationSet:
                 return ev
             if model is AnnotationSet:
                 return ann_old
             return None
+
         session.get.side_effect = get_side
 
         fake_data = _EvalData(known={"P12345": {"GO:0003674"}, "Q99999": {"GO:0005575"}})
-        with patch("protea.api.routers.annotations.compute_evaluation_data", return_value=fake_data):
+        with patch(
+            "protea.api.routers.annotations.load_evaluation_data_for_set", return_value=(fake_data, uuid4())
+        ):
             resp = c.get(f"/annotations/evaluation-sets/{ev.id}/known-terms.tsv")
         assert resp.status_code == 200
         lines = resp.text.strip().split("\n")
@@ -745,11 +802,13 @@ def _setup_session(self, session, ev, ann_old, fake_data, protein_rows=None):
         def get_side(model, id_):
             from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
             from protea.infrastructure.orm.models.annotation.evaluation_set import EvaluationSet
+
             if model is EvaluationSet:
                 return ev
             if model is AnnotationSet:
                 return ann_old
             return None
+
         session.get.side_effect = get_side
 
         if protein_rows is not None:
@@ -771,7 +830,9 @@ def test_all_category(self, client):
         fake_data = _EvalData(nk={"P12345": {"GO:0003674"}}, lk={})
         self._setup_session(session, ev, ann_old, fake_data, protein_rows=[(protein, seq)])
 
-        with patch("protea.api.routers.annotations.compute_evaluation_data", return_value=fake_data):
+        with patch(
+            "protea.api.routers.annotations.load_evaluation_data_for_set", return_value=(fake_data, uuid4())
+        ):
             resp = c.get(f"/annotations/evaluation-sets/{ev.id}/delta-proteins.fasta")
         assert resp.status_code == 200
         assert ">P12345" in resp.text
@@ -794,7 +855,9 @@ def test_nk_category_filter(self, client):
         fake_data = _EvalData(nk={"P12345": {"GO:0003674"}}, lk={"Q99999": {"GO:0005575"}})
         self._setup_session(session, ev, ann_old, fake_data, protein_rows=[(protein, seq)])
 
-        with patch("protea.api.routers.annotations.compute_evaluation_data", return_value=fake_data):
+        with patch(
+            "protea.api.routers.annotations.load_evaluation_data_for_set", return_value=(fake_data, uuid4())
+        ):
             resp = c.get(f"/annotations/evaluation-sets/{ev.id}/delta-proteins.fasta?category=nk")
         assert resp.status_code == 200
         assert ">P12345" in resp.text
@@ -807,7 +870,9 @@ def test_empty_delta_returns_empty_fasta(self, client):
         fake_data = _EvalData()
         self._setup_session(session, ev, ann_old, fake_data, protein_rows=[])
 
-        with patch("protea.api.routers.annotations.compute_evaluation_data", return_value=fake_data):
+        with patch(
+            "protea.api.routers.annotations.load_evaluation_data_for_set", return_value=(fake_data, uuid4())
+        ):
             resp = c.get(f"/annotations/evaluation-sets/{ev.id}/delta-proteins.fasta")
         assert resp.status_code == 200
         assert resp.text == ""
@@ -828,7 +893,9 @@ def test_long_sequence_wraps_at_60(self, client):
         fake_data = _EvalData(nk={"P12345": {"GO:0003674"}})
         self._setup_session(session, ev, ann_old, fake_data, protein_rows=[(protein, seq)])
 
-        with patch("protea.api.routers.annotations.compute_evaluation_data", return_value=fake_data):
+        with patch(
+            "protea.api.routers.annotations.load_evaluation_data_for_set", return_value=(fake_data, uuid4())
+        ):
             resp = c.get(f"/annotations/evaluation-sets/{ev.id}/delta-proteins.fasta")
         lines = resp.text.strip().split("\n")
         # header + 2 sequence lines
@@ -852,7 +919,9 @@ def test_pk_category(self, client):
         fake_data = _EvalData(pk={"X00001": {"GO:0005575"}})
         self._setup_session(session, ev, ann_old, fake_data, protein_rows=[(protein, seq)])
 
-        with patch("protea.api.routers.annotations.compute_evaluation_data", return_value=fake_data):
+        with patch(
+            "protea.api.routers.annotations.load_evaluation_data_for_set", return_value=(fake_data, uuid4())
+        ):
             resp = c.get(f"/annotations/evaluation-sets/{ev.id}/delta-proteins.fasta?category=pk")
         assert resp.status_code == 200
         assert "(PK)" in resp.text
@@ -874,7 +943,9 @@ def test_all_category_includes_lk(self, client):
         fake_data = _EvalData(nk={}, lk={"Q99999": {"GO:0005575"}})
         self._setup_session(session, ev, ann_old, fake_data, protein_rows=[(protein, seq)])
 
-        with patch("protea.api.routers.annotations.compute_evaluation_data", return_value=fake_data):
+        with patch(
+            "protea.api.routers.annotations.load_evaluation_data_for_set", return_value=(fake_data, uuid4())
+        ):
             resp = c.get(f"/annotations/evaluation-sets/{ev.id}/delta-proteins.fasta?category=all")
         assert resp.status_code == 200
         assert "(LK)" in resp.text
@@ -895,8 +966,10 @@ def test_success(self, client):
 
         def add_side(obj):
             from protea.infrastructure.orm.models.job import Job
+
             if isinstance(obj, Job):
                 obj.id = uuid4()
+
         session.add.side_effect = add_side
 
         with patch("protea.api.routers.annotations.publish_job"):
@@ -941,8 +1014,22 @@ def test_success_with_results(self, client):
             eval_set_id=eval_id,
             results={
                 "NK": {
-                    "BPO": {"fmax": 0.42, "precision": 0.5, "recall": 0.35, "tau": 0.3, "coverage": 0.8, "n_proteins": 100},
-                    "MFO": {"fmax": 0.55, "precision": 0.6, "recall": 0.5, "tau": 0.4, "coverage": 0.9, "n_proteins": 80},
+                    "BPO": {
+                        "fmax": 0.42,
+                        "precision": 0.5,
+                        "recall": 0.35,
+                        "tau": 0.3,
+                        "coverage": 0.8,
+                        "n_proteins": 100,
+                    },
+                    "MFO": {
+                        "fmax": 0.55,
+                        "precision": 0.6,
+                        "recall": 0.5,
+                        "tau": 0.4,
+                        "coverage": 0.9,
+                        "n_proteins": 80,
+                    },
                 },
                 "LK": {},
             },
@@ -993,26 +1080,25 @@ def test_empty_results(self, client):
 
 
 class TestDownloadEvaluationArtifacts:
-    def test_success(self, client_with_artifacts):
-        c, session, tmp_path = client_with_artifacts
+    def test_success(self, client_with_artifacts, artifact_store):
+        c, session, _ = client_with_artifacts
         eval_id = uuid4()
         result = _make_evaluation_result(eval_set_id=eval_id)
+        prefix = f"eval_artifacts/{result.id}/"
+        keys = [prefix + "pr_curve.tsv", prefix + "metrics.json"]
+        result.results = {"artifacts": {"keys": keys}}
         session.get.return_value = result
 
-        # Create artifact directory with files
-        result_dir = tmp_path / str(result.id)
-        result_dir.mkdir()
-        (result_dir / "pr_curve.tsv").write_text("threshold\tprecision\trecall\n0.5\t0.8\t0.6")
-        (result_dir / "metrics.json").write_text('{"fmax": 0.42}')
+        # store.get returns the file bytes for any key
+        artifact_store.get.side_effect = lambda key: f"content of {key}".encode()
 
         resp = c.get(f"/annotations/evaluation-sets/{eval_id}/results/{result.id}/artifacts.zip")
         assert resp.status_code == 200
         assert "application/zip" in resp.headers["content-type"]
-        assert len(resp.content) > 0
 
-        # Verify it's a valid zip
         import io
         import zipfile
+
         with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
             names = zf.namelist()
             assert "pr_curve.tsv" in names
@@ -1026,12 +1112,13 @@ def test_result_not_found(self, client_with_artifacts):
         resp = c.get(f"/annotations/evaluation-sets/{eval_id}/results/{uuid4()}/artifacts.zip")
         assert resp.status_code == 404
 
-    def test_no_artifacts_directory(self, client_with_artifacts):
-        c, session, tmp_path = client_with_artifacts
+    def test_no_artifact_keys(self, client_with_artifacts, artifact_store):
+        c, session, _ = client_with_artifacts
         eval_id = uuid4()
         result = _make_evaluation_result(eval_set_id=eval_id)
+        # results blob has no artifacts.keys → 404
+        result.results = {}
         session.get.return_value = result
-        # No directory created for this result
 
         resp = c.get(f"/annotations/evaluation-sets/{eval_id}/results/{result.id}/artifacts.zip")
         assert resp.status_code == 404
@@ -1052,7 +1139,9 @@ def test_success(self, client):
 
         # First call: session.get(EvaluationSet, eval_id) returns ev
         session.get.return_value = ev
-        session.query.return_value.filter.return_value.order_by.return_value.all.return_value = [result]
+        session.query.return_value.filter.return_value.order_by.return_value.all.return_value = [
+            result
+        ]
 
         resp = c.get(f"/annotations/evaluation-sets/{eval_id}/results")
         assert resp.status_code == 200
@@ -1085,21 +1174,19 @@ def test_empty_results(self, client):
 
 
 class TestDeleteEvaluationResult:
-    def test_success(self, client_with_artifacts):
-        c, session, tmp_path = client_with_artifacts
+    def test_success(self, client_with_artifacts, artifact_store):
+        c, session, _ = client_with_artifacts
         eval_id = uuid4()
         result = _make_evaluation_result(eval_set_id=eval_id)
+        keys = [f"eval_artifacts/{result.id}/NK/output.tsv"]
+        result.results = {"artifacts": {"keys": keys}}
         session.get.return_value = result
 
-        # Create artifact dir
-        result_dir = tmp_path / str(result.id)
-        result_dir.mkdir()
-        (result_dir / "output.tsv").write_text("data")
-
         resp = c.delete(f"/annotations/evaluation-sets/{eval_id}/results/{result.id}")
         assert resp.status_code == 204
         session.delete.assert_called_once_with(result)
-        assert not result_dir.exists()
+        deleted_keys = {call.args[0] for call in artifact_store.delete.call_args_list}
+        assert keys[0] in deleted_keys
 
     def test_not_found(self, client_with_artifacts):
         c, session, _ = client_with_artifacts
@@ -1118,14 +1205,16 @@ def test_wrong_eval_set(self, client_with_artifacts):
         resp = c.delete(f"/annotations/evaluation-sets/{eval_id}/results/{result.id}")
         assert resp.status_code == 404
 
-    def test_no_artifact_dir(self, client_with_artifacts):
-        c, session, tmp_path = client_with_artifacts
+    def test_no_artifact_keys(self, client_with_artifacts, artifact_store):
+        c, session, _ = client_with_artifacts
         eval_id = uuid4()
         result = _make_evaluation_result(eval_set_id=eval_id)
+        result.results = {}  # no artifacts.keys → store.delete never called
         session.get.return_value = result
 
         resp = c.delete(f"/annotations/evaluation-sets/{eval_id}/results/{result.id}")
         assert resp.status_code == 204
+        artifact_store.delete.assert_not_called()
 
 
 # ---------------------------------------------------------------------------
@@ -1168,10 +1257,10 @@ def test_basic_subgraph(self, client):
         query_mock = session.query.return_value
         filter_mock = query_mock.filter.return_value
         filter_mock.all.side_effect = [
-            [seed],   # seed terms query
-            [rel],    # first BFS level relationships
-            [parent], # parent terms fetch
-            [],       # second BFS level relationships (no more)
+            [seed],  # seed terms query
+            [rel],  # first BFS level relationships
+            [parent],  # parent terms fetch
+            [],  # second BFS level relationships (no more)
         ]
 
         resp = c.get(f"/annotations/snapshots/{snap_id}/subgraph?go_ids=GO:0003674")
@@ -1215,7 +1304,7 @@ def test_multiple_go_ids(self, client):
         filter_mock = query_mock.filter.return_value
         filter_mock.all.side_effect = [
             [t1, t2],  # seed terms
-            [],         # no relationships
+            [],  # no relationships
         ]
 
         resp = c.get(f"/annotations/snapshots/{snap.id}/subgraph?go_ids=GO:0003674,GO:0008150")
@@ -1237,10 +1326,10 @@ def test_bfs_stops_when_frontier_empty(self, client):
         query_mock = session.query.return_value
         filter_mock = query_mock.filter.return_value
         filter_mock.all.side_effect = [
-            [seed],    # seed terms
-            [rel1],    # first BFS: rel from 1->2
+            [seed],  # seed terms
+            [rel1],  # first BFS: rel from 1->2
             [parent],  # fetch parent 2
-            [],        # second BFS: no rels from frontier {2}
+            [],  # second BFS: no rels from frontier {2}
         ]
 
         resp = c.get(f"/annotations/snapshots/{snap.id}/subgraph?go_ids=GO:0003674&depth=5")
diff --git a/tests/test_api.py b/tests/test_api.py
index 5306cda..9bf6150 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -2,6 +2,7 @@
 Unit tests for the FastAPI jobs router.
 Database and pika are fully mocked — no real infrastructure required.
 """
+
 from __future__ import annotations
 
 from contextlib import contextmanager
@@ -45,6 +46,7 @@ def _make_app(session_factory, amqp_url=FAKE_AMQP):
     app = FastAPI()
     app.state.session_factory = session_factory
     app.state.amqp_url = amqp_url
+    app.state.operation_registry = MagicMock()
     app.include_router(router)
     return app
 
@@ -74,6 +76,7 @@ def client(session):
 # POST /jobs
 # ---------------------------------------------------------------------------
 
+
 class TestCreateJob:
     def test_returns_job_id_and_status(self, session):
         job = _make_job()
@@ -83,9 +86,13 @@ def test_returns_job_id_and_status(self, session):
         factory = MagicMock()
         app = _make_app(factory)
 
-        with patch("protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)), \
-             patch("protea.api.routers.jobs.publish_job"), \
-             patch("protea.api.routers.jobs.Job", return_value=job):
+        with (
+            patch(
+                "protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)
+            ),
+            patch("protea.api.routers.jobs.publish_job"),
+            patch("protea.api.routers.jobs.Job", return_value=job),
+        ):
             c = TestClient(app)
             resp = c.post("/jobs", json={"operation": "ping", "queue_name": "test.q"})
 
@@ -98,7 +105,9 @@ def test_missing_operation_returns_422(self, session):
         factory = MagicMock()
         app = _make_app(factory)
 
-        with patch("protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)
+        ):
             c = TestClient(app, raise_server_exceptions=False)
             resp = c.post("/jobs", json={"queue_name": "test.q"})
 
@@ -108,7 +117,9 @@ def test_missing_queue_name_returns_422(self, session):
         factory = MagicMock()
         app = _make_app(factory)
 
-        with patch("protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)
+        ):
             c = TestClient(app, raise_server_exceptions=False)
             resp = c.post("/jobs", json={"operation": "ping"})
 
@@ -119,9 +130,13 @@ def test_publish_job_called_after_commit(self, session):
         factory = MagicMock()
         app = _make_app(factory)
 
-        with patch("protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)), \
-             patch("protea.api.routers.jobs.publish_job") as mock_publish, \
-             patch("protea.api.routers.jobs.Job", return_value=job):
+        with (
+            patch(
+                "protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)
+            ),
+            patch("protea.api.routers.jobs.publish_job") as mock_publish,
+            patch("protea.api.routers.jobs.Job", return_value=job),
+        ):
             c = TestClient(app)
             c.post("/jobs", json={"operation": "ping", "queue_name": "test.q"})
 
@@ -132,9 +147,13 @@ def test_publish_failure_returns_500(self, session):
         factory = MagicMock()
         app = _make_app(factory)
 
-        with patch("protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)), \
-             patch("protea.api.routers.jobs.publish_job", side_effect=RuntimeError("broker down")), \
-             patch("protea.api.routers.jobs.Job", return_value=job):
+        with (
+            patch(
+                "protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)
+            ),
+            patch("protea.api.routers.jobs.publish_job", side_effect=RuntimeError("broker down")),
+            patch("protea.api.routers.jobs.Job", return_value=job),
+        ):
             c = TestClient(app, raise_server_exceptions=False)
             resp = c.post("/jobs", json={"operation": "ping", "queue_name": "test.q"})
 
@@ -145,6 +164,7 @@ def test_publish_failure_returns_500(self, session):
 # GET /jobs
 # ---------------------------------------------------------------------------
 
+
 class TestListJobs:
     def test_returns_list(self, session):
         job = _make_job()
@@ -156,7 +176,9 @@ def test_returns_list(self, session):
         factory = MagicMock()
         app = _make_app(factory)
 
-        with patch("protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)
+        ):
             c = TestClient(app)
             resp = c.get("/jobs")
 
@@ -167,7 +189,9 @@ def test_unknown_status_returns_400(self, session):
         factory = MagicMock()
         app = _make_app(factory)
 
-        with patch("protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)
+        ):
             c = TestClient(app, raise_server_exceptions=False)
             resp = c.get("/jobs?status=not_a_status")
 
@@ -178,6 +202,7 @@ def test_unknown_status_returns_400(self, session):
 # GET /jobs/{id}
 # ---------------------------------------------------------------------------
 
+
 class TestGetJob:
     def test_returns_job(self, session):
         job = _make_job()
@@ -186,7 +211,9 @@ def test_returns_job(self, session):
         factory = MagicMock()
         app = _make_app(factory)
 
-        with patch("protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)
+        ):
             c = TestClient(app)
             resp = c.get(f"/jobs/{job.id}")
 
@@ -199,7 +226,9 @@ def test_not_found_returns_404(self, session):
         factory = MagicMock()
         app = _make_app(factory)
 
-        with patch("protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)
+        ):
             c = TestClient(app, raise_server_exceptions=False)
             resp = c.get(f"/jobs/{uuid4()}")
 
@@ -210,6 +239,7 @@ def test_not_found_returns_404(self, session):
 # GET /jobs/{id}/events
 # ---------------------------------------------------------------------------
 
+
 class TestGetJobEvents:
     def test_returns_events(self, session):
         job = _make_job()
@@ -231,7 +261,9 @@ def test_returns_events(self, session):
         factory = MagicMock()
         app = _make_app(factory)
 
-        with patch("protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)
+        ):
             c = TestClient(app)
             resp = c.get(f"/jobs/{job.id}/events")
 
@@ -244,7 +276,9 @@ def test_not_found_returns_404(self, session):
         factory = MagicMock()
         app = _make_app(factory)
 
-        with patch("protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)
+        ):
             c = TestClient(app, raise_server_exceptions=False)
             resp = c.get(f"/jobs/{uuid4()}/events")
 
@@ -255,9 +289,11 @@ def test_not_found_returns_404(self, session):
 # POST /jobs/{id}/cancel
 # ---------------------------------------------------------------------------
 
+
 class TestCancelJob:
     def test_cancels_queued_job(self, session):
         from protea.infrastructure.orm.models.job import JobStatus
+
         job = _make_job()
         job.status = JobStatus.QUEUED
         session.get.return_value = job
@@ -265,7 +301,9 @@ def test_cancels_queued_job(self, session):
         factory = MagicMock()
         app = _make_app(factory)
 
-        with patch("protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)
+        ):
             c = TestClient(app)
             resp = c.post(f"/jobs/{job.id}/cancel")
 
@@ -274,6 +312,7 @@ def test_cancels_queued_job(self, session):
 
     def test_cancel_succeeded_job_is_noop(self, session):
         from protea.infrastructure.orm.models.job import JobStatus
+
         job = _make_job()
         job.status = JobStatus.SUCCEEDED
         session.get.return_value = job
@@ -281,7 +320,9 @@ def test_cancel_succeeded_job_is_noop(self, session):
         factory = MagicMock()
         app = _make_app(factory)
 
-        with patch("protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)
+        ):
             c = TestClient(app)
             resp = c.post(f"/jobs/{job.id}/cancel")
 
@@ -294,7 +335,9 @@ def test_cancel_not_found_returns_404(self, session):
         factory = MagicMock()
         app = _make_app(factory)
 
-        with patch("protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.jobs.session_scope", side_effect=lambda _: _mock_scope(session)
+        ):
             c = TestClient(app, raise_server_exceptions=False)
             resp = c.post(f"/jobs/{uuid4()}/cancel")
 
diff --git a/tests/test_api_annotations.py b/tests/test_api_annotations.py
index a57478d..8dcf3d8 100644
--- a/tests/test_api_annotations.py
+++ b/tests/test_api_annotations.py
@@ -2,6 +2,7 @@
 Unit tests for the FastAPI annotations router.
 Database and pika are fully mocked — no real infrastructure required.
 """
+
 from __future__ import annotations
 
 import uuid
@@ -12,8 +13,17 @@
 from fastapi import FastAPI
 from fastapi.testclient import TestClient
 
+from protea.api.cache import invalidate as _cache_invalidate
 from protea.api.routers.annotations import router
 
+
+@pytest.fixture(autouse=True)
+def _reset_router_cache():
+    _cache_invalidate()
+    yield
+    _cache_invalidate()
+
+
 FAKE_AMQP = "amqp://guest:guest@localhost/"
 _SNAPSHOT_ID = uuid.uuid4()
 _SET_ID = uuid.uuid4()
@@ -71,7 +81,9 @@ def session():
 def client(session):
     factory = MagicMock()
     app = _make_app(factory)
-    with patch("protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)):
+    with patch(
+        "protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)
+    ):
         yield TestClient(app, raise_server_exceptions=True)
 
 
@@ -79,15 +91,21 @@ def client(session):
 # GET /annotations/snapshots
 # ---------------------------------------------------------------------------
 
+
 class TestListSnapshots:
     def test_returns_list(self, session):
         s = _make_snapshot(_SNAPSHOT_ID)
         # Router uses: session.query(...).outerjoin(...).order_by(...).all() → list of (snapshot, count) tuples
-        session.query.return_value.outerjoin.return_value.order_by.return_value.all.return_value = [(s, 42)]
+        session.query.return_value.outerjoin.return_value.order_by.return_value.all.return_value = [
+            (s, 42)
+        ]
 
         factory = MagicMock()
         app = _make_app(factory)
-        with patch("protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.annotations.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             resp = TestClient(app).get("/annotations/snapshots")
 
         assert resp.status_code == 200
@@ -100,7 +118,10 @@ def test_empty_list(self, session):
 
         factory = MagicMock()
         app = _make_app(factory)
-        with patch("protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.annotations.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             resp = TestClient(app).get("/annotations/snapshots")
 
         assert resp.status_code == 200
@@ -111,6 +132,7 @@ def test_empty_list(self, session):
 # GET /annotations/snapshots/{id}
 # ---------------------------------------------------------------------------
 
+
 class TestGetSnapshot:
     def test_returns_snapshot_with_term_count(self, session):
         s = _make_snapshot(_SNAPSHOT_ID)
@@ -119,7 +141,10 @@ def test_returns_snapshot_with_term_count(self, session):
 
         factory = MagicMock()
         app = _make_app(factory)
-        with patch("protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.annotations.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             resp = TestClient(app).get(f"/annotations/snapshots/{_SNAPSHOT_ID}")
 
         assert resp.status_code == 200
@@ -132,8 +157,13 @@ def test_not_found_returns_404(self, session):
 
         factory = MagicMock()
         app = _make_app(factory)
-        with patch("protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)):
-            resp = TestClient(app, raise_server_exceptions=False).get(f"/annotations/snapshots/{uuid.uuid4()}")
+        with patch(
+            "protea.api.routers.annotations.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
+            resp = TestClient(app, raise_server_exceptions=False).get(
+                f"/annotations/snapshots/{uuid.uuid4()}"
+            )
 
         assert resp.status_code == 404
 
@@ -142,6 +172,7 @@ def test_not_found_returns_404(self, session):
 # POST /annotations/snapshots/load
 # ---------------------------------------------------------------------------
 
+
 class TestLoadOntologySnapshot:
     def test_valid_payload_creates_job(self, session):
         job = _make_job(operation="load_ontology_snapshot")
@@ -149,9 +180,14 @@ def test_valid_payload_creates_job(self, session):
 
         factory = MagicMock()
         app = _make_app(factory)
-        with patch("protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)), \
-             patch("protea.api.routers.annotations.publish_job"), \
-             patch("protea.api.routers.annotations.Job", return_value=job):
+        with (
+            patch(
+                "protea.api.routers.annotations.session_scope",
+                side_effect=lambda _: _mock_scope(session),
+            ),
+            patch("protea.api.routers.annotations.publish_job"),
+            patch("protea.api.routers.annotations.Job", return_value=job),
+        ):
             resp = TestClient(app).post(
                 "/annotations/snapshots/load",
                 json={"obo_url": "https://purl.obolibrary.org/obo/go.obo"},
@@ -165,7 +201,10 @@ def test_valid_payload_creates_job(self, session):
     def test_invalid_payload_returns_422(self, session):
         factory = MagicMock()
         app = _make_app(factory)
-        with patch("protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.annotations.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             resp = TestClient(app, raise_server_exceptions=False).post(
                 "/annotations/snapshots/load",
                 json={},  # missing obo_url
@@ -179,9 +218,14 @@ def test_publish_called_after_commit(self, session):
 
         factory = MagicMock()
         app = _make_app(factory)
-        with patch("protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)), \
-             patch("protea.api.routers.annotations.publish_job") as mock_publish, \
-             patch("protea.api.routers.annotations.Job", return_value=job):
+        with (
+            patch(
+                "protea.api.routers.annotations.session_scope",
+                side_effect=lambda _: _mock_scope(session),
+            ),
+            patch("protea.api.routers.annotations.publish_job") as mock_publish,
+            patch("protea.api.routers.annotations.Job", return_value=job),
+        ):
             TestClient(app).post(
                 "/annotations/snapshots/load",
                 json={"obo_url": "https://purl.obolibrary.org/obo/go.obo"},
@@ -194,6 +238,7 @@ def test_publish_called_after_commit(self, session):
 # GET /annotations/sets
 # ---------------------------------------------------------------------------
 
+
 class TestListAnnotationSets:
     def test_returns_list(self, session):
         a = _make_annotation_set(_SET_ID, _SNAPSHOT_ID)
@@ -204,7 +249,10 @@ def test_returns_list(self, session):
 
         factory = MagicMock()
         app = _make_app(factory)
-        with patch("protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.annotations.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             resp = TestClient(app).get("/annotations/sets")
 
         assert resp.status_code == 200
@@ -219,7 +267,10 @@ def test_filter_by_source(self, session):
 
         factory = MagicMock()
         app = _make_app(factory)
-        with patch("protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.annotations.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             resp = TestClient(app).get("/annotations/sets?source=goa")
 
         assert resp.status_code == 200
@@ -230,6 +281,7 @@ def test_filter_by_source(self, session):
 # GET /annotations/sets/{id}
 # ---------------------------------------------------------------------------
 
+
 class TestGetAnnotationSet:
     def test_returns_set_with_annotation_count(self, session):
         a = _make_annotation_set(_SET_ID, _SNAPSHOT_ID)
@@ -238,7 +290,10 @@ def test_returns_set_with_annotation_count(self, session):
 
         factory = MagicMock()
         app = _make_app(factory)
-        with patch("protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.annotations.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             resp = TestClient(app).get(f"/annotations/sets/{_SET_ID}")
 
         assert resp.status_code == 200
@@ -250,8 +305,13 @@ def test_not_found_returns_404(self, session):
 
         factory = MagicMock()
         app = _make_app(factory)
-        with patch("protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)):
-            resp = TestClient(app, raise_server_exceptions=False).get(f"/annotations/sets/{uuid.uuid4()}")
+        with patch(
+            "protea.api.routers.annotations.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
+            resp = TestClient(app, raise_server_exceptions=False).get(
+                f"/annotations/sets/{uuid.uuid4()}"
+            )
 
         assert resp.status_code == 404
 
@@ -260,6 +320,7 @@ def test_not_found_returns_404(self, session):
 # POST /annotations/sets/load-goa
 # ---------------------------------------------------------------------------
 
+
 class TestLoadGOAAnnotations:
     _VALID_PAYLOAD = {
         "ontology_snapshot_id": str(_SNAPSHOT_ID),
@@ -273,9 +334,14 @@ def test_valid_payload_creates_job(self, session):
 
         factory = MagicMock()
         app = _make_app(factory)
-        with patch("protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)), \
-             patch("protea.api.routers.annotations.publish_job"), \
-             patch("protea.api.routers.annotations.Job", return_value=job):
+        with (
+            patch(
+                "protea.api.routers.annotations.session_scope",
+                side_effect=lambda _: _mock_scope(session),
+            ),
+            patch("protea.api.routers.annotations.publish_job"),
+            patch("protea.api.routers.annotations.Job", return_value=job),
+        ):
             resp = TestClient(app).post("/annotations/sets/load-goa", json=self._VALID_PAYLOAD)
 
         assert resp.status_code == 200
@@ -284,10 +350,15 @@ def test_valid_payload_creates_job(self, session):
     def test_missing_fields_returns_422(self, session):
         factory = MagicMock()
         app = _make_app(factory)
-        with patch("protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.annotations.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             resp = TestClient(app, raise_server_exceptions=False).post(
                 "/annotations/sets/load-goa",
-                json={"ontology_snapshot_id": str(_SNAPSHOT_ID)},  # missing gaf_url + source_version
+                json={
+                    "ontology_snapshot_id": str(_SNAPSHOT_ID)
+                },  # missing gaf_url + source_version
             )
 
         assert resp.status_code == 422
@@ -297,6 +368,7 @@ def test_missing_fields_returns_422(self, session):
 # POST /annotations/sets/load-quickgo
 # ---------------------------------------------------------------------------
 
+
 class TestLoadQuickGOAnnotations:
     _VALID_PAYLOAD = {
         "ontology_snapshot_id": str(_SNAPSHOT_ID),
@@ -309,9 +381,14 @@ def test_valid_payload_creates_job(self, session):
 
         factory = MagicMock()
         app = _make_app(factory)
-        with patch("protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)), \
-             patch("protea.api.routers.annotations.publish_job"), \
-             patch("protea.api.routers.annotations.Job", return_value=job):
+        with (
+            patch(
+                "protea.api.routers.annotations.session_scope",
+                side_effect=lambda _: _mock_scope(session),
+            ),
+            patch("protea.api.routers.annotations.publish_job"),
+            patch("protea.api.routers.annotations.Job", return_value=job),
+        ):
             resp = TestClient(app).post("/annotations/sets/load-quickgo", json=self._VALID_PAYLOAD)
 
         assert resp.status_code == 200
@@ -320,7 +397,10 @@ def test_valid_payload_creates_job(self, session):
     def test_missing_source_version_returns_422(self, session):
         factory = MagicMock()
         app = _make_app(factory)
-        with patch("protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.annotations.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             resp = TestClient(app, raise_server_exceptions=False).post(
                 "/annotations/sets/load-quickgo",
                 json={"ontology_snapshot_id": str(_SNAPSHOT_ID)},  # missing source_version
@@ -334,9 +414,14 @@ def test_publish_called_with_correct_queue(self, session):
 
         factory = MagicMock()
         app = _make_app(factory)
-        with patch("protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)), \
-             patch("protea.api.routers.annotations.publish_job") as mock_publish, \
-             patch("protea.api.routers.annotations.Job", return_value=job):
+        with (
+            patch(
+                "protea.api.routers.annotations.session_scope",
+                side_effect=lambda _: _mock_scope(session),
+            ),
+            patch("protea.api.routers.annotations.publish_job") as mock_publish,
+            patch("protea.api.routers.annotations.Job", return_value=job),
+        ):
             TestClient(app).post("/annotations/sets/load-quickgo", json=self._VALID_PAYLOAD)
 
         mock_publish.assert_called_once_with(FAKE_AMQP, "protea.jobs", job.id)
diff --git a/tests/test_api_query_sets.py b/tests/test_api_query_sets.py
index 8fead82..ede641a 100644
--- a/tests/test_api_query_sets.py
+++ b/tests/test_api_query_sets.py
@@ -2,6 +2,7 @@
 
 Database and filesystem are fully mocked — no real infrastructure required.
 """
+
 from __future__ import annotations
 
 from contextlib import contextmanager
@@ -13,12 +14,17 @@
 from fastapi import FastAPI
 from fastapi.testclient import TestClient
 
-from protea.api.routers.query_sets import _parse_fasta, router
+from protea.api.routers.query_sets import (
+    _parse_fasta,
+    extract_uniprot_header_metadata,
+    router,
+)
 
 # ---------------------------------------------------------------------------
 # Fixtures
 # ---------------------------------------------------------------------------
 
+
 def _make_qs(qs_id=None, name="Test", description=None):
     qs = MagicMock()
     qs.id = qs_id or uuid4()
@@ -55,7 +61,9 @@ def factory(session):
 @pytest.fixture()
 def client(session, factory):
     app = _make_app(factory)
-    with patch("protea.api.routers.query_sets.session_scope", side_effect=lambda _: _mock_scope(session)):
+    with patch(
+        "protea.api.routers.query_sets.session_scope", side_effect=lambda _: _mock_scope(session)
+    ):
         with TestClient(app) as c:
             yield c, session
 
@@ -82,9 +90,11 @@ def test_creates_query_set_returns_201(self, client) -> None:
 
         # Sequence dedup: no existing hashes
         session.query.return_value.filter.return_value.all.return_value = []
+
         # Flush assigns IDs
         def flush_side():
             pass
+
         session.flush.side_effect = flush_side
 
         # Intercept QuerySet add to set its id
@@ -93,6 +103,7 @@ def flush_side():
         def add_side(obj):
             nonlocal added_qs
             from protea.infrastructure.orm.models.query.query_set import QuerySet
+
             if isinstance(obj, QuerySet):
                 obj.id = qs.id
                 obj.created_at = qs.created_at
@@ -100,7 +111,10 @@ def add_side(obj):
         session.add.side_effect = add_side
         session.add_all.return_value = None
 
-        with patch("protea.api.routers.query_sets.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.query_sets.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             resp = client_obj.post(
                 "/query-sets",
                 data={"name": "MySet"},
@@ -114,7 +128,10 @@ def add_side(obj):
 
     def test_empty_fasta_returns_422(self, client) -> None:
         client_obj, session = client
-        with patch("protea.api.routers.query_sets.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.query_sets.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             resp = client_obj.post(
                 "/query-sets",
                 data={"name": "empty"},
@@ -124,7 +141,10 @@ def test_empty_fasta_returns_422(self, client) -> None:
 
     def test_non_utf8_file_returns_422(self, client) -> None:
         client_obj, session = client
-        with patch("protea.api.routers.query_sets.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.query_sets.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             resp = client_obj.post(
                 "/query-sets",
                 data={"name": "binary"},
@@ -135,7 +155,10 @@ def test_non_utf8_file_returns_422(self, client) -> None:
     def test_duplicate_accession_in_upload_returns_422(self, client) -> None:
         client_obj, session = client
         dup_fasta = b">P12345\nACDEF\n>P12345\nGHIKL\n"
-        with patch("protea.api.routers.query_sets.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.query_sets.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             resp = client_obj.post(
                 "/query-sets",
                 data={"name": "dup"},
@@ -148,6 +171,7 @@ def test_duplicate_accession_in_upload_returns_422(self, client) -> None:
 # GET /query-sets
 # ---------------------------------------------------------------------------
 
+
 class TestListQuerySets:
     def test_returns_list(self, client) -> None:
         client_obj, session = client
@@ -156,7 +180,10 @@ def test_returns_list(self, client) -> None:
         session.query.return_value.order_by.return_value.all.return_value = [qs]
         session.query.return_value.group_by.return_value.all.return_value = [(qs.id, 5)]
 
-        with patch("protea.api.routers.query_sets.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.query_sets.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             resp = client_obj.get("/query-sets")
 
         assert resp.status_code == 200
@@ -169,7 +196,10 @@ def test_empty_list(self, client) -> None:
         session.query.return_value.order_by.return_value.all.return_value = []
         session.query.return_value.group_by.return_value.all.return_value = []
 
-        with patch("protea.api.routers.query_sets.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.query_sets.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             resp = client_obj.get("/query-sets")
 
         assert resp.status_code == 200
@@ -180,6 +210,7 @@ def test_empty_list(self, client) -> None:
 # GET /query-sets/{id}
 # ---------------------------------------------------------------------------
 
+
 class TestGetQuerySet:
     def test_returns_query_set(self, client) -> None:
         client_obj, session = client
@@ -189,10 +220,14 @@ def test_returns_query_set(self, client) -> None:
         session.get.return_value = qs
         session.query.return_value.filter.return_value.scalar.return_value = 3
         session.query.return_value.filter.return_value.order_by.return_value.all.return_value = [
-            ("P12345", 1), ("Q67890", 2),
+            ("P12345", 1, 9606, "Homo sapiens"),
+            ("Q67890", 2, None, None),
         ]
 
-        with patch("protea.api.routers.query_sets.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.query_sets.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             resp = client_obj.get(f"/query-sets/{qs_id}")
 
         assert resp.status_code == 200
@@ -204,7 +239,10 @@ def test_not_found_returns_404(self, client) -> None:
         client_obj, session = client
         session.get.return_value = None
 
-        with patch("protea.api.routers.query_sets.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.query_sets.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             resp = client_obj.get(f"/query-sets/{uuid4()}")
 
         assert resp.status_code == 404
@@ -214,6 +252,7 @@ def test_not_found_returns_404(self, client) -> None:
 # DELETE /query-sets/{id}
 # ---------------------------------------------------------------------------
 
+
 class TestDeleteQuerySet:
     def test_deletes_and_returns_id(self, client) -> None:
         client_obj, session = client
@@ -221,7 +260,10 @@ def test_deletes_and_returns_id(self, client) -> None:
         qs_id = qs.id
         session.get.return_value = qs
 
-        with patch("protea.api.routers.query_sets.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.query_sets.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             resp = client_obj.delete(f"/query-sets/{qs_id}")
 
         assert resp.status_code == 200
@@ -232,7 +274,10 @@ def test_not_found_returns_404(self, client) -> None:
         client_obj, session = client
         session.get.return_value = None
 
-        with patch("protea.api.routers.query_sets.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.query_sets.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             resp = client_obj.delete(f"/query-sets/{uuid4()}")
 
         assert resp.status_code == 404
@@ -242,13 +287,14 @@ def test_not_found_returns_404(self, client) -> None:
 # FASTA parser unit tests
 # ---------------------------------------------------------------------------
 
+
 class TestParseFasta:
     def test_parses_two_records(self) -> None:
         fasta = ">P12345\nACDEF\n>Q67890\nGHIKL\n"
         records = _parse_fasta(fasta)
         assert len(records) == 2
-        assert records[0] == ("P12345", "ACDEF")
-        assert records[1] == ("Q67890", "GHIKL")
+        assert records[0] == ("P12345", "ACDEF", "P12345")
+        assert records[1] == ("Q67890", "GHIKL", "Q67890")
 
     def test_multiline_sequence(self) -> None:
         fasta = ">P12345\nACD\nEFG\n"
@@ -266,6 +312,17 @@ def test_uses_first_token_as_accession(self) -> None:
         records = _parse_fasta(fasta)
         assert records[0][0] == "P12345"
 
+    def test_preserves_full_description(self) -> None:
+        fasta = ">sp|P12345|FOO_HUMAN Foo OS=Homo sapiens OX=9606 GN=FOO PE=1 SV=1\nACDEF\n"
+        records = _parse_fasta(fasta)
+        # Accession is the unwrapped UniProt id (sp|P12345|FOO_HUMAN → P12345);
+        # the full header is kept in records[0][2] so downstream code can mine
+        # OX= / OS= / GN= metadata.
+        assert records[0][0] == "P12345"
+        assert "sp|P12345|FOO_HUMAN" in records[0][2]
+        assert "OX=9606" in records[0][2]
+        assert "OS=Homo sapiens" in records[0][2]
+
     def test_uppercase_conversion(self) -> None:
         fasta = ">P12345\nacdef\n"
         records = _parse_fasta(fasta)
@@ -276,3 +333,41 @@ def test_empty_fasta_returns_empty(self) -> None:
 
     def test_comment_only_returns_empty(self) -> None:
         assert _parse_fasta(">NOSEQ\n") == []
+
+
+# ---------------------------------------------------------------------------
+# UniProt header metadata extractor
+# ---------------------------------------------------------------------------
+
+
+class TestExtractUniProtHeaderMetadata:
+    def test_full_swissprot_header(self) -> None:
+        header = "sp|P12345|FOO_HUMAN Foo OS=Homo sapiens OX=9606 GN=FOO PE=1 SV=1"
+        meta = extract_uniprot_header_metadata(header)
+        assert meta["taxonomy_id"] == 9606
+        assert meta["species"] == "Homo sapiens"
+
+    def test_header_without_ox_returns_none(self) -> None:
+        header = "sp|P12345|FOO_HUMAN Foo OS=Homo sapiens GN=FOO"
+        meta = extract_uniprot_header_metadata(header)
+        assert meta["taxonomy_id"] is None
+        assert meta["species"] == "Homo sapiens"
+
+    def test_plain_accession_header(self) -> None:
+        meta = extract_uniprot_header_metadata("P12345")
+        assert meta == {"taxonomy_id": None, "species": None}
+
+    def test_empty_header(self) -> None:
+        assert extract_uniprot_header_metadata("") == {"taxonomy_id": None, "species": None}
+
+    def test_multi_word_species(self) -> None:
+        header = "sp|Q9ZZZ9|BAR_ECOLI Bar OS=Escherichia coli (strain K12) OX=83333 GN=BAR PE=1 SV=1"
+        meta = extract_uniprot_header_metadata(header)
+        assert meta["taxonomy_id"] == 83333
+        assert meta["species"] == "Escherichia coli (strain K12)"
+
+    def test_ox_only_no_os(self) -> None:
+        header = "custom|X1|X OX=42"
+        meta = extract_uniprot_header_metadata(header)
+        assert meta["taxonomy_id"] == 42
+        assert meta["species"] is None
diff --git a/tests/test_aspect.py b/tests/test_aspect.py
new file mode 100644
index 0000000..dbd25b1
--- /dev/null
+++ b/tests/test_aspect.py
@@ -0,0 +1,87 @@
+"""Tests for protea.core.domain.aspect.
+
+Exhaustive: only three values and a handful of conversions, but they
+underpin a lot of pipeline code so we check every direction explicitly.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from protea.core.domain.aspect import (
+    ASPECT_CAFA_CODES,
+    ASPECT_CODES,
+    Aspect,
+)
+
+
+class TestAspectCodes:
+    def test_three_aspects_only(self):
+        assert len(list(Aspect)) == 3
+
+    def test_iteration_order_is_canonical(self):
+        # P → F → C, matches the historical _ASPECTS = ("P", "F", "C") order.
+        assert [a.code for a in Aspect] == ["P", "F", "C"]
+
+    def test_cafa_iteration_matches(self):
+        assert [a.cafa for a in Aspect] == ["BPO", "MFO", "CCO"]
+
+    def test_module_constants_match_iteration(self):
+        assert ASPECT_CODES == ("P", "F", "C")
+        assert ASPECT_CAFA_CODES == ("BPO", "MFO", "CCO")
+
+
+class TestAspectAccessors:
+    @pytest.mark.parametrize(
+        "aspect,code,cafa",
+        [
+            (Aspect.BIOLOGICAL_PROCESS, "P", "BPO"),
+            (Aspect.MOLECULAR_FUNCTION, "F", "MFO"),
+            (Aspect.CELLULAR_COMPONENT, "C", "CCO"),
+        ],
+    )
+    def test_code_and_cafa_properties(self, aspect: Aspect, code: str, cafa: str):
+        assert aspect.code == code
+        assert aspect.cafa == cafa
+
+
+class TestAspectFromString:
+    @pytest.mark.parametrize(
+        "code,expected",
+        [
+            ("P", Aspect.BIOLOGICAL_PROCESS),
+            ("F", Aspect.MOLECULAR_FUNCTION),
+            ("C", Aspect.CELLULAR_COMPONENT),
+        ],
+    )
+    def test_from_code(self, code: str, expected: Aspect):
+        assert Aspect.from_code(code) is expected
+
+    @pytest.mark.parametrize(
+        "cafa,expected",
+        [
+            ("BPO", Aspect.BIOLOGICAL_PROCESS),
+            ("MFO", Aspect.MOLECULAR_FUNCTION),
+            ("CCO", Aspect.CELLULAR_COMPONENT),
+        ],
+    )
+    def test_from_cafa(self, cafa: str, expected: Aspect):
+        assert Aspect.from_cafa(cafa) is expected
+
+    def test_from_code_invalid_raises(self):
+        with pytest.raises(KeyError):
+            Aspect.from_code("X")
+
+    def test_from_cafa_invalid_raises(self):
+        with pytest.raises(KeyError):
+            Aspect.from_cafa("XYZ")
+
+
+class TestRoundtrips:
+    @pytest.mark.parametrize("aspect", list(Aspect))
+    def test_code_roundtrip(self, aspect: Aspect):
+        assert Aspect.from_code(aspect.code) is aspect
+
+    @pytest.mark.parametrize("aspect", list(Aspect))
+    def test_cafa_roundtrip(self, aspect: Aspect):
+        assert Aspect.from_cafa(aspect.cafa) is aspect
diff --git a/tests/test_base_worker.py b/tests/test_base_worker.py
index b0ea184..b505d8c 100644
--- a/tests/test_base_worker.py
+++ b/tests/test_base_worker.py
@@ -2,6 +2,7 @@
 Unit tests for BaseWorker and StaleJobReaper.
 Uses a mocked session factory and a fake Operation — no real DB needed.
 """
+
 from __future__ import annotations
 
 import signal
@@ -21,6 +22,7 @@
 # Helpers
 # ---------------------------------------------------------------------------
 
+
 def _make_registry(op_name: str = "ping", result: OperationResult = None, raises=None):
     op = MagicMock()
     op.name = op_name
@@ -58,6 +60,7 @@ def _make_factory(job):
 # Tests
 # ---------------------------------------------------------------------------
 
+
 class TestBaseWorkerHandleJob:
     def test_unknown_job_id_does_nothing(self):
         """If the job row doesn't exist, handle_job returns silently."""
@@ -139,9 +142,9 @@ def test_progress_fields_are_set(self):
         session = MagicMock()
         session.get.return_value = job
         factory = MagicMock(return_value=session)
-        registry, _ = _make_registry(result=OperationResult(
-            result={}, progress_current=5, progress_total=10
-        ))
+        registry, _ = _make_registry(
+            result=OperationResult(result={}, progress_current=5, progress_total=10)
+        )
 
         worker = BaseWorker(factory, registry, WorkerConfig(worker_name="test"))
         worker.handle_job(job.id)
@@ -185,14 +188,104 @@ def test_retry_backoff_capped_at_600(self):
 
         assert exc_info.value.delay_seconds == 600
 
+    def test_retryable_db_error_is_retried_then_succeeds(self):
+        """Postgres deadlock during op.execute() retries until the op succeeds."""
+        from sqlalchemy.exc import OperationalError
+
+        job = _make_job()
+        session = MagicMock()
+        session.get.return_value = job
+        factory = MagicMock(return_value=session)
+
+        # First call raises retryable OperationalError; second succeeds.
+        attempts = {"n": 0}
+
+        def _execute(sess, payload, *, emit):
+            attempts["n"] += 1
+            if attempts["n"] == 1:
+                orig = MagicMock()
+                orig.pgcode = "40P01"  # deadlock_detected
+                err = OperationalError("stmt", {}, orig)
+                err.orig = orig
+                raise err
+            return OperationResult(result={"ok": True})
+
+        op = MagicMock()
+        op.name = "ping"
+        op.execute.side_effect = _execute
+        registry = OperationRegistry()
+        registry.register(op)
+
+        worker = BaseWorker(factory, registry, WorkerConfig(worker_name="test"))
+        with patch("protea.core.retry.time.sleep"):
+            worker.handle_job(job.id)
+
+        assert attempts["n"] == 2
+        assert job.status == JobStatus.SUCCEEDED
+
+    def test_retryable_db_error_max_attempts_then_fails(self):
+        """If retryable error keeps recurring, after max_attempts the job is FAILED."""
+        from sqlalchemy.exc import OperationalError
+
+        job = _make_job()
+        session = MagicMock()
+        session.get.return_value = job
+        factory = MagicMock(return_value=session)
+
+        def _always_deadlock(sess, payload, *, emit):
+            orig = MagicMock()
+            orig.pgcode = "40P01"
+            err = OperationalError("stmt", {}, orig)
+            err.orig = orig
+            raise err
+
+        op = MagicMock()
+        op.name = "ping"
+        op.execute.side_effect = _always_deadlock
+        registry = OperationRegistry()
+        registry.register(op)
+
+        worker = BaseWorker(factory, registry, WorkerConfig(worker_name="test"))
+        with patch("protea.core.retry.time.sleep"):
+            with pytest.raises(OperationalError):
+                worker.handle_job(job.id)
+
+        # 3 attempts (max_attempts=3 in handle_job).
+        assert op.execute.call_count == 3
+        # Retry-exhausted path uses the fallback session via sa_update,
+        # so the in-memory mock Job is not directly mutated. Verify the
+        # fallback session.execute was invoked instead.
+        # session was reused as the factory's only fixture, so an
+        # update statement should have run on it.
+        assert any(
+            "UPDATE" in str(call.args[0]).upper() if call.args else False
+            for call in session.execute.call_args_list
+        )
+
+    def test_non_retryable_error_does_not_retry(self):
+        """Non-infrastructure errors propagate immediately, single attempt."""
+        job = _make_job()
+        session = MagicMock()
+        session.get.return_value = job
+        factory = MagicMock(return_value=session)
+        registry, op = _make_registry(raises=ValueError("bad payload"))
+
+        worker = BaseWorker(factory, registry, WorkerConfig(worker_name="test"))
+        with pytest.raises(ValueError, match="bad payload"):
+            worker.handle_job(job.id)
+
+        assert op.execute.call_count == 1
+        assert job.status == JobStatus.FAILED
+
 
 # ---------------------------------------------------------------------------
 # StaleJobReaper
 # ---------------------------------------------------------------------------
 
+
 class TestStaleJobReaper:
     def test_reaps_stale_running_jobs(self):
-        """Jobs in RUNNING for longer than timeout should be marked FAILED."""
+        """Jobs RUNNING past the timeout with no recent events are marked FAILED."""
         stale_job = MagicMock(spec=Job)
         stale_job.id = uuid4()
         stale_job.status = JobStatus.RUNNING
@@ -201,6 +294,8 @@ def test_reaps_stale_running_jobs(self):
 
         session = MagicMock()
         session.query.return_value.filter.return_value.all.return_value = [stale_job]
+        # No recent events → scalar() returns None → job qualifies for reaping.
+        session.query.return_value.filter.return_value.scalar.return_value = None
         factory = MagicMock(return_value=session)
 
         reaper = StaleJobReaper(factory, timeout_seconds=3600)
@@ -212,6 +307,53 @@ def test_reaps_stale_running_jobs(self):
         session.add.assert_called_once()  # JobEvent
         session.commit.assert_called_once()
 
+    def test_reaper_skips_job_with_recent_activity(self):
+        """A candidate job with a JobEvent inside the stall window is left alone."""
+        live_job = MagicMock(spec=Job)
+        live_job.id = uuid4()
+        live_job.status = JobStatus.RUNNING
+        live_job.operation = "compute_embeddings"
+        live_job.started_at = datetime.now(UTC) - timedelta(hours=8)
+
+        session = MagicMock()
+        session.query.return_value.filter.return_value.all.return_value = [live_job]
+        # Last event was 5 minutes ago — inside the 30-min stall window.
+        session.query.return_value.filter.return_value.scalar.return_value = datetime.now(
+            UTC
+        ) - timedelta(minutes=5)
+        factory = MagicMock(return_value=session)
+
+        reaper = StaleJobReaper(factory, timeout_seconds=3600, stall_seconds=1800)
+        count = reaper._reap()
+
+        assert count == 0
+        assert live_job.status == JobStatus.RUNNING  # unchanged
+        session.add.assert_not_called()
+        session.commit.assert_called_once()
+
+    def test_reaper_kills_job_with_stale_activity(self):
+        """A candidate whose most recent event is older than stall_seconds is reaped."""
+        stalled_job = MagicMock(spec=Job)
+        stalled_job.id = uuid4()
+        stalled_job.status = JobStatus.RUNNING
+        stalled_job.operation = "compute_embeddings"
+        stalled_job.started_at = datetime.now(UTC) - timedelta(hours=8)
+
+        session = MagicMock()
+        session.query.return_value.filter.return_value.all.return_value = [stalled_job]
+        # Last event was 2 hours ago — well outside the 30-min stall window.
+        session.query.return_value.filter.return_value.scalar.return_value = datetime.now(
+            UTC
+        ) - timedelta(hours=2)
+        factory = MagicMock(return_value=session)
+
+        reaper = StaleJobReaper(factory, timeout_seconds=3600, stall_seconds=1800)
+        count = reaper._reap()
+
+        assert count == 1
+        assert stalled_job.status == JobStatus.FAILED
+        assert stalled_job.error_code == "JobTimeout"
+
     def test_no_stale_jobs_returns_zero(self):
         """When no jobs are stale, reaper does nothing."""
         session = MagicMock()
@@ -255,14 +397,18 @@ def test_run_registers_signal_handlers(self):
         # Make _reap set _stop=True so the loop exits after one iteration
         reaper._stop = False
         call_count = [0]
+
         def fake_reap():
             call_count[0] += 1
             reaper._stop = True
             return 0
+
         reaper._reap = fake_reap
 
-        with patch("protea.workers.stale_job_reaper.signal.signal") as mock_signal, \
-             patch("protea.workers.stale_job_reaper.time.sleep"):
+        with (
+            patch("protea.workers.stale_job_reaper.signal.signal") as mock_signal,
+            patch("protea.workers.stale_job_reaper.time.sleep"),
+        ):
             reaper.run(interval_seconds=1)
 
         # Should register both SIGINT and SIGTERM
@@ -284,8 +430,10 @@ def fake_reap():
 
         reaper._reap = fake_reap
 
-        with patch("protea.workers.stale_job_reaper.signal.signal"), \
-             patch("protea.workers.stale_job_reaper.time.sleep"):
+        with (
+            patch("protea.workers.stale_job_reaper.signal.signal"),
+            patch("protea.workers.stale_job_reaper.time.sleep"),
+        ):
             reaper.run(interval_seconds=1)
 
         assert reap_count[0] == 3
@@ -301,9 +449,11 @@ def fake_reap():
 
         reaper._reap = fake_reap
 
-        with patch("protea.workers.stale_job_reaper.signal.signal"), \
-             patch("protea.workers.stale_job_reaper.time.sleep"), \
-             patch("protea.workers.stale_job_reaper.logger") as mock_logger:
+        with (
+            patch("protea.workers.stale_job_reaper.signal.signal"),
+            patch("protea.workers.stale_job_reaper.time.sleep"),
+            patch("protea.workers.stale_job_reaper.logger") as mock_logger,
+        ):
             reaper.run(interval_seconds=1)
 
         # Should have logged the reaped count
@@ -325,9 +475,11 @@ def failing_reap():
 
         reaper._reap = failing_reap
 
-        with patch("protea.workers.stale_job_reaper.signal.signal"), \
-             patch("protea.workers.stale_job_reaper.time.sleep"), \
-             patch("protea.workers.stale_job_reaper.logger") as mock_logger:
+        with (
+            patch("protea.workers.stale_job_reaper.signal.signal"),
+            patch("protea.workers.stale_job_reaper.time.sleep"),
+            patch("protea.workers.stale_job_reaper.logger") as mock_logger,
+        ):
             reaper.run(interval_seconds=1)
 
         # Should have logged the error but continued
@@ -347,20 +499,25 @@ def test_handle_stop_sets_flag(self):
 # Feature engineering warmup
 # ---------------------------------------------------------------------------
 
+
 class TestTaxonomyWarmup:
     def test_warmup_calls_get_ncbi(self):
         from protea.core.feature_engineering import warmup_taxonomy_db
 
-        with patch("protea.core.feature_engineering._get_ncbi") as mock_get, \
-             patch("protea.core.feature_engineering._ETE3_AVAILABLE", True):
+        with (
+            patch("protea.core.feature_engineering._get_ncbi") as mock_get,
+            patch("protea.core.feature_engineering._ETE3_AVAILABLE", True),
+        ):
             warmup_taxonomy_db()
         mock_get.assert_called_once()
 
     def test_warmup_skips_when_ete3_unavailable(self):
         from protea.core.feature_engineering import warmup_taxonomy_db
 
-        with patch("protea.core.feature_engineering._ETE3_AVAILABLE", False), \
-             patch("protea.core.feature_engineering._get_ncbi") as mock_get:
+        with (
+            patch("protea.core.feature_engineering._ETE3_AVAILABLE", False),
+            patch("protea.core.feature_engineering._get_ncbi") as mock_get,
+        ):
             warmup_taxonomy_db()  # should not raise
         mock_get.assert_not_called()
 
@@ -369,6 +526,7 @@ def test_warmup_skips_when_ete3_unavailable(self):
 # BaseWorker — extended coverage
 # ---------------------------------------------------------------------------
 
+
 class TestBaseWorkerParentCancelled:
     """Cover parent_job_id cancellation detection (lines 93-106)."""
 
@@ -381,11 +539,13 @@ def test_cancelled_parent_cancels_child(self):
         parent_job.status = JobStatus.CANCELLED
 
         session = MagicMock()
+
         # session.get returns child_job by default, parent_job when queried by parent_id
         def get_side_effect(model, id_val):
             if id_val == parent_id:
                 return parent_job
             return child_job
+
         session.get.side_effect = get_side_effect
 
         factory = MagicMock(return_value=session)
@@ -407,10 +567,12 @@ def test_active_parent_does_not_cancel_child(self):
         parent_job.status = JobStatus.RUNNING
 
         session = MagicMock()
+
         def get_side_effect(model, id_val):
             if id_val == parent_id:
                 return parent_job
             return child_job
+
         session.get.side_effect = get_side_effect
 
         factory = MagicMock(return_value=session)
@@ -487,6 +649,7 @@ def make_session():
 
             def commit_side_effect():
                 status_log.append((current_call, job.status))
+
             s.commit.side_effect = commit_side_effect
             return s
 
@@ -536,9 +699,7 @@ def test_progress_current_only(self):
         session = MagicMock()
         session.get.return_value = job
         factory = MagicMock(return_value=session)
-        registry, _ = _make_registry(result=OperationResult(
-            result={}, progress_current=42
-        ))
+        registry, _ = _make_registry(result=OperationResult(result={}, progress_current=42))
 
         worker = BaseWorker(factory, registry, WorkerConfig(worker_name="test"))
         worker.handle_job(job.id)
@@ -570,9 +731,9 @@ def test_deferred_result_does_not_set_succeeded(self):
         session = MagicMock()
         session.get.return_value = job
         factory = MagicMock(return_value=session)
-        registry, _ = _make_registry(result=OperationResult(
-            result={"dispatched": True}, deferred=True
-        ))
+        registry, _ = _make_registry(
+            result=OperationResult(result={"dispatched": True}, deferred=True)
+        )
 
         worker = BaseWorker(factory, registry, WorkerConfig(worker_name="test"))
         worker.handle_job(job.id)
@@ -592,13 +753,17 @@ def test_publish_after_commit_publishes_child_jobs(self):
         session = MagicMock()
         session.get.return_value = job
         factory = MagicMock(return_value=session)
-        registry, _ = _make_registry(result=OperationResult(
-            result={},
-            publish_after_commit=[("protea.jobs", child_id)],
-        ))
+        registry, _ = _make_registry(
+            result=OperationResult(
+                result={},
+                publish_after_commit=[("protea.jobs", child_id)],
+            )
+        )
 
         worker = BaseWorker(
-            factory, registry, WorkerConfig(worker_name="test"),
+            factory,
+            registry,
+            WorkerConfig(worker_name="test"),
             amqp_url="amqp://localhost/",
         )
 
@@ -612,15 +777,19 @@ def test_publish_operations_publishes_ephemeral_messages(self):
         session = MagicMock()
         session.get.return_value = job
         factory = MagicMock(return_value=session)
-        registry, _ = _make_registry(result=OperationResult(
-            result={},
-            publish_operations=[
-                ("protea.embeddings.batch", {"batch_data": [1, 2]}),
-            ],
-        ))
+        registry, _ = _make_registry(
+            result=OperationResult(
+                result={},
+                publish_operations=[
+                    ("protea.embeddings.batch", {"batch_data": [1, 2]}),
+                ],
+            )
+        )
 
         worker = BaseWorker(
-            factory, registry, WorkerConfig(worker_name="test"),
+            factory,
+            registry,
+            WorkerConfig(worker_name="test"),
             amqp_url="amqp://localhost/",
         )
 
@@ -638,10 +807,12 @@ def test_no_amqp_url_skips_publish(self):
         session = MagicMock()
         session.get.return_value = job
         factory = MagicMock(return_value=session)
-        registry, _ = _make_registry(result=OperationResult(
-            result={},
-            publish_after_commit=[("protea.jobs", child_id)],
-        ))
+        registry, _ = _make_registry(
+            result=OperationResult(
+                result={},
+                publish_after_commit=[("protea.jobs", child_id)],
+            )
+        )
 
         worker = BaseWorker(factory, registry, WorkerConfig(worker_name="test"))
 
@@ -658,6 +829,7 @@ def test_emit_with_progress_fields_updates_job(self):
         job = _make_job()
 
         sessions = []
+
         def make_session():
             s = MagicMock()
             s.get.return_value = job
@@ -701,10 +873,12 @@ def make_session():
             if current == 2:
                 # Execute session: commit raises on second call (after failure recording)
                 commit_count = [0]
+
                 def commit_side():
                     commit_count[0] += 1
                     if commit_count[0] == 1:
                         raise RuntimeError("DB connection dropped")
+
                 s.commit.side_effect = commit_side
             return s
 
diff --git a/tests/test_benchmark_router.py b/tests/test_benchmark_router.py
new file mode 100644
index 0000000..a34d0a6
--- /dev/null
+++ b/tests/test_benchmark_router.py
@@ -0,0 +1,330 @@
+"""Unit tests for the /benchmark router.
+
+Covers the derived display metadata helper and the shapes of the two
+endpoints (``/benchmark/embeddings`` and ``/benchmark/matrix``).  The
+session is fully mocked.
+"""
+
+from __future__ import annotations
+
+from contextlib import contextmanager
+from unittest.mock import MagicMock, patch
+from uuid import uuid4
+
+import pytest
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+
+from protea.api.routers.benchmark import _stage_of, router
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_app(factory):
+    from protea.infrastructure.benchmark_config import BenchmarkConfig
+
+    app = FastAPI()
+    app.state.session_factory = factory
+    app.state.benchmark_config = BenchmarkConfig(
+        preferred_default_stages=("alignment_weighted", "reranker"),
+        baseline_scoring_name="alignment_weighted",
+        hidden_stages=frozenset(),
+        stage_labels={"alignment_weighted": "Alignment-weighted", "reranker": "Reranker"},
+        eval_set_labels={},
+        categories=("NK", "LK", "PK"),
+        aspects=("BPO", "MFO", "CCO"),
+    )
+    app.include_router(router)
+    return app
+
+
+@contextmanager
+def _mock_scope(session):
+    yield session
+
+
+def _make_cfg(
+    model_name="esmc_300m",
+    backend="esm3c",
+    display_name=None,
+    family=None,
+    param_count=None,
+):
+    cfg = MagicMock()
+    cfg.id = uuid4()
+    cfg.model_name = model_name
+    cfg.model_backend = backend
+    cfg.description = None
+    cfg.pooling = "mean"
+    cfg.layer_agg = "mean"
+    # Display columns are now persisted on EmbeddingConfig (no more heuristics).
+    cfg.display_name = display_name
+    cfg.family = family
+    cfg.param_count = param_count
+    return cfg
+
+
+def _make_eval(results, scoring_config_id=None, reranker_model_id=None):
+    er = MagicMock()
+    er.id = uuid4()
+    er.evaluation_set_id = uuid4()
+    er.scoring_config_id = scoring_config_id
+    er.reranker_model_id = reranker_model_id
+    er.results = results
+    return er
+
+
+@pytest.fixture()
+def session():
+    return MagicMock()
+
+
+@pytest.fixture()
+def factory(session):
+    return MagicMock()
+
+
+@pytest.fixture()
+def client(session, factory):
+    app = _make_app(factory)
+    with patch(
+        "protea.api.routers.benchmark.session_scope",
+        side_effect=lambda _: _mock_scope(session),
+    ):
+        with TestClient(app) as c:
+            yield c, session
+
+
+# ---------------------------------------------------------------------------
+# _stage_of
+# ---------------------------------------------------------------------------
+
+
+class TestStageOf:
+    def test_returns_scoring_name_when_no_reranker(self):
+        er = _make_eval({}, scoring_config_id=uuid4())
+        assert _stage_of(er, "alignment_weighted") == "alignment_weighted"
+
+    def test_reranker_takes_precedence(self):
+        er = _make_eval({}, scoring_config_id=uuid4(), reranker_model_id=uuid4())
+        assert _stage_of(er, "alignment_weighted") == "reranker"
+
+    def test_returns_none_when_neither_scoring_nor_reranker(self):
+        # Evaluations without a scoring config or reranker are excluded from
+        # the matrix.
+        er = _make_eval({})
+        assert _stage_of(er, None) is None
+
+
+# ---------------------------------------------------------------------------
+# GET /benchmark/embeddings
+# ---------------------------------------------------------------------------
+
+
+class TestListBenchmarkEmbeddings:
+    def test_empty_database(self, client):
+        c, session = client
+        exec_result = MagicMock()
+        exec_result.scalars.return_value.all.return_value = []
+        session.execute.return_value = exec_result
+
+        resp = c.get("/benchmark/embeddings")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["total"] == 0
+        assert data["embeddings"] == []
+
+    def test_returns_persisted_display_metadata(self, client):
+        c, session = client
+        # display_name / family / param_count are now persisted columns.
+        cfgs = [
+            _make_cfg(
+                "esmc_300m",
+                "esm3c",
+                display_name="ESMC-300M",
+                family="esmc",
+                param_count=300_000_000,
+            ),
+            _make_cfg(
+                "facebook/esm2_t33_650M_UR50D",
+                "esm",
+                display_name="ESM2-650M",
+                family="esm2",
+                param_count=650_000_000,
+            ),
+            _make_cfg(
+                "ElnaggarLab/ankh-base",
+                "ankh",
+                display_name="Ankh-base",
+                family="ankh",
+                param_count=None,
+            ),
+        ]
+        exec_result = MagicMock()
+        exec_result.scalars.return_value.all.return_value = cfgs
+        session.execute.return_value = exec_result
+
+        resp = c.get("/benchmark/embeddings")
+        data = resp.json()
+        assert data["total"] == 3
+        names = [e["display_name"] for e in data["embeddings"]]
+        assert "ESMC-300M" in names
+        assert "ESM2-650M" in names
+        assert "Ankh-base" in names
+        for e in data["embeddings"]:
+            assert e["family"]
+            assert "param_count" in e
+            assert "model_name" in e
+            assert "model_backend" in e
+
+
+# ---------------------------------------------------------------------------
+# GET /benchmark/matrix
+# ---------------------------------------------------------------------------
+
+
+class TestBenchmarkMatrix:
+    """The endpoint now executes two queries:
+
+    1. Main matrix: 4-tuples ``(er, embedding_id, k, scoring_name)``.
+    2. Eval set metadata enrichment: 7-tuples
+       ``(es, old_source, old_source_version, new_source, new_source_version,
+       old_obo_version, new_obo_version)``.
+
+    ``_dual_execute`` wires both behind a single ``session.execute`` mock.
+    """
+
+    @staticmethod
+    def _row(er, embedding_id, k=5, scoring_name="alignment_weighted"):
+        return (er, embedding_id, k, scoring_name)
+
+    @staticmethod
+    def _dual_execute(session, matrix_rows, eval_set_rows):
+        """Configure session.execute to return matrix_rows then eval_set_rows."""
+        first = MagicMock()
+        first.all.return_value = matrix_rows
+        second = MagicMock()
+        second.all.return_value = eval_set_rows
+        session.execute.side_effect = [first, second, second, second]
+
+    @staticmethod
+    def _eval_set_row(eval_set_id):
+        es = MagicMock()
+        es.id = eval_set_id
+        es.stats = {}
+        return (es, "goa", "210", "goa", "230", "2024-01-01", "2024-06-01")
+
+    def test_empty_database(self, client):
+        c, session = client
+        exec_result = MagicMock()
+        exec_result.all.return_value = []
+        session.execute.return_value = exec_result
+
+        resp = c.get("/benchmark/matrix")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["total"] == 0
+        assert data["rows"] == []
+        assert data["evaluation_sets"] == []
+        assert data["embedding_config_ids"] == []
+        # Empty data → no stages observed
+        assert data["stages"] == []
+
+    def test_row_per_cell_and_deduplication(self, client):
+        c, session = client
+        embedding_id = uuid4()
+
+        er_low = _make_eval({"NK": {"BPO": {"fmax": 0.4}}}, scoring_config_id=uuid4())
+        er_high = _make_eval({"NK": {"BPO": {"fmax": 0.55}}}, scoring_config_id=uuid4())
+        eval_set_id = uuid4()
+        er_low.evaluation_set_id = eval_set_id
+        er_high.evaluation_set_id = eval_set_id
+
+        self._dual_execute(
+            session,
+            matrix_rows=[
+                self._row(er_low, embedding_id),
+                self._row(er_high, embedding_id),
+            ],
+            eval_set_rows=[self._eval_set_row(eval_set_id)],
+        )
+
+        resp = c.get("/benchmark/matrix")
+        data = resp.json()
+        assert data["total"] == 1
+        row = data["rows"][0]
+        assert row["fmax"] == 0.55
+        assert row["category"] == "NK"
+        assert row["aspect"] == "BPO"
+        assert row["stage"] == "alignment_weighted"
+        assert str(embedding_id) in data["embedding_config_ids"]
+        # evaluation_sets is now a list of dicts; assert the id is among them.
+        assert str(eval_set_id) in {es["id"] for es in data["evaluation_sets"]}
+
+    def test_stage_filter(self, client):
+        c, session = client
+        embedding_id = uuid4()
+
+        er_aw = _make_eval({"NK": {"BPO": {"fmax": 0.4}}}, scoring_config_id=uuid4())
+        er_reranker = _make_eval(
+            {"NK": {"BPO": {"fmax": 0.6}}},
+            reranker_model_id=uuid4(),
+        )
+        eval_set_id_a = uuid4()
+        eval_set_id_b = uuid4()
+        er_aw.evaluation_set_id = eval_set_id_a
+        er_reranker.evaluation_set_id = eval_set_id_b
+
+        self._dual_execute(
+            session,
+            matrix_rows=[
+                self._row(er_aw, embedding_id),
+                self._row(er_reranker, embedding_id),
+            ],
+            eval_set_rows=[
+                self._eval_set_row(eval_set_id_b),
+            ],
+        )
+
+        resp = c.get("/benchmark/matrix?stage=reranker")
+        data = resp.json()
+        assert data["total"] == 1
+        assert data["rows"][0]["stage"] == "reranker"
+        assert data["rows"][0]["fmax"] == 0.6
+        assert data["filters"]["stage"] == "reranker"
+
+    def test_invalid_stage_filter_rejected(self, client):
+        c, _ = client
+        resp = c.get("/benchmark/matrix?stage=")
+        # An empty stage string passes through (no filter); we instead exercise
+        # the still-valid path. To prove that *unknown* stages just return zero
+        # results without 422, hit a real but unused stage:
+        resp = c.get("/benchmark/matrix?stage=unknown_stage_xyz")
+        assert resp.status_code == 200
+        assert resp.json()["total"] == 0
+
+    def test_missing_fmax_cells_are_skipped(self, client):
+        c, session = client
+        eval_set_id = uuid4()
+        er = _make_eval(
+            {
+                "NK": {"BPO": {"fmax": None}, "MFO": {"fmax": 0.4}},
+                "LK": {"CCO": {}},
+            },
+            scoring_config_id=uuid4(),
+        )
+        er.evaluation_set_id = eval_set_id
+        self._dual_execute(
+            session,
+            matrix_rows=[self._row(er, uuid4())],
+            eval_set_rows=[self._eval_set_row(eval_set_id)],
+        )
+
+        resp = c.get("/benchmark/matrix")
+        data = resp.json()
+        # Only NK/MFO has a real fmax → exactly one row
+        assert data["total"] == 1
+        assert data["rows"][0]["category"] == "NK"
+        assert data["rows"][0]["aspect"] == "MFO"
diff --git a/tests/test_compute_embeddings.py b/tests/test_compute_embeddings.py
index 9348d01..7c3e7e7 100644
--- a/tests/test_compute_embeddings.py
+++ b/tests/test_compute_embeddings.py
@@ -56,12 +56,13 @@ def _mock_config(
 # Payload validation
 # ---------------------------------------------------------------------------
 
+
 class TestComputeEmbeddingsPayload:
     def test_minimal_valid(self) -> None:
         cid = str(uuid.uuid4())
         p = ComputeEmbeddingsPayload.model_validate({"embedding_config_id": cid})
         assert p.embedding_config_id == cid
-        assert p.batch_size == 8
+        assert p.batch_size == 1
         assert p.skip_existing is True
         assert p.device == "cuda"
 
@@ -74,12 +75,14 @@ def test_whitespace_embedding_config_id_raises(self) -> None:
             ComputeEmbeddingsPayload.model_validate({"embedding_config_id": "   "})
 
     def test_optional_fields_override(self) -> None:
-        p = ComputeEmbeddingsPayload.model_validate({
-            "embedding_config_id": str(uuid.uuid4()),
-            "batch_size": 16,
-            "skip_existing": False,
-            "device": "cuda",
-        })
+        p = ComputeEmbeddingsPayload.model_validate(
+            {
+                "embedding_config_id": str(uuid.uuid4()),
+                "batch_size": 16,
+                "skip_existing": False,
+                "device": "cuda",
+            }
+        )
         assert p.batch_size == 16
         assert p.skip_existing is False
         assert p.device == "cuda"
@@ -89,6 +92,7 @@ def test_optional_fields_override(self) -> None:
 # _validate_layers
 # ---------------------------------------------------------------------------
 
+
 class TestValidateLayers:
     def test_valid_reverse_index(self) -> None:
         # 7 layers → indices 0..6 are valid (0=last)
@@ -111,9 +115,11 @@ def test_deduplicates_and_sorts(self) -> None:
 # _aggregate helpers
 # ---------------------------------------------------------------------------
 
+
 class TestAggregateResidues:
     def test_mean(self) -> None:
         import torch
+
         a = torch.ones(4, 8)
         b = torch.ones(4, 8) * 3
         result = _aggregate_residue_layers([a, b], "mean")
@@ -122,6 +128,7 @@ def test_mean(self) -> None:
 
     def test_concat(self) -> None:
         import torch
+
         a = torch.ones(4, 8)
         b = torch.ones(4, 8)
         result = _aggregate_residue_layers([a, b], "concat")
@@ -129,6 +136,7 @@ def test_concat(self) -> None:
 
     def test_unknown_raises(self) -> None:
         import torch
+
         with pytest.raises(ValueError):
             _aggregate_residue_layers([torch.zeros(4, 8)], "unknown")
 
@@ -136,6 +144,7 @@ def test_unknown_raises(self) -> None:
 class TestAggregate1d:
     def test_mean(self) -> None:
         import torch
+
         a = torch.ones(8)
         b = torch.ones(8) * 3
         result = _aggregate_1d([a, b], "mean")
@@ -143,6 +152,7 @@ def test_mean(self) -> None:
 
     def test_concat(self) -> None:
         import torch
+
         a = torch.ones(8)
         b = torch.ones(8)
         result = _aggregate_1d([a, b], "concat")
@@ -150,6 +160,7 @@ def test_concat(self) -> None:
 
     def test_unknown_raises(self) -> None:
         import torch
+
         with pytest.raises(ValueError):
             _aggregate_1d([torch.zeros(8)], "unknown")
 
@@ -158,6 +169,7 @@ def test_unknown_raises(self) -> None:
 # _compute_chunk_spans
 # ---------------------------------------------------------------------------
 
+
 class TestComputeChunkSpans:
     def test_no_overlap(self) -> None:
         spans = _compute_chunk_spans(10, 4, 0)
@@ -185,9 +197,11 @@ def test_overlap_greater_than_chunk_size_raises(self) -> None:
 # _chunk_and_pool
 # ---------------------------------------------------------------------------
 
+
 class TestChunkAndPool:
     def test_no_chunking_mean(self) -> None:
         import torch
+
         cfg = _mock_config(pooling="mean", normalize=False)
         residues = torch.ones(5, 8)
         chunks = _chunk_and_pool(residues, cfg)
@@ -198,9 +212,13 @@ def test_no_chunking_mean(self) -> None:
 
     def test_chunking_produces_multiple_results(self) -> None:
         import torch
+
         cfg = _mock_config(
-            pooling="mean", normalize=False,
-            use_chunking=True, chunk_size=4, chunk_overlap=0,
+            pooling="mean",
+            normalize=False,
+            use_chunking=True,
+            chunk_size=4,
+            chunk_overlap=0,
         )
         residues = torch.ones(10, 8)
         chunks = _chunk_and_pool(residues, cfg)
@@ -210,6 +228,7 @@ def test_chunking_produces_multiple_results(self) -> None:
 
     def test_mean_max_doubles_dim(self) -> None:
         import torch
+
         cfg = _mock_config(pooling="mean_max", normalize=False)
         residues = torch.ones(5, 8)
         chunks = _chunk_and_pool(residues, cfg)
@@ -217,6 +236,7 @@ def test_mean_max_doubles_dim(self) -> None:
 
     def test_normalize_produces_unit_norm(self) -> None:
         import torch
+
         cfg = _mock_config(pooling="mean", normalize=True)
         residues = torch.rand(5, 8) + 0.1
         chunks = _chunk_and_pool(residues, cfg)
@@ -228,6 +248,7 @@ def test_normalize_produces_unit_norm(self) -> None:
 # Special-token stripping (ESM and ESM3c)
 # ---------------------------------------------------------------------------
 
+
 class TestSpecialTokenStripping:
     """Verify that CLS / EOS / BOS tokens are excluded from residue pooling.
 
@@ -253,8 +274,8 @@ def test_esm_strips_cls_and_eos(self) -> None:
         seq_len = 7  # CLS + 5 content + EOS
 
         hidden = torch.zeros(1, seq_len, dim)
-        hidden[0, 1:6, :] = 1.0   # content (positions 1–5)
-        hidden[0, 6, :] = 10.0    # EOS at position 6 — must be excluded
+        hidden[0, 1:6, :] = 1.0  # content (positions 1–5)
+        hidden[0, 6, :] = 10.0  # EOS at position 6 — must be excluded
 
         tokens_dict = {
             "input_ids": torch.zeros(1, seq_len, dtype=torch.long),
@@ -282,16 +303,20 @@ def test_esm_residue_count_matches_content_only(self) -> None:
 
         op = self._op()
         cfg = _mock_config(
-            layer_indices=[0], pooling="mean", normalize=False,
-            use_chunking=True, chunk_size=3, chunk_overlap=0,
+            layer_indices=[0],
+            pooling="mean",
+            normalize=False,
+            use_chunking=True,
+            chunk_size=3,
+            chunk_overlap=0,
         )
 
         dim = 4
         seq_len = 7  # CLS + 5 content + EOS
 
         hidden = torch.ones(1, seq_len, dim)
-        hidden[0, 0, :] = 99.0   # CLS — must be excluded
-        hidden[0, 6, :] = 99.0   # EOS — must be excluded
+        hidden[0, 0, :] = 99.0  # CLS — must be excluded
+        hidden[0, 6, :] = 99.0  # EOS — must be excluded
 
         tokens_dict = {
             "input_ids": torch.zeros(1, seq_len, dtype=torch.long),
@@ -330,8 +355,8 @@ def test_esm3c_strips_bos_and_eos(self) -> None:
         seq_len = 7  # BOS + 5 content + EOS
 
         layer = torch.zeros(1, seq_len, dim)
-        layer[0, 1:6, :] = 1.0   # content
-        layer[0, 6, :] = 10.0    # EOS — must be excluded
+        layer[0, 1:6, :] = 1.0  # content
+        layer[0, 6, :] = 10.0  # EOS — must be excluded
 
         cfg = _mock_config(layer_indices=[0], layer_agg="mean", pooling="mean", normalize=False)
 
@@ -348,19 +373,27 @@ def logits(self, tensor, lc):
         # Patch the ESM SDK so the import inside _embed_esm3c resolves
         esm_api_mock = MagicMock()
         esm_mock = MagicMock()
-        with patch.dict(sys.modules, {
-            "esm": esm_mock,
-            "esm.sdk": esm_mock,
-            "esm.sdk.api": esm_api_mock,
-        }):
+        with patch.dict(
+            sys.modules,
+            {
+                "esm": esm_mock,
+                "esm.sdk": esm_mock,
+                "esm.sdk.api": esm_api_mock,
+            },
+        ):
             result = _embed_esm3c(FakeModel(), ["ACDEF"], cfg, "cpu")
 
         vec = result[0][0].vector
         # Mean of 5 content tokens (1.0) — BOS (0) and EOS (10) excluded
         assert vec == pytest.approx([1.0] * dim, abs=1e-5)
 
-    def test_t5_includes_eos_token(self) -> None:
-        """T5 keeps EOS in the residue tensor (PIS convention)."""
+    def test_t5_strips_eos_token(self) -> None:
+        """T5 (non-ProstT5) strips the trailing EOS from the residue tensor.
+
+        Each valid token position is set to a distinct value so the test can
+        prove that only the 4 content tokens — not the EOS at position 4 —
+        contribute to the mean.
+        """
         import torch
 
         from protea.core.operations.compute_embeddings import _embed_t5
@@ -368,16 +401,18 @@ def test_t5_includes_eos_token(self) -> None:
         dim = 8
         # T5: 4 content tokens + EOS = 5 valid tokens; 3 padding tokens
         batch_len = 8
-        actual_len = 5
+        actual_len = 5  # 4 content + EOS
 
         hidden = torch.zeros(1, batch_len, dim)
-        hidden[0, :actual_len, :] = 2.0   # valid tokens (content + EOS)
-        # padding positions remain 0.0
+        hidden[0, 0, :] = 1.0   # A
+        hidden[0, 1, :] = 2.0   # C
+        hidden[0, 2, :] = 3.0   # D
+        hidden[0, 3, :] = 4.0   # E
+        hidden[0, 4, :] = 99.0  # EOS — must be excluded
+        # padding positions (5..7) remain 0.0
 
-        cfg = _mock_config(
-            layer_indices=[0], layer_agg="mean", pooling="mean", normalize=False
-        )
-        cfg.model_name = "Rostlab/prot_t5_xl_uniref50"  # not prostt5
+        cfg = _mock_config(layer_indices=[0], layer_agg="mean", pooling="mean", normalize=False)
+        cfg.model_name = "Rostlab/prot_t5_xl_uniref50"  # no AA2fold prefix
 
         mock_outputs = MagicMock()
         mock_outputs.hidden_states = [hidden]
@@ -398,14 +433,147 @@ def test_t5_includes_eos_token(self) -> None:
             result = _embed_t5(mock_model, mock_tokenizer, ["ACDE"], cfg, "cpu")
 
         vec = result[0][0].vector
-        # Mean of actual_len=5 tokens (all 2.0) including EOS, excluding padding (0)
-        assert vec == pytest.approx([2.0] * dim, abs=1e-5)
+        # Mean of the 4 content tokens: (1 + 2 + 3 + 4) / 4 = 2.5; EOS excluded.
+        assert vec == pytest.approx([2.5] * dim, abs=1e-5)
+        # And the chunk's index_e must equal the AA count (4), not 5.
+        assert result[0][0].chunk_index_s == 0
+        assert result[0][0].chunk_index_e is None  # not chunking → None
+
+    def test_t5_prostt5_strips_aa2fold_and_eos(self) -> None:
+        """ProstT5 strips both the leading <AA2fold> prefix and the trailing EOS.
+
+        Same shape as the non-ProstT5 test but with an extra leading position
+        for <AA2fold>. The mean must only contain the 4 content tokens.
+        """
+        import torch
+
+        from protea.core.operations.compute_embeddings import _embed_t5
+
+        dim = 8
+        # ProstT5: <AA2fold> + 4 content + EOS = 6 valid tokens; 2 padding
+        batch_len = 8
+        actual_len = 6
+
+        hidden = torch.zeros(1, batch_len, dim)
+        hidden[0, 0, :] = 77.0  # <AA2fold> — must be excluded
+        hidden[0, 1, :] = 1.0
+        hidden[0, 2, :] = 2.0
+        hidden[0, 3, :] = 3.0
+        hidden[0, 4, :] = 4.0
+        hidden[0, 5, :] = 99.0  # EOS — must be excluded
+
+        cfg = _mock_config(layer_indices=[0], layer_agg="mean", pooling="mean", normalize=False)
+        cfg.model_name = "Rostlab/ProstT5"
+
+        mock_outputs = MagicMock()
+        mock_outputs.hidden_states = [hidden]
+
+        mock_model = MagicMock()
+        mock_model.return_value = mock_outputs
+
+        attention_mask = torch.zeros(1, batch_len, dtype=torch.long)
+        attention_mask[0, :actual_len] = 1
+
+        mock_tokenizer = MagicMock()
+        mock_tokenizer.batch_encode_plus.return_value = {
+            "input_ids": torch.zeros(1, batch_len, dtype=torch.long),
+            "attention_mask": attention_mask,
+        }
+
+        with patch("torch.no_grad"):
+            result = _embed_t5(mock_model, mock_tokenizer, ["ACDE"], cfg, "cpu")
+
+        vec = result[0][0].vector
+        # Mean of 4 content tokens: (1 + 2 + 3 + 4) / 4 = 2.5.
+        # <AA2fold>=77 and EOS=99 must be excluded or the mean would be wildly off.
+        assert vec == pytest.approx([2.5] * dim, abs=1e-5)
+
+
+# ---------------------------------------------------------------------------
+# Ankh backend (shared T5 pipeline, never injects <AA2fold>)
+# ---------------------------------------------------------------------------
+
+
+class TestAnkhBackend:
+    def _run_embed_ankh(self, model_name: str) -> MagicMock:
+        """Call _embed_ankh with a mocked model/tokenizer and return the tokenizer."""
+        import torch
+
+        from protea.core.operations.compute_embeddings import _embed_ankh
+
+        dim = 4
+        batch_len = 5
+        actual_len = 4
+
+        hidden = torch.zeros(1, batch_len, dim)
+        hidden[0, :actual_len, :] = 1.0
+
+        cfg = _mock_config(layer_indices=[0], layer_agg="mean", pooling="mean", normalize=False)
+        cfg.model_name = model_name
+
+        mock_outputs = MagicMock()
+        mock_outputs.hidden_states = [hidden]
+
+        mock_model = MagicMock()
+        mock_model.return_value = mock_outputs
+
+        attention_mask = torch.zeros(1, batch_len, dtype=torch.long)
+        attention_mask[0, :actual_len] = 1
+
+        mock_tokenizer = MagicMock()
+        mock_tokenizer.batch_encode_plus.return_value = {
+            "input_ids": torch.zeros(1, batch_len, dtype=torch.long),
+            "attention_mask": attention_mask,
+        }
+
+        with patch("torch.no_grad"):
+            _embed_ankh(mock_model, mock_tokenizer, ["ACDE"], cfg, "cpu")
+
+        return mock_tokenizer
+
+    def test_ankh_uses_is_split_into_words(self) -> None:
+        """Ankh must tokenise via list-of-chars + is_split_into_words=True.
+
+        The space-joined path used by ProstT5 maps spaces to <unk> for Ankh's
+        tokeniser, causing NaN outputs under FP16. Verified against
+        ElnaggarLab/ankh-base on 2026-04-10.
+        """
+        tok = self._run_embed_ankh("ElnaggarLab/ankh-base")
+        args, kwargs = tok.batch_encode_plus.call_args
+        processed = args[0]
+        # One list of chars per sequence, no space-joined string, no <AA2fold>
+        assert processed == [["A", "C", "D", "E"]]
+        assert kwargs.get("is_split_into_words") is True
+
+    def test_ankh_ignores_prostt5_substring_heuristic(self) -> None:
+        """Even if the model name misleadingly contains 'prostt5', ankh must not prefix."""
+        tok = self._run_embed_ankh("fake/prostt5-ankh-variant")
+        processed = tok.batch_encode_plus.call_args[0][0]
+        # processed is a list of lists of chars; no element may start with <AA2fold>
+        assert processed == [["A", "C", "D", "E"]]
+
+    def test_embed_batch_dispatches_ankh(self) -> None:
+        """ComputeEmbeddingsBatchOperation._embed_batch routes 'ankh' to _embed_ankh."""
+        from protea.core.operations.compute_embeddings import ComputeEmbeddingsBatchOperation
+
+        op = ComputeEmbeddingsBatchOperation()
+        cfg = _mock_config(backend="ankh")
+        cfg.model_name = "ElnaggarLab/ankh-base"
+
+        with patch(
+            "protea.core.operations.compute_embeddings._embed_ankh",
+            return_value=[[]],
+        ) as mock_ankh:
+            op._embed_batch(MagicMock(), MagicMock(), ["ACDE"], cfg, "cpu")
+
+        mock_ankh.assert_called_once()
 
 
 # ---------------------------------------------------------------------------
 # _embed_batch dispatch (mocked model)
 # ---------------------------------------------------------------------------
 
+
 class TestEmbedBatch:
     def _op(self) -> ComputeEmbeddingsOperation:
         return ComputeEmbeddingsOperation()
@@ -476,6 +644,7 @@ def test_normalized_vectors_have_unit_norm(self) -> None:
 # execute() — coordinator (ComputeEmbeddingsOperation)
 # ---------------------------------------------------------------------------
 
+
 class TestComputeEmbeddingsCoordinator:
     """Tests for the coordinator operation that dispatches child batch jobs."""
 
@@ -537,6 +706,7 @@ def test_dispatches_correct_number_of_child_jobs(self) -> None:
 # execute() — batch operation (ComputeEmbeddingsBatchOperation)
 # ---------------------------------------------------------------------------
 
+
 class TestComputeEmbeddingsBatchExecute:
     def _op(self) -> ComputeEmbeddingsBatchOperation:
         return ComputeEmbeddingsBatchOperation()
@@ -582,8 +752,10 @@ def test_inference_publishes_write_operation(self) -> None:
         fake_vec = np.array([0.1, 0.2, 0.3], dtype=np.float32)
         fake_batch = [self._fake_chunks(fake_vec), self._fake_chunks(fake_vec)]
 
-        with patch.object(op, "_load_model", return_value=(MagicMock(), MagicMock())), \
-             patch.object(op, "_embed_batch", return_value=fake_batch):
+        with (
+            patch.object(op, "_load_model", return_value=(MagicMock(), MagicMock())),
+            patch.object(op, "_embed_batch", return_value=fake_batch),
+        ):
             result = op.execute(session, self._base_payload(cfg), emit=_noop_emit)
 
         assert result.result["sequences_inferred"] == 2
@@ -606,8 +778,10 @@ def test_chunking_serializes_all_chunks(self) -> None:
             ChunkEmbedding(8, 10, fake_vec),
         ]
 
-        with patch.object(op, "_load_model", return_value=(MagicMock(), MagicMock())), \
-             patch.object(op, "_embed_batch", return_value=[three_chunks]):
+        with (
+            patch.object(op, "_load_model", return_value=(MagicMock(), MagicMock())),
+            patch.object(op, "_embed_batch", return_value=[three_chunks]),
+        ):
             result = op.execute(session, self._base_payload(cfg), emit=_noop_emit)
 
         _, msg = result.publish_operations[0]
@@ -618,6 +792,7 @@ def test_chunking_serializes_all_chunks(self) -> None:
 # Batch-size consistency
 # ---------------------------------------------------------------------------
 
+
 @pytest.mark.slow
 class TestBatchSizeConsistency:
     """Embeddings must be numerically identical regardless of batch grouping.
@@ -632,8 +807,11 @@ class TestBatchSizeConsistency:
 
     def _esm_cfg(self):
         return _mock_config(
-            layer_indices=[0], layer_agg="mean",
-            pooling="mean", normalize=False, normalize_residues=False,
+            layer_indices=[0],
+            layer_agg="mean",
+            pooling="mean",
+            normalize=False,
+            normalize_residues=False,
         )
 
     def test_esm_batch_size_consistency(self):
@@ -654,12 +832,15 @@ def test_esm_batch_size_consistency(self):
             batched = []
             for i in range(0, len(self.SEQUENCES), batch_size):
                 batched.extend(
-                    _embed_esm(model, tokenizer, self.SEQUENCES[i:i + batch_size], cfg, "cpu")
+                    _embed_esm(model, tokenizer, self.SEQUENCES[i : i + batch_size], cfg, "cpu")
                 )
 
             for i, (got, expected) in enumerate(zip(batched, ref, strict=False)):
                 np.testing.assert_allclose(
-                    got[0].vector, expected[0].vector, rtol=1e-5, atol=1e-6,
+                    got[0].vector,
+                    expected[0].vector,
+                    rtol=1e-5,
+                    atol=1e-6,
                     err_msg=f"ESM batch_size={batch_size}: mismatch at sequence {i}",
                 )
 
@@ -676,8 +857,12 @@ def test_t5_padding_does_not_affect_embeddings(self):
         from protea.core.operations.compute_embeddings import _embed_t5
 
         cfg = _mock_config(
-            backend="t5", layer_indices=[0], layer_agg="mean",
-            pooling="mean", normalize=False, normalize_residues=False,
+            backend="t5",
+            layer_indices=[0],
+            layer_agg="mean",
+            pooling="mean",
+            normalize=False,
+            normalize_residues=False,
         )
         cfg.model_name = "Rostlab/prot_t5_xl_uniref50"  # non-prostt5 path
 
@@ -701,8 +886,8 @@ def batch_encode_plus(self, seqs, **kwargs):
                 input_ids = torch.zeros(B, max_len, dtype=torch.long)
                 attention_mask = torch.zeros(B, max_len, dtype=torch.long)
                 for i, enc in enumerate(encoded):
-                    input_ids[i, :len(enc)] = torch.tensor(enc)
-                    attention_mask[i, :len(enc)] = 1
+                    input_ids[i, : len(enc)] = torch.tensor(enc)
+                    attention_mask[i, : len(enc)] = 1
                 return {"input_ids": input_ids, "attention_mask": attention_mask}
 
         model = _FakeT5()
@@ -714,12 +899,17 @@ def batch_encode_plus(self, seqs, **kwargs):
         for batch_size in (2, 4):
             results = []
             for i in range(0, len(self.SEQUENCES), batch_size):
-                for r in _embed_t5(model, tokenizer, self.SEQUENCES[i:i + batch_size], cfg, "cpu"):
+                for r in _embed_t5(
+                    model, tokenizer, self.SEQUENCES[i : i + batch_size], cfg, "cpu"
+                ):
                     results.append(r[0].vector)
 
             for i in range(len(self.SEQUENCES)):
                 np.testing.assert_allclose(
-                    results[i], ref[i], rtol=1e-5, atol=1e-6,
+                    results[i],
+                    ref[i],
+                    rtol=1e-5,
+                    atol=1e-6,
                     err_msg=f"T5 batch_size={batch_size}: mismatch at sequence {i}",
                 )
 
@@ -728,6 +918,7 @@ def batch_encode_plus(self, seqs, **kwargs):
 # StoreEmbeddingsOperation
 # ---------------------------------------------------------------------------
 
+
 class TestStoreEmbeddingsOperation:
     def _op(self) -> StoreEmbeddingsOperation:
         return StoreEmbeddingsOperation()
@@ -735,15 +926,19 @@ def _op(self) -> StoreEmbeddingsOperation:
     def _make_payload(self, n_sequences=2, skip_existing=True, **kw):
         sequences = []
         for i in range(n_sequences):
-            sequences.append({
-                "sequence_id": i + 1,
-                "chunks": [{
-                    "chunk_index_s": 0,
-                    "chunk_index_e": None,
-                    "vector": [0.1, 0.2, 0.3],
-                    "embedding_dim": 3,
-                }],
-            })
+            sequences.append(
+                {
+                    "sequence_id": i + 1,
+                    "chunks": [
+                        {
+                            "chunk_index_s": 0,
+                            "chunk_index_e": None,
+                            "vector": [0.1, 0.2, 0.3],
+                            "embedding_dim": 3,
+                        }
+                    ],
+                }
+            )
         defaults = {
             "parent_job_id": str(uuid.uuid4()),
             "embedding_config_id": str(uuid.uuid4()),
@@ -840,6 +1035,7 @@ def test_last_batch_closes_parent(self) -> None:
         session.execute.return_value.fetchone.side_effect = [progress_row, closed_row]
 
         events = []
+
         def capture_emit(event, msg, fields, level):
             events.append(event)
 
@@ -859,14 +1055,16 @@ def test_multiple_chunks_per_sequence(self) -> None:
         session.execute.return_value.fetchone.return_value = row
 
         payload = self._make_payload(n_sequences=0)
-        payload["sequences"] = [{
-            "sequence_id": 1,
-            "chunks": [
-                {"chunk_index_s": 0, "chunk_index_e": 4, "vector": [0.1], "embedding_dim": 1},
-                {"chunk_index_s": 4, "chunk_index_e": 8, "vector": [0.2], "embedding_dim": 1},
-                {"chunk_index_s": 8, "chunk_index_e": 10, "vector": [0.3], "embedding_dim": 1},
-            ],
-        }]
+        payload["sequences"] = [
+            {
+                "sequence_id": 1,
+                "chunks": [
+                    {"chunk_index_s": 0, "chunk_index_e": 4, "vector": [0.1], "embedding_dim": 1},
+                    {"chunk_index_s": 4, "chunk_index_e": 8, "vector": [0.2], "embedding_dim": 1},
+                    {"chunk_index_s": 8, "chunk_index_e": 10, "vector": [0.3], "embedding_dim": 1},
+                ],
+            }
+        ]
 
         result = op.execute(session, payload, emit=_noop_emit)
         assert result.result["embeddings_stored"] == 3
@@ -879,6 +1077,7 @@ def test_name(self) -> None:
 # Coordinator — GPU retry (RetryLaterError)
 # ---------------------------------------------------------------------------
 
+
 class TestComputeEmbeddingsRetryLogic:
     def _op(self) -> ComputeEmbeddingsOperation:
         return ComputeEmbeddingsOperation()
diff --git a/tests/test_compute_embeddings_backend_dispatch.py b/tests/test_compute_embeddings_backend_dispatch.py
new file mode 100644
index 0000000..04fb1e8
--- /dev/null
+++ b/tests/test_compute_embeddings_backend_dispatch.py
@@ -0,0 +1,99 @@
+"""Regression tests for the F2A.5 plugin-based backend dispatch in
+``compute_embeddings._load_model``.
+
+The legacy hardcoded ``if/elif config.model_backend == ...`` chain was
+replaced with discovery via the ``protea.backends`` entry_points group.
+These tests pin the contract:
+
+* All four bootstrap backends (esm, t5, ankh, esm3c) are discoverable
+  from the PROTEA venv when ``protea-backends`` is installed.
+* The legacy ``"auto"`` alias still maps to the ``esm`` plugin.
+* Unknown backends raise ``ValueError`` (not silent fall-through).
+* ``_load_model`` delegates to ``plugin.load_model`` and emits the
+  expected start/done events.
+
+Heavy ML deps (torch / transformers / esm) are NOT required: the tests
+mock the plugin's ``load_model`` so the lazy-import path inside the
+plugin never fires.
+"""
+
+from __future__ import annotations
+
+import importlib
+from unittest.mock import MagicMock
+
+import pytest
+
+from protea.core.operations import compute_embeddings as ce_module
+
+
+def _reset_plugin_cache() -> None:
+    """Force the next call to repopulate ``_BACKEND_PLUGINS`` from
+    entry_points so individual tests don't bleed cached state."""
+    ce_module._BACKEND_PLUGINS = None
+
+
+def test_bootstrap_backends_discoverable_via_entry_points() -> None:
+    _reset_plugin_cache()
+    plugins = ce_module._get_backend_plugins()
+    assert set(plugins) >= {"esm", "t5", "ankh", "esm3c"}
+
+
+def test_plugin_name_attribute_matches_entry_point_name() -> None:
+    _reset_plugin_cache()
+    plugins = ce_module._get_backend_plugins()
+    for ep_name, plugin in plugins.items():
+        assert plugin.name == ep_name
+
+
+def test_resolve_auto_maps_to_esm_plugin() -> None:
+    _reset_plugin_cache()
+    plugins = ce_module._get_backend_plugins()
+    assert ce_module._resolve_backend("auto") is plugins["esm"]
+
+
+def test_resolve_unknown_backend_raises_value_error() -> None:
+    _reset_plugin_cache()
+    with pytest.raises(ValueError, match="Unknown model_backend"):
+        ce_module._resolve_backend("xgboost-on-proteins")
+
+
+def test_load_model_delegates_to_resolved_plugin() -> None:
+    _reset_plugin_cache()
+    fake_plugin = MagicMock()
+    fake_plugin.name = "esm"
+    fake_plugin.load_model.return_value = ("fake_model", "fake_tokenizer")
+
+    ce_module._BACKEND_PLUGINS = {"esm": fake_plugin}
+
+    config = MagicMock()
+    config.model_backend = "esm"
+    config.model_name = "facebook/esm2_t6_8M_UR50D"
+    emit_calls: list[tuple[str, object, dict[str, object] | None, str]] = []
+
+    def emit(event: str, payload: object, fields: dict[str, object] | None, level: str) -> None:
+        emit_calls.append((event, payload, fields, level))
+
+    model, tokenizer = ce_module._load_model(config, "cpu", emit)
+    assert model == "fake_model"
+    assert tokenizer == "fake_tokenizer"
+    fake_plugin.load_model.assert_called_once_with(
+        "facebook/esm2_t6_8M_UR50D", "cpu", emit
+    )
+    event_names = {call[0] for call in emit_calls}
+    assert "compute_embeddings.model_load_start" in event_names
+    assert "compute_embeddings.model_load_done" in event_names
+
+
+def test_plugin_cache_persists_across_calls() -> None:
+    _reset_plugin_cache()
+    first = ce_module._get_backend_plugins()
+    second = ce_module._get_backend_plugins()
+    assert first is second  # cached identity, not just equality
+
+
+def test_module_re_import_redoes_discovery() -> None:
+    importlib.reload(ce_module)
+    assert ce_module._BACKEND_PLUGINS is None
+    plugins = ce_module._get_backend_plugins()
+    assert "esm" in plugins
diff --git a/tests/test_core.py b/tests/test_core.py
index 6bbf11c..bf65417 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -2,29 +2,22 @@
 Unit tests for core contracts and simple operations.
 No DB or network required.
 """
-from __future__ import annotations
 
-import gzip
-from io import BytesIO
-from unittest.mock import MagicMock, patch
+from __future__ import annotations
 
 import pytest
-import requests
 
 from protea.core.contracts.operation import OperationResult, RetryLaterError
 from protea.core.contracts.registry import OperationRegistry
 from protea.core.evidence_codes import ECO_TO_CODE, EXPERIMENTAL, is_experimental, normalize
-from protea.core.operations.fetch_uniprot_metadata import (
-    FetchUniProtMetadataOperation,
-    FetchUniProtMetadataPayload,
-)
 from protea.core.operations.ping import PingOperation
-from protea.core.utils import UniProtHttpMixin, chunks
+from protea.core.utils import chunks
 
 # ---------------------------------------------------------------------------
 # OperationRegistry
 # ---------------------------------------------------------------------------
 
+
 class TestOperationRegistry:
     def test_register_and_get(self):
         reg = OperationRegistry()
@@ -48,6 +41,7 @@ def test_get_unknown_raises(self):
 # PingOperation
 # ---------------------------------------------------------------------------
 
+
 class TestPingOperation:
     def setup_method(self):
         self.op = PingOperation()
@@ -81,6 +75,7 @@ def emit(event, message, fields, level):
 # chunks()
 # ---------------------------------------------------------------------------
 
+
 class TestChunks:
     def test_even_split(self) -> None:
         result = list(chunks([1, 2, 3, 4], 2))
@@ -98,112 +93,11 @@ def test_empty_seq(self) -> None:
         assert list(chunks([], 5)) == []
 
 
-# ---------------------------------------------------------------------------
-# UniProtHttpMixin
-# ---------------------------------------------------------------------------
-
-def _make_payload(max_retries=3, backoff_base=0.01, backoff_max=0.1, jitter=0.0):
-    p = MagicMock()
-    p.user_agent = "PROTEA/test"
-    p.timeout_seconds = 5
-    p.max_retries = max_retries
-    p.backoff_base_seconds = backoff_base
-    p.backoff_max_seconds = backoff_max
-    p.jitter_seconds = jitter
-    return p
-
-
-class _ConcreteHttp(UniProtHttpMixin):
-    def __init__(self):
-        self._http_requests = 0
-        self._http_retries = 0
-        self._http = MagicMock()
-
-
-def _noop_emit(*_):
-    return None
-
-
-class TestUniProtHttpMixin:
-    def _obj(self) -> _ConcreteHttp:
-        return _ConcreteHttp()
-
-    def test_returns_response_on_200(self) -> None:
-        obj = self._obj()
-        resp = MagicMock()
-        resp.status_code = 200
-        obj._http.get.return_value = resp
-        result = obj._get_with_retries("http://x", _make_payload(), _noop_emit)
-        assert result is resp
-
-    def test_retries_on_429(self) -> None:
-        obj = self._obj()
-        bad = MagicMock()
-        bad.status_code = 429
-        bad.headers = {}
-        good = MagicMock()
-        good.status_code = 200
-        obj._http.get.side_effect = [bad, good]
-        with patch("protea.core.utils.time.sleep"):
-            result = obj._get_with_retries("http://x", _make_payload(), _noop_emit)
-        assert result is good
-        assert obj._http_retries == 1
-
-    def test_uses_retry_after_header(self) -> None:
-        obj = self._obj()
-        bad = MagicMock()
-        bad.status_code = 429
-        bad.headers = {"Retry-After": "5"}
-        good = MagicMock()
-        good.status_code = 200
-        obj._http.get.side_effect = [bad, good]
-        sleep_calls = []
-        with patch("protea.core.utils.time.sleep", side_effect=sleep_calls.append):
-            obj._get_with_retries("http://x", _make_payload(backoff_max=30.0), _noop_emit)
-        assert len(sleep_calls) == 1
-        assert sleep_calls[0] == pytest.approx(5.0)
-
-    def test_raises_after_max_retries(self) -> None:
-        obj = self._obj()
-        bad = MagicMock()
-        bad.status_code = 503
-        bad.headers = {}
-        bad.raise_for_status.side_effect = requests.HTTPError("503")
-        obj._http.get.return_value = bad
-        with patch("protea.core.utils.time.sleep"):
-            with pytest.raises(requests.HTTPError):
-                obj._get_with_retries("http://x", _make_payload(max_retries=2), _noop_emit)
-
-    def test_retries_on_network_exception(self) -> None:
-        obj = self._obj()
-        good = MagicMock()
-        good.status_code = 200
-        obj._http.get.side_effect = [requests.ConnectionError("down"), good]
-        with patch("protea.core.utils.time.sleep"):
-            result = obj._get_with_retries("http://x", _make_payload(), _noop_emit)
-        assert result is good
-
-    def test_extract_next_cursor_present(self) -> None:
-        obj = self._obj()
-        header = '<https://rest.uniprot.org/uniprotkb/search?cursor=ABCD1234>; rel="next"'
-        assert obj._extract_next_cursor(header) == "ABCD1234"
-
-    def test_extract_next_cursor_absent(self) -> None:
-        obj = self._obj()
-        assert obj._extract_next_cursor("") is None
-        assert obj._extract_next_cursor('<http://x>; rel="prev"') is None
-
-    def test_extract_next_cursor_no_cursor_param(self) -> None:
-        obj = self._obj()
-        assert obj._extract_next_cursor('<http://x?page=2>; rel="next"') is None
-
-
 # ---------------------------------------------------------------------------
 # evidence_codes — normalize and is_experimental
 # ---------------------------------------------------------------------------
 
 
-
 class TestNormalize:
     def test_go_code_passthrough(self):
         assert normalize("IDA") == "IDA"
@@ -246,6 +140,7 @@ def test_unknown_code_not_experimental(self):
 # RetryLaterError
 # ---------------------------------------------------------------------------
 
+
 class TestRetryLaterError:
     def test_default_delay(self):
         err = RetryLaterError("GPU busy")
@@ -261,317 +156,3 @@ def test_is_exception(self):
             raise RetryLaterError("test")
 
 
-# ---------------------------------------------------------------------------
-# FetchUniProtMetadataOperation
-# ---------------------------------------------------------------------------
-
-def _noop_emit(*_):
-    pass
-
-
-def _make_tsv_content(rows: list[dict[str, str]], compressed: bool = True) -> bytes:
-    """Build a TSV byte string (optionally gzipped) from a list of dicts."""
-    if not rows:
-        header = "Entry\tReviewed\tEntry Name\tOrganism\tGene Names\tLength"
-        text = header + "\n"
-    else:
-        headers = list(rows[0].keys())
-        lines = ["\t".join(headers)]
-        for row in rows:
-            lines.append("\t".join(row.get(h, "") for h in headers))
-        text = "\n".join(lines) + "\n"
-
-    raw = text.encode("utf-8")
-    if compressed:
-        buf = BytesIO()
-        with gzip.GzipFile(fileobj=buf, mode="wb") as f:
-            f.write(raw)
-        return buf.getvalue()
-    return raw
-
-
-
-
-class TestFetchUniProtMetadataExecute:
-    def _make_op(self):
-        op = FetchUniProtMetadataOperation()
-        op._http = MagicMock()
-        return op
-
-    def test_execute_empty_page_continues(self):
-        """Line 108: when rows is empty, continue (skip store)."""
-        op = self._make_op()
-        events = []
-
-        def emit(event, message, fields, level):
-            events.append(event)
-
-        # Return one page with no data rows, then stop
-        resp = MagicMock()
-        resp.status_code = 200
-        resp.headers = {"X-Total-Results": "0"}
-        resp.content = _make_tsv_content([], compressed=True)
-        op._http.get.return_value = resp
-
-        session = MagicMock()
-        payload = {"search_criteria": "organism_id:9606", "page_size": 10}
-
-        result = op.execute(session, payload, emit=emit)
-        assert result.result["rows"] == 0
-        assert result.result["pages"] == 1
-
-    def test_execute_total_limit_truncation(self):
-        """Lines 110-113: when total_limit is set and rows exceed it, truncate."""
-        op = self._make_op()
-
-        # Build 5 rows
-        rows = []
-        for i in range(5):
-            row = {"Entry": f"P0000{i}", "Reviewed": "reviewed"}
-            # Add all FIELD_MAP headers as empty
-            for header in FetchUniProtMetadataOperation.FIELD_MAP.values():
-                row[header] = ""
-            row["Entry Name"] = ""
-            row["Organism"] = ""
-            row["Gene Names"] = ""
-            row["Length"] = ""
-            rows.append(row)
-
-        resp = MagicMock()
-        resp.status_code = 200
-        resp.headers = {"X-Total-Results": "5"}
-        resp.content = _make_tsv_content(rows, compressed=True)
-        op._http.get.return_value = resp
-
-        session = MagicMock()
-        session.query.return_value.filter.return_value.all.return_value = []
-
-        payload = {
-            "search_criteria": "organism_id:9606",
-            "page_size": 10,
-            "total_limit": 3,
-        }
-
-        result = op.execute(session, payload, emit=_noop_emit)
-        # Should only process 3 rows despite page having 5
-        assert result.result["rows"] == 3
-
-    def test_execute_total_limit_zero_after_truncation(self):
-        """Line 113: if truncation results in empty rows, break."""
-        op = self._make_op()
-
-        rows = [{"Entry": "P00001"}]
-        for header in FetchUniProtMetadataOperation.FIELD_MAP.values():
-            rows[0][header] = ""
-        rows[0].update({"Reviewed": "", "Entry Name": "", "Organism": "", "Gene Names": "", "Length": ""})
-
-        # First page returns 1 row, second page returns 1 row
-        resp1 = MagicMock()
-        resp1.status_code = 200
-        resp1.headers = {"X-Total-Results": "2", "link": '<http://next?cursor=ABC>; rel="next"'}
-        resp1.content = _make_tsv_content(rows, compressed=True)
-
-        resp2 = MagicMock()
-        resp2.status_code = 200
-        resp2.headers = {"X-Total-Results": "2"}
-        rows2 = [{"Entry": "P00002"}]
-        for header in FetchUniProtMetadataOperation.FIELD_MAP.values():
-            rows2[0][header] = ""
-        rows2[0].update({"Reviewed": "", "Entry Name": "", "Organism": "", "Gene Names": "", "Length": ""})
-        resp2.content = _make_tsv_content(rows2, compressed=True)
-
-        op._http.get.side_effect = [resp1, resp2]
-
-        session = MagicMock()
-        session.query.return_value.filter.return_value.all.return_value = []
-
-        payload = {
-            "search_criteria": "organism_id:9606",
-            "page_size": 1,
-            "total_limit": 1,
-        }
-
-        result = op.execute(session, payload, emit=_noop_emit)
-        # Should stop after first page (total_limit=1, first page gives 1 row)
-        assert result.result["rows"] == 1
-
-    def test_x_total_results_none_on_invalid_header(self):
-        """Line 227: X-Total-Results header with invalid value."""
-        op = self._make_op()
-
-        resp = MagicMock()
-        resp.status_code = 200
-        resp.headers = {"X-Total-Results": "not-a-number"}
-        resp.content = _make_tsv_content([], compressed=True)
-        op._http.get.return_value = resp
-
-        session = MagicMock()
-        payload = {"search_criteria": "test", "page_size": 10}
-
-        op.execute(session, payload, emit=_noop_emit)
-        assert op._total_results is None
-
-    def test_decode_response_uncompressed(self):
-        """Line 241-242: uncompressed response decoding."""
-        op = self._make_op()
-        resp = MagicMock()
-        resp.content = b"Entry\tReviewed\nP00001\treviewed\n"
-        text = op._decode_response(resp, compressed=False)
-        assert "P00001" in text
-
-    def test_store_rows_empty_accession_skipped(self):
-        """Line 275: rows with empty Entry are skipped."""
-        op = self._make_op()
-        session = MagicMock()
-        session.query.return_value.filter.return_value.all.return_value = []
-
-        p = FetchUniProtMetadataPayload(
-            search_criteria="test",
-            update_protein_core=False,
-        )
-
-        rows = [{"Entry": "", "Absorption": "test"}]
-        for header in FetchUniProtMetadataOperation.FIELD_MAP.values():
-            if header not in rows[0]:
-                rows[0][header] = ""
-
-        touched, upserted = op._store_rows(session, rows, p, _noop_emit)
-        assert touched == 0
-        assert upserted == 0
-
-    def test_store_rows_update_protein_core_fields(self):
-        """Lines 296-328: update_protein_core fills in missing fields on Protein."""
-        op = self._make_op()
-        session = MagicMock()
-
-        # No existing metadata
-        session.query.return_value.filter.return_value.all.return_value = []
-
-        # Create a mock protein with all None fields
-        protein = MagicMock()
-        protein.accession = "P12345"
-        protein.reviewed = None
-        protein.entry_name = None
-        protein.organism = None
-        protein.gene_name = None
-        protein.length = None
-
-        # Second query().filter().all() returns proteins
-        call_count = [0]
-        def query_side_effect(*args):
-            result = MagicMock()
-            call_count[0] += 1
-            if call_count[0] <= 1:
-                # First call: metadata lookup
-                result.filter.return_value.all.return_value = []
-            else:
-                # Second call: protein lookup
-                result.filter.return_value.all.return_value = [protein]
-            return result
-        session.query.side_effect = query_side_effect
-
-        p = FetchUniProtMetadataPayload(
-            search_criteria="test",
-            update_protein_core=True,
-        )
-
-        row = {"Entry": "P12345", "Reviewed": "reviewed", "Entry Name": "TEST_HUMAN",
-               "Organism": "Homo sapiens", "Gene Names": "TEST GENE2", "Length": "500"}
-        for header in FetchUniProtMetadataOperation.FIELD_MAP.values():
-            row.setdefault(header, "")
-
-        touched, upserted = op._store_rows(session, [row], p, _noop_emit)
-        assert protein.reviewed is True
-        assert protein.entry_name == "TEST_HUMAN"
-        assert protein.organism == "Homo sapiens"
-        assert protein.gene_name == "TEST"
-        assert protein.length == 500
-        assert touched == 1
-
-    def test_store_rows_unreviewed_protein(self):
-        """Lines 303-305: reviewed == 'unreviewed' sets pr.reviewed = False."""
-        op = self._make_op()
-        session = MagicMock()
-
-        protein = MagicMock()
-        protein.accession = "Q99999"
-        protein.reviewed = None
-        protein.entry_name = None
-        protein.organism = None
-        protein.gene_name = None
-        protein.length = None
-
-        call_count = [0]
-        def query_side_effect(*args):
-            result = MagicMock()
-            call_count[0] += 1
-            if call_count[0] <= 1:
-                result.filter.return_value.all.return_value = []
-            else:
-                result.filter.return_value.all.return_value = [protein]
-            return result
-        session.query.side_effect = query_side_effect
-
-        p = FetchUniProtMetadataPayload(
-            search_criteria="test",
-            update_protein_core=True,
-        )
-
-        row = {"Entry": "Q99999", "Reviewed": "unreviewed"}
-        for header in FetchUniProtMetadataOperation.FIELD_MAP.values():
-            row.setdefault(header, "")
-        row.setdefault("Entry Name", "")
-        row.setdefault("Organism", "")
-        row.setdefault("Gene Names", "")
-        row.setdefault("Length", "")
-
-        touched, _ = op._store_rows(session, [row], p, _noop_emit)
-        assert protein.reviewed is False
-        assert touched == 1
-
-    def test_store_rows_protein_not_in_db(self):
-        """Lines 294-295: protein not found in protein_map, no core update."""
-        op = self._make_op()
-        session = MagicMock()
-
-        call_count = [0]
-        def query_side_effect(*args):
-            result = MagicMock()
-            call_count[0] += 1
-            if call_count[0] <= 1:
-                result.filter.return_value.all.return_value = []
-            else:
-                result.filter.return_value.all.return_value = []  # No proteins
-            return result
-        session.query.side_effect = query_side_effect
-
-        p = FetchUniProtMetadataPayload(
-            search_criteria="test",
-            update_protein_core=True,
-        )
-
-        row = {"Entry": "UNKNOWN1", "Reviewed": "reviewed"}
-        for header in FetchUniProtMetadataOperation.FIELD_MAP.values():
-            row.setdefault(header, "")
-        row.setdefault("Entry Name", "")
-        row.setdefault("Organism", "")
-        row.setdefault("Gene Names", "")
-        row.setdefault("Length", "")
-
-        touched, upserted = op._store_rows(session, [row], p, _noop_emit)
-        assert touched == 0
-        # Still upserted metadata
-        assert upserted == 1
-
-    def test_load_existing_metadata_chunks(self):
-        """Line 346: _load_existing_metadata returns existing metadata by canonical."""
-        op = self._make_op()
-        session = MagicMock()
-
-        m1 = MagicMock()
-        m1.canonical_accession = "P12345"
-        session.query.return_value.filter.return_value.all.return_value = [m1]
-
-        result = op._load_existing_metadata(session, ["P12345"], chunk_size=10)
-        assert "P12345" in result
-        assert result["P12345"] is m1
diff --git a/tests/test_datasets_and_reranker_import_smoke.py b/tests/test_datasets_and_reranker_import_smoke.py
new file mode 100644
index 0000000..ae0203c
--- /dev/null
+++ b/tests/test_datasets_and_reranker_import_smoke.py
@@ -0,0 +1,323 @@
+"""Smoke tests for the PROTEA ↔ protea-reranker-lab decoupling surface.
+
+Exercises the three HTTP boundaries that the lab drives against PROTEA:
+
+* ``POST /datasets`` — enqueues an ``export_research_dataset`` job. Does
+  not run the operation; just confirms validation, duplicate rejection,
+  and AMQP publish.
+* ``GET /datasets`` / ``GET /datasets/{id_or_name}`` — read path used by
+  the lab's ``pull_dataset.py``.
+* ``POST /reranker-models/import-by-reference`` — lab registers a
+  pre-uploaded booster; PROTEA persists a ``RerankerModel`` row linked
+  to its ``Dataset``.
+* ``POST /reranker-models/import`` — multipart variant; PROTEA uploads
+  the booster through its artifact store.
+
+No live stack is required: FastAPI ``TestClient`` drives a minimal in-
+process app wired to a MagicMock session and a fake artifact store.
+"""
+
+from __future__ import annotations
+
+import json
+import uuid
+from contextlib import contextmanager
+from datetime import UTC, datetime
+from unittest.mock import MagicMock, patch
+
+import pytest
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+
+from protea.api.routers.datasets import router as datasets_router
+from protea.api.routers.reranker_models import router as reranker_models_router
+
+# ---------------------------------------------------------------------------
+# Shared helpers
+# ---------------------------------------------------------------------------
+
+
+@contextmanager
+def _scope(session):
+    yield session
+
+
+def _make_dataset_row(
+    *,
+    name="bench-v1-K5",
+    backend="local",
+    train_uri="file:///tmp/bench-v1-K5/train.parquet",
+    eval_uri="file:///tmp/bench-v1-K5/eval.parquet",
+    manifest_uri="file:///tmp/bench-v1-K5/manifest.json",
+):
+    ds = MagicMock()
+    ds.id = uuid.uuid4()
+    ds.name = name
+    ds.operation = "export_research_dataset"
+    ds.job_id = None
+    ds.storage_backend = backend
+    ds.key_prefix = f"datasets/{name}/"
+    ds.train_uri = train_uri
+    ds.eval_uri = eval_uri
+    ds.manifest_uri = manifest_uri
+    ds.schema_sha = "abc123def456"
+    ds.manifest_sha = "f" * 64
+    ds.n_train_rows = 12345
+    ds.n_eval_rows = 678
+    ds.k = 5
+    ds.annotation_source = "goa"
+    ds.embedding_config_id = uuid.uuid4()
+    ds.ontology_snapshot_id = uuid.uuid4()
+    ds.train_snapshot_pairs = ["v160-v165", "v165-v170"]
+    ds.eval_snapshot_pair = "v220-v230"
+    ds.producer_version = "0.1.0"
+    ds.producer_git_sha = "deadbeef"
+    ds.meta = {}
+    ds.created_at = datetime(2026, 4, 20, tzinfo=UTC)
+    return ds
+
+
+# ---------------------------------------------------------------------------
+# /datasets — enqueue, list, lookup
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture()
+def datasets_client():
+    session = MagicMock()
+
+    # Simulate SQLAlchemy's primary-key default: when the router adds a Job
+    # instance, stamp a real UUID on it so ``job.id`` post-``flush`` returns
+    # something that actually round-trips through ``UUID(...)``.
+    def _stamp(obj):
+        if getattr(obj, "id", None) is None or isinstance(obj.id, MagicMock):
+            obj.id = uuid.uuid4()
+
+    session.add.side_effect = _stamp
+
+    app = FastAPI()
+    app.state.session_factory = MagicMock()
+    app.state.amqp_url = "amqp://stub"
+    app.include_router(datasets_router)
+
+    with patch(
+        "protea.api.routers.datasets.session_scope",
+        side_effect=lambda _: _scope(session),
+    ), patch(
+        "protea.api.routers.datasets.publish_job"
+    ) as publish:
+        yield TestClient(app, raise_server_exceptions=True), session, publish
+
+
+class TestDatasetsRouter:
+    def _valid_body(self, **overrides):
+        body = {
+            "output_name": "bench-v1-K5",
+            "embedding_config_id": str(uuid.uuid4()),
+            "ontology_snapshot_id": str(uuid.uuid4()),
+            "train_versions": [160, 165, 170],
+            "test_versions": [230],
+            "k": 5,
+        }
+        body.update(overrides)
+        return body
+
+    def test_post_enqueues_training_job(self, datasets_client):
+        client, session, publish = datasets_client
+        # No duplicate — initial conflict check returns None
+        session.query.return_value.filter.return_value.first.return_value = None
+
+        resp = client.post("/datasets", json=self._valid_body())
+        assert resp.status_code == 200, resp.text
+
+        body = resp.json()
+        assert body["queue"] == "protea.training"
+        assert body["status"] == "queued"
+        uuid.UUID(body["job_id"])  # valid UUID
+
+        # Job + JobEvent persisted, publish_job invoked with the right queue
+        assert session.add.call_count >= 2
+        assert publish.call_count == 1
+        publish_args = publish.call_args
+        assert publish_args[0][0] == "amqp://stub"
+        assert publish_args[0][1] == "protea.training"
+
+    def test_post_rejects_duplicate_name(self, datasets_client):
+        client, session, publish = datasets_client
+        # Name already exists
+        session.query.return_value.filter.return_value.first.return_value = (uuid.uuid4(),)
+
+        resp = client.post("/datasets", json=self._valid_body())
+        assert resp.status_code == 409
+        assert "already exists" in resp.json()["detail"]
+        publish.assert_not_called()
+
+    def test_post_rejects_too_few_train_versions(self, datasets_client):
+        client, _, publish = datasets_client
+        resp = client.post(
+            "/datasets", json=self._valid_body(train_versions=[160])
+        )
+        assert resp.status_code == 422
+        publish.assert_not_called()
+
+    def test_get_list_returns_dataset_dicts(self, datasets_client):
+        client, session, _ = datasets_client
+        rows = [_make_dataset_row(name=n) for n in ("bench-A", "bench-B")]
+        q = session.query.return_value
+        q.order_by.return_value.limit.return_value.all.return_value = rows
+
+        resp = client.get("/datasets")
+        assert resp.status_code == 200
+        payload = resp.json()
+        assert [d["name"] for d in payload] == ["bench-A", "bench-B"]
+        assert payload[0]["storage_backend"] == "local"
+        assert payload[0]["k"] == 5
+
+    def test_get_by_name_resolves_non_uuid(self, datasets_client):
+        client, session, _ = datasets_client
+        row = _make_dataset_row(name="bench-by-name")
+        # First lookup (UUID path) raises ValueError on ``UUID("bench-by-name")``,
+        # then name path returns the row.
+        session.query.return_value.filter.return_value.first.return_value = row
+
+        resp = client.get("/datasets/bench-by-name")
+        assert resp.status_code == 200
+        assert resp.json()["name"] == "bench-by-name"
+
+    def test_get_by_name_404(self, datasets_client):
+        client, session, _ = datasets_client
+        session.get.return_value = None
+        session.query.return_value.filter.return_value.first.return_value = None
+
+        resp = client.get("/datasets/missing")
+        assert resp.status_code == 404
+
+
+# ---------------------------------------------------------------------------
+# /reranker-models — import paths
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture()
+def reranker_client(tmp_path):
+    session = MagicMock()
+    # No duplicate name
+    session.query.return_value.filter.return_value.first.return_value = None
+
+    app = FastAPI()
+    app.state.session_factory = MagicMock()
+    app.include_router(reranker_models_router)
+
+    fake_settings = MagicMock()
+    fake_settings.storage_backend = "local"
+
+    fake_store = MagicMock()
+    fake_store.put.side_effect = lambda key, _bytes: f"file:///fake/{key}"
+
+    with patch(
+        "protea.api.routers.reranker_models.session_scope",
+        side_effect=lambda _: _scope(session),
+    ), patch(
+        "protea.api.routers.reranker_models.load_settings",
+        return_value=fake_settings,
+    ), patch(
+        "protea.api.routers.reranker_models.get_artifact_store",
+        return_value=fake_store,
+    ):
+        yield TestClient(app, raise_server_exceptions=True), session, fake_store
+
+
+class TestRerankerModelsImport:
+    def _spec_yaml(self, cell="pk-bpo"):
+        return f"training:\n  cell: {cell}\n"
+
+    def _run_json(self, run_id="run-abc", dataset_name="bench-v1-K5"):
+        return {
+            "run_id": run_id,
+            "features": {"families_enabled": ["knn"], "drop_features": []},
+            "dataset": {
+                "name": dataset_name,
+                "schema_sha": "abc123def456",
+                "embedding_config_id": str(uuid.uuid4()),
+                "ontology_snapshot_id": str(uuid.uuid4()),
+            },
+            "metrics": {"fmax": 0.42},
+            "feature_importance": {"distance": 0.5},
+        }
+
+    def test_import_multipart_persists_row(self, reranker_client):
+        client, session, store = reranker_client
+        files = {
+            "model_file": ("model.txt", b"tree\n--\n", "text/plain"),
+            "spec_yaml": ("spec.yaml", self._spec_yaml().encode(), "text/yaml"),
+            "run_json": (
+                "run.json",
+                json.dumps(self._run_json()).encode(),
+                "application/json",
+            ),
+        }
+        resp = client.post("/reranker-models/import", files=files)
+        assert resp.status_code == 201, resp.text
+
+        body = resp.json()
+        assert body["name"] == "run-abc"
+        assert body["artifact_uri"].endswith("rerankers/run-abc/model.txt")
+        assert body["storage_backend"] == "local"
+
+        # Booster was uploaded + RerankerModel row persisted
+        store.put.assert_called_once()
+        assert session.add.call_count == 1
+
+    def test_import_by_reference_persists_row(self, reranker_client):
+        client, session, store = reranker_client
+        resp = client.post(
+            "/reranker-models/import-by-reference",
+            json={
+                "artifact_uri": "s3://protea/rerankers/run-xyz/model.txt",
+                "spec_yaml": self._spec_yaml("lk-mfo"),
+                "run": self._run_json(run_id="run-xyz"),
+            },
+        )
+        assert resp.status_code == 201, resp.text
+
+        body = resp.json()
+        assert body["name"] == "run-xyz"
+        assert body["artifact_uri"] == "s3://protea/rerankers/run-xyz/model.txt"
+
+        # By-reference path must NOT hit the store
+        store.put.assert_not_called()
+        assert session.add.call_count == 1
+
+    def test_duplicate_name_conflicts_without_force(self, reranker_client):
+        client, session, _ = reranker_client
+        session.query.return_value.filter.return_value.first.return_value = MagicMock(
+            id=uuid.uuid4()
+        )
+        resp = client.post(
+            "/reranker-models/import-by-reference",
+            json={
+                "artifact_uri": "s3://protea/rerankers/run-dup/model.txt",
+                "spec_yaml": self._spec_yaml(),
+                "run": self._run_json(run_id="run-dup"),
+            },
+        )
+        assert resp.status_code == 409
+        assert "already exists" in resp.json()["detail"]
+
+
+# ---------------------------------------------------------------------------
+# End-to-end wiring: both routers coexist + the export operation is in the
+# registry; TrainReranker* are *not*.
+# ---------------------------------------------------------------------------
+
+
+class TestOperationCatalogWiring:
+    def test_export_dataset_registered_training_ops_not(self):
+        from protea.core.operation_catalog import build_operation_registry
+
+        registry = build_operation_registry()
+        names = set(registry._ops.keys())  # type: ignore[attr-defined]
+
+        assert "export_research_dataset" in names
+        # Historical training operations remain unregistered.
+        assert "research_dataset_dump_helper" not in names
diff --git a/tests/test_embeddings_router.py b/tests/test_embeddings_router.py
index 67f8130..872a898 100644
--- a/tests/test_embeddings_router.py
+++ b/tests/test_embeddings_router.py
@@ -2,6 +2,7 @@
 Unit tests for the FastAPI embeddings router.
 Database and pika are fully mocked — no real infrastructure required.
 """
+
 from __future__ import annotations
 
 from contextlib import contextmanager
@@ -13,8 +14,17 @@
 from fastapi import FastAPI
 from fastapi.testclient import TestClient
 
+from protea.api.cache import invalidate as _cache_invalidate
 from protea.api.routers.embeddings import router
 
+
+@pytest.fixture(autouse=True)
+def _reset_router_cache():
+    _cache_invalidate()
+    yield
+    _cache_invalidate()
+
+
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
@@ -63,7 +73,9 @@ def session():
 def client(session):
     factory = MagicMock()
     app = _make_app(factory)
-    with patch("protea.api.routers.embeddings.session_scope", side_effect=lambda _: _mock_scope(session)):
+    with patch(
+        "protea.api.routers.embeddings.session_scope", side_effect=lambda _: _mock_scope(session)
+    ):
         yield TestClient(app, raise_server_exceptions=True)
 
 
@@ -114,6 +126,19 @@ def test_invalid_backend_returns_422(self, client, session):
         resp = client.post("/embeddings/configs", json=body)
         assert resp.status_code == 422
 
+    def test_ankh_backend_is_accepted(self, client, session):
+        """Ankh is a valid backend; the router should not reject it."""
+        session.add.side_effect = lambda obj: setattr(obj, "id", uuid4()) or setattr(
+            obj, "created_at", datetime(2024, 1, 1, tzinfo=UTC)
+        )
+        body = {
+            **_VALID_CONFIG_BODY,
+            "model_backend": "ankh",
+            "model_name": "ElnaggarLab/ankh-base",
+        }
+        resp = client.post("/embeddings/configs", json=body)
+        assert resp.status_code == 200
+
     def test_empty_layer_indices_returns_422(self, client, session):
         body = {**_VALID_CONFIG_BODY, "layer_indices": []}
         resp = client.post("/embeddings/configs", json=body)
@@ -168,6 +193,7 @@ def _fake_add(obj):
 # GET /embeddings/configs
 # ---------------------------------------------------------------------------
 
+
 class TestListEmbeddingConfigs:
     def test_returns_list(self, client, session):
         session.query.return_value.order_by.return_value.all.return_value = [_make_config()]
@@ -191,6 +217,7 @@ def test_empty_list(self, client, session):
 # DELETE /embeddings/configs/{id}
 # ---------------------------------------------------------------------------
 
+
 class TestDeleteEmbeddingConfig:
     def test_delete_existing_returns_200(self, client, session):
         cfg = _make_config()
@@ -209,6 +236,7 @@ def test_delete_nonexistent_returns_404(self, client, session):
 # GET /embeddings/prediction-sets/{set_id}/predictions.tsv
 # ---------------------------------------------------------------------------
 
+
 def _make_prediction_set(ps_id=None):
     ps = MagicMock()
     ps.id = ps_id or uuid4()
@@ -230,14 +258,25 @@ def _make_go_prediction(accession="P12345", distance=0.1):
     pred.qualifier = "enables"
     pred.evidence_code = "IDA"
     # alignment — not computed
-    for col in ("identity_nw", "similarity_nw", "alignment_score_nw",
-                "gaps_pct_nw", "alignment_length_nw",
-                "identity_sw", "similarity_sw", "alignment_score_sw",
-                "gaps_pct_sw", "alignment_length_sw",
-                "length_query", "length_ref",
-                "query_taxonomy_id", "ref_taxonomy_id",
-                "taxonomic_lca", "taxonomic_distance",
-                "taxonomic_common_ancestors"):
+    for col in (
+        "identity_nw",
+        "similarity_nw",
+        "alignment_score_nw",
+        "gaps_pct_nw",
+        "alignment_length_nw",
+        "identity_sw",
+        "similarity_sw",
+        "alignment_score_sw",
+        "gaps_pct_sw",
+        "alignment_length_sw",
+        "length_query",
+        "length_ref",
+        "query_taxonomy_id",
+        "ref_taxonomy_id",
+        "taxonomic_lca",
+        "taxonomic_distance",
+        "taxonomic_common_ancestors",
+    ):
         setattr(pred, col, None)
     pred.taxonomic_relation = None
     # re-ranker features
@@ -334,16 +373,22 @@ def test_null_alignment_fields_are_empty_string(self, client, session):
     def test_prediction_set_not_found_returns_404(self, client, session):
         # Both the preflight check and the generator use session.get → None
         session.get.return_value = None
-        with patch("protea.api.routers.embeddings.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.embeddings.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             resp = client.get(f"/embeddings/prediction-sets/{uuid4()}/predictions.tsv")
         assert resp.status_code == 404
 
     def test_multiple_rows_all_included(self, client, session):
         set_id = uuid4()
-        rows = [(
-            _make_go_prediction(f"PROT{i}", distance=i * 0.1),
-            _make_go_term(f"GO:{i:07d}", f"term {i}", "P"),
-        ) for i in range(5)]
+        rows = [
+            (
+                _make_go_prediction(f"PROT{i}", distance=i * 0.1),
+                _make_go_term(f"GO:{i:07d}", f"term {i}", "P"),
+            )
+            for i in range(5)
+        ]
         resp = self._get(client, session, set_id, rows)
 
         lines = resp.text.splitlines()
@@ -395,17 +440,21 @@ def test_alignment_fields_formatted(self, client, session):
 # _fmt helper
 # ---------------------------------------------------------------------------
 
+
 class TestFmt:
     def test_none_returns_empty(self):
         from protea.api.routers.embeddings import _fmt
+
         assert _fmt(None) == ""
 
     def test_float_returns_formatted(self):
         from protea.api.routers.embeddings import _fmt
+
         assert _fmt(0.123456789) == "0.123457"
 
     def test_zero_returns_formatted(self):
         from protea.api.routers.embeddings import _fmt
+
         assert _fmt(0.0) == "0"
 
 
@@ -413,9 +462,11 @@ def test_zero_returns_formatted(self):
 # get_session_factory / get_amqp_url — RuntimeError when not set
 # ---------------------------------------------------------------------------
 
+
 class TestDependencyGuards:
     def test_session_factory_missing_raises(self):
         from protea.api.routers.embeddings import get_session_factory
+
         req = MagicMock()
         req.app.state = MagicMock(spec=[])  # no session_factory attr
         with pytest.raises(RuntimeError, match="session_factory"):
@@ -423,6 +474,7 @@ def test_session_factory_missing_raises(self):
 
     def test_amqp_url_missing_raises(self):
         from protea.api.routers.embeddings import get_amqp_url
+
         req = MagicMock()
         req.app.state = MagicMock(spec=[])  # no amqp_url attr
         with pytest.raises(RuntimeError, match="amqp_url"):
@@ -433,6 +485,7 @@ def test_amqp_url_missing_raises(self):
 # Additional validation edge cases
 # ---------------------------------------------------------------------------
 
+
 class TestValidationEdgeCases:
     def test_normalize_residues_non_bool_returns_422(self, client, session):
         body = {**_VALID_CONFIG_BODY, "normalize_residues": "yes"}
@@ -475,6 +528,7 @@ def test_description_non_string_returns_422(self, client, session):
 # GET /embeddings/configs/{config_id}
 # ---------------------------------------------------------------------------
 
+
 class TestGetEmbeddingConfig:
     def test_returns_config(self, client, session):
         cfg = _make_config()
@@ -500,6 +554,7 @@ def test_not_found_returns_404(self, client, session):
 # DELETE /embeddings/configs/{config_id} — with prediction sets
 # ---------------------------------------------------------------------------
 
+
 class TestDeleteEmbeddingConfigCascade:
     def test_delete_with_prediction_sets(self, client, session):
         cfg = _make_config()
@@ -522,6 +577,7 @@ def test_delete_with_prediction_sets(self, client, session):
 # POST /embeddings/predict
 # ---------------------------------------------------------------------------
 
+
 class TestPredictGoTerms:
     def _make_predict_app(self, session):
         factory = MagicMock()
@@ -545,20 +601,26 @@ def test_predict_success(self, session):
         def _fake_add(obj):
             added.append(obj)
             # If it's a Job, set its id
-            if hasattr(obj, 'operation'):
+            if hasattr(obj, "operation"):
                 obj.id = 42
 
         session.add.side_effect = _fake_add
         session.flush = MagicMock()
 
-        with patch("protea.api.routers.embeddings.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.embeddings.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             with patch("protea.api.routers.embeddings.publish_job") as mock_pub:
                 client = TestClient(app, raise_server_exceptions=True)
-                resp = client.post("/embeddings/predict", json={
-                    "embedding_config_id": str(config_id),
-                    "annotation_set_id": str(ann_id),
-                    "ontology_snapshot_id": str(onto_id),
-                })
+                resp = client.post(
+                    "/embeddings/predict",
+                    json={
+                        "embedding_config_id": str(config_id),
+                        "annotation_set_id": str(ann_id),
+                        "ontology_snapshot_id": str(onto_id),
+                    },
+                )
 
         assert resp.status_code == 200
         data = resp.json()
@@ -567,26 +629,38 @@ def _fake_add(obj):
 
     def test_predict_invalid_uuid_returns_422(self, session):
         app = self._make_predict_app(session)
-        with patch("protea.api.routers.embeddings.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.embeddings.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             client = TestClient(app, raise_server_exceptions=True)
-            resp = client.post("/embeddings/predict", json={
-                "embedding_config_id": "not-a-uuid",
-                "annotation_set_id": str(uuid4()),
-                "ontology_snapshot_id": str(uuid4()),
-            })
+            resp = client.post(
+                "/embeddings/predict",
+                json={
+                    "embedding_config_id": "not-a-uuid",
+                    "annotation_set_id": str(uuid4()),
+                    "ontology_snapshot_id": str(uuid4()),
+                },
+            )
         assert resp.status_code == 422
 
     def test_predict_config_not_found_returns_404(self, session):
         app = self._make_predict_app(session)
         # session.get returns None for EmbeddingConfig
         session.get.return_value = None
-        with patch("protea.api.routers.embeddings.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.embeddings.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             client = TestClient(app, raise_server_exceptions=True)
-            resp = client.post("/embeddings/predict", json={
-                "embedding_config_id": str(uuid4()),
-                "annotation_set_id": str(uuid4()),
-                "ontology_snapshot_id": str(uuid4()),
-            })
+            resp = client.post(
+                "/embeddings/predict",
+                json={
+                    "embedding_config_id": str(uuid4()),
+                    "annotation_set_id": str(uuid4()),
+                    "ontology_snapshot_id": str(uuid4()),
+                },
+            )
         assert resp.status_code == 404
 
     def test_predict_annotation_set_not_found_returns_404(self, session):
@@ -594,41 +668,56 @@ def test_predict_annotation_set_not_found_returns_404(self, session):
 
         def _get_side(model_cls, id_val):
             from protea.infrastructure.orm.models.embedding.embedding_config import EmbeddingConfig
+
             if model_cls is EmbeddingConfig:
                 return MagicMock()
             return None
 
         session.get.side_effect = _get_side
-        with patch("protea.api.routers.embeddings.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.embeddings.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             client = TestClient(app, raise_server_exceptions=True)
-            resp = client.post("/embeddings/predict", json={
-                "embedding_config_id": str(uuid4()),
-                "annotation_set_id": str(uuid4()),
-                "ontology_snapshot_id": str(uuid4()),
-            })
+            resp = client.post(
+                "/embeddings/predict",
+                json={
+                    "embedding_config_id": str(uuid4()),
+                    "annotation_set_id": str(uuid4()),
+                    "ontology_snapshot_id": str(uuid4()),
+                },
+            )
         assert resp.status_code == 404
 
     def test_predict_ontology_not_found_returns_404(self, session):
         app = self._make_predict_app(session)
 
         call_count = [0]
+
         def _get_side(model_cls, id_val):
             call_count[0] += 1
             from protea.infrastructure.orm.models.annotation.ontology_snapshot import (
                 OntologySnapshot,
             )
+
             if model_cls is OntologySnapshot:
                 return None
             return MagicMock()
 
         session.get.side_effect = _get_side
-        with patch("protea.api.routers.embeddings.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.embeddings.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             client = TestClient(app, raise_server_exceptions=True)
-            resp = client.post("/embeddings/predict", json={
-                "embedding_config_id": str(uuid4()),
-                "annotation_set_id": str(uuid4()),
-                "ontology_snapshot_id": str(uuid4()),
-            })
+            resp = client.post(
+                "/embeddings/predict",
+                json={
+                    "embedding_config_id": str(uuid4()),
+                    "annotation_set_id": str(uuid4()),
+                    "ontology_snapshot_id": str(uuid4()),
+                },
+            )
         assert resp.status_code == 404
 
 
@@ -636,16 +725,32 @@ def _get_side(model_cls, id_val):
 # GET /embeddings/prediction-sets
 # ---------------------------------------------------------------------------
 
+
 class TestListPredictionSets:
     @staticmethod
-    def _wire_list_query(session, rows):
-        """Wire the mock chain for the correlated-subquery list query."""
-        # query(PredictionSet, EmbeddingConfig, AnnotationSet, OntologySnapshot, count_subq)
-        #   .join(...).join(...).join(...).order_by(...).limit(...).all()
-        # The count subquery is built via session.query().filter().correlate().scalar_subquery()
-        # but all that matters for the mock is the final .all() result.
-        session.query.return_value.join.return_value.join.return_value.join.return_value \
-            .order_by.return_value.limit.return_value.all.return_value = rows
+    def _wire_list_query(session, rows, count_pairs=()):
+        """Wire the two queries the endpoint runs.
+
+        The endpoint executes:
+
+        1. ``query(PredictionSet, EmbeddingConfig, AnnotationSet, OntologySnapshot)
+           .join(...).join(...).join(...).order_by(...).limit(...).all()`` — for
+           the metadata 4-tuples.
+        2. ``query(GOPrediction.prediction_set_id, count(GOPrediction.id))
+           .group_by(...).all()`` — for the per-set counts.
+
+        ``rows`` feeds the first call; ``count_pairs`` feeds the second.
+        Both share ``session.query``, so we route them through ``side_effect``
+        on a tiny dispatch helper.
+        """
+        list_query = MagicMock()
+        list_query.join.return_value.join.return_value.join.return_value.order_by.return_value.limit.return_value.all.return_value = rows
+
+        count_query = MagicMock()
+        count_query.group_by.return_value.all.return_value = list(count_pairs)
+
+        # First query() call → list_query, second → count_query.
+        session.query.side_effect = [list_query, count_query]
 
     def test_returns_list(self, client, session):
         ps = _make_prediction_set()
@@ -656,7 +761,11 @@ def test_returns_list(self, client, session):
         snap = MagicMock()
         snap.obo_version = "2024-01-01"
 
-        self._wire_list_query(session, [(ps, ec, ann, snap, 100)])
+        self._wire_list_query(
+            session,
+            rows=[(ps, ec, ann, snap)],
+            count_pairs=[(ps.id, 100)],
+        )
 
         resp = client.get("/embeddings/prediction-sets")
         assert resp.status_code == 200
@@ -678,14 +787,16 @@ def test_annotation_set_without_version(self, client, session):
         snap = MagicMock()
         snap.obo_version = "2024-01-01"
 
-        self._wire_list_query(session, [(ps, ec, ann, snap, 0)])
+        self._wire_list_query(session, rows=[(ps, ec, ann, snap)])
 
         resp = client.get("/embeddings/prediction-sets")
         assert resp.status_code == 200
-        assert resp.json()[0]["annotation_set_label"] == "goa"
+        body = resp.json()
+        assert body[0]["annotation_set_label"] == "goa"
+        assert body[0]["prediction_count"] == 0  # no count_pairs → defaults to 0
 
     def test_empty_list(self, client, session):
-        self._wire_list_query(session, [])
+        self._wire_list_query(session, rows=[])
         resp = client.get("/embeddings/prediction-sets")
         assert resp.status_code == 200
         assert resp.json() == []
@@ -695,6 +806,7 @@ def test_empty_list(self, client, session):
 # GET /embeddings/prediction-sets/{set_id}
 # ---------------------------------------------------------------------------
 
+
 class TestGetPredictionSet:
     def test_returns_details(self, client, session):
         ps = _make_prediction_set()
@@ -702,7 +814,8 @@ def test_returns_details(self, client, session):
         session.get.return_value = ps
         session.query.return_value.filter.return_value.scalar.return_value = 50
         session.query.return_value.filter.return_value.group_by.return_value.all.return_value = [
-            ("P12345", 30), ("Q67890", 20),
+            ("P12345", 30),
+            ("Q67890", 20),
         ]
 
         resp = client.get(f"/embeddings/prediction-sets/{ps_id}")
@@ -734,6 +847,7 @@ def test_with_query_set_id(self, client, session):
 # GET /embeddings/prediction-sets/{set_id}/proteins
 # ---------------------------------------------------------------------------
 
+
 class TestListPredictionSetProteins:
     def _setup_proteins_mocks(self, session, ps, rows_data):
         """Set up the complex mock chain for the proteins endpoint."""
@@ -840,6 +954,7 @@ def _query_side(*args, **kwargs):
 # GET /embeddings/prediction-sets/{set_id}/proteins/{accession}
 # ---------------------------------------------------------------------------
 
+
 class TestGetProteinPredictions:
     def test_returns_predictions(self, client, session):
         ps = _make_prediction_set()
@@ -849,8 +964,9 @@ def test_returns_predictions(self, client, session):
         pred = _make_go_prediction("P12345", distance=0.1)
         gt = _make_go_term("GO:0003824", "catalytic activity", "F")
 
-        session.query.return_value.join.return_value.filter.return_value \
-            .order_by.return_value.all.return_value = [(pred, gt)]
+        session.query.return_value.join.return_value.filter.return_value.order_by.return_value.all.return_value = [
+            (pred, gt)
+        ]
 
         resp = client.get(f"/embeddings/prediction-sets/{ps_id}/proteins/P12345")
         assert resp.status_code == 200
@@ -874,8 +990,7 @@ def test_not_found_returns_404(self, client, session):
     def test_empty_predictions_returns_empty_list(self, client, session):
         ps = _make_prediction_set()
         session.get.return_value = ps
-        session.query.return_value.join.return_value.filter.return_value \
-            .order_by.return_value.all.return_value = []
+        session.query.return_value.join.return_value.filter.return_value.order_by.return_value.all.return_value = []
         resp = client.get(f"/embeddings/prediction-sets/{ps.id}/proteins/UNKNOWN")
         assert resp.status_code == 200
         assert resp.json() == []
@@ -885,6 +1000,7 @@ def test_empty_predictions_returns_empty_list(self, client, session):
 # GET /embeddings/prediction-sets/{set_id}/go-terms
 # ---------------------------------------------------------------------------
 
+
 class TestGoTermDistribution:
     def test_returns_distribution(self, client, session):
         ps = _make_prediction_set()
@@ -892,19 +1008,17 @@ def test_returns_distribution(self, client, session):
         session.get.return_value = ps
 
         # Top terms query
-        session.query.return_value.join.return_value.filter.return_value \
-            .group_by.return_value.order_by.return_value.limit.return_value \
-            .all.return_value = [
-                ("GO:0003824", "catalytic activity", "F", 50),
-                ("GO:0005515", "protein binding", "F", 30),
-                ("GO:0008150", "biological_process", "P", 20),
-            ]
+        session.query.return_value.join.return_value.filter.return_value.group_by.return_value.order_by.return_value.limit.return_value.all.return_value = [
+            ("GO:0003824", "catalytic activity", "F", 50),
+            ("GO:0005515", "protein binding", "F", 30),
+            ("GO:0008150", "biological_process", "P", 20),
+        ]
 
         # Aspect counts query
-        session.query.return_value.join.return_value.filter.return_value \
-            .group_by.return_value.all.return_value = [
-                ("F", 80), ("P", 20),
-            ]
+        session.query.return_value.join.return_value.filter.return_value.group_by.return_value.all.return_value = [
+            ("F", 80),
+            ("P", 20),
+        ]
 
         resp = client.get(f"/embeddings/prediction-sets/{ps_id}/go-terms")
         assert resp.status_code == 200
@@ -923,6 +1037,7 @@ def test_not_found_returns_404(self, client, session):
 # GET /embeddings/prediction-sets/{set_id}/predictions-cafa.tsv
 # ---------------------------------------------------------------------------
 
+
 class TestDownloadPredictionsCafa:
     def _get_cafa(self, client, session, set_id, rows, **params):
         """Wire mocks for the CAFA download endpoint.
@@ -974,7 +1089,10 @@ def test_cafa_deduplicates_go_terms(self, client, session):
 
     def test_cafa_not_found_returns_404(self, client, session):
         session.get.return_value = None
-        with patch("protea.api.routers.embeddings.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.embeddings.session_scope",
+            side_effect=lambda _: _mock_scope(session),
+        ):
             resp = client.get(f"/embeddings/prediction-sets/{uuid4()}/predictions-cafa.tsv")
         assert resp.status_code == 404
 
@@ -992,7 +1110,9 @@ def test_cafa_filter_by_aspect(self, client, session):
 
     def test_cafa_filter_by_max_distance(self, client, session):
         set_id = uuid4()
-        resp = self._get_cafa(client, session, set_id, [("P12345", "GO:0003824", 0.05)], max_distance=0.5)
+        resp = self._get_cafa(
+            client, session, set_id, [("P12345", "GO:0003824", 0.05)], max_distance=0.5
+        )
         assert resp.status_code == 200
 
     def test_cafa_score_clamps_at_zero(self, client, session):
@@ -1009,6 +1129,7 @@ def test_cafa_score_clamps_at_zero(self, client, session):
 # DELETE /embeddings/prediction-sets/{set_id}
 # ---------------------------------------------------------------------------
 
+
 class TestDeletePredictionSet:
     def test_delete_existing_returns_200(self, client, session):
         ps = _make_prediction_set()
diff --git a/tests/test_evaluation.py b/tests/test_evaluation.py
index b8c5476..17b714c 100644
--- a/tests/test_evaluation.py
+++ b/tests/test_evaluation.py
@@ -1,21 +1,26 @@
 """Tests for protea.core.evaluation — pure-Python components + mocked DB tests."""
+
 import uuid
 from unittest.mock import MagicMock, patch
 
 from protea.core.evaluation import (
     EvaluationData,
+    _apply_negatives,
+    _bfs_closure,
     _build_negative_keys,
     _get_descendants,
     _load_children_map,
     _load_experimental_annotations_by_ns,
     _load_go_maps,
     compute_evaluation_data,
+    compute_evaluation_data_reconciled,
 )
 
 # ---------------------------------------------------------------------------
 # EvaluationData — dataclass properties
 # ---------------------------------------------------------------------------
 
+
 class TestEvaluationDataProperties:
     def _make(self, nk=None, lk=None, pk=None, known=None, pk_known=None):
         return EvaluationData(
@@ -86,8 +91,14 @@ def test_stats_dict_keys(self):
         ed = self._make()
         s = ed.stats()
         expected = {
-            "delta_proteins", "nk_proteins", "lk_proteins", "pk_proteins",
-            "nk_annotations", "lk_annotations", "pk_annotations", "known_terms_count",
+            "delta_proteins",
+            "nk_proteins",
+            "lk_proteins",
+            "pk_proteins",
+            "nk_annotations",
+            "lk_annotations",
+            "pk_annotations",
+            "known_terms_count",
         }
         assert set(s.keys()) == expected
 
@@ -113,6 +124,7 @@ def test_stats_dict_values(self):
 # _get_descendants — BFS over GO DAG
 # ---------------------------------------------------------------------------
 
+
 class TestGetDescendants:
     def test_no_children(self):
         result = _get_descendants(1, {})
@@ -155,6 +167,7 @@ def test_leaf_node(self):
 # _load_children_map — lines 124-137
 # ---------------------------------------------------------------------------
 
+
 class TestLoadChildrenMap:
     def test_loads_and_groups_by_parent(self):
         snap_id = uuid.uuid4()
@@ -192,6 +205,7 @@ def test_single_relationship(self):
 # _load_go_maps — lines 161-169
 # ---------------------------------------------------------------------------
 
+
 class TestLoadGoMaps:
     def test_basic_maps(self):
         mock_session = MagicMock()
@@ -227,6 +241,7 @@ def test_empty(self):
 # _build_negative_keys — lines 182-204
 # ---------------------------------------------------------------------------
 
+
 class TestBuildNegativeKeys:
     def test_no_not_annotations(self):
         mock_session = MagicMock()
@@ -281,6 +296,7 @@ def test_passes_set_ids(self):
 # _load_experimental_annotations_by_ns — lines 219-238
 # ---------------------------------------------------------------------------
 
+
 class TestLoadExperimentalAnnotationsByNs:
     def _go_id_map(self):
         return {100: "GO:0001", 200: "GO:0002", 300: "GO:0003", 400: "GO:0004"}
@@ -334,9 +350,7 @@ def test_missing_aspect_skipped(self):
     def test_empty_rows(self):
         mock_session = MagicMock()
         mock_session.execute.return_value.fetchall.return_value = []
-        result = _load_experimental_annotations_by_ns(
-            mock_session, uuid.uuid4(), set(), {}, {}
-        )
+        result = _load_experimental_annotations_by_ns(mock_session, uuid.uuid4(), set(), {}, {})
         assert result == {}
 
     def test_multiple_terms_same_namespace(self):
@@ -355,6 +369,7 @@ def test_multiple_terms_same_namespace(self):
 # compute_evaluation_data — lines 265-322
 # ---------------------------------------------------------------------------
 
+
 class TestComputeEvaluationData:
     def _ids(self):
         return uuid.uuid4(), uuid.uuid4(), uuid.uuid4()
@@ -515,7 +530,9 @@ def test_multiple_proteins(self, mock_go_maps, mock_children, mock_neg, mock_ann
     @patch("protea.core.evaluation._build_negative_keys")
     @patch("protea.core.evaluation._load_children_map")
     @patch("protea.core.evaluation._load_go_maps")
-    def test_protein_with_empty_new_namespaces(self, mock_go_maps, mock_children, mock_neg, mock_annots):
+    def test_protein_with_empty_new_namespaces(
+        self, mock_go_maps, mock_children, mock_neg, mock_annots
+    ):
         """Protein key in new but no namespace data -> new_all empty -> skip."""
         old_id, new_id, snap_id = self._ids()
         mock_go_maps.return_value = ({}, {})
@@ -562,3 +579,171 @@ def test_both_empty(self, mock_go_maps, mock_children, mock_neg, mock_annots):
         result = compute_evaluation_data(MagicMock(), old_id, new_id, snap_id)
         assert result.delta_proteins == 0
         assert result.known == {}
+
+
+# ---------------------------------------------------------------------------
+# Cross-OBO reconciliation helpers
+# ---------------------------------------------------------------------------
+
+
+class TestBfsClosure:
+    def test_empty_seeds(self):
+        assert _bfs_closure(set(), {"a": {"b"}}) == set()
+
+    def test_no_edges(self):
+        assert _bfs_closure({"a"}, {}) == {"a"}
+
+    def test_direct_neighbours(self):
+        assert _bfs_closure({"a"}, {"a": {"b", "c"}}) == {"a", "b", "c"}
+
+    def test_transitive_closure(self):
+        edges = {"a": {"b"}, "b": {"c"}, "c": {"d"}}
+        assert _bfs_closure({"a"}, edges) == {"a", "b", "c", "d"}
+
+    def test_diamond(self):
+        edges = {"a": {"b", "c"}, "b": {"d"}, "c": {"d"}}
+        assert _bfs_closure({"a"}, edges) == {"a", "b", "c", "d"}
+
+    def test_multi_seed_union(self):
+        edges = {"a": {"x"}, "b": {"y"}}
+        assert _bfs_closure({"a", "b"}, edges) == {"a", "b", "x", "y"}
+
+    def test_cycle_safe(self):
+        edges = {"a": {"b"}, "b": {"a"}}
+        assert _bfs_closure({"a"}, edges) == {"a", "b"}
+
+
+class TestApplyNegatives:
+    def test_no_negatives_passthrough(self):
+        exp = {"P1": {"F": {"GO:0001"}}}
+        out = _apply_negatives(exp, {})
+        assert out == {"P1": {"F": {"GO:0001"}}}
+
+    def test_negative_drops_term(self):
+        exp = {"P1": {"F": {"GO:0001", "GO:0002"}}}
+        out = _apply_negatives(exp, {"P1": {"GO:0001"}})
+        assert out == {"P1": {"F": {"GO:0002"}}}
+
+    def test_negative_empties_namespace(self):
+        exp = {"P1": {"F": {"GO:0001"}, "P": {"GO:0002"}}}
+        out = _apply_negatives(exp, {"P1": {"GO:0001"}})
+        assert out == {"P1": {"P": {"GO:0002"}}}
+
+    def test_negative_empties_all_namespaces_drops_protein(self):
+        exp = {"P1": {"F": {"GO:0001"}}}
+        out = _apply_negatives(exp, {"P1": {"GO:0001"}})
+        assert out == {}
+
+    def test_different_protein_negative_ignored(self):
+        exp = {"P1": {"F": {"GO:0001"}}}
+        out = _apply_negatives(exp, {"P2": {"GO:0001"}})
+        assert out == {"P1": {"F": {"GO:0001"}}}
+
+
+class TestComputeEvaluationDataReconciled:
+    def _ids(self):
+        return uuid.uuid4(), uuid.uuid4(), uuid.uuid4(), uuid.uuid4(), uuid.uuid4()
+
+    def _patches(self):
+        return [
+            patch("protea.core.evaluation._load_pivot_term_universe"),
+            patch("protea.core.evaluation._load_children_by_go_id"),
+            patch("protea.core.evaluation._reconcile_experimental_side"),
+            patch("protea.core.evaluation._reconcile_not_side"),
+        ]
+
+    def test_same_terms_no_delta(self):
+        old_id, new_id, old_snap, new_snap, pivot = self._ids()
+        with (
+            patch(
+                "protea.core.evaluation._load_pivot_term_universe",
+                return_value=({"GO:0001"}, {"GO:0001": "F"}),
+            ),
+            patch("protea.core.evaluation._load_children_by_go_id", return_value={}),
+            patch(
+                "protea.core.evaluation._reconcile_experimental_side",
+                side_effect=[
+                    {"P1": {"F": {"GO:0001"}}},
+                    {"P1": {"F": {"GO:0001"}}},
+                ],
+            ),
+            patch("protea.core.evaluation._reconcile_not_side", return_value={}),
+        ):
+            result = compute_evaluation_data_reconciled(
+                MagicMock(), old_id, new_id, old_snap, new_snap, pivot
+            )
+        assert result.delta_proteins == 0
+
+    def test_nk_protein_reconciled(self):
+        old_id, new_id, old_snap, new_snap, pivot = self._ids()
+        with (
+            patch(
+                "protea.core.evaluation._load_pivot_term_universe",
+                return_value=({"GO:0001"}, {"GO:0001": "F"}),
+            ),
+            patch("protea.core.evaluation._load_children_by_go_id", return_value={}),
+            patch(
+                "protea.core.evaluation._reconcile_experimental_side",
+                side_effect=[{}, {"P1": {"F": {"GO:0001"}}}],
+            ),
+            patch("protea.core.evaluation._reconcile_not_side", return_value={}),
+        ):
+            result = compute_evaluation_data_reconciled(
+                MagicMock(), old_id, new_id, old_snap, new_snap, pivot
+            )
+        assert result.nk == {"P1": {"GO:0001"}}
+        assert result.lk == {}
+        assert result.pk == {}
+
+    def test_pk_with_merged_negatives(self):
+        """NOT from new side excludes a term in old side (matches same-snapshot semantics)."""
+        old_id, new_id, old_snap, new_snap, pivot = self._ids()
+        with (
+            patch(
+                "protea.core.evaluation._load_pivot_term_universe",
+                return_value=({"GO:0001", "GO:0002", "GO:0003"}, {"GO:0001": "F", "GO:0002": "F", "GO:0003": "F"}),
+            ),
+            patch("protea.core.evaluation._load_children_by_go_id", return_value={}),
+            patch(
+                "protea.core.evaluation._reconcile_experimental_side",
+                side_effect=[
+                    {"P1": {"F": {"GO:0001", "GO:0002"}}},
+                    {"P1": {"F": {"GO:0001", "GO:0002", "GO:0003"}}},
+                ],
+            ),
+            patch(
+                "protea.core.evaluation._reconcile_not_side",
+                side_effect=[{}, {"P1": {"GO:0002"}}],  # NOT from new side
+            ),
+        ):
+            result = compute_evaluation_data_reconciled(
+                MagicMock(), old_id, new_id, old_snap, new_snap, pivot
+            )
+        # After applying the merged negatives, P1 old = {GO:0001}, new = {GO:0001, GO:0003}
+        # → PK with delta GO:0003
+        assert result.pk == {"P1": {"GO:0003"}}
+        assert result.pk_known == {"P1": {"GO:0001"}}
+
+    def test_lk_protein_reconciled(self):
+        old_id, new_id, old_snap, new_snap, pivot = self._ids()
+        with (
+            patch(
+                "protea.core.evaluation._load_pivot_term_universe",
+                return_value=({"GO:0001", "GO:0002"}, {"GO:0001": "F", "GO:0002": "P"}),
+            ),
+            patch("protea.core.evaluation._load_children_by_go_id", return_value={}),
+            patch(
+                "protea.core.evaluation._reconcile_experimental_side",
+                side_effect=[
+                    {"P1": {"F": {"GO:0001"}}},
+                    {"P1": {"F": {"GO:0001"}, "P": {"GO:0002"}}},
+                ],
+            ),
+            patch("protea.core.evaluation._reconcile_not_side", return_value={}),
+        ):
+            result = compute_evaluation_data_reconciled(
+                MagicMock(), old_id, new_id, old_snap, new_snap, pivot
+            )
+        assert result.lk == {"P1": {"GO:0002"}}
+        assert result.nk == {}
+        assert result.pk == {}
diff --git a/tests/test_evaluation_parquet_roundtrip.py b/tests/test_evaluation_parquet_roundtrip.py
new file mode 100644
index 0000000..d1713c6
--- /dev/null
+++ b/tests/test_evaluation_parquet_roundtrip.py
@@ -0,0 +1,134 @@
+"""Round-trip tests for EvaluationData parquet (de)serialization.
+
+``generate_evaluation_set`` persists the full NK/LK/PK/known/pk_known
+delta to the artifact store; ``load_evaluation_data_for_set`` reads it
+back via ``deserialize_evaluation_data_from_bytes``.  These tests lock
+that round-trip contract end to end so the layout stays stable across
+refactors — a schema drift here silently breaks every downstream
+consumer (lab dump, predict_go_terms, cafaeval).
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from protea.core.evaluation import (
+    EvaluationData,
+    deserialize_evaluation_data_from_bytes,
+    serialize_evaluation_data_to_parquet,
+)
+
+
+def _fixture() -> EvaluationData:
+    """Non-trivial ground-truth with every bucket populated."""
+    return EvaluationData(
+        nk={"P00001": {"GO:0003674"}, "P00002": {"GO:0008150", "GO:0005575"}},
+        lk={"P00003": {"GO:0004567"}},
+        pk={"P00004": {"GO:0009876", "GO:0001234"}},
+        known={"P00004": {"GO:1111111", "GO:2222222"}, "P00005": {"GO:3333333"}},
+        pk_known={"P00004": {"GO:1111111"}},
+    )
+
+
+class TestParquetRoundTrip:
+    def test_all_five_buckets_preserved(self, tmp_path: Path):
+        data = _fixture()
+        dest = tmp_path / "gt.parquet"
+
+        serialize_evaluation_data_to_parquet(data, dest)
+        blob = dest.read_bytes()
+        restored = deserialize_evaluation_data_from_bytes(blob)
+
+        assert restored.nk == data.nk
+        assert restored.lk == data.lk
+        assert restored.pk == data.pk
+        assert restored.known == data.known
+        assert restored.pk_known == data.pk_known
+
+    def test_empty_evaluation_data_roundtrips_cleanly(self, tmp_path: Path):
+        data = EvaluationData()
+        dest = tmp_path / "empty.parquet"
+
+        serialize_evaluation_data_to_parquet(data, dest)
+        restored = deserialize_evaluation_data_from_bytes(dest.read_bytes())
+
+        assert restored.nk == {}
+        assert restored.lk == {}
+        assert restored.pk == {}
+        assert restored.known == {}
+        assert restored.pk_known == {}
+
+    def test_stats_survive_roundtrip(self, tmp_path: Path):
+        data = _fixture()
+        dest = tmp_path / "stats.parquet"
+
+        serialize_evaluation_data_to_parquet(data, dest)
+        restored = deserialize_evaluation_data_from_bytes(dest.read_bytes())
+
+        assert restored.stats() == data.stats()
+
+    def test_serialize_creates_missing_parent_dirs(self, tmp_path: Path):
+        data = _fixture()
+        nested = tmp_path / "a" / "b" / "c" / "gt.parquet"
+        assert not nested.parent.exists()
+
+        serialize_evaluation_data_to_parquet(data, nested)
+
+        assert nested.exists()
+        restored = deserialize_evaluation_data_from_bytes(nested.read_bytes())
+        assert restored.nk == data.nk
+
+    def test_returns_dest_for_chaining(self, tmp_path: Path):
+        dest = tmp_path / "gt.parquet"
+        out = serialize_evaluation_data_to_parquet(_fixture(), dest)
+        assert out == dest
+
+    def test_deserialize_on_unknown_bucket_ignores_bad_rows(self, tmp_path: Path):
+        """Forward compatibility — extra bucket values should not crash."""
+        import pandas as pd
+
+        df = pd.DataFrame(
+            [
+                ("P00001", "GO:0003674", "nk"),
+                ("P00002", "GO:0004567", "lk"),
+                ("P00003", "GO:0009876", "pk"),
+                ("P00004", "GO:1111111", "known"),
+                ("P00004", "GO:1111111", "pk_known"),
+            ],
+            columns=["protein_accession", "go_id", "bucket"],
+        )
+        df["bucket"] = df["bucket"].astype("category")
+        dest = tmp_path / "mixed.parquet"
+        df.to_parquet(dest, index=False, compression="snappy")
+
+        restored = deserialize_evaluation_data_from_bytes(dest.read_bytes())
+        assert restored.nk == {"P00001": {"GO:0003674"}}
+        assert restored.lk == {"P00002": {"GO:0004567"}}
+        assert restored.pk == {"P00003": {"GO:0009876"}}
+        assert restored.known == {"P00004": {"GO:1111111"}}
+        assert restored.pk_known == {"P00004": {"GO:1111111"}}
+
+
+class TestParquetFormat:
+    def test_parquet_is_snappy_compressed(self, tmp_path: Path):
+        """Lock the compression choice — consumers assume snappy."""
+        import pyarrow.parquet as pq
+
+        dest = tmp_path / "gt.parquet"
+        serialize_evaluation_data_to_parquet(_fixture(), dest)
+
+        meta = pq.read_metadata(dest)
+        # Every row group's every column must be snappy
+        for rg in range(meta.num_row_groups):
+            rgm = meta.row_group(rg)
+            for col in range(rgm.num_columns):
+                assert rgm.column(col).compression.upper() == "SNAPPY"
+
+    def test_schema_columns_stable(self, tmp_path: Path):
+        """The three-column long layout is load-bearing for downstream."""
+        import pyarrow.parquet as pq
+
+        dest = tmp_path / "gt.parquet"
+        serialize_evaluation_data_to_parquet(_fixture(), dest)
+        schema = pq.read_schema(dest)
+        assert set(schema.names) == {"protein_accession", "go_id", "bucket"}
diff --git a/tests/test_feature_contract.py b/tests/test_feature_contract.py
new file mode 100644
index 0000000..7569dac
--- /dev/null
+++ b/tests/test_feature_contract.py
@@ -0,0 +1,181 @@
+"""Invariant tests cross-repo (T1.7 of master plan v3).
+
+Pins the contract between ``protea_contracts`` (canonical schema)
+and PROTEA's re-exports / future registry. Any drift here means the
+booster cache is at risk.
+
+Two tiers of assertions:
+
+1. **Re-export identity** (active now): every name PROTEA still
+   exposes from its old module locations must be the *same object*
+   (or equal value) as the canonical one in ``protea_contracts``.
+2. **Registry cover** (skipped until F2B.1 of master plan v3 lands
+   the in-process registry in ``protea/core/features/``): the live
+   registry must cover exactly ``ALL_FEATURES`` and group them
+   under the same family map.
+
+Once F2B.1 ships, the skipped tests turn green automatically and
+the suite gains a hard guarantee that the platform's feature
+generation matches what every booster was trained against.
+"""
+
+from __future__ import annotations
+
+import importlib
+
+import pytest
+
+import protea_contracts
+
+
+# ---------------------------------------------------------------------------
+# Re-export identity (active)
+# ---------------------------------------------------------------------------
+
+
+class TestReexportIdentity:
+    """Every constant PROTEA still re-exports must be the same object
+    or equal value as the canonical one in protea_contracts. Hard
+    guarantee that ``from protea.core.reranker import ALL_FEATURES``
+    does NOT diverge from ``from protea_contracts import ALL_FEATURES``.
+    """
+
+    def test_all_features_identity(self) -> None:
+        from protea.core.reranker import ALL_FEATURES as proto_all
+
+        assert proto_all is protea_contracts.ALL_FEATURES
+
+    def test_numeric_features_identity(self) -> None:
+        from protea.core.reranker import NUMERIC_FEATURES as proto_num
+
+        assert proto_num is protea_contracts.NUMERIC_FEATURES
+
+    def test_categorical_features_identity(self) -> None:
+        from protea.core.reranker import CATEGORICAL_FEATURES as proto_cat
+
+        assert proto_cat is protea_contracts.CATEGORICAL_FEATURES
+
+    def test_embedding_pca_dim_value(self) -> None:
+        from protea.core.reranker import EMBEDDING_PCA_DIM as proto_dim
+
+        assert proto_dim == protea_contracts.EMBEDDING_PCA_DIM
+
+    def test_label_column_value(self) -> None:
+        from protea.core.reranker import LABEL_COLUMN as proto_label
+
+        assert proto_label == protea_contracts.LABEL_COLUMN
+
+    def test_protea_payload_identity(self) -> None:
+        from protea.core.contracts.operation import ProteaPayload as proto_base
+
+        assert proto_base is protea_contracts.ProteaPayload
+
+    def test_predict_payloads_identity(self) -> None:
+        from protea.core.operations.predict_go_terms import (
+            PredictGOTermsBatchPayload,
+            PredictGOTermsPayload,
+            StorePredictionsPayload,
+        )
+
+        assert PredictGOTermsPayload is protea_contracts.PredictGOTermsPayload
+        assert PredictGOTermsBatchPayload is protea_contracts.PredictGOTermsBatchPayload
+        assert StorePredictionsPayload is protea_contracts.StorePredictionsPayload
+
+
+# ---------------------------------------------------------------------------
+# Sha consistency (active)
+# ---------------------------------------------------------------------------
+
+
+class TestShaConsistency:
+    """compute_schema_sha must produce the same digest from any caller
+    on the same column list. Pinning here catches accidental drift
+    if PROTEA ever stops re-exporting and grows its own copy."""
+
+    def test_all_features_sha_matches_canonical(self) -> None:
+        sha_via_contracts = protea_contracts.compute_schema_sha(
+            protea_contracts.ALL_FEATURES
+        )
+        # Re-import through PROTEA path — must hit the same constant
+        # and the same function (re-exported).
+        from protea.core.reranker import ALL_FEATURES as proto_all
+
+        sha_via_protea = protea_contracts.compute_schema_sha(proto_all)
+        assert sha_via_contracts == sha_via_protea
+
+    def test_sha_is_pinned_to_golden(self) -> None:
+        # Mirrors the golden test inside protea-contracts: bumping any
+        # column forces a SemVer major bump on protea-contracts AND a
+        # re-train of every downstream LightGBM booster. Pinned here
+        # too so PROTEA's CI fails before the booster cache invalidates.
+        sha = protea_contracts.compute_schema_sha(protea_contracts.ALL_FEATURES)
+        assert sha == "145592ed186c"
+
+
+# ---------------------------------------------------------------------------
+# Feature-family coverage invariants (active)
+# ---------------------------------------------------------------------------
+
+
+class TestFeatureFamilyCoverage:
+    def test_every_family_member_in_all_features(self) -> None:
+        all_set = set(protea_contracts.ALL_FEATURES)
+        offenders = []
+        for family, cols in protea_contracts.FEATURE_FAMILIES.items():
+            for col in cols:
+                if col not in all_set:
+                    offenders.append((family, col))
+        assert offenders == [], (
+            "FEATURE_FAMILIES references columns missing from ALL_FEATURES: "
+            f"{offenders}"
+        )
+
+    def test_emb_pca_family_size_matches_dim(self) -> None:
+        emb_pca = protea_contracts.FEATURE_FAMILIES["emb_pca"]
+        assert len(emb_pca) == protea_contracts.EMBEDDING_PCA_DIM
+
+    def test_emb_pca_family_names_are_canonical(self) -> None:
+        expected = [
+            f"emb_pca_query_{i}" for i in range(protea_contracts.EMBEDDING_PCA_DIM)
+        ]
+        assert protea_contracts.FEATURE_FAMILIES["emb_pca"] == expected
+
+
+# ---------------------------------------------------------------------------
+# Future registry cover (skipped until F2B.1)
+# ---------------------------------------------------------------------------
+
+
+def _registry_module_available() -> bool:
+    try:
+        importlib.import_module("protea.core.features.registry")
+        return True
+    except ImportError:
+        return False
+
+
+@pytest.mark.skipif(
+    not _registry_module_available(),
+    reason=(
+        "F2B.1 of master plan v3 lands the in-process feature registry "
+        "in protea/core/features/registry.py. Until then this contract "
+        "test is dormant; once the registry ships it activates "
+        "automatically and pins the registry-vs-contracts invariant."
+    ),
+)
+class TestRegistryCoversContracts:
+    def test_registry_names_match_all_features(self) -> None:
+        from protea.core.features.registry import REGISTRY  # type: ignore[import-not-found]
+
+        assert set(REGISTRY.names()) == set(protea_contracts.ALL_FEATURES)
+
+    def test_registry_families_match_contracts(self) -> None:
+        from protea.core.features.registry import REGISTRY  # type: ignore[import-not-found]
+
+        # Order inside each family list does not matter at the registry
+        # level; the dataset side enforces order via ALL_FEATURES.
+        registry_fams = {k: sorted(v) for k, v in REGISTRY.families().items()}
+        contract_fams = {
+            k: sorted(v) for k, v in protea_contracts.FEATURE_FAMILIES.items()
+        }
+        assert registry_fams == contract_fams
diff --git a/tests/test_feature_engineering.py b/tests/test_feature_engineering.py
index 2826b44..be81b1d 100644
--- a/tests/test_feature_engineering.py
+++ b/tests/test_feature_engineering.py
@@ -2,6 +2,7 @@
 
 Parasail and ete3 results are mocked so no external dependencies are needed.
 """
+
 from __future__ import annotations
 
 from unittest.mock import MagicMock, patch
@@ -22,6 +23,7 @@
 # Helpers
 # ---------------------------------------------------------------------------
 
+
 def _fake_traceback(query: str, ref: str, comp: str) -> MagicMock:
     tb = MagicMock()
     tb.query = query
@@ -41,6 +43,7 @@ def _fake_result(query: str, ref: str, comp: str, score: float = 42.0) -> MagicM
 # _parse_alignment
 # ---------------------------------------------------------------------------
 
+
 class TestParseAlignment:
     def test_nw_basic_metrics(self) -> None:
         # 3 matches, 0 gaps, aln_len=3
@@ -90,11 +93,14 @@ def test_score_stored(self) -> None:
 # compute_nw / compute_sw / compute_alignment
 # ---------------------------------------------------------------------------
 
+
 class TestComputeNW:
     def test_calls_parasail_nw_and_returns_dict(self) -> None:
         fake_res = _fake_result("ACDEF", "ACDEF", "|||||", score=100.0)
-        with patch("protea.core.feature_engineering._PARASAIL_AVAILABLE", True), \
-             patch("protea.core.feature_engineering.parasail") as mock_p:
+        with (
+            patch("protea.core.feature_engineering._PARASAIL_AVAILABLE", True),
+            patch("protea.core.feature_engineering.parasail") as mock_p,
+        ):
             mock_p.nw_trace_striped_32.return_value = fake_res
             mock_p.blosum62 = object()
             out = compute_nw("ACDEF", "ACDEF")
@@ -111,8 +117,10 @@ def test_raises_when_parasail_unavailable(self) -> None:
 class TestComputeSW:
     def test_calls_parasail_sw_and_returns_dict(self) -> None:
         fake_res = _fake_result("ACDEF", "ACDEF", "|||||", score=80.0)
-        with patch("protea.core.feature_engineering._PARASAIL_AVAILABLE", True), \
-             patch("protea.core.feature_engineering.parasail") as mock_p:
+        with (
+            patch("protea.core.feature_engineering._PARASAIL_AVAILABLE", True),
+            patch("protea.core.feature_engineering.parasail") as mock_p,
+        ):
             mock_p.sw_trace_striped_32.return_value = fake_res
             mock_p.blosum62 = object()
             out = compute_sw("ACDEF", "ACDEF")
@@ -128,8 +136,10 @@ def test_raises_when_parasail_unavailable(self) -> None:
 class TestComputeAlignment:
     def test_merges_nw_and_sw(self) -> None:
         fake_res = _fake_result("AC", "AC", "||", score=10.0)
-        with patch("protea.core.feature_engineering._PARASAIL_AVAILABLE", True), \
-             patch("protea.core.feature_engineering.parasail") as mock_p:
+        with (
+            patch("protea.core.feature_engineering._PARASAIL_AVAILABLE", True),
+            patch("protea.core.feature_engineering.parasail") as mock_p,
+        ):
             mock_p.nw_trace_striped_32.return_value = fake_res
             mock_p.sw_trace_striped_32.return_value = fake_res
             mock_p.blosum62 = object()
@@ -142,6 +152,7 @@ def test_merges_nw_and_sw(self) -> None:
 # _normalize_tax_id
 # ---------------------------------------------------------------------------
 
+
 class TestNormalizeTaxId:
     def test_int_passthrough(self) -> None:
         assert _normalize_tax_id(9606) == 9606
@@ -166,6 +177,7 @@ def test_empty_string_returns_none(self) -> None:
 # compute_taxonomy
 # ---------------------------------------------------------------------------
 
+
 class TestComputeTaxonomy:
     def test_none_inputs_return_unrelated(self) -> None:
         out = compute_taxonomy(None, None)
@@ -183,8 +195,12 @@ def test_invalid_id_returns_unrelated(self) -> None:
         assert out["taxonomic_relation"] == "unrelated"
 
     def test_lineage_exception_returns_unrelated(self) -> None:
-        with patch("protea.core.feature_engineering._ETE3_AVAILABLE", True), \
-             patch("protea.core.feature_engineering._cached_lineage", side_effect=Exception("db error")):
+        with (
+            patch("protea.core.feature_engineering._ETE3_AVAILABLE", True),
+            patch(
+                "protea.core.feature_engineering._cached_lineage", side_effect=Exception("db error")
+            ),
+        ):
             out = compute_taxonomy(9606, 10090)
         assert out["taxonomic_relation"] == "unrelated"
 
@@ -215,6 +231,7 @@ def test_distance_calculation(self) -> None:
 # _classify_relation
 # ---------------------------------------------------------------------------
 
+
 class TestClassifyRelation:
     def test_same(self) -> None:
         assert _classify_relation(9606, 9606, 1, 9606, [1, 9606], [1, 9606]) == "same"
diff --git a/tests/test_fetch_uniprot_metadata.py b/tests/test_fetch_uniprot_metadata.py
index b71829e..3f4215b 100644
--- a/tests/test_fetch_uniprot_metadata.py
+++ b/tests/test_fetch_uniprot_metadata.py
@@ -17,6 +17,7 @@
 # Helpers
 # ---------------------------------------------------------------------------
 
+
 def _noop_emit(event: str, message: str | None, fields: dict[str, object], level: str) -> None:
     pass
 
@@ -36,6 +37,7 @@ def emit(event, message, fields, level):
 # Unit tests — FetchUniProtMetadataPayload
 # ---------------------------------------------------------------------------
 
+
 class TestFetchUniProtMetadataPayload:
     def test_minimal_valid(self):
         p = FetchUniProtMetadataPayload.model_validate({"search_criteria": "organism_id:9606"})
@@ -46,20 +48,22 @@ def test_minimal_valid(self):
         assert p.update_protein_core is True
 
     def test_all_fields(self):
-        p = FetchUniProtMetadataPayload.model_validate({
-            "search_criteria": "organism_id:9606",
-            "page_size": 100,
-            "total_limit": 200,
-            "timeout_seconds": 30,
-            "compressed": False,
-            "max_retries": 3,
-            "backoff_base_seconds": 0.5,
-            "backoff_max_seconds": 10.0,
-            "jitter_seconds": 0.1,
-            "commit_every_page": False,
-            "update_protein_core": False,
-            "user_agent": "test/1.0",
-        })
+        p = FetchUniProtMetadataPayload.model_validate(
+            {
+                "search_criteria": "organism_id:9606",
+                "page_size": 100,
+                "total_limit": 200,
+                "timeout_seconds": 30,
+                "compressed": False,
+                "max_retries": 3,
+                "backoff_base_seconds": 0.5,
+                "backoff_max_seconds": 10.0,
+                "jitter_seconds": 0.1,
+                "commit_every_page": False,
+                "update_protein_core": False,
+                "user_agent": "test/1.0",
+            }
+        )
         assert p.page_size == 100
         assert p.total_limit == 200
         assert p.compressed is False
@@ -85,55 +89,28 @@ def test_invalid_total_limit_raises(self):
             FetchUniProtMetadataPayload.model_validate({"search_criteria": "q", "total_limit": -1})
 
     def test_null_total_limit_allowed(self):
-        p = FetchUniProtMetadataPayload.model_validate({"search_criteria": "q", "total_limit": None})
+        p = FetchUniProtMetadataPayload.model_validate(
+            {"search_criteria": "q", "total_limit": None}
+        )
         assert p.total_limit is None
 
     def test_invalid_compressed_raises(self):
         with pytest.raises(ValueError, match="compressed"):
-            FetchUniProtMetadataPayload.model_validate({"search_criteria": "q", "compressed": "yes"})
+            FetchUniProtMetadataPayload.model_validate(
+                {"search_criteria": "q", "compressed": "yes"}
+            )
 
     def test_negative_backoff_raises(self):
         with pytest.raises(ValueError, match="backoff_base_seconds"):
-            FetchUniProtMetadataPayload.model_validate({"search_criteria": "q", "backoff_base_seconds": -1.0})
+            FetchUniProtMetadataPayload.model_validate(
+                {"search_criteria": "q", "backoff_base_seconds": -1.0}
+            )
 
     def test_search_criteria_is_stripped(self):
         p = FetchUniProtMetadataPayload.model_validate({"search_criteria": "  organism_id:9606  "})
         assert p.search_criteria == "organism_id:9606"
 
 
-# ---------------------------------------------------------------------------
-# Unit tests — _parse_tsv
-# ---------------------------------------------------------------------------
-
-class TestParseTsv:
-    def setup_method(self):
-        self.op = FetchUniProtMetadataOperation()
-
-    def test_parses_basic_tsv(self):
-        tsv = "Entry\tReviewed\tLength\nP12345\treviewed\t500\nQ99999\tunreviewed\t120\n"
-        rows = self.op._parse_tsv(tsv)
-        assert len(rows) == 2
-        assert rows[0]["Entry"] == "P12345"
-        assert rows[0]["Reviewed"] == "reviewed"
-        assert rows[1]["Length"] == "120"
-
-    def test_empty_tsv_returns_empty(self):
-        rows = self.op._parse_tsv("")
-        assert rows == []
-
-    def test_none_values_coerced_to_empty_string(self):
-        # DictReader returns None for missing fields in some edge cases;
-        # the implementation maps None -> ""
-        tsv = "Entry\tReviewed\nP12345\t\n"
-        rows = self.op._parse_tsv(tsv)
-        assert rows[0]["Reviewed"] == ""
-
-    def test_header_only_returns_empty(self):
-        tsv = "Entry\tReviewed\tLength\n"
-        rows = self.op._parse_tsv(tsv)
-        assert rows == []
-
-
 # ---------------------------------------------------------------------------
 # Unit tests — execute() with fully mocked HTTP and DB session
 # ---------------------------------------------------------------------------
@@ -174,7 +151,7 @@ def test_execute_returns_operation_result(self):
         session = self._mock_session()
         emit = _capturing_emit()
 
-        with patch.object(self.op._http, "get", return_value=_make_mock_response(TSV_RESPONSE)):
+        with patch.object(self.op._uniprot_plugin._client.session, "get", return_value=_make_mock_response(TSV_RESPONSE)):
             result = self.op.execute(
                 session,
                 {"search_criteria": "organism_id:9606", "page_size": 1, "compressed": False},
@@ -190,7 +167,7 @@ def test_execute_emits_start_and_done(self):
         session = self._mock_session()
         emit = _capturing_emit()
 
-        with patch.object(self.op._http, "get", return_value=_make_mock_response(TSV_RESPONSE)):
+        with patch.object(self.op._uniprot_plugin._client.session, "get", return_value=_make_mock_response(TSV_RESPONSE)):
             self.op.execute(
                 session,
                 {"search_criteria": "organism_id:9606", "compressed": False},
@@ -203,11 +180,16 @@ def test_execute_emits_start_and_done(self):
 
     def test_execute_respects_total_limit(self):
         # Two rows in TSV but total_limit=1 should stop after 1
-        tsv = TSV_RESPONSE + "Q99999\tunreviewed\tTEST2_HUMAN\tAnother\tT2\tMus musculus\t100\t" + "\t" * 17 + "\n"
+        tsv = (
+            TSV_RESPONSE
+            + "Q99999\tunreviewed\tTEST2_HUMAN\tAnother\tT2\tMus musculus\t100\t"
+            + "\t" * 17
+            + "\n"
+        )
         session = self._mock_session()
         emit = _capturing_emit()
 
-        with patch.object(self.op._http, "get", return_value=_make_mock_response(tsv)):
+        with patch.object(self.op._uniprot_plugin._client.session, "get", return_value=_make_mock_response(tsv)):
             result = self.op.execute(
                 session,
                 {"search_criteria": "q", "total_limit": 1, "compressed": False},
@@ -215,7 +197,9 @@ def test_execute_respects_total_limit(self):
             )
 
         assert result.result["rows"] == 1
-        limit_events = [c for c in emit.calls if c["event"] == "fetch_uniprot_metadata.limit_reached"]
+        limit_events = [
+            c for c in emit.calls if c["event"] == "fetch_uniprot_metadata.limit_reached"
+        ]
         assert len(limit_events) == 1
 
     def test_execute_inserts_metadata_row(self):
@@ -223,7 +207,7 @@ def test_execute_inserts_metadata_row(self):
         session = self._mock_session()
         emit = _noop_emit
 
-        with patch.object(self.op._http, "get", return_value=_make_mock_response(TSV_RESPONSE)):
+        with patch.object(self.op._uniprot_plugin._client.session, "get", return_value=_make_mock_response(TSV_RESPONSE)):
             self.op.execute(
                 session,
                 {"search_criteria": "q", "compressed": False},
@@ -237,6 +221,7 @@ def test_execute_inserts_metadata_row(self):
 # Integration test — execute() against a real Postgres DB with mocked HTTP
 # ---------------------------------------------------------------------------
 
+
 @pytest.mark.integration
 def test_fetch_uniprot_metadata_integration(postgres_url: str):
     engine = create_engine(postgres_url, future=True)
@@ -247,10 +232,14 @@ def test_fetch_uniprot_metadata_integration(postgres_url: str):
     emit = _capturing_emit()
 
     with Session(engine, future=True) as session:
-        with patch.object(op._http, "get", return_value=_make_mock_response(TSV_RESPONSE)):
+        with patch.object(op._uniprot_plugin._client.session, "get", return_value=_make_mock_response(TSV_RESPONSE)):
             result = op.execute(
                 session,
-                {"search_criteria": "organism_id:9606", "compressed": False, "commit_every_page": False},
+                {
+                    "search_criteria": "organism_id:9606",
+                    "compressed": False,
+                    "commit_every_page": False,
+                },
                 emit=emit,
             )
             session.commit()
@@ -261,10 +250,14 @@ def test_fetch_uniprot_metadata_integration(postgres_url: str):
     # Second run with same data → upsert should not double-insert
     op2 = FetchUniProtMetadataOperation()
     with Session(engine, future=True) as session:
-        with patch.object(op2._http, "get", return_value=_make_mock_response(TSV_RESPONSE)):
+        with patch.object(op2._uniprot_plugin._client.session, "get", return_value=_make_mock_response(TSV_RESPONSE)):
             result2 = op2.execute(
                 session,
-                {"search_criteria": "organism_id:9606", "compressed": False, "commit_every_page": False},
+                {
+                    "search_criteria": "organism_id:9606",
+                    "compressed": False,
+                    "commit_every_page": False,
+                },
                 emit=_noop_emit,
             )
             session.commit()
diff --git a/tests/test_generate_evaluation_set.py b/tests/test_generate_evaluation_set.py
index fff662c..7c91e19 100644
--- a/tests/test_generate_evaluation_set.py
+++ b/tests/test_generate_evaluation_set.py
@@ -1,4 +1,5 @@
 """Unit tests for GenerateEvaluationSetOperation — DB mocked."""
+
 from __future__ import annotations
 
 import uuid
@@ -16,6 +17,7 @@
 # Payload validator
 # ---------------------------------------------------------------------------
 
+
 class TestGenerateEvaluationSetPayload:
     def test_valid_uuids(self):
         old = str(uuid.uuid4())
@@ -26,11 +28,15 @@ def test_valid_uuids(self):
 
     def test_empty_old_raises(self):
         with pytest.raises(ValueError):
-            GenerateEvaluationSetPayload(old_annotation_set_id="  ", new_annotation_set_id=str(uuid.uuid4()))
+            GenerateEvaluationSetPayload(
+                old_annotation_set_id="  ", new_annotation_set_id=str(uuid.uuid4())
+            )
 
     def test_empty_new_raises(self):
         with pytest.raises(ValueError):
-            GenerateEvaluationSetPayload(old_annotation_set_id=str(uuid.uuid4()), new_annotation_set_id="")
+            GenerateEvaluationSetPayload(
+                old_annotation_set_id=str(uuid.uuid4()), new_annotation_set_id=""
+            )
 
     def test_strips_whitespace(self):
         uid = str(uuid.uuid4())
@@ -45,6 +51,7 @@ def test_strips_whitespace(self):
 # Operation execute — mocked session
 # ---------------------------------------------------------------------------
 
+
 def _make_annotation_set(snapshot_id: uuid.UUID) -> MagicMock:
     s = MagicMock()
     s.ontology_snapshot_id = snapshot_id
@@ -59,6 +66,25 @@ def _make_eval_data() -> EvaluationData:
     )
 
 
+@pytest.fixture(autouse=True)
+def _mock_artifact_store(request):
+    """Stub the artifact store for tests that exercise execute()."""
+    if not request.cls or request.cls.__name__ != "TestGenerateEvaluationSetExecute":
+        yield
+        return
+    with (
+        patch(
+            "protea.core.operations.generate_evaluation_set.get_artifact_store",
+            return_value=MagicMock(),
+        ),
+        patch(
+            "protea.core.operations.generate_evaluation_set.load_settings",
+            return_value=MagicMock(),
+        ),
+    ):
+        yield
+
+
 class TestGenerateEvaluationSetExecute:
     def setup_method(self):
         self.op = GenerateEvaluationSetOperation()
@@ -84,13 +110,61 @@ def test_new_set_not_found_raises(self):
         with pytest.raises(ValueError, match="not found"):
             self.op.execute(session, self._payload(), emit=self.emit)
 
-    def test_different_snapshot_raises(self):
+    def test_different_snapshot_dispatches_reconciled(self):
+        """Mismatched snapshots should invoke the reconciled compute path."""
         session = MagicMock()
         old_set = _make_annotation_set(uuid.uuid4())
         new_set = _make_annotation_set(uuid.uuid4())  # different snapshot
         session.get.side_effect = [old_set, new_set]
-        with pytest.raises(ValueError, match="same ontology snapshot"):
-            self.op.execute(session, self._payload(), emit=self.emit)
+
+        def add_side(obj):
+            obj.id = uuid.uuid4()
+
+        session.add.side_effect = add_side
+        session.flush = MagicMock()
+
+        with patch(
+            "protea.core.operations.generate_evaluation_set.compute_evaluation_data_reconciled",
+            return_value=_make_eval_data(),
+        ) as mock_reconciled:
+            with patch(
+                "protea.core.operations.generate_evaluation_set.compute_evaluation_data",
+            ) as mock_same:
+                self.op.execute(session, self._payload(), emit=self.emit)
+
+        assert mock_reconciled.called
+        assert not mock_same.called
+        # Pivot defaults to new_set.ontology_snapshot_id.
+        kwargs_or_args = mock_reconciled.call_args
+        assert kwargs_or_args.args[5] == new_set.ontology_snapshot_id
+
+    def test_explicit_pivot_snapshot(self):
+        """Explicit pivot with matching old+new snapshots still uses same-snapshot path."""
+        session = MagicMock()
+        snap_id = uuid.uuid4()
+        old_set = _make_annotation_set(snap_id)
+        new_set = _make_annotation_set(snap_id)
+        pivot_snap = MagicMock()
+        # session.get is called for old, new, and the pivot lookup.
+        session.get.side_effect = [old_set, new_set, pivot_snap]
+
+        def add_side(obj):
+            obj.id = uuid.uuid4()
+
+        session.add.side_effect = add_side
+        session.flush = MagicMock()
+
+        pivot_id = str(snap_id)  # same as old/new → same_snapshot mode
+        payload = self._payload()
+        payload["pivot_ontology_snapshot_id"] = pivot_id
+
+        with patch(
+            "protea.core.operations.generate_evaluation_set.compute_evaluation_data",
+            return_value=_make_eval_data(),
+        ) as mock_same:
+            self.op.execute(session, payload, emit=self.emit)
+
+        assert mock_same.called
 
     def test_successful_execution(self):
         session = MagicMock()
diff --git a/tests/test_infrastructure.py b/tests/test_infrastructure.py
index 8aca557..84aeff6 100644
--- a/tests/test_infrastructure.py
+++ b/tests/test_infrastructure.py
@@ -2,6 +2,7 @@
 Unit tests for infrastructure layer: session, engine, settings, and app factory.
 No real database or broker required — SQLAlchemy and pika are mocked.
 """
+
 from __future__ import annotations
 
 from pathlib import Path
@@ -15,6 +16,7 @@
 # session_scope
 # ---------------------------------------------------------------------------
 
+
 class TestSessionScope:
     def _make_factory(self):
         session = MagicMock()
@@ -60,6 +62,7 @@ def test_reraises_exception(self):
 # build_session_factory
 # ---------------------------------------------------------------------------
 
+
 class TestBuildSessionFactory:
     def test_returns_sessionmaker(self):
         with patch("protea.infrastructure.session.build_engine") as mock_engine:
@@ -79,9 +82,11 @@ def test_calls_build_engine_with_url(self):
 # build_engine
 # ---------------------------------------------------------------------------
 
+
 class TestBuildEngine:
     def test_returns_engine(self):
         from protea.infrastructure.database.engine import build_engine
+
         with patch("protea.infrastructure.database.engine.create_engine") as mock_create:
             mock_create.return_value = MagicMock()
             engine = build_engine("sqlite:///:memory:")
@@ -100,41 +105,51 @@ def test_returns_engine(self):
 # create_app
 # ---------------------------------------------------------------------------
 
+
 class TestCreateApp:
     def test_sets_session_factory_on_state(self):
         from protea.api.app import create_app
+
         mock_factory = MagicMock()
         mock_settings = MagicMock()
         mock_settings.db_url = "sqlite:///:memory:"
         mock_settings.amqp_url = "amqp://guest:guest@localhost/"
 
-        with patch("protea.api.app.load_settings", return_value=mock_settings), \
-             patch("protea.api.app.build_session_factory", return_value=mock_factory):
+        with (
+            patch("protea.api.app.load_settings", return_value=mock_settings),
+            patch("protea.api.app.build_session_factory", return_value=mock_factory),
+        ):
             app = create_app(Path("/fake/root"))
 
         assert app.state.session_factory is mock_factory
 
     def test_sets_amqp_url_on_state(self):
         from protea.api.app import create_app
+
         mock_factory = MagicMock()
         mock_settings = MagicMock()
         mock_settings.db_url = "sqlite:///:memory:"
         mock_settings.amqp_url = "amqp://guest:guest@localhost/"
 
-        with patch("protea.api.app.load_settings", return_value=mock_settings), \
-             patch("protea.api.app.build_session_factory", return_value=mock_factory):
+        with (
+            patch("protea.api.app.load_settings", return_value=mock_settings),
+            patch("protea.api.app.build_session_factory", return_value=mock_factory),
+        ):
             app = create_app(Path("/fake/root"))
 
         assert app.state.amqp_url == "amqp://guest:guest@localhost/"
 
     def test_jobs_router_is_registered(self):
         from protea.api.app import create_app
+
         mock_settings = MagicMock()
         mock_settings.db_url = "sqlite:///:memory:"
         mock_settings.amqp_url = "amqp://guest:guest@localhost/"
 
-        with patch("protea.api.app.load_settings", return_value=mock_settings), \
-             patch("protea.api.app.build_session_factory", return_value=MagicMock()):
+        with (
+            patch("protea.api.app.load_settings", return_value=mock_settings),
+            patch("protea.api.app.build_session_factory", return_value=MagicMock()),
+        ):
             app = create_app(Path("/fake/root"))
 
         routes = [r.path for r in app.routes]
@@ -147,8 +162,10 @@ def test_health_endpoint_registered(self):
         mock_settings.db_url = "sqlite:///:memory:"
         mock_settings.amqp_url = "amqp://guest:guest@localhost/"
 
-        with patch("protea.api.app.load_settings", return_value=mock_settings), \
-             patch("protea.api.app.build_session_factory", return_value=MagicMock()):
+        with (
+            patch("protea.api.app.load_settings", return_value=mock_settings),
+            patch("protea.api.app.build_session_factory", return_value=MagicMock()),
+        ):
             app = create_app(Path("/fake/root"))
 
         routes = [r.path for r in app.routes]
@@ -165,8 +182,10 @@ def test_health_endpoint_returns_ok(self):
         mock_settings.db_url = "sqlite:///:memory:"
         mock_settings.amqp_url = "amqp://guest:guest@localhost/"
 
-        with patch("protea.api.app.load_settings", return_value=mock_settings), \
-             patch("protea.api.app.build_session_factory", return_value=MagicMock()):
+        with (
+            patch("protea.api.app.load_settings", return_value=mock_settings),
+            patch("protea.api.app.build_session_factory", return_value=MagicMock()),
+        ):
             app = create_app(Path("/fake/root"))
 
         client = TestClient(app)
@@ -190,13 +209,17 @@ def test_readiness_check_succeeds(self):
         mock_session.__enter__ = lambda s: s
         mock_session.__exit__ = MagicMock(return_value=False)
 
-        with patch("protea.api.app.load_settings", return_value=mock_settings), \
-             patch("protea.api.app.build_session_factory", return_value=mock_factory):
+        with (
+            patch("protea.api.app.load_settings", return_value=mock_settings),
+            patch("protea.api.app.build_session_factory", return_value=mock_factory),
+        ):
             app = create_app(Path("/fake/root"))
 
         mock_conn = MagicMock()
-        with patch("protea.infrastructure.session.session_scope") as mock_scope, \
-             patch("pika.BlockingConnection", return_value=mock_conn):
+        with (
+            patch("protea.infrastructure.session.session_scope") as mock_scope,
+            patch("pika.BlockingConnection", return_value=mock_conn),
+        ):
             mock_scope.return_value.__enter__ = lambda s: mock_session
             mock_scope.return_value.__exit__ = MagicMock(return_value=False)
             client = TestClient(app)
@@ -221,12 +244,16 @@ def test_readiness_check_fails_when_rabbitmq_down(self):
         mock_session.__enter__ = lambda s: s
         mock_session.__exit__ = MagicMock(return_value=False)
 
-        with patch("protea.api.app.load_settings", return_value=mock_settings), \
-             patch("protea.api.app.build_session_factory", return_value=mock_factory):
+        with (
+            patch("protea.api.app.load_settings", return_value=mock_settings),
+            patch("protea.api.app.build_session_factory", return_value=mock_factory),
+        ):
             app = create_app(Path("/fake/root"))
 
-        with patch("protea.infrastructure.session.session_scope") as mock_scope, \
-             patch("pika.BlockingConnection", side_effect=Exception("Connection refused")):
+        with (
+            patch("protea.infrastructure.session.session_scope") as mock_scope,
+            patch("pika.BlockingConnection", side_effect=Exception("Connection refused")),
+        ):
             mock_scope.return_value.__enter__ = lambda s: mock_session
             mock_scope.return_value.__exit__ = MagicMock(return_value=False)
             client = TestClient(app)
@@ -243,8 +270,10 @@ def test_project_root_defaults_to_parents_2(self):
         mock_settings.db_url = "sqlite:///:memory:"
         mock_settings.amqp_url = "amqp://guest:guest@localhost/"
 
-        with patch("protea.api.app.load_settings", return_value=mock_settings) as mock_load, \
-             patch("protea.api.app.build_session_factory", return_value=MagicMock()):
+        with (
+            patch("protea.api.app.load_settings", return_value=mock_settings) as mock_load,
+            patch("protea.api.app.build_session_factory", return_value=MagicMock()),
+        ):
             create_app()  # project_root=None
 
         # load_settings should have been called with the resolved parents[2] path
@@ -264,8 +293,10 @@ def test_sphinx_mount_when_directory_exists(self, tmp_path):
         mock_settings.db_url = "sqlite:///:memory:"
         mock_settings.amqp_url = "amqp://guest:guest@localhost/"
 
-        with patch("protea.api.app.load_settings", return_value=mock_settings), \
-             patch("protea.api.app.build_session_factory", return_value=MagicMock()):
+        with (
+            patch("protea.api.app.load_settings", return_value=mock_settings),
+            patch("protea.api.app.build_session_factory", return_value=MagicMock()),
+        ):
             app = create_app(project_root=tmp_path)
 
         route_paths = [r.path for r in app.routes]
@@ -283,8 +314,10 @@ def test_static_mount_when_directory_exists(self, tmp_path):
         mock_settings.db_url = "sqlite:///:memory:"
         mock_settings.amqp_url = "amqp://guest:guest@localhost/"
 
-        with patch("protea.api.app.load_settings", return_value=mock_settings), \
-             patch("protea.api.app.build_session_factory", return_value=MagicMock()):
+        with (
+            patch("protea.api.app.load_settings", return_value=mock_settings),
+            patch("protea.api.app.build_session_factory", return_value=MagicMock()),
+        ):
             app = create_app(project_root=tmp_path)
 
         route_paths = [r.path for r in app.routes]
diff --git a/tests/test_insert_proteins.py b/tests/test_insert_proteins.py
index 5a4bd50..d647256 100644
--- a/tests/test_insert_proteins.py
+++ b/tests/test_insert_proteins.py
@@ -3,6 +3,7 @@
 Unit tests use mocked HTTP + mocked session (no DB, no network).
 Integration test uses a real Postgres via --with-postgres.
 """
+
 from __future__ import annotations
 
 from unittest.mock import MagicMock, patch
@@ -22,6 +23,7 @@
 # Helpers
 # ---------------------------------------------------------------------------
 
+
 def _noop_emit(event, message, fields, level):
     pass
 
@@ -64,10 +66,37 @@ def _make_mock_session():
     return session
 
 
+def _make_record(
+    accession: str = "P12345",
+    sequence: str = "MKTAYIAK",
+    is_canonical: bool = True,
+    isoform_index: int | None = None,
+    canonical_accession: str | None = None,
+):
+    """Build a UniProtProteinRecord for store-records testing."""
+    from protea_contracts import UniProtProteinRecord, compute_sequence_hash
+
+    return UniProtProteinRecord(
+        accession=accession,
+        entry_name="TEST_HUMAN",
+        canonical_accession=canonical_accession or accession,
+        is_canonical=is_canonical,
+        isoform_index=isoform_index,
+        organism="Homo sapiens",
+        taxonomy_id="9606",
+        gene_name="TEST",
+        reviewed=True,
+        sequence=sequence,
+        length=len(sequence),
+        sequence_hash=compute_sequence_hash(sequence),
+    )
+
+
 # ---------------------------------------------------------------------------
 # Unit tests — InsertProteinsPayload
 # ---------------------------------------------------------------------------
 
+
 class TestInsertProteinsPayload:
     def test_minimal_valid(self):
         p = InsertProteinsPayload.model_validate({"search_criteria": "organism_id:9606"})
@@ -77,15 +106,17 @@ def test_minimal_valid(self):
         assert p.total_limit is None
 
     def test_all_fields(self):
-        p = InsertProteinsPayload.model_validate({
-            "search_criteria": "organism_id:9606",
-            "page_size": 100,
-            "total_limit": 50,
-            "timeout_seconds": 30,
-            "include_isoforms": False,
-            "compressed": True,
-            "max_retries": 2,
-        })
+        p = InsertProteinsPayload.model_validate(
+            {
+                "search_criteria": "organism_id:9606",
+                "page_size": 100,
+                "total_limit": 50,
+                "timeout_seconds": 30,
+                "include_isoforms": False,
+                "compressed": True,
+                "max_retries": 2,
+            }
+        )
         assert p.page_size == 100
         assert p.total_limit == 50
         assert p.include_isoforms is False
@@ -115,144 +146,12 @@ def test_search_criteria_stripped(self):
         assert p.search_criteria == "q"
 
 
-# ---------------------------------------------------------------------------
-# Unit tests — _parse_fasta / _parse_header
-# ---------------------------------------------------------------------------
-
-class TestParseFasta:
-    def setup_method(self):
-        self.op = InsertProteinsOperation()
-
-    def test_parses_single_record(self):
-        records = self.op._parse_fasta(FASTA_ONE)
-        assert len(records) == 1
-        r = records[0]
-        assert r["accession"] == "P12345"
-        assert r["reviewed"] is True
-        assert r["organism"] == "Homo sapiens"
-        assert r["taxonomy_id"] == "9606"
-        assert r["gene_name"] == "TEST"
-        assert len(r["sequence"]) > 0
-        assert r["length"] == len(r["sequence"])
-
-    def test_parses_multiple_records(self):
-        records = self.op._parse_fasta(FASTA_TWO)
-        assert len(records) == 2
-        assert records[1]["accession"] == "Q99999"
-        assert records[1]["reviewed"] is False
-        assert records[1]["taxonomy_id"] == "10090"
-
-    def test_empty_fasta_returns_empty(self):
-        assert self.op._parse_fasta("") == []
-
-    def test_canonical_isoform_parsing(self):
-        fasta = (
-            ">sp|P12345-2|TEST_HUMAN Isoform 2 OS=Homo sapiens OX=9606\n"
-            "MKTAYIAK\n"
-        )
-        records = self.op._parse_fasta(fasta)
-        assert records[0]["canonical_accession"] == "P12345"
-        assert records[0]["is_canonical"] is False
-        assert records[0]["isoform_index"] == 2
-
-    def test_sequence_hash_is_set(self):
-        records = self.op._parse_fasta(FASTA_ONE)
-        assert records[0]["sequence_hash"] is not None
-        assert len(records[0]["sequence_hash"]) == 32  # MD5 hex
-
-    def test_empty_sequence_skipped(self):
-        """Lines 231-233: header with no sequence lines is skipped."""
-        fasta = ">sp|P12345|TEST_HUMAN Test OS=Homo sapiens OX=9606\n\n"
-        records = self.op._parse_fasta(fasta)
-        assert records == []
-
-    def test_header_without_pipe_separators(self):
-        """Lines 264-265: header without | uses first word as accession."""
-        fasta = ">SIMPLE_ACC some description\nMKTAYIAK\n"
-        records = self.op._parse_fasta(fasta)
-        assert len(records) == 1
-        assert records[0]["accession"] == "SIMPLE_ACC"
-        assert records[0]["entry_name"] is None
-
-    def test_isoform_accession_parsed(self):
-        fasta = (
-            ">sp|P12345-3|TEST_HUMAN Isoform 3 OS=Homo sapiens OX=9606 GN=TEST\n"
-            "MKTAYIAK\n"
-        )
-        records = self.op._parse_fasta(fasta)
-        r = records[0]
-        assert r["accession"] == "P12345-3"
-        assert r["canonical_accession"] == "P12345"
-        assert r["is_canonical"] is False
-        assert r["isoform_index"] == 3
-
-    def test_canonical_accession_flagged(self):
-        records = self.op._parse_fasta(FASTA_ONE)
-        r = records[0]
-        assert r["canonical_accession"] == "P12345"
-        assert r["is_canonical"] is True
-        assert r["isoform_index"] is None
-
-    def test_reviewed_vs_unreviewed(self):
-        records = self.op._parse_fasta(FASTA_TWO)
-        assert records[0]["reviewed"] is True   # sp|
-        assert records[1]["reviewed"] is False   # tr|
-
-    def test_sequence_deduplication_by_hash(self):
-        """Two identical sequences produce the same hash."""
-        fasta = (
-            ">sp|P11111|A_HUMAN Prot A OS=Homo sapiens OX=9606\nMKTAYIAK\n"
-            ">sp|P22222|B_HUMAN Prot B OS=Homo sapiens OX=9606\nMKTAYIAK\n"
-        )
-        records = self.op._parse_fasta(fasta)
-        assert len(records) == 2
-        assert records[0]["sequence_hash"] == records[1]["sequence_hash"]
-
-    def test_multiline_sequence(self):
-        fasta = (
-            ">sp|P12345|TEST_HUMAN Test OS=Homo sapiens OX=9606\n"
-            "MKTAY\n"
-            "IAKQR\n"
-        )
-        records = self.op._parse_fasta(fasta)
-        assert records[0]["sequence"] == "MKTAYIAKQR"
-        assert records[0]["length"] == 10
-
-
-# ---------------------------------------------------------------------------
-# Unit tests — _decode_response
-# ---------------------------------------------------------------------------
-
-class TestDecodeResponse:
-    def setup_method(self):
-        self.op = InsertProteinsOperation()
-
-    def test_decode_uncompressed(self):
-        """Line 217: uncompressed path."""
-        resp = MagicMock()
-        resp.content = b"hello world"
-        result = self.op._decode_response(resp, compressed=False)
-        assert result == "hello world"
-
-    def test_decode_compressed(self):
-        """Lines 215-216: gzip decompression path."""
-        import gzip
-        from io import BytesIO
-
-        raw = b"compressed content"
-        buf = BytesIO()
-        with gzip.GzipFile(fileobj=buf, mode="wb") as f:
-            f.write(raw)
-        resp = MagicMock()
-        resp.content = buf.getvalue()
-        result = self.op._decode_response(resp, compressed=True)
-        assert result == "compressed content"
-
 
 # ---------------------------------------------------------------------------
 # Unit tests — _store_records
 # ---------------------------------------------------------------------------
 
+
 class TestStoreRecords:
     def setup_method(self):
         self.op = InsertProteinsOperation()
@@ -266,25 +165,8 @@ def test_empty_records_returns_zeros(self):
 
     def test_updates_existing_protein(self):
         """Lines 350-394: existing protein gets conservative updates."""
-        from protea.infrastructure.orm.models.sequence.sequence import (
-            Sequence as SequenceModel,
-        )
-
-        seq_hash = SequenceModel.compute_hash("MKTAYIAK")
-        record = {
-            "accession": "P12345",
-            "entry_name": "TEST_HUMAN",
-            "canonical_accession": "P12345",
-            "is_canonical": True,
-            "isoform_index": None,
-            "organism": "Homo sapiens",
-            "taxonomy_id": "9606",
-            "gene_name": "TEST",
-            "reviewed": True,
-            "sequence": "MKTAYIAK",
-            "length": 8,
-            "sequence_hash": seq_hash,
-        }
+        record = _make_record()
+        seq_hash = record.sequence_hash
 
         # Existing protein with missing fields (triggers updates)
         existing_prot = MagicMock()
@@ -336,25 +218,7 @@ def query_side_effect(*args):
 
     def test_inserts_new_sequence_when_missing(self):
         """Lines 318-334: new sequence inserted when hash not in DB."""
-        from protea.infrastructure.orm.models.sequence.sequence import (
-            Sequence as SequenceModel,
-        )
-
-        seq_hash = SequenceModel.compute_hash("MKTAYIAK")
-        record = {
-            "accession": "P12345",
-            "entry_name": "TEST_HUMAN",
-            "canonical_accession": "P12345",
-            "is_canonical": True,
-            "isoform_index": None,
-            "organism": "Homo sapiens",
-            "taxonomy_id": "9606",
-            "gene_name": "TEST",
-            "reviewed": True,
-            "sequence": "MKTAYIAK",
-            "length": 8,
-            "sequence_hash": seq_hash,
-        }
+        record = _make_record()
 
         session = MagicMock(spec=Session)
 
@@ -390,6 +254,7 @@ def query_side_effect(*args):
 # Unit tests — execute() with mocked HTTP and session
 # ---------------------------------------------------------------------------
 
+
 class TestInsertProteinsOperationExecute:
     def setup_method(self):
         self.op = InsertProteinsOperation()
@@ -398,7 +263,7 @@ def test_execute_returns_operation_result(self):
         session = _make_mock_session()
         emit = _capturing_emit()
 
-        with patch.object(self.op._http, "get", return_value=_make_mock_response(FASTA_ONE)):
+        with patch.object(self.op._uniprot_plugin._client.session, "get", return_value=_make_mock_response(FASTA_ONE)):
             result = self.op.execute(
                 session,
                 {"search_criteria": "organism_id:9606", "compressed": False},
@@ -415,7 +280,7 @@ def test_execute_emits_start_and_done(self):
         session = _make_mock_session()
         emit = _capturing_emit()
 
-        with patch.object(self.op._http, "get", return_value=_make_mock_response(FASTA_ONE)):
+        with patch.object(self.op._uniprot_plugin._client.session, "get", return_value=_make_mock_response(FASTA_ONE)):
             self.op.execute(
                 session,
                 {"search_criteria": "q", "compressed": False},
@@ -430,7 +295,7 @@ def test_execute_respects_total_limit(self):
         session = _make_mock_session()
         emit = _capturing_emit()
 
-        with patch.object(self.op._http, "get", return_value=_make_mock_response(FASTA_TWO)):
+        with patch.object(self.op._uniprot_plugin._client.session, "get", return_value=_make_mock_response(FASTA_TWO)):
             result = self.op.execute(
                 session,
                 {"search_criteria": "q", "total_limit": 1, "compressed": False},
@@ -444,7 +309,7 @@ def test_execute_respects_total_limit(self):
     def test_execute_calls_session_add_all_for_new_protein(self):
         session = _make_mock_session()
 
-        with patch.object(self.op._http, "get", return_value=_make_mock_response(FASTA_ONE)):
+        with patch.object(self.op._uniprot_plugin._client.session, "get", return_value=_make_mock_response(FASTA_ONE)):
             self.op.execute(
                 session,
                 {"search_criteria": "q", "compressed": False},
@@ -457,7 +322,7 @@ def test_two_records_counts_correctly(self):
         session = _make_mock_session()
         emit = _capturing_emit()
 
-        with patch.object(self.op._http, "get", return_value=_make_mock_response(FASTA_TWO)):
+        with patch.object(self.op._uniprot_plugin._client.session, "get", return_value=_make_mock_response(FASTA_TWO)):
             result = self.op.execute(
                 session,
                 {"search_criteria": "q", "compressed": False},
@@ -467,20 +332,23 @@ def test_two_records_counts_correctly(self):
         assert result.result["retrieved_records"] == 2
         assert result.result["proteins_inserted"] == 2
 
-    def test_empty_page_continues(self):
-        """Line 93: empty records list triggers continue."""
+    def test_empty_page_does_not_flush(self):
+        """Empty FASTA response → no records, no buffer flush, pages=0.
+
+        Per F2A.6-real, ``pages`` counts DB-side buffer flushes; an
+        HTTP page that returns zero records never triggers a flush.
+        """
         session = _make_mock_session()
         emit = _capturing_emit()
-        # First response is empty FASTA, no link header → single page with 0 records
         empty_resp = _make_mock_response("")
-        with patch.object(self.op._http, "get", return_value=empty_resp):
+        with patch.object(self.op._uniprot_plugin._client.session, "get", return_value=empty_resp):
             result = self.op.execute(
                 session,
                 {"search_criteria": "q", "compressed": False},
                 emit=emit,
             )
         assert result.result["retrieved_records"] == 0
-        assert result.result["pages"] == 1
+        assert result.result["pages"] == 0
 
     def test_total_limit_trims_to_zero_breaks(self):
         """Lines 96-98: when total_limit is already reached, records trimmed to empty → break."""
@@ -503,7 +371,7 @@ def get_side_effect(*args, **kwargs):
                 return page1_resp
             return page2_resp
 
-        with patch.object(self.op._http, "get", side_effect=get_side_effect):
+        with patch.object(self.op._uniprot_plugin._client.session, "get", side_effect=get_side_effect):
             result = self.op.execute(
                 session,
                 {"search_criteria": "q", "total_limit": 2, "compressed": False},
@@ -531,7 +399,7 @@ def test_compressed_param_appended(self):
         resp.headers = {"link": ""}
         resp.raise_for_status = MagicMock()
 
-        with patch.object(self.op._http, "get", return_value=resp) as mock_get:
+        with patch.object(self.op._uniprot_plugin._client.session, "get", return_value=resp) as mock_get:
             self.op.execute(
                 session,
                 {"search_criteria": "q", "compressed": True},
@@ -541,41 +409,11 @@ def test_compressed_param_appended(self):
         called_url = mock_get.call_args[0][0]
         assert "compressed=true" in called_url
 
-    def test_total_results_from_header(self):
-        """Line 200: X-Total-Results header is captured."""
-        session = _make_mock_session()
-        emit = _capturing_emit()
-
-        resp = _make_mock_response(FASTA_ONE)
-        resp.headers["X-Total-Results"] = "42"
-
-        op = InsertProteinsOperation()
-        with patch.object(op._http, "get", return_value=resp):
-            op.execute(
-                session,
-                {"search_criteria": "q", "compressed": False},
-                emit=emit,
-            )
-
-        assert op._total_results == 42
-
-    def test_total_results_invalid_header_ignored(self):
-        """Line 200: non-numeric X-Total-Results doesn't crash."""
-        session = _make_mock_session()
-        emit = _capturing_emit()
-
-        resp = _make_mock_response(FASTA_ONE)
-        resp.headers["X-Total-Results"] = "not-a-number"
-
-        op = InsertProteinsOperation()
-        with patch.object(op._http, "get", return_value=resp):
-            op.execute(
-                session,
-                {"search_criteria": "q", "compressed": False},
-                emit=emit,
-            )
-
-        assert op._total_results is None
+    # NOTE: tests for ``op._total_results`` (X-Total-Results capture)
+    # were removed in F2A.6-real step 3 (b). The plugin abstracts HTTP
+    # away from the operation, and X-Total-Results was nice-to-have for
+    # progress reporting, not load-bearing for correctness. Progress
+    # totals now only flow when ``total_limit`` is set.
 
     def test_cursor_pagination(self):
         """Lines 208-210: cursor-based pagination follows link headers."""
@@ -599,16 +437,20 @@ def get_side_effect(url, **kwargs):
             return page2_resp
 
         op = InsertProteinsOperation()
-        with patch.object(op._http, "get", side_effect=get_side_effect):
+        with patch.object(op._uniprot_plugin._client.session, "get", side_effect=get_side_effect):
             result = op.execute(
                 session,
                 {"search_criteria": "q", "compressed": False},
                 emit=emit,
             )
 
-        assert result.result["pages"] == 2
+        # Per F2A.6-real, ``pages`` counts DB-side buffer flushes,
+        # not HTTP pages. With 2 records and the default page_size=500,
+        # only one final flush fires.
+        assert result.result["pages"] == 1
         assert result.result["retrieved_records"] == 2
-        # Second call URL should contain cursor
+        # Second HTTP call URL should contain cursor (HTTP-level pagination
+        # is the plugin's concern, but we verify the cursor was followed).
         assert "cursor=abc123" in called_urls[1]
 
     def test_network_failure_propagates(self):
@@ -619,7 +461,7 @@ def test_network_failure_propagates(self):
         op = InsertProteinsOperation()
 
         with patch.object(
-            op._http,
+            op._uniprot_plugin._client.session,
             "get",
             side_effect=req.ConnectionError("network down"),
         ):
@@ -648,7 +490,7 @@ def test_isoform_records_counted(self):
         )
         resp = _make_mock_response(fasta_with_isoform)
         op = InsertProteinsOperation()
-        with patch.object(op._http, "get", return_value=resp):
+        with patch.object(op._uniprot_plugin._client.session, "get", return_value=resp):
             result = op.execute(
                 session,
                 {"search_criteria": "q", "compressed": False},
@@ -657,25 +499,28 @@ def test_isoform_records_counted(self):
 
         assert result.result["isoform_records"] == 1
 
-    def test_progress_emission_with_total(self):
-        """Progress events include _progress_current and _progress_total."""
+    def test_progress_emission_with_total_limit(self):
+        """Progress events include _progress_total when ``total_limit`` is set.
+
+        Per F2A.6-real, the operation no longer captures X-Total-Results
+        from the HTTP response; only the user-set ``total_limit`` flows
+        into ``_progress_total``. With ``page_size=1`` we force a flush
+        so the page_done event actually fires.
+        """
         session = _make_mock_session()
         emit = _capturing_emit()
 
         resp = _make_mock_response(FASTA_ONE)
-        resp.headers["X-Total-Results"] = "100"
-
         op = InsertProteinsOperation()
-        with patch.object(op._http, "get", return_value=resp):
+        with patch.object(op._uniprot_plugin._client.session, "get", return_value=resp):
             op.execute(
                 session,
-                {"search_criteria": "q", "compressed": False},
+                {"search_criteria": "q", "compressed": False,
+                 "total_limit": 100, "page_size": 1},
                 emit=emit,
             )
 
-        page_done_events = [
-            c for c in emit.calls if c["event"] == "insert_proteins.page_done"
-        ]
+        page_done_events = [c for c in emit.calls if c["event"] == "insert_proteins.page_done"]
         assert len(page_done_events) == 1
         fields = page_done_events[0]["fields"]
         assert fields["_progress_current"] == 1
@@ -686,7 +531,7 @@ def test_include_isoforms_false_omits_param(self):
         session = _make_mock_session()
         resp = _make_mock_response(FASTA_ONE)
         op = InsertProteinsOperation()
-        with patch.object(op._http, "get", return_value=resp) as mock_get:
+        with patch.object(op._uniprot_plugin._client.session, "get", return_value=resp) as mock_get:
             op.execute(
                 session,
                 {"search_criteria": "q", "compressed": False, "include_isoforms": False},
@@ -700,6 +545,7 @@ def test_include_isoforms_false_omits_param(self):
 # Integration test — full round-trip against real Postgres
 # ---------------------------------------------------------------------------
 
+
 @pytest.mark.integration
 def test_insert_proteins_integration(postgres_url: str):
     engine = create_engine(postgres_url, future=True)
@@ -710,7 +556,7 @@ def test_insert_proteins_integration(postgres_url: str):
     emit = _capturing_emit()
 
     with Session(engine, future=True) as session:
-        with patch.object(op._http, "get", return_value=_make_mock_response(FASTA_TWO)):
+        with patch.object(op._uniprot_plugin._client.session, "get", return_value=_make_mock_response(FASTA_TWO)):
             result = op.execute(
                 session,
                 {"search_criteria": "organism_id:9606", "compressed": False},
@@ -724,7 +570,7 @@ def test_insert_proteins_integration(postgres_url: str):
     # Idempotency: second run should update, not re-insert
     op2 = InsertProteinsOperation()
     with Session(engine, future=True) as session:
-        with patch.object(op2._http, "get", return_value=_make_mock_response(FASTA_TWO)):
+        with patch.object(op2._uniprot_plugin._client.session, "get", return_value=_make_mock_response(FASTA_TWO)):
             result2 = op2.execute(
                 session,
                 {"search_criteria": "organism_id:9606", "compressed": False},
diff --git a/tests/test_integration.py b/tests/test_integration.py
index d576a4e..06fd2fe 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -2,6 +2,7 @@
 
 Run with: poetry run pytest --with-postgres -m integration
 """
+
 from __future__ import annotations
 
 from unittest.mock import patch
@@ -72,6 +73,7 @@ def db(postgres_url: str):
 # Load ontology snapshot — full round-trip
 # ---------------------------------------------------------------------------
 
+
 @pytest.mark.integration
 def test_load_ontology_snapshot_roundtrip(db):
     from protea.core.operations.load_ontology_snapshot import LoadOntologySnapshotOperation
@@ -136,6 +138,7 @@ def test_load_ontology_snapshot_idempotent(db):
 # Store embeddings — pgvector round-trip
 # ---------------------------------------------------------------------------
 
+
 @pytest.mark.integration
 def test_store_embeddings_roundtrip(db):
     from protea.core.operations.compute_embeddings import StoreEmbeddingsOperation
@@ -160,8 +163,13 @@ def test_store_embeddings_roundtrip(db):
         seq = Sequence(sequence="MKVLWAGS", sequence_hash=Sequence.compute_hash("MKVLWAGS"))
         session.add(seq)
 
-        parent = Job(operation="compute_embeddings", queue_name="protea.embeddings",
-                     status=JobStatus.RUNNING, progress_current=0, progress_total=1)
+        parent = Job(
+            operation="compute_embeddings",
+            queue_name="protea.embeddings",
+            status=JobStatus.RUNNING,
+            progress_current=0,
+            progress_total=1,
+        )
         session.add(parent)
         session.flush()
 
@@ -177,15 +185,19 @@ def test_store_embeddings_roundtrip(db):
         "parent_job_id": str(parent_id),
         "embedding_config_id": str(config_id),
         "skip_existing": True,
-        "sequences": [{
-            "sequence_id": seq_id,
-            "chunks": [{
-                "chunk_index_s": 0,
-                "chunk_index_e": None,
-                "vector": vec,
-                "embedding_dim": 4,
-            }],
-        }],
+        "sequences": [
+            {
+                "sequence_id": seq_id,
+                "chunks": [
+                    {
+                        "chunk_index_s": 0,
+                        "chunk_index_e": None,
+                        "vector": vec,
+                        "embedding_dim": 4,
+                    }
+                ],
+            }
+        ],
     }
 
     with Session(db, future=True) as session:
@@ -199,8 +211,12 @@ def test_store_embeddings_roundtrip(db):
         emb = session.query(SequenceEmbedding).filter_by(sequence_id=seq_id).one()
         assert emb.embedding_config_id == config_id
         assert emb.embedding_dim == 4
-        stored_vec = list(emb.embedding)
-        np.testing.assert_allclose(stored_vec, vec, atol=1e-5)
+        # halfvec migration (2026-04-11): emb.embedding is a HalfVector,
+        # which exposes ``.to_list()`` rather than __iter__. atol is
+        # relaxed to 1e-3 because fp16 quantization introduces ~1e-4
+        # roundtrip error (e.g. 0.1 → 0.0999755859375).
+        stored_vec = emb.embedding.to_list()
+        np.testing.assert_allclose(stored_vec, vec, atol=1e-3)
 
     # Second run — skip_existing should prevent re-insert
     with Session(db, future=True) as session:
@@ -215,6 +231,7 @@ def test_store_embeddings_roundtrip(db):
 # Store predictions — round-trip with parent progress
 # ---------------------------------------------------------------------------
 
+
 @pytest.mark.integration
 def test_store_predictions_roundtrip(db):
     from protea.core.operations.predict_go_terms import StorePredictionsOperation
@@ -311,6 +328,7 @@ def test_store_predictions_roundtrip(db):
     }
 
     events = []
+
     def capture_emit(event, msg, fields, level):
         events.append(event)
 
@@ -340,6 +358,7 @@ def capture_emit(event, msg, fields, level):
 # Job lifecycle — parent-child with atomic progress
 # ---------------------------------------------------------------------------
 
+
 @pytest.mark.integration
 def test_job_parent_child_progress(db):
     with Session(db, future=True) as session:
@@ -381,12 +400,14 @@ def test_job_parent_child_progress(db):
                     .where(Job.id == parent_id, Job.status == JobStatus.RUNNING)
                     .values(status=JobStatus.SUCCEEDED, finished_at=utcnow())
                 )
-                session.add(JobEvent(
-                    job_id=parent_id,
-                    event="job.succeeded",
-                    fields={"via": "last_batch"},
-                    level="info",
-                ))
+                session.add(
+                    JobEvent(
+                        job_id=parent_id,
+                        event="job.succeeded",
+                        fields={"via": "last_batch"},
+                        level="info",
+                    )
+                )
             session.commit()
 
     # Verify final state
@@ -403,6 +424,7 @@ def test_job_parent_child_progress(db):
 # Load GOA annotations — round-trip
 # ---------------------------------------------------------------------------
 
+
 @pytest.mark.integration
 def test_load_goa_annotations_roundtrip(db):
     from protea.core.operations.load_goa_annotations import LoadGOAAnnotationsOperation
@@ -413,7 +435,9 @@ def test_load_goa_annotations_roundtrip(db):
     with patch.object(ont_op, "_download", return_value=_OBO_SAMPLE):
         with Session(db, future=True) as session:
             ont_result = ont_op.execute(
-                session, {"obo_url": "http://example.org/go.obo"}, emit=_noop_emit,
+                session,
+                {"obo_url": "http://example.org/go.obo"},
+                emit=_noop_emit,
             )
             session.commit()
 
@@ -433,18 +457,21 @@ def test_load_goa_annotations_roundtrip(db):
         session.add(protein)
         session.commit()
 
-    # Step 3: Build a GAF record (as _stream_gaf yields dicts)
+    # Step 3: Build a GAF record (post-F2A.6-real: _stream_gaf yields
+    # GoaAnnotationRecord instances from protea_contracts).
+    from protea_contracts import GoaAnnotationRecord
+
     gaf_records = [
-        {
-            "accession": "P12345",
-            "go_id": "GO:0003824",
-            "qualifier": "enables",
-            "evidence_code": "IDA",
-            "db_reference": "PMID:123",
-            "with_from": "",
-            "assigned_by": "UniProt",
-            "annotation_date": "20240101",
-        },
+        GoaAnnotationRecord(
+            accession="P12345",
+            go_id="GO:0003824",
+            qualifier="enables",
+            evidence_code="IDA",
+            db_reference="PMID:123",
+            with_from=None,
+            assigned_by="UniProt",
+            annotation_date="20240101",
+        ),
     ]
 
     # Step 4: Load annotations
@@ -479,6 +506,7 @@ def test_load_goa_annotations_roundtrip(db):
 # Full pipeline: QuerySet → Embeddings → Predictions
 # ---------------------------------------------------------------------------
 
+
 @pytest.mark.integration
 def test_full_pipeline_queryset_to_predictions(db):
     """End-to-end: create QuerySet, store embeddings, store predictions."""
@@ -490,10 +518,17 @@ def test_full_pipeline_queryset_to_predictions(db):
     with Session(db, future=True) as session:
         # 1. Create EmbeddingConfig
         config = EmbeddingConfig(
-            model_name="test/model", model_backend="esm",
-            layer_indices=[0], layer_agg="mean", pooling="mean",
-            normalize_residues=False, normalize=True,
-            max_length=1022, use_chunking=False, chunk_size=512, chunk_overlap=0,
+            model_name="test/model",
+            model_backend="esm",
+            layer_indices=[0],
+            layer_agg="mean",
+            pooling="mean",
+            normalize_residues=False,
+            normalize=True,
+            max_length=1022,
+            use_chunking=False,
+            chunk_size=512,
+            chunk_overlap=0,
         )
         session.add(config)
 
@@ -502,8 +537,9 @@ def test_full_pipeline_queryset_to_predictions(db):
         session.add(snap)
         session.flush()
 
-        go_mf = GOTerm(go_id="GO:0003674", name="molecular_function", aspect="F",
-                       ontology_snapshot_id=snap.id)
+        go_mf = GOTerm(
+            go_id="GO:0003674", name="molecular_function", aspect="F", ontology_snapshot_id=snap.id
+        )
         session.add(go_mf)
 
         # 3. Create AnnotationSet
@@ -516,10 +552,15 @@ def test_full_pipeline_queryset_to_predictions(db):
         session.add_all([seq1, seq2])
         session.flush()
 
-        p1 = Protein(accession="Q_QUERY", canonical_accession="Q_QUERY",
-                      is_canonical=True, sequence_id=seq1.id)
-        p2 = Protein(accession="R_REF", canonical_accession="R_REF",
-                      is_canonical=True, sequence_id=seq2.id)
+        p1 = Protein(
+            accession="Q_QUERY",
+            canonical_accession="Q_QUERY",
+            is_canonical=True,
+            sequence_id=seq1.id,
+        )
+        p2 = Protein(
+            accession="R_REF", canonical_accession="R_REF", is_canonical=True, sequence_id=seq2.id
+        )
         session.add_all([p1, p2])
 
         # 5. Create QuerySet
@@ -531,15 +572,25 @@ def test_full_pipeline_queryset_to_predictions(db):
         session.add(entry)
 
         # 6. Create embedding parent job
-        embed_job = Job(operation="compute_embeddings", queue_name="protea.embeddings",
-                        status=JobStatus.RUNNING, progress_current=0, progress_total=1)
+        embed_job = Job(
+            operation="compute_embeddings",
+            queue_name="protea.embeddings",
+            status=JobStatus.RUNNING,
+            progress_current=0,
+            progress_total=1,
+        )
         session.add(embed_job)
         session.flush()
 
         ids = {
-            "config_id": config.id, "snap_id": snap.id, "ann_set_id": ann_set.id,
-            "go_term_id": go_mf.id, "seq1_id": seq1.id, "seq2_id": seq2.id,
-            "qs_id": qs.id, "embed_job_id": embed_job.id,
+            "config_id": config.id,
+            "snap_id": snap.id,
+            "ann_set_id": ann_set.id,
+            "go_term_id": go_mf.id,
+            "seq1_id": seq1.id,
+            "seq2_id": seq2.id,
+            "qs_id": qs.id,
+            "embed_job_id": embed_job.id,
         }
         session.commit()
 
@@ -549,14 +600,28 @@ def test_full_pipeline_queryset_to_predictions(db):
         "parent_job_id": str(ids["embed_job_id"]),
         "embedding_config_id": str(ids["config_id"]),
         "sequences": [
-            {"sequence_id": ids["seq1_id"], "chunks": [
-                {"chunk_index_s": 0, "chunk_index_e": None,
-                 "vector": [0.9, 0.1, 0.0, 0.0], "embedding_dim": dim}
-            ]},
-            {"sequence_id": ids["seq2_id"], "chunks": [
-                {"chunk_index_s": 0, "chunk_index_e": None,
-                 "vector": [0.0, 0.0, 0.1, 0.9], "embedding_dim": dim}
-            ]},
+            {
+                "sequence_id": ids["seq1_id"],
+                "chunks": [
+                    {
+                        "chunk_index_s": 0,
+                        "chunk_index_e": None,
+                        "vector": [0.9, 0.1, 0.0, 0.0],
+                        "embedding_dim": dim,
+                    }
+                ],
+            },
+            {
+                "sequence_id": ids["seq2_id"],
+                "chunks": [
+                    {
+                        "chunk_index_s": 0,
+                        "chunk_index_e": None,
+                        "vector": [0.0, 0.0, 0.1, 0.9],
+                        "embedding_dim": dim,
+                    }
+                ],
+            },
         ],
     }
     with Session(db, future=True) as session:
@@ -567,8 +632,13 @@ def test_full_pipeline_queryset_to_predictions(db):
 
     # 8. Create prediction job + PredictionSet
     with Session(db, future=True) as session:
-        pred_job = Job(operation="predict_go_terms", queue_name="protea.jobs",
-                       status=JobStatus.RUNNING, progress_current=0, progress_total=1)
+        pred_job = Job(
+            operation="predict_go_terms",
+            queue_name="protea.jobs",
+            status=JobStatus.RUNNING,
+            progress_current=0,
+            progress_total=1,
+        )
         session.add(pred_job)
 
         pred_set = PredictionSet(
@@ -576,7 +646,8 @@ def test_full_pipeline_queryset_to_predictions(db):
             annotation_set_id=ids["ann_set_id"],
             ontology_snapshot_id=ids["snap_id"],
             query_set_id=ids["qs_id"],
-            limit_per_entry=5, meta={},
+            limit_per_entry=5,
+            meta={},
         )
         session.add(pred_set)
         session.flush()
@@ -589,14 +660,16 @@ def test_full_pipeline_queryset_to_predictions(db):
     pred_payload = {
         "parent_job_id": str(pred_job_id),
         "prediction_set_id": str(pred_set_id),
-        "predictions": [{
-            "protein_accession": "Q_QUERY",
-            "go_term_id": ids["go_term_id"],
-            "ref_protein_accession": "R_REF",
-            "distance": 0.85,
-            "qualifier": "enables",
-            "evidence_code": "IDA",
-        }],
+        "predictions": [
+            {
+                "protein_accession": "Q_QUERY",
+                "go_term_id": ids["go_term_id"],
+                "ref_protein_accession": "R_REF",
+                "distance": 0.85,
+                "qualifier": "enables",
+                "evidence_code": "IDA",
+            }
+        ],
     }
     with Session(db, future=True) as session:
         pred_result = store_pred.execute(session, pred_payload, emit=_noop_emit)
@@ -611,9 +684,9 @@ def test_full_pipeline_queryset_to_predictions(db):
         assert len(entries) == 1
 
         # Embeddings exist
-        embs = session.query(SequenceEmbedding).filter_by(
-            embedding_config_id=ids["config_id"]
-        ).all()
+        embs = (
+            session.query(SequenceEmbedding).filter_by(embedding_config_id=ids["config_id"]).all()
+        )
         assert len(embs) == 2
 
         # Predictions exist
diff --git a/tests/test_knn_streaming_smoke.py b/tests/test_knn_streaming_smoke.py
new file mode 100644
index 0000000..c9af7ea
--- /dev/null
+++ b/tests/test_knn_streaming_smoke.py
@@ -0,0 +1,212 @@
+"""Smoke test: list mode vs streaming mode of ``_knn_transfer_and_label``.
+
+The refactor in Change C rearranged the outer loop and added a streaming
+``output_parquet`` path. This test exercises both modes over tiny
+synthetic fixtures and asserts they produce the same logical record set
+(same ``(protein_accession, go_id)`` pairs, same labels, same distances).
+It also checks that the ``pivot_go_ids`` filter keeps only whitelisted
+terms, and that ancestor expansion is consistent across modes.
+"""
+from __future__ import annotations
+
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+
+import numpy as np
+import pyarrow.parquet as pq
+import pytest
+
+from protea.core.training_dump_helpers import StreamOutput, _knn_transfer_and_label
+
+
+class _StubAnc2Vec:
+    """Drop-in for ``Anc2VecIndex`` that returns zeros for every GO id."""
+
+    dim = 8
+
+    def batch(self, go_ids, *, zero_if_missing: bool = True):
+        return np.zeros((len(go_ids), self.dim), dtype=np.float32)
+
+
+def _mk_payload(*, expand: bool = False) -> SimpleNamespace:
+    return SimpleNamespace(
+        limit_per_entry=3,
+        distance_threshold=None,
+        search_backend="numpy",
+        metric="cosine",
+        faiss_index_type="flat",
+        faiss_nlist=0,
+        faiss_nprobe=0,
+        compute_alignments=False,
+        compute_taxonomy=False,
+        expand_votes_to_ancestors=expand,
+    )
+
+
+def _mk_fixtures(dim: int = 4):
+    """Two queries, two aspects, three reference proteins, six GO terms.
+
+    GO ids:
+      BP: GO:0000001, GO:0000002 (parent → GO:0000010)
+      MF: GO:0000003, GO:0000004 (parent → GO:0000020)
+    """
+    rng = np.random.default_rng(42)
+    query_emb = rng.normal(size=(2, dim)).astype(np.float32)
+    valid_queries = ["Q1", "Q2"]
+
+    ref_embs = rng.normal(size=(3, dim)).astype(np.float32)
+    go_id_map = {
+        1: "GO:0000001",
+        2: "GO:0000002",
+        3: "GO:0000003",
+        4: "GO:0000004",
+        10: "GO:0000010",
+        20: "GO:0000020",
+    }
+    aspect_map = {
+        1: "P", 2: "P", 10: "P",
+        3: "F", 4: "F", 20: "F",
+    }
+    # go_map: {ref_acc: [{go_term_id, qualifier, evidence_code}]}
+    ref_by_aspect = {
+        "P": {
+            "accessions": ["R1", "R2", "R3"],
+            "embeddings": ref_embs.astype(np.float16),
+            "go_map": {
+                "R1": [{"go_term_id": 1, "qualifier": "", "evidence_code": "EXP"}],
+                "R2": [{"go_term_id": 2, "qualifier": "", "evidence_code": "IEA"}],
+                "R3": [{"go_term_id": 1, "qualifier": "", "evidence_code": "IDA"}],
+            },
+        },
+        "F": {
+            "accessions": ["R1", "R2", "R3"],
+            "embeddings": ref_embs.astype(np.float16),
+            "go_map": {
+                "R1": [{"go_term_id": 3, "qualifier": "", "evidence_code": "EXP"}],
+                "R2": [{"go_term_id": 4, "qualifier": "", "evidence_code": "IEA"}],
+                "R3": [{"go_term_id": 3, "qualifier": "", "evidence_code": "IDA"}],
+            },
+        },
+        "C": {"accessions": [], "embeddings": np.empty((0, dim), dtype=np.float16), "go_map": {}},
+    }
+    # Ground truth: Q1 has GO:0000001 in BP; Q2 has GO:0000003 in MF
+    gt_pairs = {("Q1", "GO:0000001"), ("Q2", "GO:0000003")}
+    parent_map_str = {
+        "GO:0000001": {"GO:0000010"},
+        "GO:0000002": {"GO:0000010"},
+        "GO:0000003": {"GO:0000020"},
+        "GO:0000004": {"GO:0000020"},
+    }
+    return (
+        valid_queries,
+        query_emb,
+        ref_by_aspect,
+        go_id_map,
+        aspect_map,
+        gt_pairs,
+        parent_map_str,
+    )
+
+
+def _run(mode: str, tmp_path: Path | None = None, *, expand: bool, pivot=None):
+    (
+        valid_queries,
+        query_emb,
+        ref_by_aspect,
+        go_id_map,
+        aspect_map,
+        gt_pairs,
+        parent_map_str,
+    ) = _mk_fixtures()
+
+    session = MagicMock()
+    p = _mk_payload(expand=expand)
+
+    kwargs: dict = {
+        "query_known_gos": None,
+        "parent_map_str": parent_map_str if expand else None,
+        "ia_weights": None,
+        "pca_state": None,
+    }
+    kwargs["pivot_go_ids"] = pivot
+    if mode == "stream":
+        kwargs["stream_output"] = StreamOutput(
+            output_parquet=tmp_path / "out.parquet",
+            chunk_rows=3,  # tiny to force multiple flushes
+        )
+
+    with patch(
+        "protea.core.training_dump_helpers.get_anc2vec_index",
+        return_value=_StubAnc2Vec(),
+    ):
+        return _knn_transfer_and_label(
+            session,
+            valid_queries,
+            query_emb,
+            ref_by_aspect,
+            go_id_map,
+            aspect_map,
+            gt_pairs,
+            p,
+            **kwargs,
+        )
+
+
+def _record_key(r: dict) -> tuple:
+    return (r["protein_accession"], r["go_id"])
+
+
+def _read_parquet_records(path: Path) -> list[dict]:
+    table = pq.read_table(str(path))
+    return table.to_pylist()
+
+
+@pytest.mark.parametrize("expand", [False, True])
+def test_list_vs_stream_equivalence(tmp_path, expand):
+    # Rebuild fixtures for each call because ref["embeddings"] gets
+    # nulled inside ``_knn_transfer_and_label`` (Change A).
+    list_records = _run("list", expand=expand)
+    stream_info = _run("stream", tmp_path=tmp_path, expand=expand)
+
+    assert stream_info["parquet_path"].endswith("out.parquet")
+    assert stream_info["n_rows"] == len(list_records)
+
+    stream_records = _read_parquet_records(tmp_path / "out.parquet")
+
+    list_keys = sorted(_record_key(r) for r in list_records)
+    stream_keys = sorted(_record_key(r) for r in stream_records)
+    assert list_keys == stream_keys
+
+    # Labels must agree per (acc, go_id).
+    list_labels = {_record_key(r): r["label"] for r in list_records}
+    stream_labels = {_record_key(r): r["label"] for r in stream_records}
+    assert list_labels == stream_labels
+
+    # Ancestor expansion should produce GO:0000010 / GO:0000020 rows.
+    ancestors_emitted = {k[1] for k in list_keys if k[1] in ("GO:0000010", "GO:0000020")}
+    if expand:
+        assert ancestors_emitted, "ancestor expansion produced no rows"
+    else:
+        assert not ancestors_emitted
+
+
+def test_pivot_filter_drops_non_pivot_terms(tmp_path):
+    pivot = frozenset({"GO:0000001", "GO:0000003"})
+    list_records = _run("list", expand=False, pivot=pivot)
+    stream_info = _run("stream", tmp_path=tmp_path, expand=False, pivot=pivot)
+
+    assert {r["go_id"] for r in list_records} <= pivot
+    stream_records = _read_parquet_records(tmp_path / "out.parquet")
+    assert {r["go_id"] for r in stream_records} <= pivot
+    assert stream_info["n_rows"] == len(stream_records)
+
+
+def test_streaming_empty_result_is_safe(tmp_path):
+    # Pivot with no overlap → zero rows; parquet writer should
+    # stay unopened and the function must still return cleanly.
+    pivot = frozenset({"GO:9999999"})
+    info = _run("stream", tmp_path=tmp_path, expand=False, pivot=pivot)
+    assert info["n_rows"] == 0
+    # Writer never opened → no file.
+    assert not (tmp_path / "out.parquet").exists()
diff --git a/tests/test_load_goa_annotations.py b/tests/test_load_goa_annotations.py
index 1fb880d..0b94d84 100644
--- a/tests/test_load_goa_annotations.py
+++ b/tests/test_load_goa_annotations.py
@@ -2,6 +2,7 @@
 Unit tests for LoadGOAAnnotationsOperation.
 No DB or network required — everything is mocked.
 """
+
 from __future__ import annotations
 
 import io
@@ -26,6 +27,7 @@
 # Helpers
 # ---------------------------------------------------------------------------
 
+
 def _make_emit():
     """Return a recording emit function and its event list."""
     events = []
@@ -66,28 +68,34 @@ def _gaf_line(
 
 class TestLoadGOAAnnotationsPayload:
     def test_valid(self) -> None:
-        p = LoadGOAAnnotationsPayload.model_validate({
-            "ontology_snapshot_id": _SNAPSHOT_ID,
-            "gaf_url": "https://ftp.ebi.ac.uk/goa_human.gaf.gz",
-            "source_version": "2024-03",
-        })
+        p = LoadGOAAnnotationsPayload.model_validate(
+            {
+                "ontology_snapshot_id": _SNAPSHOT_ID,
+                "gaf_url": "https://ftp.ebi.ac.uk/goa_human.gaf.gz",
+                "source_version": "2024-03",
+            }
+        )
         assert p.source_version == "2024-03"
         assert p.page_size == 10000
 
     def test_missing_required_raises(self) -> None:
         with pytest.raises(ValueError):
-            LoadGOAAnnotationsPayload.model_validate({
-                "gaf_url": "https://example.org/goa.gaf.gz",
-                "source_version": "2024-03",
-            })
+            LoadGOAAnnotationsPayload.model_validate(
+                {
+                    "gaf_url": "https://example.org/goa.gaf.gz",
+                    "source_version": "2024-03",
+                }
+            )
 
     def test_empty_snapshot_id_raises(self) -> None:
         with pytest.raises(ValueError):
-            LoadGOAAnnotationsPayload.model_validate({
-                "ontology_snapshot_id": "  ",
-                "gaf_url": "https://example.org/goa.gaf.gz",
-                "source_version": "2024-03",
-            })
+            LoadGOAAnnotationsPayload.model_validate(
+                {
+                    "ontology_snapshot_id": "  ",
+                    "gaf_url": "https://example.org/goa.gaf.gz",
+                    "source_version": "2024-03",
+                }
+            )
 
     def test_empty_gaf_url_raises(self) -> None:
         with pytest.raises(ValueError):
@@ -145,16 +153,18 @@ def _op(self) -> LoadGOAAnnotationsOperation:
         return LoadGOAAnnotationsOperation()
 
     def _make_record(self, accession="P12345", go_id="GO:0003824", evidence="IDA"):
-        return {
-            "accession": accession,
-            "go_id": go_id,
-            "qualifier": "enables",
-            "evidence_code": evidence,
-            "db_reference": "PMID:1",
-            "with_from": "",
-            "assigned_by": "UniProt",
-            "annotation_date": "20240101",
-        }
+        from protea_contracts import GoaAnnotationRecord
+
+        return GoaAnnotationRecord(
+            accession=accession,
+            go_id=go_id,
+            qualifier="enables",
+            evidence_code=evidence,
+            db_reference="PMID:1",
+            with_from=None,
+            assigned_by="UniProt",
+            annotation_date="20240101",
+        )
 
     def test_skips_unknown_accession(self) -> None:
         op = self._op()
@@ -220,7 +230,7 @@ def test_deduplicates_within_buffer(self) -> None:
         op = self._op()
         session = MagicMock()
         rec = self._make_record()
-        records = [rec.copy(), rec.copy(), rec.copy()]
+        records = [rec.model_copy(), rec.model_copy(), rec.model_copy()]
         inserted, skipped = op._store_buffer(
             session,
             records,
@@ -271,8 +281,11 @@ def test_empty_buffer(self) -> None:
         op = self._op()
         session = MagicMock()
         inserted, skipped = op._store_buffer(
-            session, [], uuid.UUID(_SNAPSHOT_ID),
-            valid_accessions={"P12345"}, go_term_map={"GO:0003824": 1},
+            session,
+            [],
+            uuid.UUID(_SNAPSHOT_ID),
+            valid_accessions={"P12345"},
+            go_term_map={"GO:0003824": 1},
         )
         assert inserted == 0
         assert skipped == 0
@@ -320,16 +333,18 @@ def _stream_from_text(self, text: str, url="https://example.com/goa.gaf"):
         mock_resp.raw = raw
         mock_resp.raise_for_status = MagicMock()
 
-        with patch("protea.core.operations.load_goa_annotations.requests.get", return_value=mock_resp):
+        with patch(
+            "protea_sources.goa.requests.get", return_value=mock_resp
+        ):
             return list(self.op._stream_gaf(payload, emit))
 
     def test_parses_valid_gaf_line(self):
         line = _gaf_line(accession="P12345", go_id="GO:0003674", evidence="IDA")
         records = self._stream_from_text(line + "\n")
         assert len(records) == 1
-        assert records[0]["accession"] == "P12345"
-        assert records[0]["go_id"] == "GO:0003674"
-        assert records[0]["evidence_code"] == "IDA"
+        assert records[0].accession == "P12345"
+        assert records[0].go_id == "GO:0003674"
+        assert records[0].evidence_code == "IDA"
 
     def test_skips_comment_lines(self):
         text = "!this is a comment\n" + _gaf_line() + "\n"
@@ -354,7 +369,7 @@ def test_multiple_records(self):
         ]
         records = self._stream_from_text("\n".join(lines) + "\n")
         assert len(records) == 3
-        assert [r["accession"] for r in records] == ["A1", "A2", "A3"]
+        assert [r.accession for r in records] == ["A1", "A2", "A3"]
 
     def test_extracts_all_fields(self):
         line = _gaf_line(
@@ -369,14 +384,14 @@ def test_extracts_all_fields(self):
         )
         records = self._stream_from_text(line + "\n")
         r = records[0]
-        assert r["accession"] == "Q99999"
-        assert r["go_id"] == "GO:0005575"
-        assert r["qualifier"] == "located_in"
-        assert r["evidence_code"] == "IEA"
-        assert r["db_reference"] == "GO_REF:001"
-        assert r["with_from"] == "InterPro:IPR000001"
-        assert r["annotation_date"] == "20230615"
-        assert r["assigned_by"] == "InterPro"
+        assert r.accession == "Q99999"
+        assert r.go_id == "GO:0005575"
+        assert r.qualifier == "located_in"
+        assert r.evidence_code == "IEA"
+        assert r.db_reference == "GO_REF:001"
+        assert r.with_from == "InterPro:IPR000001"
+        assert r.annotation_date == "20230615"
+        assert r.assigned_by == "InterPro"
 
     def test_gzip_url_uses_gzip_decompression(self):
         import gzip as gzip_mod
@@ -396,7 +411,9 @@ def test_gzip_url_uses_gzip_decompression(self):
         mock_resp.raw = raw
         mock_resp.raise_for_status = MagicMock()
 
-        with patch("protea.core.operations.load_goa_annotations.requests.get", return_value=mock_resp):
+        with patch(
+            "protea_sources.goa.requests.get", return_value=mock_resp
+        ):
             records = list(self.op._stream_gaf(payload, emit))
         assert len(records) == 1
 
@@ -516,9 +533,16 @@ def _make_session(self, accessions, go_terms):
         query_mock.all.return_value = list(go_terms.items())
         return session
 
-    def _run(self, gaf_text, accessions, go_terms,
-             page_size=10000, total_limit=None, commit_every_page=True,
-             store_buffer_side_effect=None):
+    def _run(
+        self,
+        gaf_text,
+        accessions,
+        go_terms,
+        page_size=10000,
+        total_limit=None,
+        commit_every_page=True,
+        store_buffer_side_effect=None,
+    ):
         session = self._make_session(accessions, go_terms)
         emit, events = _make_emit()
 
@@ -552,16 +576,16 @@ def fake_store_buffer(_session, records, _ann_set_id, _valid, _go_map):
             skipped = 0
             seen = set()
             for rec in records:
-                acc = rec["accession"].strip()
+                acc = rec.accession.strip()
                 if not acc or acc not in real_valid:
                     skipped += 1
                     continue
-                go_id = rec["go_id"].strip()
+                go_id = rec.go_id.strip()
                 go_term_id = real_go.get(go_id)
                 if go_term_id is None:
                     skipped += 1
                     continue
-                ev = rec["evidence_code"] or None
+                ev = rec.evidence_code
                 key = (_ann_set_id, acc, go_term_id, ev)
                 if key in seen:
                     skipped += 1
@@ -573,14 +597,29 @@ def fake_store_buffer(_session, records, _ann_set_id, _valid, _go_map):
         if store_buffer_side_effect is not None:
             fake_store_buffer = store_buffer_side_effect
 
-        with patch(
-            "protea.core.operations.load_goa_annotations.requests.get",
-            return_value=mock_resp,
-        ), patch(
-            "protea.core.operations.load_goa_annotations.AnnotationSet",
-            return_value=ann_set_mock,
-        ), patch.object(
-            self.op, "_store_buffer", side_effect=fake_store_buffer,
+        with (
+            patch(
+                "protea_sources.goa.requests.get",
+                return_value=mock_resp,
+            ),
+            patch(
+                "protea.core.operations.load_goa_annotations.AnnotationSet",
+                return_value=ann_set_mock,
+            ),
+            patch.object(
+                self.op,
+                "_store_buffer",
+                side_effect=fake_store_buffer,
+            ),
+            # The auto-eval enqueue path has dedicated coverage in
+            # ``TestMaybeEnqueueAtomicEval`` below; stubbing it here keeps
+            # these GAF-ingest tests independent of the session.query
+            # mock state used by ``_load_go_term_map``.
+            patch.object(
+                self.op,
+                "_maybe_enqueue_atomic_eval",
+                return_value=None,
+            ),
         ):
             result = self.op.execute(session, payload, emit=emit)
 
@@ -589,7 +628,9 @@ def fake_store_buffer(_session, records, _ann_set_id, _valid, _go_map):
     def test_basic_execution(self):
         gaf = _gaf_line(accession="P12345", go_id="GO:0003674") + "\n"
         result, events, _ = self._run(
-            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
+            gaf,
+            accessions=["P12345"],
+            go_terms={"GO:0003674": 1},
         )
         assert isinstance(result, OperationResult)
         assert result.result["annotations_inserted"] == 1
@@ -614,7 +655,9 @@ def test_snapshot_not_found_raises(self):
     def test_no_proteins_returns_zero(self):
         gaf = _gaf_line() + "\n"
         result, events, _ = self._run(
-            gaf, accessions=[], go_terms={"GO:0003674": 1},
+            gaf,
+            accessions=[],
+            go_terms={"GO:0003674": 1},
         )
         assert result.result == {"annotations_inserted": 0}
         event_names = [e["event"] for e in events]
@@ -623,7 +666,9 @@ def test_no_proteins_returns_zero(self):
     def test_skips_unmatched_accessions(self):
         gaf = _gaf_line(accession="UNKNOWN") + "\n"
         result, _, _ = self._run(
-            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
+            gaf,
+            accessions=["P12345"],
+            go_terms={"GO:0003674": 1},
         )
         assert result.result["annotations_inserted"] == 0
         assert result.result["annotations_skipped"] == 1
@@ -631,17 +676,22 @@ def test_skips_unmatched_accessions(self):
     def test_skips_unmatched_go_ids(self):
         gaf = _gaf_line(accession="P12345", go_id="GO:UNKNOWN") + "\n"
         result, _, _ = self._run(
-            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
+            gaf,
+            accessions=["P12345"],
+            go_terms={"GO:0003674": 1},
         )
         assert result.result["annotations_inserted"] == 0
         assert result.result["annotations_skipped"] == 1
 
     def test_pagination_emits_page_done(self):
-        lines = [_gaf_line(accession="P12345", go_id="GO:0003674", evidence=f"E{i}")
-                 for i in range(5)]
+        lines = [
+            _gaf_line(accession="P12345", go_id="GO:0003674", evidence=f"E{i}") for i in range(5)
+        ]
         gaf = "\n".join(lines) + "\n"
         result, events, _ = self._run(
-            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
+            gaf,
+            accessions=["P12345"],
+            go_terms={"GO:0003674": 1},
             page_size=2,
         )
         page_events = [e for e in events if e["event"] == "load_goa_annotations.page_done"]
@@ -651,33 +701,45 @@ def test_pagination_emits_page_done(self):
         assert result.result["pages"] == 3
 
     def test_commit_every_page(self):
-        lines = [_gaf_line(accession="P12345", go_id="GO:0003674", evidence=f"E{i}")
-                 for i in range(4)]
+        lines = [
+            _gaf_line(accession="P12345", go_id="GO:0003674", evidence=f"E{i}") for i in range(4)
+        ]
         gaf = "\n".join(lines) + "\n"
         _, _, session = self._run(
-            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
-            page_size=2, commit_every_page=True,
+            gaf,
+            accessions=["P12345"],
+            go_terms={"GO:0003674": 1},
+            page_size=2,
+            commit_every_page=True,
         )
         # 4 records, page_size=2 -> 2 full pages -> 2 commits
         assert session.commit.call_count == 2
 
     def test_no_commit_when_disabled(self):
-        lines = [_gaf_line(accession="P12345", go_id="GO:0003674", evidence=f"E{i}")
-                 for i in range(4)]
+        lines = [
+            _gaf_line(accession="P12345", go_id="GO:0003674", evidence=f"E{i}") for i in range(4)
+        ]
         gaf = "\n".join(lines) + "\n"
         _, _, session = self._run(
-            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
-            page_size=2, commit_every_page=False,
+            gaf,
+            accessions=["P12345"],
+            go_terms={"GO:0003674": 1},
+            page_size=2,
+            commit_every_page=False,
         )
         session.commit.assert_not_called()
 
     def test_total_limit_stops_early(self):
-        lines = [_gaf_line(accession="P12345", go_id="GO:0003674", evidence=f"E{i}")
-                 for i in range(10)]
+        lines = [
+            _gaf_line(accession="P12345", go_id="GO:0003674", evidence=f"E{i}") for i in range(10)
+        ]
         gaf = "\n".join(lines) + "\n"
         result, events, _ = self._run(
-            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
-            page_size=3, total_limit=3,
+            gaf,
+            accessions=["P12345"],
+            go_terms={"GO:0003674": 1},
+            page_size=3,
+            total_limit=3,
         )
         assert result.result["annotations_inserted"] == 3
         event_names = [e["event"] for e in events]
@@ -685,7 +747,9 @@ def test_total_limit_stops_early(self):
 
     def test_empty_file(self):
         result, _, _ = self._run(
-            "", accessions=["P12345"], go_terms={"GO:0003674": 1},
+            "",
+            accessions=["P12345"],
+            go_terms={"GO:0003674": 1},
         )
         assert result.result["annotations_inserted"] == 0
         assert result.result["total_lines_read"] == 0
@@ -694,7 +758,9 @@ def test_empty_file(self):
     def test_result_contains_elapsed_seconds(self):
         gaf = _gaf_line() + "\n"
         result, _, _ = self._run(
-            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
+            gaf,
+            accessions=["P12345"],
+            go_terms={"GO:0003674": 1},
         )
         assert "elapsed_seconds" in result.result
         assert result.result["elapsed_seconds"] >= 0
@@ -702,7 +768,9 @@ def test_result_contains_elapsed_seconds(self):
     def test_result_contains_annotation_set_id(self):
         gaf = _gaf_line() + "\n"
         result, _, _ = self._run(
-            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
+            gaf,
+            accessions=["P12345"],
+            go_terms={"GO:0003674": 1},
         )
         assert result.result["annotation_set_id"] == str(_ANNOTATION_SET_ID)
 
@@ -710,7 +778,9 @@ def test_duplicate_annotations_in_file(self):
         line = _gaf_line(accession="P12345", go_id="GO:0003674", evidence="IDA")
         gaf = (line + "\n") * 5
         result, _, _ = self._run(
-            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
+            gaf,
+            accessions=["P12345"],
+            go_terms={"GO:0003674": 1},
         )
         assert result.result["annotations_inserted"] == 1
         assert result.result["annotations_skipped"] == 4
@@ -719,11 +789,12 @@ def test_comments_and_short_lines_not_counted(self):
         text = (
             "!GAF header comment\n"
             "!another comment\n"
-            "short\tline\n"
-            + _gaf_line(accession="P12345", go_id="GO:0003674") + "\n"
+            "short\tline\n" + _gaf_line(accession="P12345", go_id="GO:0003674") + "\n"
         )
         result, _, _ = self._run(
-            text, accessions=["P12345"], go_terms={"GO:0003674": 1},
+            text,
+            accessions=["P12345"],
+            go_terms={"GO:0003674": 1},
         )
         # Only valid GAF lines are counted as total_lines_read
         assert result.result["total_lines_read"] == 1
@@ -732,7 +803,9 @@ def test_comments_and_short_lines_not_counted(self):
     def test_annotation_set_created_event(self):
         gaf = _gaf_line() + "\n"
         _, events, _ = self._run(
-            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
+            gaf,
+            accessions=["P12345"],
+            go_terms={"GO:0003674": 1},
         )
         event_names = [e["event"] for e in events]
         assert "load_goa_annotations.annotation_set_created" in event_names
@@ -740,11 +813,14 @@ def test_annotation_set_created_event(self):
         assert created[0]["fields"]["annotation_set_id"] == str(_ANNOTATION_SET_ID)
 
     def test_page_done_event_fields(self):
-        lines = [_gaf_line(accession="P12345", go_id="GO:0003674", evidence=f"E{i}")
-                 for i in range(3)]
+        lines = [
+            _gaf_line(accession="P12345", go_id="GO:0003674", evidence=f"E{i}") for i in range(3)
+        ]
         gaf = "\n".join(lines) + "\n"
         _, events, _ = self._run(
-            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
+            gaf,
+            accessions=["P12345"],
+            go_terms={"GO:0003674": 1},
             page_size=2,
         )
         page_events = [e for e in events if e["event"] == "load_goa_annotations.page_done"]
@@ -757,17 +833,22 @@ def test_page_done_event_fields(self):
     def test_session_flush_called_after_annotation_set_add(self):
         gaf = _gaf_line() + "\n"
         _, _, session = self._run(
-            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
+            gaf,
+            accessions=["P12345"],
+            go_terms={"GO:0003674": 1},
         )
         session.flush.assert_called()
 
     def test_multiple_pages_with_remainder(self):
         """7 records with page_size=3 -> 2 full pages + 1 remainder = 3 pages total."""
-        lines = [_gaf_line(accession="P12345", go_id="GO:0003674", evidence=f"E{i}")
-                 for i in range(7)]
+        lines = [
+            _gaf_line(accession="P12345", go_id="GO:0003674", evidence=f"E{i}") for i in range(7)
+        ]
         gaf = "\n".join(lines) + "\n"
         result, events, session = self._run(
-            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
+            gaf,
+            accessions=["P12345"],
+            go_terms={"GO:0003674": 1},
             page_size=3,
         )
         assert result.result["pages"] == 3
@@ -776,6 +857,140 @@ def test_multiple_pages_with_remainder(self):
         assert len(page_events) == 2  # only full pages emit page_done
 
 
+# ---------------------------------------------------------------------------
+# _maybe_enqueue_atomic_eval
+# ---------------------------------------------------------------------------
+
+
+class TestMaybeEnqueueAtomicEval:
+    """Covers the automatic generate_evaluation_set enqueue triggered by a
+    successful load_goa_annotations run.
+
+    Three scenarios:
+      1. No prior GOA AnnotationSet  → skipped, returns None.
+      2. Prior version exists, no EvaluationSet yet  → enqueues a child job.
+      3. Prior version exists AND the EvaluationSet already exists  → skipped.
+    """
+
+    def setup_method(self):
+        self.op = LoadGOAAnnotationsOperation()
+
+    def _make_ann_set(self, version: str):
+        """Build a MagicMock standing in for an AnnotationSet ORM row."""
+        ann = MagicMock()
+        ann.id = uuid.uuid4()
+        ann.source_version = version
+        ann.source = "goa"
+        return ann
+
+    def _wire_session(
+        self,
+        *,
+        candidates: list,
+        existing_eval_id: uuid.UUID | None,
+    ):
+        """Return a session mock whose query().filter().all() returns
+        ``candidates`` and whose query(EvaluationSet.id).filter(...).first()
+        returns ``(existing_eval_id,)`` or ``None``.
+
+        ``session.query`` is side-effected so AnnotationSet and EvaluationSet
+        queries route to different mock chains.
+        """
+        from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
+        from protea.infrastructure.orm.models.annotation.evaluation_set import EvaluationSet
+
+        ann_query = MagicMock()
+        ann_query.filter.return_value = ann_query
+        ann_query.all.return_value = candidates
+
+        eval_query = MagicMock()
+        eval_query.filter.return_value = eval_query
+        eval_query.first.return_value = (existing_eval_id,) if existing_eval_id else None
+
+        session = MagicMock()
+
+        def _route_query(target):
+            if target is AnnotationSet:
+                return ann_query
+            # Matches both EvaluationSet and EvaluationSet.id — exists check
+            # in the operation passes the column.
+            if target is EvaluationSet or getattr(target, "class_", None) is EvaluationSet:
+                return eval_query
+            return eval_query
+
+        session.query.side_effect = _route_query
+        return session
+
+    def test_no_prior_goa_version_skipped(self):
+        new_set = self._make_ann_set(version="220")
+        session = self._wire_session(candidates=[], existing_eval_id=None)
+        emit, events = _make_emit()
+
+        result = self.op._maybe_enqueue_atomic_eval(session, new_set, emit)
+
+        assert result is None
+        session.add.assert_not_called()
+        assert any(
+            e["event"] == "load_goa_annotations.auto_eval_skipped"
+            and e["fields"].get("reason") == "no_prior_goa_annotation_set"
+            for e in events
+        )
+
+    def test_prior_version_enqueues_child_job(self):
+        prior = self._make_ann_set(version="215")
+        new_set = self._make_ann_set(version="220")
+        session = self._wire_session(candidates=[prior], existing_eval_id=None)
+
+        child_id = uuid.uuid4()
+
+        def _flush_side_effect():
+            # Simulate SQLAlchemy populating Job.id on flush() after add()
+            for call in session.add.call_args_list:
+                obj = call.args[0]
+                # Only set an id on Job rows (Job has queue_name, JobEvent doesn't)
+                if hasattr(obj, "queue_name") and getattr(obj, "id", None) is None:
+                    obj.id = child_id
+
+        session.flush.side_effect = _flush_side_effect
+        emit, events = _make_emit()
+
+        result = self.op._maybe_enqueue_atomic_eval(session, new_set, emit)
+
+        assert result == child_id
+        # Two add() calls: the Job and its job.created JobEvent.
+        assert session.add.call_count == 2
+        enqueued = [
+            e for e in events if e["event"] == "load_goa_annotations.auto_eval_enqueued"
+        ]
+        assert len(enqueued) == 1
+        assert enqueued[0]["fields"]["child_job_id"] == str(child_id)
+        assert enqueued[0]["fields"]["old_source_version"] == "215"
+        assert enqueued[0]["fields"]["new_source_version"] == "220"
+
+    def test_existing_evaluation_set_skips_enqueue(self):
+        prior = self._make_ann_set(version="215")
+        new_set = self._make_ann_set(version="220")
+        existing_id = uuid.uuid4()
+        session = self._wire_session(
+            candidates=[prior],
+            existing_eval_id=existing_id,
+        )
+        emit, events = _make_emit()
+
+        result = self.op._maybe_enqueue_atomic_eval(session, new_set, emit)
+
+        assert result is None
+        session.add.assert_not_called()
+        skipped = [
+            e
+            for e in events
+            if e["event"] == "load_goa_annotations.auto_eval_skipped"
+            and e["fields"].get("reason") == "evaluation_set_exists"
+        ]
+        assert len(skipped) == 1
+        assert skipped[0]["fields"]["existing_evaluation_set_id"] == str(existing_id)
+
+
 # ---------------------------------------------------------------------------
 # Operation name
 # ---------------------------------------------------------------------------
diff --git a/tests/test_load_ontology_snapshot.py b/tests/test_load_ontology_snapshot.py
index 6d045fe..7d7c4b6 100644
--- a/tests/test_load_ontology_snapshot.py
+++ b/tests/test_load_ontology_snapshot.py
@@ -45,9 +45,7 @@
 
 class TestLoadOntologySnapshotPayload:
     def test_valid(self) -> None:
-        p = LoadOntologySnapshotPayload.model_validate(
-            {"obo_url": "http://example.org/go.obo"}
-        )
+        p = LoadOntologySnapshotPayload.model_validate({"obo_url": "http://example.org/go.obo"})
         assert p.obo_url == "http://example.org/go.obo"
         assert p.timeout_seconds == 120
 
@@ -166,8 +164,12 @@ def test_all_supported_relationship_types(self) -> None:
         """Each of the 7 supported relationship types is captured."""
         op = self._op()
         for rt in [
-            "part_of", "regulates", "negatively_regulates",
-            "positively_regulates", "occurs_in", "capable_of",
+            "part_of",
+            "regulates",
+            "negatively_regulates",
+            "positively_regulates",
+            "occurs_in",
+            "capable_of",
             "capable_of_part_of",
         ]:
             obo = (
@@ -232,9 +234,7 @@ def test_download_success(self) -> None:
             result = op._download(payload, emit)
 
         assert result == _OBO_SAMPLE
-        mock_get.assert_called_once_with(
-            "http://example.org/go.obo", timeout=120, stream=True
-        )
+        mock_get.assert_called_once_with("http://example.org/go.obo", timeout=120, stream=True)
         # Should emit download_start and download_done
         assert emit.call_count == 2
         assert emit.call_args_list[0][0][0] == "load_ontology_snapshot.download_start"
@@ -287,9 +287,7 @@ def test_idempotent_if_version_exists(self) -> None:
         existing.id = "existing-uuid"
         session = self._mock_session(existing_snapshot=existing, rel_count=42)
 
-        with patch.object(
-            LoadOntologySnapshotOperation, "_download", return_value=_OBO_SAMPLE
-        ):
+        with patch.object(LoadOntologySnapshotOperation, "_download", return_value=_OBO_SAMPLE):
             op = LoadOntologySnapshotOperation()
             result = op.execute(
                 session,
@@ -307,17 +305,18 @@ def test_inserts_snapshot_and_terms(self) -> None:
         fake_snapshot.id = "new-uuid"
 
         def add_side_effect(obj):
-            if isinstance(obj, __import__(
-                "protea.infrastructure.orm.models.annotation.ontology_snapshot",
-                fromlist=["OntologySnapshot"]
-            ).OntologySnapshot):
+            if isinstance(
+                obj,
+                __import__(
+                    "protea.infrastructure.orm.models.annotation.ontology_snapshot",
+                    fromlist=["OntologySnapshot"],
+                ).OntologySnapshot,
+            ):
                 obj.id = "new-uuid"
 
         session.add.side_effect = add_side_effect
 
-        with patch.object(
-            LoadOntologySnapshotOperation, "_download", return_value=_OBO_SAMPLE
-        ):
+        with patch.object(LoadOntologySnapshotOperation, "_download", return_value=_OBO_SAMPLE):
             op = LoadOntologySnapshotOperation()
             result = op.execute(
                 session,
@@ -343,6 +342,7 @@ def add_side_effect(obj):
             from protea.infrastructure.orm.models.annotation.ontology_snapshot import (
                 OntologySnapshot,
             )
+
             if isinstance(obj, OntologySnapshot):
                 obj.id = "snap-id"
 
@@ -352,6 +352,7 @@ def add_all_side_effect(items):
             """Simulate DB flush assigning IDs to GOTerm objects."""
             for item in items:
                 from protea.infrastructure.orm.models.annotation.go_term import GOTerm
+
                 if isinstance(item, GOTerm) and item.id is None:
                     _id_counter["n"] += 1
                     item.id = _id_counter["n"]
@@ -392,6 +393,7 @@ def add_side_effect(obj):
             from protea.infrastructure.orm.models.annotation.ontology_snapshot import (
                 OntologySnapshot,
             )
+
             if isinstance(obj, OntologySnapshot):
                 obj.id = "snap-id"
 
@@ -400,15 +402,14 @@ def add_side_effect(obj):
         def add_all_side_effect(items):
             for item in items:
                 from protea.infrastructure.orm.models.annotation.go_term import GOTerm
+
                 if isinstance(item, GOTerm) and item.id is None:
                     _id_counter["n"] += 1
                     item.id = _id_counter["n"]
 
         session.add_all.side_effect = add_all_side_effect
 
-        with patch.object(
-            LoadOntologySnapshotOperation, "_download", return_value=obo
-        ):
+        with patch.object(LoadOntologySnapshotOperation, "_download", return_value=obo):
             op = LoadOntologySnapshotOperation()
             result = op.execute(
                 session,
@@ -423,9 +424,7 @@ def test_emits_progress_events(self) -> None:
         session = self._mock_session(existing_snapshot=None)
         emit = MagicMock()
 
-        with patch.object(
-            LoadOntologySnapshotOperation, "_download", return_value=_OBO_SAMPLE
-        ):
+        with patch.object(LoadOntologySnapshotOperation, "_download", return_value=_OBO_SAMPLE):
             op = LoadOntologySnapshotOperation()
             op.execute(session, {"obo_url": "http://x.org/go.obo"}, emit=emit)
 
@@ -439,9 +438,7 @@ def test_done_event_includes_elapsed(self) -> None:
         session = self._mock_session(existing_snapshot=None)
         emit = MagicMock()
 
-        with patch.object(
-            LoadOntologySnapshotOperation, "_download", return_value=_OBO_SAMPLE
-        ):
+        with patch.object(LoadOntologySnapshotOperation, "_download", return_value=_OBO_SAMPLE):
             op = LoadOntologySnapshotOperation()
             result = op.execute(session, {"obo_url": "http://x.org/go.obo"}, emit=emit)
 
@@ -478,9 +475,7 @@ def query_side_effect(*args):
         session.query.side_effect = query_side_effect
         emit = MagicMock()
 
-        with patch.object(
-            LoadOntologySnapshotOperation, "_download", return_value=_OBO_SAMPLE
-        ):
+        with patch.object(LoadOntologySnapshotOperation, "_download", return_value=_OBO_SAMPLE):
             op = LoadOntologySnapshotOperation()
             result = op.execute(
                 session,
@@ -520,9 +515,7 @@ def query_side_effect(*args):
         session = MagicMock()
         session.query.side_effect = query_side_effect
 
-        with patch.object(
-            LoadOntologySnapshotOperation, "_download", return_value=_OBO_SAMPLE
-        ):
+        with patch.object(LoadOntologySnapshotOperation, "_download", return_value=_OBO_SAMPLE):
             op = LoadOntologySnapshotOperation()
             result = op.execute(
                 session,
diff --git a/tests/test_load_quickgo_annotations.py b/tests/test_load_quickgo_annotations.py
index f359de8..4e763f4 100644
--- a/tests/test_load_quickgo_annotations.py
+++ b/tests/test_load_quickgo_annotations.py
@@ -15,62 +15,47 @@
 _noop_emit = lambda *_: None  # noqa: E731
 _SNAPSHOT_ID = str(uuid.uuid4())
 
-# Simulates a QuickGO TSV response (header + 3 rows)
-_QUICKGO_ROWS = [
-    {
-        "GENE PRODUCT DB": "UniProtKB",
-        "GENE PRODUCT ID": "P12345",
-        "SYMBOL": "GENE1",
-        "QUALIFIER": "enables",
-        "GO TERM": "GO:0003824",
-        "GO ASPECT": "F",
-        "ECO ID": "ECO:0000314",
-        "REFERENCE": "PMID:123",
-        "WITH/FROM": "",
-        "TAXON ID": "9606",
-        "ASSIGNED BY": "UniProt",
-        "ANNOTATION EXTENSION": "",
-        "DATE": "20240101",
-    },
-    {
-        "GENE PRODUCT DB": "UniProtKB",
-        "GENE PRODUCT ID": "Q67890",
-        "SYMBOL": "GENE2",
-        "QUALIFIER": "involved_in",
-        "GO TERM": "GO:0008150",
-        "GO ASPECT": "P",
-        "ECO ID": "ECO:0000501",
-        "REFERENCE": "PMID:456",
-        "WITH/FROM": "",
-        "TAXON ID": "9606",
-        "ASSIGNED BY": "UniProt",
-        "ANNOTATION EXTENSION": "",
-        "DATE": "20240101",
-    },
-    {
-        "GENE PRODUCT DB": "UniProtKB",
-        "GENE PRODUCT ID": "XXXXXX",
-        "SYMBOL": "UNKNOWN",
-        "QUALIFIER": "enables",
-        "GO TERM": "GO:0003824",
-        "GO ASPECT": "F",
-        "ECO ID": "ECO:0000314",
-        "REFERENCE": "PMID:789",
-        "WITH/FROM": "",
-        "TAXON ID": "9606",
-        "ASSIGNED BY": "UniProt",
-        "ANNOTATION EXTENSION": "",
-        "DATE": "20240101",
-    },
-]
+
+def _record(
+    accession: str = "P12345",
+    go_id: str = "GO:0003824",
+    qualifier: str | None = "enables",
+    eco_id: str | None = "ECO:0000314",
+    db_reference: str | None = "PMID:123",
+    with_from: str | None = None,
+    assigned_by: str | None = "UniProt",
+    annotation_date: str | None = "20240101",
+):
+    from protea_contracts import QuickGoAnnotationRecord
+
+    return QuickGoAnnotationRecord(
+        accession=accession,
+        go_id=go_id,
+        qualifier=qualifier,
+        eco_id=eco_id,
+        db_reference=db_reference,
+        with_from=with_from,
+        assigned_by=assigned_by,
+        annotation_date=annotation_date,
+    )
+
+
+def _quickgo_records() -> list:
+    return [
+        _record("P12345", "GO:0003824", eco_id="ECO:0000314"),
+        _record("Q67890", "GO:0008150", qualifier="involved_in", eco_id="ECO:0000501"),
+        _record("XXXXXX", "GO:0003824", eco_id="ECO:0000314"),
+    ]
 
 
 class TestLoadQuickGOAnnotationsPayload:
     def test_valid_minimal(self) -> None:
-        p = LoadQuickGOAnnotationsPayload.model_validate({
-            "ontology_snapshot_id": _SNAPSHOT_ID,
-            "source_version": "2026-01-11",
-        })
+        p = LoadQuickGOAnnotationsPayload.model_validate(
+            {
+                "ontology_snapshot_id": _SNAPSHOT_ID,
+                "source_version": "2026-01-11",
+            }
+        )
         assert p.eco_mapping_url is None
         assert p.page_size == 10000
 
@@ -80,10 +65,12 @@ def test_missing_snapshot_raises(self) -> None:
 
     def test_empty_source_version_raises(self) -> None:
         with pytest.raises(ValueError):
-            LoadQuickGOAnnotationsPayload.model_validate({
-                "ontology_snapshot_id": _SNAPSHOT_ID,
-                "source_version": "",
-            })
+            LoadQuickGOAnnotationsPayload.model_validate(
+                {
+                    "ontology_snapshot_id": _SNAPSHOT_ID,
+                    "source_version": "",
+                }
+            )
 
 
 class TestStoreBuffer:
@@ -94,7 +81,9 @@ def test_skips_unknown_accession(self) -> None:
         op = self._op()
         session = MagicMock()
         inserted, skipped = op._store_buffer(
-            session, _QUICKGO_ROWS, uuid.UUID(_SNAPSHOT_ID),
+            session,
+            _quickgo_records(),
+            uuid.UUID(_SNAPSHOT_ID),
             valid_accessions={"P12345"},
             go_term_map={"GO:0003824": 1, "GO:0008150": 2},
             eco_map={},
@@ -106,7 +95,9 @@ def test_skips_unknown_go_term(self) -> None:
         op = self._op()
         session = MagicMock()
         inserted, skipped = op._store_buffer(
-            session, _QUICKGO_ROWS, uuid.UUID(_SNAPSHOT_ID),
+            session,
+            _quickgo_records(),
+            uuid.UUID(_SNAPSHOT_ID),
             valid_accessions={"P12345", "Q67890", "XXXXXX"},
             go_term_map={},
             eco_map={},
@@ -118,7 +109,9 @@ def test_inserts_all_valid(self) -> None:
         op = self._op()
         session = MagicMock()
         inserted, skipped = op._store_buffer(
-            session, _QUICKGO_ROWS, uuid.UUID(_SNAPSHOT_ID),
+            session,
+            _quickgo_records(),
+            uuid.UUID(_SNAPSHOT_ID),
             valid_accessions={"P12345", "Q67890", "XXXXXX"},
             go_term_map={"GO:0003824": 1, "GO:0008150": 2},
             eco_map={},
@@ -132,7 +125,9 @@ def test_eco_mapping_applied(self) -> None:
         session = MagicMock()
         eco_map = {"ECO:0000314": "IDA", "ECO:0000501": "IEA"}
         inserted, _ = op._store_buffer(
-            session, _QUICKGO_ROWS[:1], uuid.UUID(_SNAPSHOT_ID),
+            session,
+            [_quickgo_records()[0]],
+            uuid.UUID(_SNAPSHOT_ID),
             valid_accessions={"P12345"},
             go_term_map={"GO:0003824": 1},
             eco_map=eco_map,
@@ -140,6 +135,7 @@ def test_eco_mapping_applied(self) -> None:
         assert inserted == 1
         call_stmt = session.execute.call_args[0][0]
         from sqlalchemy.dialects.postgresql import dialect as pg_dialect
+
         compiled = call_stmt.compile(dialect=pg_dialect())
         assert compiled.params["evidence_code_m0"] == "IDA"
 
@@ -147,7 +143,9 @@ def test_raw_eco_stored_when_no_mapping(self) -> None:
         op = self._op()
         session = MagicMock()
         inserted, _ = op._store_buffer(
-            session, _QUICKGO_ROWS[:1], uuid.UUID(_SNAPSHOT_ID),
+            session,
+            [_quickgo_records()[0]],
+            uuid.UUID(_SNAPSHOT_ID),
             valid_accessions={"P12345"},
             go_term_map={"GO:0003824": 1},
             eco_map={},
@@ -155,29 +153,36 @@ def test_raw_eco_stored_when_no_mapping(self) -> None:
         assert inserted == 1
         call_stmt = session.execute.call_args[0][0]
         from sqlalchemy.dialects.postgresql import dialect as pg_dialect
+
         compiled = call_stmt.compile(dialect=pg_dialect())
         assert compiled.params["evidence_code_m0"] == "ECO:0000314"
 
     def test_empty_eco_id_becomes_none(self) -> None:
         op = self._op()
         session = MagicMock()
-        row = dict(_QUICKGO_ROWS[0])
-        row["ECO ID"] = ""
+        rec = _record(eco_id=None)
         inserted, _ = op._store_buffer(
-            session, [row], uuid.UUID(_SNAPSHOT_ID),
+            session,
+            [rec],
+            uuid.UUID(_SNAPSHOT_ID),
             valid_accessions={"P12345"},
             go_term_map={"GO:0003824": 1},
             eco_map={},
         )
         assert inserted == 1
 
-    def test_empty_accession_skipped(self) -> None:
+    def test_unknown_accession_skipped(self) -> None:
+        # Whitespace handling moved to the parser in protea-sources
+        # (parse_quickgo_row strips and returns None for empty cells).
+        # Here we exercise the operation's filter against
+        # valid_accessions with an accession not in the universe.
         op = self._op()
         session = MagicMock()
-        row = dict(_QUICKGO_ROWS[0])
-        row["GENE PRODUCT ID"] = "  "
+        rec = _record(accession="UNRELATED")
         inserted, skipped = op._store_buffer(
-            session, [row], uuid.UUID(_SNAPSHOT_ID),
+            session,
+            [rec],
+            uuid.UUID(_SNAPSHOT_ID),
             valid_accessions={"P12345"},
             go_term_map={"GO:0003824": 1},
             eco_map={},
@@ -189,9 +194,11 @@ def test_chunked_insert_large_buffer(self) -> None:
         """When to_add > 5000, session.execute is called multiple times."""
         op = self._op()
         session = MagicMock()
-        records = [dict(_QUICKGO_ROWS[0])] * 5001
+        records = [_record() for _ in range(5001)]
         inserted, skipped = op._store_buffer(
-            session, records, uuid.UUID(_SNAPSHOT_ID),
+            session,
+            records,
+            uuid.UUID(_SNAPSHOT_ID),
             valid_accessions={"P12345"},
             go_term_map={"GO:0003824": 1},
             eco_map={},
@@ -205,6 +212,7 @@ def test_chunked_insert_large_buffer(self) -> None:
 # _load_accessions
 # ---------------------------------------------------------------------------
 
+
 class TestLoadAccessions:
     def test_returns_canonical_and_protein_sets(self) -> None:
         op = LoadQuickGOAnnotationsOperation()
@@ -214,6 +222,7 @@ def test_returns_canonical_and_protein_sets(self) -> None:
             iter({"P12345", "P12345-2", "Q99999"}),
         ]
         events: list[str] = []
+
         def emit(event, msg, fields, level):
             return events.append(event)
 
@@ -228,6 +237,7 @@ def test_emits_counts(self) -> None:
         session = MagicMock()
         session.scalars.side_effect = [iter({"A", "B"}), iter({"A", "B", "C"})]
         fields_log: list[dict] = []
+
         def emit(event, msg, fields, level):
             return fields_log.append(fields)
 
@@ -241,6 +251,7 @@ def emit(event, msg, fields, level):
 # _load_go_term_map
 # ---------------------------------------------------------------------------
 
+
 class TestLoadGoTermMap:
     def test_returns_mapping(self) -> None:
         op = LoadQuickGOAnnotationsOperation()
@@ -248,11 +259,13 @@ def test_returns_mapping(self) -> None:
         sid = uuid.uuid4()
         query_mock = MagicMock()
         query_mock.filter.return_value.all.return_value = [
-            ("GO:0005634", 1), ("GO:0008150", 2),
+            ("GO:0005634", 1),
+            ("GO:0008150", 2),
         ]
         session.query.return_value = query_mock
 
         events: list[str] = []
+
         def emit(event, msg, fields, level):
             return events.append(event)
 
@@ -276,16 +289,19 @@ def test_empty_terms(self) -> None:
 # _load_eco_mapping
 # ---------------------------------------------------------------------------
 
+
 class TestLoadEcoMapping:
     def test_no_url_returns_empty(self) -> None:
         op = LoadQuickGOAnnotationsOperation()
-        p = LoadQuickGOAnnotationsPayload.model_validate({
-            "ontology_snapshot_id": _SNAPSHOT_ID,
-            "source_version": "v1",
-        })
+        p = LoadQuickGOAnnotationsPayload.model_validate(
+            {
+                "ontology_snapshot_id": _SNAPSHOT_ID,
+                "source_version": "v1",
+            }
+        )
         assert op._load_eco_mapping(p, _noop_emit) == {}
 
-    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    @patch("protea_sources.quickgo.requests.get")
     def test_parses_mapping_file(self, mock_get) -> None:
         resp = MagicMock()
         resp.text = "ECO:0000314 IDA\nECO:0000501 IEA\n# comment\nbadline\n"
@@ -293,30 +309,34 @@ def test_parses_mapping_file(self, mock_get) -> None:
         mock_get.return_value = resp
 
         op = LoadQuickGOAnnotationsOperation()
-        p = LoadQuickGOAnnotationsPayload.model_validate({
-            "ontology_snapshot_id": _SNAPSHOT_ID,
-            "source_version": "v1",
-            "eco_mapping_url": "https://eco.test/map.txt",
-        })
+        p = LoadQuickGOAnnotationsPayload.model_validate(
+            {
+                "ontology_snapshot_id": _SNAPSHOT_ID,
+                "source_version": "v1",
+                "eco_mapping_url": "https://eco.test/map.txt",
+            }
+        )
         result = op._load_eco_mapping(p, _noop_emit)
         assert result == {"ECO:0000314": "IDA", "ECO:0000501": "IEA"}
 
-    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    @patch("protea_sources.quickgo.requests.get")
     def test_http_error_raises(self, mock_get) -> None:
         resp = MagicMock()
         resp.raise_for_status.side_effect = requests.HTTPError("404")
         mock_get.return_value = resp
 
         op = LoadQuickGOAnnotationsOperation()
-        p = LoadQuickGOAnnotationsPayload.model_validate({
-            "ontology_snapshot_id": _SNAPSHOT_ID,
-            "source_version": "v1",
-            "eco_mapping_url": "https://eco.test/bad",
-        })
+        p = LoadQuickGOAnnotationsPayload.model_validate(
+            {
+                "ontology_snapshot_id": _SNAPSHOT_ID,
+                "source_version": "v1",
+                "eco_mapping_url": "https://eco.test/bad",
+            }
+        )
         with pytest.raises(requests.HTTPError):
             op._load_eco_mapping(p, _noop_emit)
 
-    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    @patch("protea_sources.quickgo.requests.get")
     def test_emits_start_and_done(self, mock_get) -> None:
         resp = MagicMock()
         resp.text = "ECO:0000314 IDA\n"
@@ -324,19 +344,23 @@ def test_emits_start_and_done(self, mock_get) -> None:
         mock_get.return_value = resp
 
         op = LoadQuickGOAnnotationsOperation()
-        p = LoadQuickGOAnnotationsPayload.model_validate({
-            "ontology_snapshot_id": _SNAPSHOT_ID,
-            "source_version": "v1",
-            "eco_mapping_url": "https://eco.test/map.txt",
-        })
+        p = LoadQuickGOAnnotationsPayload.model_validate(
+            {
+                "ontology_snapshot_id": _SNAPSHOT_ID,
+                "source_version": "v1",
+                "eco_mapping_url": "https://eco.test/map.txt",
+            }
+        )
         events: list[str] = []
+
         def emit(event, msg, fields, level):
             return events.append(event)
+
         op._load_eco_mapping(p, emit)
-        assert "load_quickgo_annotations.eco_mapping_start" in events
-        assert "load_quickgo_annotations.eco_mapping_done" in events
+        assert "source.quickgo.eco_mapping_start" in events
+        assert "source.quickgo.eco_mapping_done" in events
 
-    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    @patch("protea_sources.quickgo.requests.get")
     def test_ignores_non_eco_lines(self, mock_get) -> None:
         resp = MagicMock()
         resp.text = "ECO:0000314 IDA\nNOT_ECO stuff\n  \nECO:0000501 IEA\n"
@@ -344,11 +368,13 @@ def test_ignores_non_eco_lines(self, mock_get) -> None:
         mock_get.return_value = resp
 
         op = LoadQuickGOAnnotationsOperation()
-        p = LoadQuickGOAnnotationsPayload.model_validate({
-            "ontology_snapshot_id": _SNAPSHOT_ID,
-            "source_version": "v1",
-            "eco_mapping_url": "https://eco.test/map.txt",
-        })
+        p = LoadQuickGOAnnotationsPayload.model_validate(
+            {
+                "ontology_snapshot_id": _SNAPSHOT_ID,
+                "source_version": "v1",
+                "eco_mapping_url": "https://eco.test/map.txt",
+            }
+        )
         result = op._load_eco_mapping(p, _noop_emit)
         assert len(result) == 2
 
@@ -391,130 +417,22 @@ def _make_stream_response(text: str, status_code: int = 200) -> MagicMock:
     return resp
 
 
-class TestFetchQuickgoPage:
-    def _payload(self, **kw):
-        return LoadQuickGOAnnotationsPayload.model_validate({
-            "ontology_snapshot_id": _SNAPSHOT_ID,
-            "source_version": "v1",
-            **kw,
-        })
-
-    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
-    def test_parses_rows(self, mock_get) -> None:
-        tsv = _make_tsv_text(
-            _tsv_row_str("P12345", "GO:0005634"),
-            _tsv_row_str("Q99999", "GO:0008150"),
-        )
-        mock_get.return_value = _make_stream_response(tsv)
-
-        op = LoadQuickGOAnnotationsOperation()
-        records = list(
-            op._fetch_quickgo_page(self._payload(), _noop_emit, gp_ids=["P12345"], batch_index=0, total_batches=1)
-        )
-        assert len(records) == 2
-        assert records[0]["GENE PRODUCT ID"] == "P12345"
-        assert records[1]["GO TERM"] == "GO:0008150"
-
-    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
-    def test_skips_empty_lines(self, mock_get) -> None:
-        tsv = QUICKGO_HEADER_LINE + "\n\n" + _tsv_row_str() + "\n\n"
-        mock_get.return_value = _make_stream_response(tsv)
-
-        op = LoadQuickGOAnnotationsOperation()
-        records = list(
-            op._fetch_quickgo_page(self._payload(), _noop_emit, gp_ids=None, batch_index=0, total_batches=1)
-        )
-        assert len(records) == 1
-
-    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
-    def test_skips_short_rows(self, mock_get) -> None:
-        tsv = QUICKGO_HEADER_LINE + "\ntoo\tfew\n" + _tsv_row_str() + "\n"
-        mock_get.return_value = _make_stream_response(tsv)
-
-        op = LoadQuickGOAnnotationsOperation()
-        records = list(
-            op._fetch_quickgo_page(self._payload(), _noop_emit, gp_ids=None, batch_index=0, total_batches=1)
-        )
-        assert len(records) == 1
-
-    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
-    def test_http_error_raises(self, mock_get) -> None:
-        mock_get.return_value = _make_stream_response("", status_code=500)
-
-        op = LoadQuickGOAnnotationsOperation()
-        with pytest.raises(requests.HTTPError):
-            list(
-                op._fetch_quickgo_page(self._payload(), _noop_emit, gp_ids=None, batch_index=0, total_batches=1)
-            )
-
-    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
-    def test_sends_correct_params_with_gp_ids(self, mock_get) -> None:
-        mock_get.return_value = _make_stream_response(_make_tsv_text())
-
-        op = LoadQuickGOAnnotationsOperation()
-        list(
-            op._fetch_quickgo_page(self._payload(), _noop_emit, gp_ids=["P12345", "Q99999"], batch_index=0, total_batches=1)
-        )
-        _, kwargs = mock_get.call_args
-        assert kwargs["params"]["geneProductId"] == "P12345,Q99999"
-        assert kwargs["params"]["geneProductType"] == "protein"
-        assert kwargs["headers"]["Accept"] == "text/tsv"
-        assert kwargs["stream"] is True
-
-    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
-    def test_no_gp_ids_omits_gene_product_param(self, mock_get) -> None:
-        mock_get.return_value = _make_stream_response(_make_tsv_text())
-
-        op = LoadQuickGOAnnotationsOperation()
-        list(
-            op._fetch_quickgo_page(self._payload(), _noop_emit, gp_ids=None, batch_index=0, total_batches=1)
-        )
-        _, kwargs = mock_get.call_args
-        assert "geneProductId" not in kwargs["params"]
-
-    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
-    def test_emits_download_start_with_progress(self, mock_get) -> None:
-        mock_get.return_value = _make_stream_response(_make_tsv_text())
-        events: list[tuple[str, dict]] = []
-        def emit(event, msg, fields, level):
-            return events.append((event, fields))
-
-        op = LoadQuickGOAnnotationsOperation()
-        list(
-            op._fetch_quickgo_page(self._payload(), emit, gp_ids=["X"], batch_index=2, total_batches=5)
-        )
-        start_events = [e for e in events if e[0] == "load_quickgo_annotations.download_start"]
-        assert len(start_events) == 1
-        assert start_events[0][1]["batch"] == 3
-        assert start_events[0][1]["of"] == 5
-        assert start_events[0][1]["_progress_current"] == 3
-        assert start_events[0][1]["_progress_total"] == 5
-
-    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
-    def test_header_only_yields_nothing(self, mock_get) -> None:
-        tsv = QUICKGO_HEADER_LINE + "\n"
-        mock_get.return_value = _make_stream_response(tsv)
-
-        op = LoadQuickGOAnnotationsOperation()
-        records = list(
-            op._fetch_quickgo_page(self._payload(), _noop_emit, gp_ids=None, batch_index=0, total_batches=1)
-        )
-        assert records == []
-
-
 # ---------------------------------------------------------------------------
 # _stream_quickgo — batching logic
 # ---------------------------------------------------------------------------
 
+
 class TestStreamQuickgo:
     def _payload(self, **kw):
-        return LoadQuickGOAnnotationsPayload.model_validate({
-            "ontology_snapshot_id": _SNAPSHOT_ID,
-            "source_version": "v1",
-            **kw,
-        })
+        return LoadQuickGOAnnotationsPayload.model_validate(
+            {
+                "ontology_snapshot_id": _SNAPSHOT_ID,
+                "source_version": "v1",
+                **kw,
+            }
+        )
 
-    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    @patch("protea_sources.quickgo.requests.get")
     def test_batches_accessions(self, mock_get) -> None:
         mock_get.side_effect = lambda *a, **kw: _make_stream_response(_make_tsv_text())
 
@@ -523,7 +441,7 @@ def test_batches_accessions(self, mock_get) -> None:
         list(op._stream_quickgo(p, _noop_emit, gene_product_ids=["A", "B", "C", "D", "E"]))
         assert mock_get.call_count == 3  # 2+2+1
 
-    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    @patch("protea_sources.quickgo.requests.get")
     def test_no_ids_single_request(self, mock_get) -> None:
         mock_get.return_value = _make_stream_response(_make_tsv_text())
 
@@ -532,24 +450,25 @@ def test_no_ids_single_request(self, mock_get) -> None:
         list(op._stream_quickgo(p, _noop_emit, gene_product_ids=None))
         assert mock_get.call_count == 1
 
-    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    @patch("protea_sources.quickgo.requests.get")
     def test_emits_batching_event(self, mock_get) -> None:
         mock_get.side_effect = lambda *a, **kw: _make_stream_response(_make_tsv_text())
 
         events: list[tuple[str, dict]] = []
+
         def emit(event, msg, fields, level):
             return events.append((event, fields))
 
         op = LoadQuickGOAnnotationsOperation()
         p = self._payload(gene_product_batch_size=2)
         list(op._stream_quickgo(p, emit, gene_product_ids=["A", "B", "C"]))
-        batching = [e for e in events if e[0] == "load_quickgo_annotations.batching"]
+        batching = [e for e in events if e[0] == "source.quickgo.batching"]
         assert len(batching) == 1
         assert batching[0][1]["total_accessions"] == 3
         assert batching[0][1]["total_batches"] == 2
         assert batching[0][1]["batch_size"] == 2
 
-    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    @patch("protea_sources.quickgo.requests.get")
     def test_yields_records_from_all_batches(self, mock_get) -> None:
         tsv = _make_tsv_text(_tsv_row_str("P12345"))
         mock_get.side_effect = lambda *a, **kw: _make_stream_response(tsv)
@@ -565,6 +484,7 @@ def test_yields_records_from_all_batches(self, mock_get) -> None:
 # Full execute flow
 # ---------------------------------------------------------------------------
 
+
 def _mock_session(
     canonical_accessions: set[str] | None = None,
     protein_accessions: set[str] | None = None,
@@ -588,6 +508,7 @@ def _mock_session(
 
     def _set_id(obj):
         obj.id = uuid.uuid4()
+
     session.add.side_effect = _set_id
 
     return session
@@ -621,13 +542,15 @@ def test_no_proteins_returns_zero(self) -> None:
         session.scalars.side_effect = [iter(set()), iter(set())]
         op = LoadQuickGOAnnotationsOperation()
         events: list[str] = []
+
         def emit(event, msg, fields, level):
             return events.append(event)
+
         result = op.execute(session, _base_payload(), emit=emit)
         assert result.result["annotations_inserted"] == 0
         assert "load_quickgo_annotations.no_proteins" in events
 
-    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    @patch("protea_sources.quickgo.requests.get")
     def test_full_run_inserts_and_skips(self, mock_get) -> None:
         tsv = _make_tsv_text(
             _tsv_row_str("P12345", "GO:0003824"),
@@ -643,6 +566,7 @@ def test_full_run_inserts_and_skips(self, mock_get) -> None:
         )
 
         events: list[str] = []
+
         def emit(event, msg, fields, level):
             return events.append(event)
 
@@ -654,7 +578,7 @@ def emit(event, msg, fields, level):
         assert "load_quickgo_annotations.done" in events
         assert "load_quickgo_annotations.annotation_set_created" in events
 
-    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    @patch("protea_sources.quickgo.requests.get")
     def test_total_limit_stops_early(self, mock_get) -> None:
         tsv = _make_tsv_text(
             _tsv_row_str("P12345", "GO:0003824"),
@@ -669,16 +593,19 @@ def test_total_limit_stops_early(self, mock_get) -> None:
         )
 
         events: list[str] = []
+
         def emit(event, msg, fields, level):
             return events.append(event)
 
         op = LoadQuickGOAnnotationsOperation()
         op.execute(
-            session, _base_payload(total_limit=1, page_size=1), emit=emit,
+            session,
+            _base_payload(total_limit=1, page_size=1),
+            emit=emit,
         )
         assert "load_quickgo_annotations.limit_reached" in events
 
-    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    @patch("protea_sources.quickgo.requests.get")
     def test_commit_every_page(self, mock_get) -> None:
         tsv = _make_tsv_text(
             _tsv_row_str("P12345", "GO:0003824"),
@@ -693,11 +620,13 @@ def test_commit_every_page(self, mock_get) -> None:
 
         op = LoadQuickGOAnnotationsOperation()
         op.execute(
-            session, _base_payload(commit_every_page=True, page_size=1), emit=_noop_emit,
+            session,
+            _base_payload(commit_every_page=True, page_size=1),
+            emit=_noop_emit,
         )
         assert session.commit.call_count >= 2
 
-    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    @patch("protea_sources.quickgo.requests.get")
     def test_no_commit_when_disabled(self, mock_get) -> None:
         tsv = _make_tsv_text(_tsv_row_str("P12345", "GO:0003824"))
         mock_get.return_value = _make_stream_response(tsv)
@@ -709,11 +638,13 @@ def test_no_commit_when_disabled(self, mock_get) -> None:
 
         op = LoadQuickGOAnnotationsOperation()
         op.execute(
-            session, _base_payload(commit_every_page=False, page_size=1), emit=_noop_emit,
+            session,
+            _base_payload(commit_every_page=False, page_size=1),
+            emit=_noop_emit,
         )
         session.commit.assert_not_called()
 
-    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    @patch("protea_sources.quickgo.requests.get")
     def test_page_done_emitted(self, mock_get) -> None:
         tsv = _make_tsv_text(
             _tsv_row_str("P12345", "GO:0003824"),
@@ -728,6 +659,7 @@ def test_page_done_emitted(self, mock_get) -> None:
         )
 
         events: list[tuple[str, dict]] = []
+
         def emit(event, msg, fields, level):
             return events.append((event, fields))
 
@@ -737,7 +669,7 @@ def emit(event, msg, fields, level):
         assert len(page_done) >= 1
         assert result.result["pages"] == 2
 
-    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    @patch("protea_sources.quickgo.requests.get")
     def test_result_contains_elapsed_seconds(self, mock_get) -> None:
         tsv = _make_tsv_text(_tsv_row_str("P12345", "GO:0003824"))
         mock_get.return_value = _make_stream_response(tsv)
@@ -752,7 +684,7 @@ def test_result_contains_elapsed_seconds(self, mock_get) -> None:
         assert "elapsed_seconds" in result.result
         assert result.result["elapsed_seconds"] >= 0
 
-    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    @patch("protea_sources.quickgo.requests.get")
     def test_use_db_accessions_false(self, mock_get) -> None:
         tsv = _make_tsv_text(_tsv_row_str("X00001", "GO:0003824"))
         mock_get.return_value = _make_stream_response(tsv)
@@ -773,7 +705,7 @@ def test_use_db_accessions_false(self, mock_get) -> None:
         assert "X00001" in kwargs["params"]["geneProductId"]
         assert result.result["annotations_inserted"] == 1
 
-    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    @patch("protea_sources.quickgo.requests.get")
     def test_eco_mapping_integrated_in_execute(self, mock_get) -> None:
         eco_resp = MagicMock()
         eco_resp.text = "ECO:0000314 IDA\n"
@@ -799,7 +731,7 @@ def test_eco_mapping_integrated_in_execute(self, mock_get) -> None:
         )
         assert result.result["annotations_inserted"] == 1
 
-    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    @patch("protea_sources.quickgo.requests.get")
     def test_result_has_annotation_set_id(self, mock_get) -> None:
         tsv = _make_tsv_text(_tsv_row_str("P12345", "GO:0003824"))
         mock_get.return_value = _make_stream_response(tsv)
@@ -813,7 +745,7 @@ def test_result_has_annotation_set_id(self, mock_get) -> None:
         result = op.execute(session, _base_payload(), emit=_noop_emit)
         assert "annotation_set_id" in result.result
 
-    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    @patch("protea_sources.quickgo.requests.get")
     def test_remainder_buffer_flushed(self, mock_get) -> None:
         """Records that don't fill a full page are still flushed at the end."""
         tsv = _make_tsv_text(_tsv_row_str("P12345", "GO:0003824"))
diff --git a/tests/test_logging.py b/tests/test_logging.py
index 76c6c2a..75650ff 100644
--- a/tests/test_logging.py
+++ b/tests/test_logging.py
@@ -1,4 +1,5 @@
 """Tests for protea/infrastructure/logging.py"""
+
 from __future__ import annotations
 
 import json
@@ -48,6 +49,7 @@ def test_includes_exc_info_when_present(self):
             raise ValueError("boom")
         except ValueError:
             import sys
+
             exc_info = sys.exc_info()
 
         record = self._make_record("error occurred", exc_info=exc_info)
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index a16ff43..c0fd827 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -1,4 +1,5 @@
 """Unit tests for protea.core.metrics — pure-Python, no DB."""
+
 from __future__ import annotations
 
 import pytest
@@ -10,6 +11,7 @@
 # Helpers
 # ---------------------------------------------------------------------------
 
+
 def _make_eval(nk=None, lk=None):
     return EvaluationData(
         nk=nk or {},
@@ -26,6 +28,7 @@ def _pred(acc, go_id, score):
 # PRPoint / CAFAMetrics dataclasses
 # ---------------------------------------------------------------------------
 
+
 class TestDataclasses:
     def test_prpoint_fields(self):
         p = PRPoint(threshold=0.5, precision=0.8, recall=0.6, f1=0.686)
@@ -46,8 +49,13 @@ def test_cafa_metrics_summary_keys(self):
         )
         s = m.summary()
         assert set(s.keys()) == {
-            "category", "fmax", "threshold_at_fmax", "auc_pr",
-            "n_ground_truth_proteins", "n_predicted_proteins", "n_predictions",
+            "category",
+            "fmax",
+            "threshold_at_fmax",
+            "auc_pr",
+            "n_ground_truth_proteins",
+            "n_predicted_proteins",
+            "n_predictions",
         }
         assert s["fmax"] == 0.75
 
@@ -56,6 +64,7 @@ def test_cafa_metrics_summary_keys(self):
 # compute_cafa_metrics — validation
 # ---------------------------------------------------------------------------
 
+
 class TestComputeCafaMetricsValidation:
     def test_invalid_category_raises(self):
         with pytest.raises(ValueError, match="category"):
@@ -74,6 +83,7 @@ def test_valid_lk_category(self):
 # compute_cafa_metrics — empty / no predictions
 # ---------------------------------------------------------------------------
 
+
 class TestComputeCafaMetricsEmpty:
     def test_empty_ground_truth_and_preds(self):
         result = compute_cafa_metrics([], _make_eval())
@@ -98,6 +108,7 @@ def test_curve_has_101_points(self):
 # compute_cafa_metrics — correctness
 # ---------------------------------------------------------------------------
 
+
 class TestComputeCafaMetricsCorrectness:
     def test_perfect_prediction_fmax_one(self):
         """Protein with one GO term predicted at score 1.0 → Fmax == 1.0."""
@@ -127,10 +138,12 @@ def test_partial_prediction(self):
         assert 0 < result.fmax < 1.0
 
     def test_multiple_proteins(self):
-        eval_data = _make_eval(nk={
-            "P1": {"GO:0001"},
-            "P2": {"GO:0002"},
-        })
+        eval_data = _make_eval(
+            nk={
+                "P1": {"GO:0001"},
+                "P2": {"GO:0002"},
+            }
+        )
         preds = [_pred("P1", "GO:0001", 0.9), _pred("P2", "GO:0002", 0.9)]
         result = compute_cafa_metrics(preds, eval_data)
         assert result.fmax > 0.5
diff --git a/tests/test_parent_progress.py b/tests/test_parent_progress.py
new file mode 100644
index 0000000..ca80b43
--- /dev/null
+++ b/tests/test_parent_progress.py
@@ -0,0 +1,120 @@
+"""Tests for protea.core.contracts.parent_progress (T0.7)."""
+
+from __future__ import annotations
+
+from unittest.mock import MagicMock
+from uuid import uuid4
+
+from protea.core.contracts.parent_progress import update_parent_progress
+
+
+def _row(current: int, total: int) -> MagicMock:
+    r = MagicMock()
+    r.progress_current = current
+    r.progress_total = total
+    return r
+
+
+class TestUpdateParentProgress:
+    def test_silent_when_no_row_returned(self) -> None:
+        session = MagicMock()
+        session.execute.return_value.fetchone.return_value = None
+        emit = MagicMock()
+
+        update_parent_progress(
+            session,
+            uuid4(),
+            emit,
+            event_name="store_x.parent_succeeded",
+        )
+
+        # Only the increment statement was executed.
+        assert session.execute.call_count == 1
+        emit.assert_not_called()
+        session.add.assert_not_called()
+
+    def test_returns_when_not_last_batch(self) -> None:
+        session = MagicMock()
+        session.execute.return_value.fetchone.return_value = _row(2, 5)
+        emit = MagicMock()
+
+        update_parent_progress(
+            session,
+            uuid4(),
+            emit,
+            event_name="store_x.parent_succeeded",
+        )
+
+        assert session.execute.call_count == 1
+        emit.assert_not_called()
+        session.add.assert_not_called()
+
+    def test_marks_succeeded_on_last_batch(self) -> None:
+        parent_id = uuid4()
+        session = MagicMock()
+        # First execute (the increment) returns last-batch progress;
+        # second execute (the SUCCEEDED transition) returns the closed row.
+        increment = MagicMock()
+        increment.fetchone.return_value = _row(5, 5)
+        succeeded = MagicMock()
+        succeeded.fetchone.return_value = MagicMock(id=parent_id)
+        session.execute.side_effect = [increment, succeeded]
+        emit = MagicMock()
+
+        update_parent_progress(
+            session,
+            parent_id,
+            emit,
+            event_name="store_predictions.parent_succeeded",
+        )
+
+        assert session.execute.call_count == 2
+        # JobEvent row added once.
+        session.add.assert_called_once()
+        # Custom event emitted with the operation-specific name.
+        emit.assert_called_once()
+        emit_call = emit.call_args
+        assert emit_call.args[0] == "store_predictions.parent_succeeded"
+        assert emit_call.args[2] == {"parent_job_id": str(parent_id)}
+
+    def test_emit_skipped_when_succeeded_returns_no_row(self) -> None:
+        # Race: another worker finished before us. Our SUCCEEDED update
+        # returns no row because the parent is no longer RUNNING.
+        session = MagicMock()
+        increment = MagicMock()
+        increment.fetchone.return_value = _row(5, 5)
+        succeeded = MagicMock()
+        succeeded.fetchone.return_value = None
+        session.execute.side_effect = [increment, succeeded]
+        emit = MagicMock()
+
+        update_parent_progress(
+            session,
+            uuid4(),
+            emit,
+            event_name="store_x.parent_succeeded",
+        )
+
+        assert session.execute.call_count == 2
+        session.add.assert_not_called()
+        emit.assert_not_called()
+
+    def test_event_name_is_passed_through(self) -> None:
+        parent_id = uuid4()
+        session = MagicMock()
+        increment = MagicMock()
+        increment.fetchone.return_value = _row(1, 1)
+        succeeded = MagicMock()
+        succeeded.fetchone.return_value = MagicMock(id=parent_id)
+        session.execute.side_effect = [increment, succeeded]
+        emit = MagicMock()
+
+        update_parent_progress(
+            session,
+            parent_id,
+            emit,
+            event_name="custom.parent_succeeded",
+        )
+
+        emit.assert_called_once()
+        assert emit.call_args.args[0] == "custom.parent_succeeded"
diff --git a/tests/test_parquet_export_boundary.py b/tests/test_parquet_export_boundary.py
new file mode 100644
index 0000000..30e35f2
--- /dev/null
+++ b/tests/test_parquet_export_boundary.py
@@ -0,0 +1,107 @@
+"""T1.8 boundary validation tests for parquet_export.
+
+Pins the boundary invariant: shards written to disk must contain
+exactly the canonical ``ALL_FEATURES`` columns (plus reserved
+columns). Missing or unknown feature columns must raise instead of
+silently shipping a partial dump that future LightGBM training would
+choke on.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pandas as pd
+import pytest
+
+from protea_contracts import ALL_FEATURES
+
+
+def _full_feature_row() -> dict[str, object]:
+    """Build a single shard row with reserved cols + every ALL_FEATURES col.
+
+    Per-shard reserved columns differ from the merged-dump reserved set:
+    shards carry ``go_id`` and ``aspect``; ``category``, ``snapshot_pair``
+    and the rename ``go_id->go_term_id`` are added by ``export_reranker_parquets``.
+    """
+    row: dict[str, object] = {
+        "protein_accession": "P12345",
+        "go_id": "GO:0000001",
+        "label": 0,
+        "aspect": "P",
+    }
+    for col in ALL_FEATURES:
+        if col in row:
+            continue
+        if col in {"qualifier", "evidence_code", "taxonomic_relation"}:
+            row[col] = "x"
+        else:
+            row[col] = 0.0
+    return row
+
+
+def _write_shard(path: Path, rows: list[dict[str, object]]) -> Path:
+    df = pd.DataFrame(rows) if rows else pd.DataFrame()
+    df.to_parquet(path, index=False, compression="snappy")
+    return path
+
+
+def _call_export(
+    stage_dir: Path,
+    train_rows: list[dict[str, object]],
+    eval_rows: list[dict[str, object]],
+) -> dict[str, object]:
+    from protea.core.parquet_export import export_reranker_parquets
+
+    train_shard = _write_shard(stage_dir / "_train_nk.parquet", train_rows)
+    eval_shard = _write_shard(stage_dir / "_eval_nk.parquet", eval_rows)
+    return export_reranker_parquets(
+        stage_dir=stage_dir,
+        split_files={"nk": [train_shard]},
+        valid_split_versions=[(220, 221)],
+        test_files={"nk": eval_shard},
+        test_old_v=221,
+        test_new_v=222,
+        name="t18-test",
+        k=5,
+        embedding_config_id="00000000-0000-0000-0000-000000000001",
+        ontology_snapshot_id="00000000-0000-0000-0000-000000000002",
+        annotation_source="goa",
+        store=None,
+        producer_version="t18",
+        producer_git_sha=None,
+        validate_with_contracts=False,
+    )
+
+
+class TestExportBoundaryInvariant:
+    def test_full_columns_passes(self, tmp_path: Path) -> None:
+        result = _call_export(tmp_path, [_full_feature_row()], [_full_feature_row()])
+        assert (tmp_path / "manifest.json").exists()
+        assert "schema_sha" in result
+
+    def test_missing_feature_column_in_train_raises(self, tmp_path: Path) -> None:
+        row = _full_feature_row()
+        del row["distance"]  # drop one canonical feature
+        with pytest.raises(ValueError, match="canonical column invariant"):
+            _call_export(tmp_path, [row], [_full_feature_row()])
+
+    def test_missing_feature_column_in_eval_raises(self, tmp_path: Path) -> None:
+        row = _full_feature_row()
+        del row["k_position"]
+        with pytest.raises(ValueError, match="canonical column invariant"):
+            _call_export(tmp_path, [_full_feature_row()], [row])
+
+    def test_typo_feature_name_raises(self, tmp_path: Path) -> None:
+        row = _full_feature_row()
+        del row["distance"]
+        row["distnace"] = 0.0  # typo: not in ALL_FEATURES
+        with pytest.raises(ValueError, match="canonical column invariant"):
+            _call_export(tmp_path, [row], [_full_feature_row()])
+
+    def test_empty_eval_does_not_trigger(self, tmp_path: Path) -> None:
+        # Empty eval shard skipped by the writer; the invariant only
+        # gates non-empty data.
+        result = _call_export(tmp_path, [_full_feature_row()], [])
+        assert (tmp_path / "manifest.json").exists()
+        assert int(result["n_eval_rows"]) == 0
diff --git a/tests/test_predict_go_terms.py b/tests/test_predict_go_terms.py
index ea48937..2ba64e7 100644
--- a/tests/test_predict_go_terms.py
+++ b/tests/test_predict_go_terms.py
@@ -6,6 +6,12 @@
 import numpy as np
 import pytest
 
+from protea.core.disk_cache import (
+    _aspect_index_path,
+    _build_anno_csr,
+    _csr_lookup,
+    _disk_cache_paths,
+)
 from protea.core.knn_search import _compute_distance_matrix, search_knn
 from protea.core.operations.predict_go_terms import (
     PredictGOTermsBatchOperation,
@@ -13,10 +19,6 @@
     PredictGOTermsOperation,
     PredictGOTermsPayload,
     StorePredictionsOperation,
-    _aspect_index_path,
-    _build_anno_csr,
-    _csr_lookup,
-    _disk_cache_paths,
 )
 from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
 from protea.infrastructure.orm.models.annotation.ontology_snapshot import OntologySnapshot
@@ -33,6 +35,7 @@ def _get(cls, id_):
         if cls is missing_class:
             return None
         return MagicMock()
+
     return _get
 
 
@@ -40,62 +43,77 @@ def _get(cls, id_):
 # Payload validation
 # ---------------------------------------------------------------------------
 
+
 class TestPredictGOTermsPayload:
     def test_minimal_valid(self) -> None:
-        p = PredictGOTermsPayload.model_validate({
-            "embedding_config_id": str(uuid.uuid4()),
-            "annotation_set_id": _ANN_SET_ID,
-            "ontology_snapshot_id": _SNAPSHOT_ID,
-        })
+        p = PredictGOTermsPayload.model_validate(
+            {
+                "embedding_config_id": str(uuid.uuid4()),
+                "annotation_set_id": _ANN_SET_ID,
+                "ontology_snapshot_id": _SNAPSHOT_ID,
+            }
+        )
         assert p.limit_per_entry == 5
         assert p.distance_threshold is None
         assert p.batch_size == 1024
 
     def test_empty_embedding_config_id_raises(self) -> None:
         with pytest.raises(ValueError):
-            PredictGOTermsPayload.model_validate({
-                "embedding_config_id": "",
-                "annotation_set_id": _ANN_SET_ID,
-                "ontology_snapshot_id": _SNAPSHOT_ID,
-            })
+            PredictGOTermsPayload.model_validate(
+                {
+                    "embedding_config_id": "",
+                    "annotation_set_id": _ANN_SET_ID,
+                    "ontology_snapshot_id": _SNAPSHOT_ID,
+                }
+            )
 
     def test_whitespace_embedding_config_id_raises(self) -> None:
         with pytest.raises(ValueError):
-            PredictGOTermsPayload.model_validate({
-                "embedding_config_id": "   ",
-                "annotation_set_id": _ANN_SET_ID,
-                "ontology_snapshot_id": _SNAPSHOT_ID,
-            })
+            PredictGOTermsPayload.model_validate(
+                {
+                    "embedding_config_id": "   ",
+                    "annotation_set_id": _ANN_SET_ID,
+                    "ontology_snapshot_id": _SNAPSHOT_ID,
+                }
+            )
 
     def test_empty_annotation_set_id_raises(self) -> None:
         with pytest.raises(ValueError):
-            PredictGOTermsPayload.model_validate({
-                "embedding_config_id": str(uuid.uuid4()),
-                "annotation_set_id": "",
-                "ontology_snapshot_id": _SNAPSHOT_ID,
-            })
+            PredictGOTermsPayload.model_validate(
+                {
+                    "embedding_config_id": str(uuid.uuid4()),
+                    "annotation_set_id": "",
+                    "ontology_snapshot_id": _SNAPSHOT_ID,
+                }
+            )
 
     def test_empty_ontology_snapshot_id_raises(self) -> None:
         with pytest.raises(ValueError):
-            PredictGOTermsPayload.model_validate({
-                "embedding_config_id": str(uuid.uuid4()),
-                "annotation_set_id": _ANN_SET_ID,
-                "ontology_snapshot_id": "   ",
-            })
+            PredictGOTermsPayload.model_validate(
+                {
+                    "embedding_config_id": str(uuid.uuid4()),
+                    "annotation_set_id": _ANN_SET_ID,
+                    "ontology_snapshot_id": "   ",
+                }
+            )
 
     def test_missing_annotation_set_raises(self) -> None:
         with pytest.raises(ValueError):
-            PredictGOTermsPayload.model_validate({
-                "embedding_config_id": str(uuid.uuid4()),
-                "ontology_snapshot_id": _SNAPSHOT_ID,
-            })
+            PredictGOTermsPayload.model_validate(
+                {
+                    "embedding_config_id": str(uuid.uuid4()),
+                    "ontology_snapshot_id": _SNAPSHOT_ID,
+                }
+            )
 
     def test_default_values(self) -> None:
-        p = PredictGOTermsPayload.model_validate({
-            "embedding_config_id": str(uuid.uuid4()),
-            "annotation_set_id": _ANN_SET_ID,
-            "ontology_snapshot_id": _SNAPSHOT_ID,
-        })
+        p = PredictGOTermsPayload.model_validate(
+            {
+                "embedding_config_id": str(uuid.uuid4()),
+                "annotation_set_id": _ANN_SET_ID,
+                "ontology_snapshot_id": _SNAPSHOT_ID,
+            }
+        )
         assert p.limit_per_entry == 5
         assert p.distance_threshold is None
         assert p.batch_size == 1024
@@ -105,6 +123,7 @@ def test_default_values(self) -> None:
 # _compute_distance_matrix (cosine + l2)
 # ---------------------------------------------------------------------------
 
+
 class TestDistanceMatrix:
     def test_cosine_identical_vectors_zero_distance(self) -> None:
         v = np.array([[1.0, 0.0, 0.0]], dtype=np.float32)
@@ -152,6 +171,7 @@ def test_unknown_metric_raises(self) -> None:
 # search_knn — numpy and faiss backends
 # ---------------------------------------------------------------------------
 
+
 class TestSearchKnn:
     def _make_data(self, n_refs: int = 20, dim: int = 16):
         rng = np.random.default_rng(42)
@@ -179,8 +199,9 @@ def test_numpy_distance_threshold(self) -> None:
         R, accs = self._make_data()
         # Query identical to first ref → distance ≈ 0
         Q = R[:1].copy()
-        results = search_knn(Q, R, accs, k=10, distance_threshold=0.001,
-                             backend="numpy", metric="cosine")
+        results = search_knn(
+            Q, R, accs, k=10, distance_threshold=0.001, backend="numpy", metric="cosine"
+        )
         assert len(results[0]) >= 1
         for _, d in results[0]:
             assert d <= 0.001 + 1e-5
@@ -198,8 +219,9 @@ def test_faiss_flat_matches_numpy(self) -> None:
         rng = np.random.default_rng(0)
         Q = rng.random((5, 16)).astype(np.float32)
         numpy_res = search_knn(Q, R, accs, k=3, backend="numpy", metric="cosine")
-        faiss_res = search_knn(Q, R, accs, k=3, backend="faiss",
-                               metric="cosine", faiss_index_type="Flat")
+        faiss_res = search_knn(
+            Q, R, accs, k=3, backend="faiss", metric="cosine", faiss_index_type="Flat"
+        )
         for np_hits, fa_hits in zip(numpy_res, faiss_res, strict=False):
             np_accs = [a for a, _ in np_hits]
             fa_accs = [a for a, _ in fa_hits]
@@ -208,9 +230,17 @@ def test_faiss_flat_matches_numpy(self) -> None:
     def test_faiss_ivfflat(self) -> None:
         R, accs = self._make_data(n_refs=200)
         Q = np.random.rand(4, 16).astype(np.float32)
-        results = search_knn(Q, R, accs, k=5, backend="faiss",
-                             metric="cosine", faiss_index_type="IVFFlat",
-                             faiss_nlist=10, faiss_nprobe=5)
+        results = search_knn(
+            Q,
+            R,
+            accs,
+            k=5,
+            backend="faiss",
+            metric="cosine",
+            faiss_index_type="IVFFlat",
+            faiss_nlist=10,
+            faiss_nprobe=5,
+        )
         assert len(results) == 4
         for hits in results:
             assert 1 <= len(hits) <= 5
@@ -218,9 +248,17 @@ def test_faiss_ivfflat(self) -> None:
     def test_faiss_hnsw(self) -> None:
         R, accs = self._make_data(n_refs=100)
         Q = np.random.rand(3, 16).astype(np.float32)
-        results = search_knn(Q, R, accs, k=4, backend="faiss",
-                             metric="cosine", faiss_index_type="HNSW",
-                             faiss_hnsw_m=8, faiss_hnsw_ef_search=32)
+        results = search_knn(
+            Q,
+            R,
+            accs,
+            k=4,
+            backend="faiss",
+            metric="cosine",
+            faiss_index_type="HNSW",
+            faiss_hnsw_m=8,
+            faiss_hnsw_ef_search=32,
+        )
         assert len(results) == 3
         for hits in results:
             assert 1 <= len(hits) <= 4
@@ -242,6 +280,7 @@ def test_unknown_faiss_index_raises(self) -> None:
 # _predict_batch
 # ---------------------------------------------------------------------------
 
+
 class TestPredictBatch:
     def _op(self) -> PredictGOTermsBatchOperation:
         return PredictGOTermsBatchOperation()
@@ -250,10 +289,16 @@ def _payload(self, **kwargs):
         defaults = {
             "embedding_config_id": str(uuid.uuid4()),
             "annotation_set_id": _ANN_SET_ID,
+            "ontology_snapshot_id": _SNAPSHOT_ID,
             "prediction_set_id": str(uuid.uuid4()),
             "parent_job_id": str(uuid.uuid4()),
             "query_accessions": [],
             "limit_per_entry": 2,
+            # Opt out of features that require sequences/taxonomy: the mock
+            # ref_data in this class never provides them.
+            "compute_alignments": False,
+            "compute_taxonomy": False,
+            "compute_reranker_features": False,
         }
         defaults.update(kwargs)
         return PredictGOTermsBatchPayload.model_validate(defaults)
@@ -261,10 +306,13 @@ def _payload(self, **kwargs):
     def _ref_data(self):
         return {
             "accessions": ["P12345", "Q67890"],
-            "embeddings": np.array([
-                [1.0, 0.0, 0.0],
-                [0.0, 1.0, 0.0],
-            ], dtype=np.float32),
+            "embeddings": np.array(
+                [
+                    [1.0, 0.0, 0.0],
+                    [0.0, 1.0, 0.0],
+                ],
+                dtype=np.float32,
+            ),
             "go_map": {
                 "P12345": [{"go_term_id": 1, "qualifier": "enables", "evidence_code": "IDA"}],
                 "Q67890": [{"go_term_id": 2, "qualifier": "involved_in", "evidence_code": "IEA"}],
@@ -278,7 +326,7 @@ def test_transfers_go_annotations_from_nearest_neighbor(self) -> None:
         pred_set_id = uuid.uuid4()
 
         query_embs = np.array([[0.99, 0.01, 0.0]], dtype=np.float32)
-        preds = op._predict_batch(["RQUERY"], query_embs, ref, pred_set_id, p)
+        preds, _, _ = op._predict_batch(["RQUERY"], query_embs, ref, pred_set_id, p)
 
         assert len(preds) >= 1
         go_ids = {pr["go_term_id"] for pr in preds}
@@ -292,7 +340,7 @@ def test_includes_self_as_first_reference(self) -> None:
         ref = self._ref_data()
         pred_set_id = uuid.uuid4()
         query_embs = np.array([[1.0, 0.0, 0.0]], dtype=np.float32)
-        preds = op._predict_batch(["P12345"], query_embs, ref, pred_set_id, p)
+        preds, _, _ = op._predict_batch(["P12345"], query_embs, ref, pred_set_id, p)
 
         ref_accs = [pr["ref_protein_accession"] for pr in preds]
         assert "P12345" in ref_accs, "Self should be included as a reference neighbor"
@@ -306,7 +354,7 @@ def test_distance_threshold_filters_far_neighbors(self) -> None:
         pred_set_id = uuid.uuid4()
 
         query_embs = np.array([[0.0, 0.0, 1.0]], dtype=np.float32)
-        preds = op._predict_batch(["RQUERY"], query_embs, ref, pred_set_id, p)
+        preds, _, _ = op._predict_batch(["RQUERY"], query_embs, ref, pred_set_id, p)
         assert preds == []
 
     def test_limit_per_entry_caps_neighbors(self) -> None:
@@ -316,7 +364,7 @@ def test_limit_per_entry_caps_neighbors(self) -> None:
         pred_set_id = uuid.uuid4()
 
         query_embs = np.array([[0.7, 0.7, 0.0]], dtype=np.float32)
-        preds = op._predict_batch(["RQUERY"], query_embs, ref, pred_set_id, p)
+        preds, _, _ = op._predict_batch(["RQUERY"], query_embs, ref, pred_set_id, p)
 
         ref_accs = {pr["ref_protein_accession"] for pr in preds}
         assert len(ref_accs) == 1
@@ -326,6 +374,7 @@ def test_limit_per_entry_caps_neighbors(self) -> None:
 # execute() — mocked session
 # ---------------------------------------------------------------------------
 
+
 class TestPredictGOTermsExecute:
     def _op(self) -> PredictGOTermsOperation:
         return PredictGOTermsOperation()
@@ -378,6 +427,7 @@ def test_no_references_returns_zero(self) -> None:
 # Coordinator — dispatching batches
 # ---------------------------------------------------------------------------
 
+
 class TestPredictGOTermsCoordinatorDispatch:
     def _op(self) -> PredictGOTermsOperation:
         return PredictGOTermsOperation()
@@ -397,7 +447,9 @@ def test_dispatches_correct_number_of_batches(self) -> None:
         session.flush.return_value = None
         pred_set = MagicMock()
         pred_set.id = uuid.uuid4()
-        session.add.side_effect = lambda obj: setattr(obj, "id", uuid.uuid4()) if not hasattr(obj, "id") or obj.id is None else None
+        session.add.side_effect = lambda obj: (
+            setattr(obj, "id", uuid.uuid4()) if not hasattr(obj, "id") or obj.id is None else None
+        )
 
         accessions = [f"P{i:05d}" for i in range(10)]
 
@@ -422,6 +474,7 @@ def test_creates_prediction_set(self) -> None:
         def add_side_effect(obj):
             if not hasattr(obj, "id") or obj.id is None:
                 obj.id = uuid.uuid4()
+
         session.add.side_effect = add_side_effect
 
         with patch.object(op, "_load_query_accessions", return_value=["P1"]):
@@ -438,6 +491,7 @@ def test_batch_messages_contain_correct_fields(self) -> None:
         def add_side_effect(obj):
             if not hasattr(obj, "id") or obj.id is None:
                 obj.id = uuid.uuid4()
+
         session.add.side_effect = add_side_effect
 
         payload = self._base_payload()
@@ -460,6 +514,7 @@ def add_side_effect(obj):
 # StorePredictionsOperation
 # ---------------------------------------------------------------------------
 
+
 class TestStorePredictions:
     def _op(self) -> StorePredictionsOperation:
         return StorePredictionsOperation()
@@ -576,6 +631,7 @@ def test_last_batch_closes_parent_as_succeeded(self) -> None:
         }
 
         events = []
+
         def capture_emit(event, msg, fields, level):
             events.append(event)
 
@@ -590,6 +646,7 @@ def test_name(self) -> None:
 # Batch worker — parent cancellation
 # ---------------------------------------------------------------------------
 
+
 class TestPredictBatchParentCancellation:
     def _op(self) -> PredictGOTermsBatchOperation:
         return PredictGOTermsBatchOperation()
@@ -604,6 +661,7 @@ def test_skips_when_parent_cancelled(self) -> None:
         payload = {
             "embedding_config_id": str(uuid.uuid4()),
             "annotation_set_id": str(uuid.uuid4()),
+            "ontology_snapshot_id": _SNAPSHOT_ID,
             "prediction_set_id": str(uuid.uuid4()),
             "parent_job_id": str(uuid.uuid4()),
             "query_accessions": ["P1"],
@@ -622,6 +680,7 @@ def test_skips_when_parent_failed(self) -> None:
         payload = {
             "embedding_config_id": str(uuid.uuid4()),
             "annotation_set_id": str(uuid.uuid4()),
+            "ontology_snapshot_id": _SNAPSHOT_ID,
             "prediction_set_id": str(uuid.uuid4()),
             "parent_job_id": str(uuid.uuid4()),
             "query_accessions": ["P1"],
@@ -635,6 +694,7 @@ def test_skips_when_parent_failed(self) -> None:
 # Pure helper functions
 # ---------------------------------------------------------------------------
 
+
 class TestBuildAnnoCsr:
     def test_builds_correct_structure(self) -> None:
         accessions = ["P1", "P2"]
@@ -711,35 +771,43 @@ def test_aspect_index_path_includes_aspect(self) -> None:
 # Batch payload validation
 # ---------------------------------------------------------------------------
 
+
 class TestPredictGOTermsBatchPayload:
     def test_valid_payload(self) -> None:
-        p = PredictGOTermsBatchPayload.model_validate({
-            "embedding_config_id": str(uuid.uuid4()),
-            "annotation_set_id": str(uuid.uuid4()),
-            "prediction_set_id": str(uuid.uuid4()),
-            "parent_job_id": str(uuid.uuid4()),
-            "query_accessions": ["P1", "P2"],
-        })
+        p = PredictGOTermsBatchPayload.model_validate(
+            {
+                "embedding_config_id": str(uuid.uuid4()),
+                "annotation_set_id": str(uuid.uuid4()),
+                "ontology_snapshot_id": _SNAPSHOT_ID,
+                "prediction_set_id": str(uuid.uuid4()),
+                "parent_job_id": str(uuid.uuid4()),
+                "query_accessions": ["P1", "P2"],
+            }
+        )
         assert p.limit_per_entry == 5
         assert p.aspect_separated_knn is True
 
     def test_feature_flags_default_false(self) -> None:
-        p = PredictGOTermsBatchPayload.model_validate({
-            "embedding_config_id": str(uuid.uuid4()),
-            "annotation_set_id": str(uuid.uuid4()),
-            "prediction_set_id": str(uuid.uuid4()),
-            "parent_job_id": str(uuid.uuid4()),
-            "query_accessions": [],
-        })
-        assert p.compute_alignments is False
-        assert p.compute_taxonomy is False
-        assert p.compute_reranker_features is False
+        p = PredictGOTermsBatchPayload.model_validate(
+            {
+                "embedding_config_id": str(uuid.uuid4()),
+                "annotation_set_id": str(uuid.uuid4()),
+                "ontology_snapshot_id": _SNAPSHOT_ID,
+                "prediction_set_id": str(uuid.uuid4()),
+                "parent_job_id": str(uuid.uuid4()),
+                "query_accessions": [],
+            }
+        )
+        assert p.compute_alignments is True
+        assert p.compute_taxonomy is True
+        assert p.compute_reranker_features is True
 
 
 # ---------------------------------------------------------------------------
 # _predict_batch — reranker features
 # ---------------------------------------------------------------------------
 
+
 class TestPredictBatchRerankerFeatures:
     def _op(self) -> PredictGOTermsBatchOperation:
         return PredictGOTermsBatchOperation()
@@ -748,10 +816,15 @@ def _payload(self, **kwargs):
         defaults = {
             "embedding_config_id": str(uuid.uuid4()),
             "annotation_set_id": _ANN_SET_ID,
+            "ontology_snapshot_id": _SNAPSHOT_ID,
             "prediction_set_id": str(uuid.uuid4()),
             "parent_job_id": str(uuid.uuid4()),
             "query_accessions": [],
             "limit_per_entry": 2,
+            # Reranker features ON, alignments/taxonomy OFF: this suite only
+            # exercises voting/neighbor-stat features, not NW/SW or taxonomy.
+            "compute_alignments": False,
+            "compute_taxonomy": False,
             "compute_reranker_features": True,
         }
         defaults.update(kwargs)
@@ -771,7 +844,7 @@ def test_reranker_features_included_when_enabled(self) -> None:
             },
         }
         query_embs = np.array([[0.9, 0.1]], dtype=np.float32)
-        preds = op._predict_batch(["Q1"], query_embs, ref_data, pred_set_id, p)
+        preds, _, _ = op._predict_batch(["Q1"], query_embs, ref_data, pred_set_id, p)
 
         assert len(preds) >= 1
         for pred in preds:
@@ -794,8 +867,235 @@ def test_reranker_features_excluded_when_disabled(self) -> None:
             },
         }
         query_embs = np.array([[0.9, 0.1]], dtype=np.float32)
-        preds = op._predict_batch(["Q1"], query_embs, ref_data, pred_set_id, p)
+        preds, _, _ = op._predict_batch(["Q1"], query_embs, ref_data, pred_set_id, p)
 
         for pred in preds:
             assert "vote_count" not in pred
             assert "k_position" not in pred
+
+
+# ---------------------------------------------------------------------------
+# Phase 6 — reranker integration (RerankerModel → predict-time scoring)
+# ---------------------------------------------------------------------------
+
+
+class TestPredictGOTermsCoordinatorReranker:
+    """Coordinator-side validation of the ``reranker_model_id`` payload field."""
+
+    def _op(self) -> PredictGOTermsOperation:
+        return PredictGOTermsOperation()
+
+    def _base_payload(self, **overrides):
+        p = {
+            "embedding_config_id": str(uuid.uuid4()),
+            "annotation_set_id": _ANN_SET_ID,
+            "ontology_snapshot_id": _SNAPSHOT_ID,
+            "_job_id": str(uuid.uuid4()),
+        }
+        p.update(overrides)
+        return p
+
+    def test_missing_reranker_model_raises(self) -> None:
+        from protea.infrastructure.orm.models.embedding.reranker_model import RerankerModel
+
+        op = self._op()
+        reranker_id = str(uuid.uuid4())
+
+        def _get(cls, _):
+            return None if cls is RerankerModel else MagicMock()
+
+        session = MagicMock()
+        session.get.side_effect = _get
+
+        with pytest.raises(ValueError, match=r"RerankerModel .* not found"):
+            op.execute(
+                session,
+                self._base_payload(reranker_model_id=reranker_id),
+                emit=_noop_emit,
+            )
+
+    def test_reranker_without_artifact_uri_raises(self) -> None:
+        from protea.infrastructure.orm.models.embedding.reranker_model import RerankerModel
+
+        op = self._op()
+        reranker_id = str(uuid.uuid4())
+        reranker_row = MagicMock(spec=RerankerModel)
+        reranker_row.artifact_uri = None
+        reranker_row.feature_schema_sha = "abcd1234"
+        reranker_row.name = "broken"
+
+        def _get(cls, _):
+            return reranker_row if cls is RerankerModel else MagicMock()
+
+        session = MagicMock()
+        session.get.side_effect = _get
+
+        with pytest.raises(ValueError, match="no artifact_uri"):
+            op.execute(
+                session,
+                self._base_payload(reranker_model_id=reranker_id),
+                emit=_noop_emit,
+            )
+
+    def test_reranker_context_propagated_to_batch_payload(self) -> None:
+        from protea.infrastructure.orm.models.embedding.reranker_model import RerankerModel
+
+        op = self._op()
+        reranker_id = str(uuid.uuid4())
+        reranker_row = MagicMock(spec=RerankerModel)
+        reranker_row.artifact_uri = "file:///tmp/reranker/model.txt"
+        reranker_row.feature_schema_sha = "deadbeef0000"
+        reranker_row.name = "smoke"
+
+        def _get(cls, _):
+            return reranker_row if cls is RerankerModel else MagicMock()
+
+        session = MagicMock()
+        session.get.side_effect = _get
+
+        def add_side_effect(obj):
+            if not hasattr(obj, "id") or obj.id is None:
+                obj.id = uuid.uuid4()
+
+        session.add.side_effect = add_side_effect
+
+        payload = self._base_payload(reranker_model_id=reranker_id)
+        with patch.object(op, "_load_query_accessions", return_value=["P1"]):
+            result = op.execute(session, payload, emit=_noop_emit)
+
+        assert len(result.publish_operations) == 1
+        _, msg = result.publish_operations[0]
+        assert msg["payload"]["reranker_model_id"] == reranker_id
+        assert msg["payload"]["reranker_artifact_uri"] == reranker_row.artifact_uri
+        assert msg["payload"]["reranker_feature_schema_sha"] == reranker_row.feature_schema_sha
+
+
+class TestPredictGOTermsBatchReranker:
+    """Batch-worker tests for ``_apply_reranker_if_aligned``.
+
+    Driven by unit-level mocks: ``load_reranker`` / ``apply_reranker`` are
+    patched so the tests don't require LightGBM nor a real booster file.
+    """
+
+    def _op(self) -> PredictGOTermsBatchOperation:
+        return PredictGOTermsBatchOperation()
+
+    def _payload(self, **kwargs) -> PredictGOTermsBatchPayload:
+        defaults = {
+            "embedding_config_id": str(uuid.uuid4()),
+            "annotation_set_id": _ANN_SET_ID,
+            "ontology_snapshot_id": _SNAPSHOT_ID,
+            "prediction_set_id": str(uuid.uuid4()),
+            "parent_job_id": str(uuid.uuid4()),
+            "query_accessions": ["Q1"],
+            "limit_per_entry": 2,
+        }
+        defaults.update(kwargs)
+        return PredictGOTermsBatchPayload.model_validate(defaults)
+
+    def _emit_capture(self):
+        events: list[tuple[str, dict]] = []
+
+        def _emit(name, _msg, fields, _sev):
+            events.append((name, fields or {}))
+
+        return _emit, events
+
+    def test_skipped_when_artifact_context_missing(self) -> None:
+        op = self._op()
+        p = self._payload(reranker_model_id=str(uuid.uuid4()))  # but no uri/sha
+        dicts = [{"protein_accession": "Q1", "go_term_id": 1, "distance": 0.1}]
+        emit, events = self._emit_capture()
+
+        stats = op._apply_reranker_if_aligned(MagicMock(), dicts, p, emit)
+
+        assert stats is None
+        assert any(name == "reranker.skipped" for name, _ in events)
+        assert "reranker_score" not in dicts[0]
+
+    def test_schema_mismatch_falls_back(self) -> None:
+        op = self._op()
+        p = self._payload(
+            reranker_model_id=str(uuid.uuid4()),
+            reranker_artifact_uri="file:///tmp/x/model.txt",
+            reranker_feature_schema_sha="not_matching",
+            compute_alignments=False,
+            compute_taxonomy=False,
+            compute_v6_features=False,
+        )
+        dicts = [{"protein_accession": "Q1", "go_term_id": 1, "distance": 0.1}]
+        emit, events = self._emit_capture()
+
+        stats = op._apply_reranker_if_aligned(MagicMock(), dicts, p, emit)
+
+        assert stats is not None
+        assert stats["applied"] is False
+        assert stats["skipped_reason"] == "schema_mismatch"
+        assert any(name == "reranker.schema_mismatch" for name, _ in events)
+        assert "reranker_score" not in dicts[0]
+
+    def test_applies_when_schema_matches(self) -> None:
+        from protea_reranker_lab.contracts import compute_feature_schema_sha
+
+        op = self._op()
+        # Families match PROTEA's inference when all feature flags are off:
+        # knn + annotation_meta → deterministic sha.
+        expected_sha = compute_feature_schema_sha(["knn", "annotation_meta"])
+        p = self._payload(
+            reranker_model_id=str(uuid.uuid4()),
+            reranker_artifact_uri="file:///tmp/x/model.txt",
+            reranker_feature_schema_sha=expected_sha,
+            compute_alignments=False,
+            compute_taxonomy=False,
+            compute_v6_features=False,
+        )
+        dicts = [
+            {"protein_accession": "Q1", "go_term_id": 10, "distance": 0.1},
+            {"protein_accession": "Q1", "go_term_id": 11, "distance": 0.3},
+        ]
+        session = MagicMock()
+        # GOTerm aspect lookup returns (id, aspect) tuples.
+        session.query.return_value.filter.return_value.all.return_value = [
+            (10, "P"), (11, "F"),
+        ]
+        emit, events = self._emit_capture()
+
+        fake_scores = np.array([0.8, 0.2], dtype=np.float32)
+        with patch(
+            "protea.core.operations.predict_go_terms.load_reranker",
+            return_value=MagicMock(name="booster"),
+        ), patch(
+            "protea.core.operations.predict_go_terms.apply_reranker",
+            return_value=fake_scores,
+        ), patch(
+            "protea.core.operations.predict_go_terms.get_artifact_store",
+            return_value=MagicMock(),
+        ), patch(
+            "protea.core.operations.predict_go_terms.load_settings",
+            return_value=MagicMock(),
+        ):
+            stats = op._apply_reranker_if_aligned(session, dicts, p, emit)
+
+        assert stats is not None
+        assert stats["applied"] is True
+        assert stats["rows"] == 2
+        assert stats["score_max"] == pytest.approx(0.8, rel=1e-6)
+        assert stats["score_min"] == pytest.approx(0.2, rel=1e-6)
+        assert dicts[0]["reranker_score"] == pytest.approx(0.8, rel=1e-6)
+        assert dicts[1]["reranker_score"] == pytest.approx(0.2, rel=1e-6)
+        # Aspect attached from GOTerm lookup
+        assert dicts[0]["aspect"] == "P"
+        assert dicts[1]["aspect"] == "F"
+        assert not any(name == "reranker.schema_mismatch" for name, _ in events)
+
+    def test_no_reranker_leaves_dicts_untouched(self) -> None:
+        """Baseline: when ``reranker_model_id`` is absent, the helper is never
+        invoked — coordinator + batch should behave identically to pre-Phase-6."""
+        p = self._payload()  # no reranker fields
+        assert p.reranker_model_id is None
+        dicts = [{"protein_accession": "Q1", "go_term_id": 1, "distance": 0.1}]
+
+        # Confirm the coordinator emits no reranker_* fields when id absent:
+        # this is a static payload check (coord happy path already covered
+        # elsewhere). Here we just assert the baseline invariant on dicts.
+        assert "reranker_score" not in dicts[0]
diff --git a/tests/test_proteins_router.py b/tests/test_proteins_router.py
index 4c51bb3..f99dd29 100644
--- a/tests/test_proteins_router.py
+++ b/tests/test_proteins_router.py
@@ -2,6 +2,7 @@
 
 Database is fully mocked -- no real infrastructure required.
 """
+
 from __future__ import annotations
 
 from contextlib import contextmanager
@@ -12,12 +13,21 @@
 from fastapi import FastAPI
 from fastapi.testclient import TestClient
 
+from protea.api.cache import invalidate as _cache_invalidate
 from protea.api.routers.proteins import router
 
+
+@pytest.fixture(autouse=True)
+def _reset_router_cache():
+    _cache_invalidate()
+    yield
+    _cache_invalidate()
+
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
 
+
 def _make_app(session_factory):
     app = FastAPI()
     app.state.session_factory = session_factory
@@ -54,10 +64,24 @@ def _make_protein(**overrides):
 def _make_metadata():
     meta = MagicMock()
     for attr in (
-        "function_cc", "ec_number", "catalytic_activity", "pathway",
-        "keywords", "cofactor", "activity_regulation", "absorption",
-        "kinetics", "ph_dependence", "redox_potential", "temperature_dependence",
-        "active_site", "binding_site", "dna_binding", "rhea_id", "site", "features",
+        "function_cc",
+        "ec_number",
+        "catalytic_activity",
+        "pathway",
+        "keywords",
+        "cofactor",
+        "activity_regulation",
+        "absorption",
+        "kinetics",
+        "ph_dependence",
+        "redox_potential",
+        "temperature_dependence",
+        "active_site",
+        "binding_site",
+        "dna_binding",
+        "rhea_id",
+        "site",
+        "features",
     ):
         setattr(meta, attr, f"mock_{attr}")
     return meta
@@ -67,6 +91,7 @@ def _make_metadata():
 # Fixtures
 # ---------------------------------------------------------------------------
 
+
 @pytest.fixture()
 def session():
     return MagicMock()
@@ -92,6 +117,7 @@ def client(session, factory):
 # GET /proteins/stats
 # ---------------------------------------------------------------------------
 
+
 class TestProteinStats:
     def test_returns_all_stat_keys(self, client):
         c, session = client
@@ -105,8 +131,14 @@ def test_returns_all_stat_keys(self, client):
         assert resp.status_code == 200
         data = resp.json()
         for key in (
-            "total", "canonical", "isoforms", "reviewed",
-            "unreviewed", "with_metadata", "with_embeddings", "with_go_annotations",
+            "total",
+            "canonical",
+            "isoforms",
+            "reviewed",
+            "unreviewed",
+            "with_metadata",
+            "with_embeddings",
+            "with_go_annotations",
         ):
             assert key in data
 
@@ -127,6 +159,7 @@ def test_stats_zero_values(self, client):
 # GET /proteins
 # ---------------------------------------------------------------------------
 
+
 class TestListProteins:
     def test_returns_paginated_list(self, client):
         c, session = client
@@ -210,6 +243,7 @@ def test_empty_list(self, client):
 # GET /proteins/{accession}
 # ---------------------------------------------------------------------------
 
+
 class TestGetProtein:
     def test_returns_protein_with_metadata(self, client):
         c, session = client
@@ -258,7 +292,10 @@ def test_canonical_lists_isoforms(self, client):
         iso1.accession = "P12345-2"
         iso2 = MagicMock()
         iso2.accession = "P12345-3"
-        session.query.return_value.filter.return_value.order_by.return_value.all.return_value = [iso1, iso2]
+        session.query.return_value.filter.return_value.order_by.return_value.all.return_value = [
+            iso1,
+            iso2,
+        ]
 
         resp = c.get("/proteins/P12345")
         assert resp.status_code == 200
@@ -282,11 +319,21 @@ def test_non_canonical_no_isoform_list(self, client):
 # GET /proteins/{accession}/annotations
 # ---------------------------------------------------------------------------
 
+
 class TestGetProteinAnnotations:
-    def _make_annotation_row(self, go_id="GO:0003674", name="molecular_function",
-                              aspect="F", qualifier="enables", evidence="IDA",
-                              assigned_by="UniProt", db_ref="PMID:123",
-                              ann_set_id=None, source="goa", version="2024-01"):
+    def _make_annotation_row(
+        self,
+        go_id="GO:0003674",
+        name="molecular_function",
+        aspect="F",
+        qualifier="enables",
+        evidence="IDA",
+        assigned_by="UniProt",
+        db_ref="PMID:123",
+        ann_set_id=None,
+        source="goa",
+        version="2024-01",
+    ):
         ann = MagicMock()
         ann.qualifier = qualifier
         ann.evidence_code = evidence
diff --git a/tests/test_queue.py b/tests/test_queue.py
index 9c0bb47..f9b4503 100644
--- a/tests/test_queue.py
+++ b/tests/test_queue.py
@@ -2,6 +2,7 @@
 Unit tests for the queue consumer and publisher.
 Pika is fully mocked — no RabbitMQ server required.
 """
+
 from __future__ import annotations
 
 import json
@@ -18,6 +19,7 @@
 # Helpers
 # ---------------------------------------------------------------------------
 
+
 def _make_worker(raises=None):
     worker = MagicMock()
     if raises:
@@ -48,6 +50,7 @@ def _consumer(worker=None, requeue_on_failure=False):
 # QueueConsumer._on_message
 # ---------------------------------------------------------------------------
 
+
 class TestOnMessage:
     def setup_method(self):
         self.channel = MagicMock()
@@ -127,6 +130,7 @@ def test_stop_flag_nacks_with_requeue(self):
 # QueueConsumer.run (pika connection fully mocked)
 # ---------------------------------------------------------------------------
 
+
 class TestConsumerRun:
     def _mock_pika(self, consumer):
         """
@@ -138,7 +142,9 @@ def _mock_pika(self, consumer):
         conn.channel.return_value = channel
         conn.is_open = False  # skip close attempt
 
-        with patch("protea.infrastructure.queue.consumer.pika.BlockingConnection", return_value=conn):
+        with patch(
+            "protea.infrastructure.queue.consumer.pika.BlockingConnection", return_value=conn
+        ):
             consumer.run()
 
         return conn, channel
@@ -183,6 +189,7 @@ def test_handle_stop_sets_flag(self):
 # publish_job
 # ---------------------------------------------------------------------------
 
+
 class TestPublishJob:
     def test_publishes_correct_body(self):
         job_id = uuid4()
@@ -191,8 +198,12 @@ def test_publishes_correct_body(self):
         conn.channel.return_value = channel
         conn.is_open = True
 
-        with patch("protea.infrastructure.queue.publisher.pika.BlockingConnection", return_value=conn), \
-             patch("protea.infrastructure.queue.publisher._local", threading.local()):
+        with (
+            patch(
+                "protea.infrastructure.queue.publisher.pika.BlockingConnection", return_value=conn
+            ),
+            patch("protea.infrastructure.queue.publisher._local", threading.local()),
+        ):
             publish_job("amqp://localhost/", "test.jobs", job_id)
 
         channel.basic_publish.assert_called_once()
@@ -208,8 +219,12 @@ def test_reuses_connection_on_success(self):
         conn.channel.return_value = channel
         conn.is_open = True
 
-        with patch("protea.infrastructure.queue.publisher.pika.BlockingConnection", return_value=conn), \
-             patch("protea.infrastructure.queue.publisher._local", threading.local()):
+        with (
+            patch(
+                "protea.infrastructure.queue.publisher.pika.BlockingConnection", return_value=conn
+            ),
+            patch("protea.infrastructure.queue.publisher._local", threading.local()),
+        ):
             publish_job("amqp://localhost/", "q", uuid4())
 
         conn.close.assert_not_called()
@@ -221,14 +236,18 @@ def test_closes_connection_on_exception(self):
         conn.channel.return_value = channel
         conn.is_open = True
 
-        with patch("protea.infrastructure.queue.publisher.pika.BlockingConnection", return_value=conn), \
-             patch("protea.infrastructure.queue.publisher.time.sleep"), \
-             patch("protea.infrastructure.queue.publisher._local", threading.local()):
+        with (
+            patch(
+                "protea.infrastructure.queue.publisher.pika.BlockingConnection", return_value=conn
+            ),
+            patch("protea.infrastructure.queue.publisher.time.sleep"),
+            patch("protea.infrastructure.queue.publisher._local", threading.local()),
+        ):
             with pytest.raises(RuntimeError, match="Failed to publish to queue"):
                 publish_job("amqp://localhost/", "q", uuid4())
 
-        # _close_cached_connection calls conn.close() once per failed attempt (5 total)
-        assert conn.close.call_count == 5
+        # _close_cached_connection calls conn.close() once per failed attempt (12 total)
+        assert conn.close.call_count == 12
 
     def test_declares_durable_queue(self):
         conn = MagicMock()
@@ -236,8 +255,12 @@ def test_declares_durable_queue(self):
         conn.channel.return_value = channel
         conn.is_open = False
 
-        with patch("protea.infrastructure.queue.publisher.pika.BlockingConnection", return_value=conn), \
-             patch("protea.infrastructure.queue.publisher._local", threading.local()):
+        with (
+            patch(
+                "protea.infrastructure.queue.publisher.pika.BlockingConnection", return_value=conn
+            ),
+            patch("protea.infrastructure.queue.publisher._local", threading.local()),
+        ):
             publish_job("amqp://localhost/", "my.queue", uuid4())
 
         channel.queue_declare.assert_called_once_with(
@@ -253,20 +276,29 @@ def test_exponential_backoff_delays(self):
         conn.is_open = True
 
         sleep_calls = []
-        with patch("protea.infrastructure.queue.publisher.pika.BlockingConnection", return_value=conn), \
-             patch("protea.infrastructure.queue.publisher.time.sleep", side_effect=lambda d: sleep_calls.append(d)), \
-             patch("protea.infrastructure.queue.publisher._local", threading.local()):
+        with (
+            patch(
+                "protea.infrastructure.queue.publisher.pika.BlockingConnection", return_value=conn
+            ),
+            patch(
+                "protea.infrastructure.queue.publisher.time.sleep",
+                side_effect=lambda d: sleep_calls.append(d),
+            ),
+            patch("protea.infrastructure.queue.publisher._local", threading.local()),
+        ):
             with pytest.raises(RuntimeError, match="Failed to publish"):
                 publish_job("amqp://localhost/", "q", uuid4())
 
-        # 5 attempts → 4 sleeps: 1, 2, 4, 8
-        assert sleep_calls == [1, 2, 4, 8]
+        # 12 attempts -> 11 sleeps: 1, 2, 4, 8, 16, 30, 30, 30, 30, 30, 30
+        # Exponential up to attempt 5 (16s); capped at 30s for the rest.
+        assert sleep_calls == [1, 2, 4, 8, 16, 30, 30, 30, 30, 30, 30]
 
 
 # ---------------------------------------------------------------------------
 # OperationConsumer — emit writes to parent job
 # ---------------------------------------------------------------------------
 
+
 class TestOperationConsumerEmit:
     """Verify that OperationConsumer's emit writes JobEvent rows to the parent job."""
 
@@ -284,10 +316,12 @@ def test_emit_writes_job_event_on_parent(self):
 
         # Track sessions created by the factory
         sessions = []
+
         def make_session():
             s = MagicMock()
             sessions.append(s)
             return s
+
         factory = MagicMock(side_effect=make_session)
 
         consumer = OperationConsumer(
@@ -298,11 +332,13 @@ def make_session():
         )
 
         # Build a valid message with a parent job_id
-        body = json.dumps({
-            "operation": "test_op",
-            "job_id": str(parent_job_id),
-            "payload": {"key": "value"},
-        }).encode()
+        body = json.dumps(
+            {
+                "operation": "test_op",
+                "job_id": str(parent_job_id),
+                "payload": {"key": "value"},
+            }
+        ).encode()
 
         channel = MagicMock()
         method = _make_method()
@@ -326,10 +362,12 @@ def test_emit_records_failure_on_parent(self):
         registry.get.return_value = op
 
         sessions = []
+
         def make_session():
             s = MagicMock()
             sessions.append(s)
             return s
+
         factory = MagicMock(side_effect=make_session)
 
         consumer = OperationConsumer(
@@ -339,11 +377,13 @@ def make_session():
             session_factory=factory,
         )
 
-        body = json.dumps({
-            "operation": "test_op",
-            "job_id": str(parent_job_id),
-            "payload": {},
-        }).encode()
+        body = json.dumps(
+            {
+                "operation": "test_op",
+                "job_id": str(parent_job_id),
+                "payload": {},
+            }
+        ).encode()
 
         channel = MagicMock()
         method = _make_method()
@@ -366,6 +406,7 @@ def make_session():
 # OperationConsumer._on_message — extended coverage
 # ---------------------------------------------------------------------------
 
+
 class TestOperationConsumerOnMessage:
     """Cover uncovered lines in OperationConsumer._on_message."""
 
@@ -384,6 +425,7 @@ def _make_consumer(self, op=None, raises=None, requeue_on_failure=False):
         registry.get.return_value = op
 
         sessions = []
+
         def make_session():
             s = MagicMock()
             sessions.append(s)
@@ -441,23 +483,58 @@ def test_failed_operation_nacks_with_requeue_when_flag_set(self):
 
         channel.basic_nack.assert_called_once_with(delivery_tag=21, requeue=True)
 
-    def test_cuda_oom_clears_cache_and_requeues(self):
+    def test_cuda_oom_first_failure_republishes_with_backoff(self):
+        """First OOM: cache cleared, message republished with retry header, original acked."""
         exc = RuntimeError("CUDA out of memory. Tried to allocate 2 GiB")
         consumer, sessions, _, _ = self._make_consumer(raises=exc)
         channel = MagicMock()
         method = _make_method(30)
+        properties = MagicMock()
+        properties.headers = None  # fresh message, no retry header yet
 
-        with patch("protea.infrastructure.queue.consumer.torch", create=True):
-            # Import torch inside the handler — we patch at module level
-            import sys
-            mock_module = MagicMock()
-            with patch.dict(sys.modules, {"torch": mock_module}):
-                consumer._on_message(channel, method, MagicMock(), self._body())
+        import sys
 
-        # Should requeue regardless of requeue_on_failure flag
-        channel.basic_nack.assert_called_once()
-        call_kwargs = channel.basic_nack.call_args.kwargs
-        assert call_kwargs["requeue"] is True
+        mock_torch = MagicMock()
+        with patch.dict(sys.modules, {"torch": mock_torch}):
+            consumer._on_message(channel, method, properties, self._body())
+
+        # Torch cache was cleared.
+        mock_torch.cuda.empty_cache.assert_called_once()
+        # Original message was acked (not nacked) after republish.
+        channel.basic_ack.assert_called_once_with(delivery_tag=30)
+        channel.basic_nack.assert_not_called()
+        # A new copy was republished with incremented retry header.
+        channel.basic_publish.assert_called_once()
+        republish_kwargs = channel.basic_publish.call_args.kwargs
+        assert republish_kwargs["routing_key"] == "test.ops"
+        assert republish_kwargs["properties"].headers == {"x-oom-retry": 1}
+        # Handler slept before republishing (5s for retry 1).
+        channel.connection.sleep.assert_called_once_with(5)
+
+    def test_cuda_oom_retries_exhausted_dead_letters(self):
+        """After oom_max_retries failures the message is nack'd without requeue."""
+        from protea.config.tuning import get_tuning
+
+        exc = RuntimeError("CUDA out of memory. Tried to allocate 2 GiB")
+        consumer, sessions, _, _ = self._make_consumer(raises=exc)
+        channel = MagicMock()
+        method = _make_method(31)
+        properties = MagicMock()
+        # Message has already retried the maximum number of times.
+        properties.headers = {"x-oom-retry": get_tuning().queue.oom_max_retries}
+
+        import sys
+
+        mock_torch = MagicMock()
+        with patch.dict(sys.modules, {"torch": mock_torch}):
+            consumer._on_message(channel, method, properties, self._body())
+
+        # Should dead-letter, not requeue or republish.
+        channel.basic_publish.assert_not_called()
+        channel.basic_ack.assert_not_called()
+        channel.basic_nack.assert_called_once_with(delivery_tag=31, requeue=False)
+        # No further sleeps on the final attempt.
+        channel.connection.sleep.assert_not_called()
 
     def test_unparseable_message_nacks_without_requeue(self):
         consumer, _, _, _ = self._make_consumer()
@@ -552,6 +629,7 @@ def _execute(session, payload, *, emit):
         op.execute.side_effect = _execute
 
         sessions_created = []
+
         def make_session():
             s = MagicMock()
             sessions_created.append(s)
@@ -561,6 +639,7 @@ def make_session():
             return s
 
         from protea.infrastructure.queue.consumer import OperationConsumer
+
         registry = MagicMock()
         registry.get.return_value = op
         factory = MagicMock(side_effect=make_session)
@@ -634,11 +713,13 @@ def test_invalid_job_id_in_message_is_ignored(self):
         channel = MagicMock()
         method = _make_method()
 
-        body = json.dumps({
-            "operation": "test_op",
-            "job_id": "not-a-uuid",
-            "payload": {},
-        }).encode()
+        body = json.dumps(
+            {
+                "operation": "test_op",
+                "job_id": "not-a-uuid",
+                "payload": {},
+            }
+        ).encode()
 
         consumer._on_message(channel, method, MagicMock(), body)
 
@@ -651,6 +732,7 @@ def test_error_event_session_rollback_on_commit_failure(self):
         parent_id = uuid4()
 
         sessions_created = []
+
         def make_session():
             s = MagicMock()
             sessions_created.append(s)
@@ -660,6 +742,7 @@ def make_session():
             return s
 
         from protea.infrastructure.queue.consumer import OperationConsumer
+
         op = MagicMock()
         op.execute.side_effect = ValueError("boom")
         registry = MagicMock()
@@ -687,6 +770,7 @@ def make_session():
 # QueueConsumer._on_message — RetryLaterError handling
 # ---------------------------------------------------------------------------
 
+
 class TestQueueConsumerRetryLater:
     """Cover RetryLaterError handling in QueueConsumer._on_message (lines 142-151)."""
 
@@ -732,6 +816,7 @@ def test_shutdown_draining_nacks_with_requeue(self):
 # OperationConsumer._handle_stop
 # ---------------------------------------------------------------------------
 
+
 class TestOperationConsumerHandleStop:
     def test_handle_stop_sets_flag(self):
         from protea.infrastructure.queue.consumer import OperationConsumer
@@ -751,6 +836,7 @@ def test_handle_stop_sets_flag(self):
 # OperationConsumer.run (pika fully mocked)
 # ---------------------------------------------------------------------------
 
+
 class TestOperationConsumerRun:
     def test_run_declares_queue_and_starts_consuming(self):
         from protea.infrastructure.queue.consumer import OperationConsumer
@@ -768,7 +854,9 @@ def test_run_declares_queue_and_starts_consuming(self):
         conn.channel.return_value = channel
         conn.is_open = False
 
-        with patch("protea.infrastructure.queue.consumer.pika.BlockingConnection", return_value=conn):
+        with patch(
+            "protea.infrastructure.queue.consumer.pika.BlockingConnection", return_value=conn
+        ):
             consumer.run()
 
         channel.queue_declare.assert_any_call(
diff --git a/tests/test_real_models.py b/tests/test_real_models.py
index bec5a99..d9418c4 100644
--- a/tests/test_real_models.py
+++ b/tests/test_real_models.py
@@ -8,6 +8,7 @@
 Model used: facebook/esm2_t6_8M_UR50D (~30 MB, CPU — fast).
 Large models (650M, T5) are covered by the synthetic tests in test_compute_embeddings.py.
 """
+
 from __future__ import annotations
 
 import numpy as np
@@ -20,6 +21,7 @@
 
 def _esm_cfg(model_name: str, *, normalize: bool = True):
     from unittest.mock import MagicMock
+
     cfg = MagicMock()
     cfg.model_name = model_name
     cfg.model_backend = "esm"
@@ -46,6 +48,7 @@ class TestESM2_8M:
     @pytest.fixture(scope="class")
     def model_and_tokenizer(self):
         from transformers import AutoTokenizer, EsmModel
+
         tokenizer = AutoTokenizer.from_pretrained(self.MODEL)
         model = EsmModel.from_pretrained(self.MODEL, output_hidden_states=True)
         model.eval()
@@ -72,7 +75,7 @@ def test_normalize_produces_unit_norm(self, model_and_tokenizer):
     def test_deterministic(self, model_and_tokenizer):
         model, tokenizer = model_and_tokenizer
         cfg = _esm_cfg(self.MODEL, normalize=False)
-        first  = [_embed_esm(model, tokenizer, [s], cfg, self.DEVICE)[0][0].vector for s in SEQS]
+        first = [_embed_esm(model, tokenizer, [s], cfg, self.DEVICE)[0][0].vector for s in SEQS]
         second = [_embed_esm(model, tokenizer, [s], cfg, self.DEVICE)[0][0].vector for s in SEQS]
         for i in range(len(SEQS)):
             np.testing.assert_array_equal(first[i], second[i], err_msg=f"seq{i}: non-deterministic")
@@ -84,10 +87,15 @@ def test_batch_size_consistency(self, model_and_tokenizer):
         for batch_size in (2, 4):
             results = []
             for i in range(0, len(SEQS), batch_size):
-                for chunks in _embed_esm(model, tokenizer, SEQS[i:i + batch_size], cfg, self.DEVICE):
+                for chunks in _embed_esm(
+                    model, tokenizer, SEQS[i : i + batch_size], cfg, self.DEVICE
+                ):
                     results.append(chunks[0].vector)
             for i in range(len(SEQS)):
                 np.testing.assert_allclose(
-                    results[i], ref[i], rtol=1e-5, atol=1e-6,
+                    results[i],
+                    ref[i],
+                    rtol=1e-5,
+                    atol=1e-6,
                     err_msg=f"batch_size={batch_size}: mismatch at seq {i}",
                 )
diff --git a/tests/test_registry_endpoints.py b/tests/test_registry_endpoints.py
new file mode 100644
index 0000000..c92a15a
--- /dev/null
+++ b/tests/test_registry_endpoints.py
@@ -0,0 +1,106 @@
+"""Tests for the plugin-registry endpoints (T2B.1-3 of master plan v3).
+
+Three endpoints — ``GET /backends``, ``GET /sources``, ``GET /runners``
+— each list the entry-point-discovered plugins for the corresponding
+group. The plugins themselves are real (installed via the C-stack
+sibling repos), so these tests run against the live discovery rather
+than mocking ``importlib.metadata.entry_points``.
+"""
+
+from __future__ import annotations
+
+import pytest
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+
+from protea.api.routers.registry import router
+
+
+@pytest.fixture
+def client() -> TestClient:
+    app = FastAPI()
+    app.include_router(router)
+    return TestClient(app)
+
+
+class TestBackendsEndpoint:
+    def test_returns_200(self, client: TestClient) -> None:
+        resp = client.get("/backends")
+        assert resp.status_code == 200
+
+    def test_returns_canonical_group(self, client: TestClient) -> None:
+        body = client.get("/backends").json()
+        assert body["group"] == "protea.backends"
+
+    def test_lists_all_four_backends(self, client: TestClient) -> None:
+        body = client.get("/backends").json()
+        names = {p["name"] for p in body["plugins"]}
+        assert {"esm", "t5", "ankh", "esm3c"} <= names
+
+    def test_plugin_info_shape(self, client: TestClient) -> None:
+        body = client.get("/backends").json()
+        plugin = next(p for p in body["plugins"] if p["name"] == "esm")
+        assert plugin["cls"] == "EsmBackend"
+        assert plugin["module"] == "protea_backends.esm:plugin"
+        assert "extra" in plugin
+
+    def test_plugins_sorted_by_name(self, client: TestClient) -> None:
+        body = client.get("/backends").json()
+        names = [p["name"] for p in body["plugins"]]
+        assert names == sorted(names)
+
+
+class TestSourcesEndpoint:
+    def test_returns_200(self, client: TestClient) -> None:
+        resp = client.get("/sources")
+        assert resp.status_code == 200
+
+    def test_returns_canonical_group(self, client: TestClient) -> None:
+        body = client.get("/sources").json()
+        assert body["group"] == "protea.sources"
+
+    def test_lists_all_three_sources(self, client: TestClient) -> None:
+        body = client.get("/sources").json()
+        names = {p["name"] for p in body["plugins"]}
+        assert names >= {"goa", "quickgo", "uniprot"}
+
+    def test_extra_carries_version(self, client: TestClient) -> None:
+        body = client.get("/sources").json()
+        goa = next(p for p in body["plugins"] if p["name"] == "goa")
+        assert goa["extra"].get("version") == "uniprot-goa"
+
+    def test_uniprot_class_name_is_correct(self, client: TestClient) -> None:
+        body = client.get("/sources").json()
+        uniprot = next(p for p in body["plugins"] if p["name"] == "uniprot")
+        assert uniprot["cls"] == "UniProtSource"
+
+
+class TestRunnersEndpoint:
+    def test_returns_200(self, client: TestClient) -> None:
+        resp = client.get("/runners")
+        assert resp.status_code == 200
+
+    def test_returns_canonical_group(self, client: TestClient) -> None:
+        body = client.get("/runners").json()
+        assert body["group"] == "protea.runners"
+
+    def test_lists_all_three_runners(self, client: TestClient) -> None:
+        body = client.get("/runners").json()
+        names = {p["name"] for p in body["plugins"]}
+        assert names >= {"baseline", "knn", "lightgbm"}
+
+    def test_lightgbm_class_name(self, client: TestClient) -> None:
+        body = client.get("/runners").json()
+        lgbm = next(p for p in body["plugins"] if p["name"] == "lightgbm")
+        assert lgbm["cls"] == "LightgbmRunner"
+
+
+class TestResponseSchema:
+    def test_plugin_list_response_keys(self, client: TestClient) -> None:
+        body = client.get("/backends").json()
+        assert set(body.keys()) == {"group", "plugins"}
+
+    def test_plugin_info_keys(self, client: TestClient) -> None:
+        body = client.get("/backends").json()
+        plugin = body["plugins"][0]
+        assert set(plugin.keys()) == {"name", "cls", "module", "extra"}
diff --git a/tests/test_reranker.py b/tests/test_reranker.py
index ec4b7c8..663b335 100644
--- a/tests/test_reranker.py
+++ b/tests/test_reranker.py
@@ -1,6 +1,13 @@
-"""Unit tests for the LightGBM re-ranker core module."""
+"""Unit tests for the re-ranker inference module.
+
+Training was moved to ``protea-reranker-lab``; the tests below cover
+only the helpers that remain in PROTEA: feature-column constants,
+``prepare_dataset``, ``predict``, and model serialization round-trips.
+"""
+
 from __future__ import annotations
 
+import lightgbm as lgb
 import numpy as np
 import pandas as pd
 
@@ -9,22 +16,14 @@
     CATEGORICAL_FEATURES,
     LABEL_COLUMN,
     NUMERIC_FEATURES,
-    TrainResult,
-    load_training_tsv,
     model_from_string,
-    model_to_string,
     predict,
     prepare_dataset,
-    train,
 )
 
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
 
 def _make_training_df(n: int = 200, positive_rate: float = 0.3, seed: int = 42) -> pd.DataFrame:
-    """Generate a synthetic training DataFrame with realistic feature distributions."""
+    """Generate a synthetic DataFrame with realistic feature distributions."""
     rng = np.random.RandomState(seed)
 
     labels = (rng.random(n) < positive_rate).astype(int)
@@ -36,7 +35,6 @@ def _make_training_df(n: int = 200, positive_rate: float = 0.3, seed: int = 42)
         "label": labels.tolist(),
     }
 
-    # Numeric features — positives get slightly better values
     for col in NUMERIC_FEATURES:
         if col == "distance":
             data[col] = (rng.random(n) * 0.5 + (1 - labels) * 0.3).tolist()
@@ -61,7 +59,6 @@ def _make_training_df(n: int = 200, positive_rate: float = 0.3, seed: int = 42)
         else:
             data[col] = (rng.random(n) * 10).tolist()
 
-    # Categorical features
     data["qualifier"] = rng.choice(["enables", "involved_in", "located_in", ""], n).tolist()
     data["evidence_code"] = rng.choice(["IDA", "IEA", "ISS", "EXP", ""], n).tolist()
     data["taxonomic_relation"] = rng.choice(["self", "sibling", "ancestor", ""], n).tolist()
@@ -69,6 +66,35 @@ def _make_training_df(n: int = 200, positive_rate: float = 0.3, seed: int = 42)
     return pd.DataFrame(data)
 
 
+def _fit_minimal_booster(df: pd.DataFrame, num_boost_round: int = 10) -> lgb.Booster:
+    """Train a minimal LightGBM booster inline — replaces the removed
+    ``protea.core.reranker.train`` helper, so tests still have a real
+    booster to exercise ``predict`` / ``model_from_string`` against.
+
+    Mirrors ``protea-reranker-lab.reranker.encode_categoricals``: label-
+    encode categoricals to int codes so the booster trained here matches
+    the booster shape PROTEA's ``predict`` (no-label branch) expects at
+    inference time.
+    """
+    X, y = prepare_dataset(df)
+    cat_cols = [c for c in CATEGORICAL_FEATURES if c in X.columns]
+    for col in cat_cols:
+        s = X[col].astype("object").where(X[col].notna(), None)
+        codes, _ = pd.factorize(s, use_na_sentinel=True)
+        X[col] = codes
+    dataset = lgb.Dataset(X, label=y, categorical_feature=cat_cols, free_raw_data=False)
+    return lgb.train(
+        {
+            "objective": "binary",
+            "metric": "binary_logloss",
+            "verbose": -1,
+            "seed": 42,
+        },
+        dataset,
+        num_boost_round=num_boost_round,
+    )
+
+
 # ---------------------------------------------------------------------------
 # prepare_dataset
 # ---------------------------------------------------------------------------
@@ -81,11 +107,16 @@ def test_returns_correct_shapes(self):
         assert X.shape == (50, len(ALL_FEATURES))
         assert y.shape == (50,)
 
-    def test_categorical_columns_are_category_dtype(self):
+    def test_categorical_columns_are_int_codes(self):
+        # Mirror protea-reranker-lab encoding: categoricals are
+        # label-encoded to int64 codes (missing → -1), not pandas
+        # ``category`` dtype. Required so cross-instance lab boosters
+        # don't trip "train and valid dataset categorical_feature do
+        # not match" at inference time.
         df = _make_training_df(20)
         X, _ = prepare_dataset(df)
         for col in CATEGORICAL_FEATURES:
-            assert X[col].dtype.name == "category"
+            assert X[col].dtype.kind == "i", f"{col} dtype = {X[col].dtype}"
 
     def test_label_is_int(self):
         df = _make_training_df(20)
@@ -99,58 +130,13 @@ def test_only_feature_columns_in_X(self):
         assert "protein_accession" not in X.columns
         assert "go_id" not in X.columns
 
-    def test_empty_strings_become_na_for_categoricals(self):
+    def test_empty_strings_become_minus_one_codes(self):
+        # Empty-string qualifier collapses to None → factorize sentinel
+        # code -1, matching the lab's encode_categoricals contract.
         df = _make_training_df(20)
         df.loc[0, "qualifier"] = ""
         X, _ = prepare_dataset(df)
-        assert pd.isna(X.loc[0, "qualifier"])
-
-
-# ---------------------------------------------------------------------------
-# train
-# ---------------------------------------------------------------------------
-
-
-class TestTrain:
-    def test_returns_train_result(self):
-        df = _make_training_df(200)
-        result = train(df, num_boost_round=10, early_stopping_rounds=5)
-        assert isinstance(result, TrainResult)
-        assert result.model is not None
-        assert "val_auc" in result.metrics
-        assert "val_f1" in result.metrics
-        assert "best_iteration" in result.metrics
-        assert len(result.feature_importance) > 0
-
-    def test_metrics_are_reasonable(self):
-        df = _make_training_df(500, positive_rate=0.3)
-        result = train(df, num_boost_round=50, early_stopping_rounds=10)
-        assert 0.0 <= result.metrics["val_auc"] <= 1.0
-        assert 0.0 <= result.metrics["val_precision"] <= 1.0
-        assert 0.0 <= result.metrics["val_recall"] <= 1.0
-        assert result.metrics["train_samples"] > 0
-        assert result.metrics["val_samples"] > 0
-
-    def test_custom_params(self):
-        df = _make_training_df(200)
-        result = train(
-            df,
-            params={"num_leaves": 15, "learning_rate": 0.1},
-            num_boost_round=10,
-            early_stopping_rounds=5,
-        )
-        assert result.model is not None
-
-    def test_feature_importance_keys_are_features(self):
-        df = _make_training_df(200)
-        result = train(df, num_boost_round=10, early_stopping_rounds=5)
-        for key in result.feature_importance:
-            assert key in ALL_FEATURES
-
-    def test_positive_rate_in_metrics(self):
-        df = _make_training_df(200, positive_rate=0.4)
-        result = train(df, num_boost_round=10, early_stopping_rounds=5)
-        assert 0.2 < result.metrics["positive_rate"] < 0.6  # approximate
+        assert int(X.loc[0, "qualifier"]) == -1
 
 
 # ---------------------------------------------------------------------------
@@ -161,78 +147,32 @@ def test_positive_rate_in_metrics(self):
 class TestPredict:
     def test_returns_probabilities(self):
         df = _make_training_df(200)
-        result = train(df, num_boost_round=10, early_stopping_rounds=5)
-        scores = predict(result.model, df)
+        model = _fit_minimal_booster(df)
+        scores = predict(model, df)
         assert len(scores) == 200
         assert all(0.0 <= s <= 1.0 for s in scores)
 
     def test_scores_without_label_column(self):
         df = _make_training_df(200)
-        result = train(df, num_boost_round=10, early_stopping_rounds=5)
+        model = _fit_minimal_booster(df)
         df_no_label = df.drop(columns=[LABEL_COLUMN])
-        scores = predict(result.model, df_no_label)
+        scores = predict(model, df_no_label)
         assert len(scores) == 200
 
-    def test_higher_scores_for_positive_examples(self):
-        """On average, positive examples should get higher scores."""
-        df = _make_training_df(1000, positive_rate=0.3)
-        result = train(df, num_boost_round=50, early_stopping_rounds=10)
-        scores = predict(result.model, df)
-        pos_mean = np.mean(scores[df["label"] == 1])
-        neg_mean = np.mean(scores[df["label"] == 0])
-        assert pos_mean > neg_mean
-
 
 # ---------------------------------------------------------------------------
-# Serialization
+# Serialization round-trip — scoring router loads stored boosters via
+# model_from_string, so the roundtrip must yield identical scores.
 # ---------------------------------------------------------------------------
 
 
 class TestSerialization:
     def test_roundtrip(self):
         df = _make_training_df(200)
-        result = train(df, num_boost_round=10, early_stopping_rounds=5)
-        model_str = model_to_string(result.model)
-        assert isinstance(model_str, str)
-        assert len(model_str) > 100
-
+        model = _fit_minimal_booster(df)
+        model_str = model.model_to_string()
         restored = model_from_string(model_str)
-        original_scores = predict(result.model, df)
-        restored_scores = predict(restored, df)
-        np.testing.assert_array_almost_equal(original_scores, restored_scores)
-
-
-# ---------------------------------------------------------------------------
-# load_training_tsv
-# ---------------------------------------------------------------------------
-
-
-class TestLoadTrainingTSV:
-    def test_parses_tsv_string(self):
-        tsv = "distance\tvote_count\tlabel\n0.1\t3\t1\n0.5\t1\t0\n"
-        df = load_training_tsv(tsv)
-        assert len(df) == 2
-        assert df["distance"].dtype == float
-        assert np.issubdtype(df["vote_count"].dtype, np.number)
-        assert df["label"].dtype == int
-
-    def test_parses_tsv_bytes(self):
-        tsv = b"distance\tvote_count\tlabel\n0.1\t3\t1\n"
-        df = load_training_tsv(tsv)
-        assert len(df) == 1
-
-    def test_missing_values_become_nan(self):
-        tsv = "distance\tvote_count\tlabel\n\t\t0\n"
-        df = load_training_tsv(tsv)
-        assert pd.isna(df.loc[0, "distance"])
-        assert pd.isna(df.loc[0, "vote_count"])
-        assert df.loc[0, "label"] == 0
-
-    def test_handles_missing_columns_gracefully(self):
-        tsv = "distance\tlabel\n0.1\t1\n"
-        df = load_training_tsv(tsv)
-        assert "distance" in df.columns
-        assert "vote_count" not in df.columns
+        np.testing.assert_array_almost_equal(predict(model, df), predict(restored, df))
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/test_retry.py b/tests/test_retry.py
new file mode 100644
index 0000000..8079e7d
--- /dev/null
+++ b/tests/test_retry.py
@@ -0,0 +1,199 @@
+"""Tests for protea.core.retry (T0.3 of master plan v3)."""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+from unittest.mock import MagicMock, patch
+
+import pytest
+from sqlalchemy.exc import OperationalError
+
+from protea.core.retry import (
+    RETRYABLE_PG_CODES,
+    is_retryable,
+    is_retryable_connection_error,
+    is_retryable_db_error,
+    with_retry,
+)
+
+
+def _make_op_error(pgcode: str | None) -> OperationalError:
+    """Build an OperationalError whose .orig has the given pgcode."""
+    orig = MagicMock()
+    orig.pgcode = pgcode
+    err = OperationalError("stmt", {}, orig)
+    err.orig = orig
+    return err
+
+
+class TestIsRetryableDbError:
+    def test_deadlock_detected_is_retryable(self) -> None:
+        assert is_retryable_db_error(_make_op_error("40P01")) is True
+
+    def test_serialization_failure_is_retryable(self) -> None:
+        assert is_retryable_db_error(_make_op_error("40001")) is True
+
+    def test_other_pgcode_is_not_retryable(self) -> None:
+        assert is_retryable_db_error(_make_op_error("23505")) is False
+
+    def test_no_pgcode_is_not_retryable(self) -> None:
+        assert is_retryable_db_error(_make_op_error(None)) is False
+
+    def test_non_operational_error_is_not_retryable(self) -> None:
+        assert is_retryable_db_error(ValueError("bad")) is False
+
+    def test_known_codes_match_constant(self) -> None:
+        assert "40P01" in RETRYABLE_PG_CODES
+        assert "40001" in RETRYABLE_PG_CODES
+
+
+class TestIsRetryableConnectionError:
+    def test_connection_reset(self) -> None:
+        assert is_retryable_connection_error(ConnectionResetError("kaboom")) is True
+
+    def test_connection_aborted(self) -> None:
+        assert is_retryable_connection_error(ConnectionAbortedError("bye")) is True
+
+    def test_timeout(self) -> None:
+        assert is_retryable_connection_error(TimeoutError("slow")) is True
+
+    def test_value_error_not(self) -> None:
+        assert is_retryable_connection_error(ValueError("nope")) is False
+
+
+class TestIsRetryableCombined:
+    def test_db_match(self) -> None:
+        assert is_retryable(_make_op_error("40P01")) is True
+
+    def test_connection_match(self) -> None:
+        assert is_retryable(ConnectionResetError("x")) is True
+
+    def test_unrelated_does_not_match(self) -> None:
+        assert is_retryable(RuntimeError("x")) is False
+
+
+class TestWithRetry:
+    def test_returns_value_when_no_failure(self) -> None:
+        fn = MagicMock(return_value=42)
+
+        result = with_retry(fn, "a", k="b")
+
+        assert result == 42
+        fn.assert_called_once_with("a", k="b")
+
+    def test_retries_on_retryable_then_succeeds(self) -> None:
+        attempts = {"n": 0}
+
+        def fn() -> str:
+            attempts["n"] += 1
+            if attempts["n"] < 3:
+                raise _make_op_error("40P01")
+            return "ok"
+
+        with patch("protea.core.retry.time.sleep") as mock_sleep:
+            result = with_retry(fn, max_attempts=5, base_delay=0.01, jitter_ratio=0)
+
+        assert result == "ok"
+        assert attempts["n"] == 3
+        # Two sleeps: between attempt 1 and 2, between 2 and 3.
+        assert mock_sleep.call_count == 2
+
+    def test_propagates_non_retryable(self) -> None:
+        fn = MagicMock(side_effect=ValueError("bad"))
+
+        with patch("protea.core.retry.time.sleep") as mock_sleep:
+            with pytest.raises(ValueError, match="bad"):
+                with_retry(fn, max_attempts=5)
+
+        # Non-retryable exception means no sleep, no retry.
+        assert fn.call_count == 1
+        assert mock_sleep.call_count == 0
+
+    def test_propagates_after_max_attempts(self) -> None:
+        fn = MagicMock(side_effect=_make_op_error("40P01"))
+
+        with patch("protea.core.retry.time.sleep") as mock_sleep:
+            with pytest.raises(OperationalError):
+                with_retry(fn, max_attempts=3, base_delay=0.01, jitter_ratio=0)
+
+        assert fn.call_count == 3
+        # Sleep happens only between attempts (so 2 sleeps for 3 attempts).
+        assert mock_sleep.call_count == 2
+
+    def test_exponential_backoff_no_jitter(self) -> None:
+        fn = MagicMock(side_effect=_make_op_error("40P01"))
+
+        with patch("protea.core.retry.time.sleep") as mock_sleep:
+            with pytest.raises(OperationalError):
+                with_retry(fn, max_attempts=4, base_delay=1.0, max_delay=30.0, jitter_ratio=0)
+
+        sleeps = [c.args[0] for c in mock_sleep.call_args_list]
+        # attempt 1 -> sleep 1.0, attempt 2 -> 2.0, attempt 3 -> 4.0
+        assert sleeps == [1.0, 2.0, 4.0]
+
+    def test_max_delay_caps_backoff(self) -> None:
+        fn = MagicMock(side_effect=_make_op_error("40P01"))
+
+        with patch("protea.core.retry.time.sleep") as mock_sleep:
+            with pytest.raises(OperationalError):
+                with_retry(fn, max_attempts=8, base_delay=1.0, max_delay=5.0, jitter_ratio=0)
+
+        sleeps = [c.args[0] for c in mock_sleep.call_args_list]
+        # 1, 2, 4, 5 (capped), 5, 5, 5
+        assert sleeps == [1.0, 2.0, 4.0, 5.0, 5.0, 5.0, 5.0]
+
+    def test_jitter_keeps_sleep_within_band(self) -> None:
+        fn = MagicMock(side_effect=_make_op_error("40P01"))
+        captured: list[float] = []
+
+        with patch(
+            "protea.core.retry.time.sleep", side_effect=lambda d: captured.append(d)
+        ):
+            with pytest.raises(OperationalError):
+                with_retry(fn, max_attempts=4, base_delay=1.0, max_delay=30.0, jitter_ratio=0.5)
+
+        # First sleep base = 1.0; with jitter 0.5, range is [0.5, 1.5].
+        assert 0.5 <= captured[0] <= 1.5
+        # Second sleep base = 2.0; range [1.0, 3.0].
+        assert 1.0 <= captured[1] <= 3.0
+
+    def test_calls_on_retry_callback(self) -> None:
+        events: list[tuple[int, str, float]] = []
+
+        def cb(attempt: int, exc: BaseException, sleep_for: float) -> None:
+            events.append((attempt, type(exc).__name__, sleep_for))
+
+        fn = MagicMock(side_effect=[_make_op_error("40P01"), "ok"])
+
+        with patch("protea.core.retry.time.sleep"):
+            result = with_retry(fn, max_attempts=2, base_delay=0.01, jitter_ratio=0, on_retry=cb)
+
+        assert result == "ok"
+        assert len(events) == 1
+        assert events[0][0] == 1
+        assert events[0][1] == "OperationalError"
+
+    def test_logs_warning_on_default_callback(self, caplog: pytest.LogCaptureFixture) -> None:
+        fn = MagicMock(side_effect=[_make_op_error("40001"), "ok"])
+
+        with caplog.at_level(logging.WARNING, logger="protea.retry"):
+            with patch("protea.core.retry.time.sleep"):
+                with_retry(fn, max_attempts=2, base_delay=0.01, jitter_ratio=0)
+
+        relevant = [r for r in caplog.records if r.name == "protea.retry"]
+        assert relevant, "expected at least one log under protea.retry"
+        assert relevant[0].levelno == logging.WARNING
+        assert relevant[0].pgcode == "40001"
+
+    def test_max_attempts_zero_raises(self) -> None:
+        fn = MagicMock()
+        with pytest.raises(ValueError, match="max_attempts must be"):
+            with_retry(fn, max_attempts=0)
+
+    def test_does_not_swallow_keyboard_interrupt(self) -> None:
+        def fn() -> Any:
+            raise KeyboardInterrupt
+
+        with pytest.raises(KeyboardInterrupt):
+            with_retry(fn, max_attempts=3, base_delay=0.01)
diff --git a/tests/test_run_cafa_evaluation.py b/tests/test_run_cafa_evaluation.py
index 3c1a81d..e3151db 100644
--- a/tests/test_run_cafa_evaluation.py
+++ b/tests/test_run_cafa_evaluation.py
@@ -2,6 +2,7 @@
 
 No real DB, network, or cafaeval binary required — everything is mocked.
 """
+
 from __future__ import annotations
 
 import gzip
@@ -133,7 +134,6 @@ def test_valid_payload(self):
         assert p.evaluation_set_id == EVAL_SET_ID
         assert p.prediction_set_id == PRED_SET_ID
         assert p.max_distance is None
-        assert p.artifacts_dir is None
         assert p.scoring_config_id is None
         assert p.ia_file is None
 
@@ -142,12 +142,10 @@ def test_valid_payload_all_fields(self):
             evaluation_set_id=EVAL_SET_ID,
             prediction_set_id=PRED_SET_ID,
             max_distance=1.5,
-            artifacts_dir="/tmp/artifacts",
             scoring_config_id=SCORING_CONFIG_ID,
             ia_file="/tmp/ia.tsv",
         )
         assert p.max_distance == 1.5
-        assert p.artifacts_dir == "/tmp/artifacts"
         assert p.scoring_config_id == SCORING_CONFIG_ID
         assert p.ia_file == "/tmp/ia.tsv"
 
@@ -263,21 +261,50 @@ def test_parse_empty_df_f(self):
 
     def test_parse_ignores_unknown_namespaces(self):
         df_f = pd.DataFrame(
-            [{"ns": "unknown_namespace", "f": 0.5, "pr": 0.5, "rc": 0.5, "tau": 0.1, "cov_max": 0.9, "n": 10}]
+            [
+                {
+                    "ns": "unknown_namespace",
+                    "f": 0.5,
+                    "pr": 0.5,
+                    "rc": 0.5,
+                    "tau": 0.1,
+                    "cov_max": 0.9,
+                    "n": 10,
+                }
+            ]
         )
         result = self.op._parse_results({"f": df_f})
         assert result == {}
 
     def test_parse_uses_cov_fallback_when_no_cov_max(self):
         df_f = pd.DataFrame(
-            [{"ns": "biological_process", "f": 0.5, "pr": 0.5, "rc": 0.5, "tau": 0.1, "cov": 0.85, "n": 10}]
+            [
+                {
+                    "ns": "biological_process",
+                    "f": 0.5,
+                    "pr": 0.5,
+                    "rc": 0.5,
+                    "tau": 0.1,
+                    "cov": 0.85,
+                    "n": 10,
+                }
+            ]
         )
         result = self.op._parse_results({"f": df_f})
         assert result["BPO"]["coverage"] == 0.85
 
     def test_parse_missing_n_column(self):
         df_f = pd.DataFrame(
-            [{"ns": "biological_process", "f": 0.5, "pr": 0.5, "rc": 0.5, "tau": 0.1, "cov_max": 0.9}]
+            [
+                {
+                    "ns": "biological_process",
+                    "f": 0.5,
+                    "pr": 0.5,
+                    "rc": 0.5,
+                    "tau": 0.1,
+                    "cov_max": 0.9,
+                }
+            ]
         )
         result = self.op._parse_results({"f": df_f})
         assert result["BPO"]["n_proteins"] is None
@@ -485,9 +512,7 @@ def test_write_predictions_without_scoring_config(self):
         with tempfile.NamedTemporaryFile(suffix=".tsv", delete=False) as f:
             path = f.name
         try:
-            self.op._write_predictions(
-                session, uuid.uuid4(), {"P1"}, None, path, None
-            )
+            self.op._write_predictions(session, uuid.uuid4(), {"P1"}, None, path, None)
             with open(path) as f:
                 line = f.read().strip()
             # score = max(0, 1 - 0.4/2) = 0.8
@@ -518,9 +543,7 @@ def test_write_predictions_deduplicates(self):
         with tempfile.NamedTemporaryFile(suffix=".tsv", delete=False) as f:
             path = f.name
         try:
-            self.op._write_predictions(
-                session, uuid.uuid4(), {"P1"}, None, path, None
-            )
+            self.op._write_predictions(session, uuid.uuid4(), {"P1"}, None, path, None)
             with open(path) as f:
                 lines = f.read().strip().split("\n")
             # Only the first (closest) prediction should be written
@@ -556,9 +579,7 @@ def test_write_predictions_with_scoring_config(self, mock_compute_score):
         with tempfile.NamedTemporaryFile(suffix=".tsv", delete=False) as f:
             path = f.name
         try:
-            self.op._write_predictions(
-                session, uuid.uuid4(), {"P1"}, None, path, scoring_config
-            )
+            self.op._write_predictions(session, uuid.uuid4(), {"P1"}, None, path, scoring_config)
             with open(path) as f:
                 line = f.read().strip()
             assert line == "P1\tGO:0000001\t0.7500"
@@ -585,9 +606,7 @@ def test_write_predictions_zero_distance(self):
         with tempfile.NamedTemporaryFile(suffix=".tsv", delete=False) as f:
             path = f.name
         try:
-            self.op._write_predictions(
-                session, uuid.uuid4(), {"P1"}, None, path, None
-            )
+            self.op._write_predictions(session, uuid.uuid4(), {"P1"}, None, path, None)
             with open(path) as f:
                 line = f.read().strip()
             # score = max(0, 1 - 0/2) = 1.0
@@ -615,9 +634,7 @@ def test_write_predictions_with_max_distance(self):
         with tempfile.NamedTemporaryFile(suffix=".tsv", delete=False) as f:
             path = f.name
         try:
-            self.op._write_predictions(
-                session, uuid.uuid4(), {"P1"}, 0.5, path, None
-            )
+            self.op._write_predictions(session, uuid.uuid4(), {"P1"}, 0.5, path, None)
             with open(path) as f:
                 line = f.read().strip()
             assert line == "P1\tGO:0000001\t0.8500"
@@ -646,9 +663,7 @@ def test_write_predictions_none_distance_fallback(self):
         with tempfile.NamedTemporaryFile(suffix=".tsv", delete=False) as f:
             path = f.name
         try:
-            self.op._write_predictions(
-                session, uuid.uuid4(), {"P1"}, None, path, None
-            )
+            self.op._write_predictions(session, uuid.uuid4(), {"P1"}, None, path, None)
             with open(path) as f:
                 line = f.read().strip()
             # score = max(0, 1 - 0/2) = 1.0 (None → 0.0)
@@ -691,17 +706,14 @@ def test_missing_prediction_set(self):
                 emit=self.emit,
             )
 
-    @patch("protea.core.operations.run_cafa_evaluation.compute_evaluation_data")
+    @patch("protea.core.operations.run_cafa_evaluation.load_evaluation_data_for_set")
     def test_no_delta_proteins(self, mock_compute):
-        mock_compute.return_value = EvaluationData(
-            nk={}, lk={}, pk={}, known={}, pk_known={}
-        )
+        mock_compute.return_value = (EvaluationData(nk={}, lk={}, pk={}, known={}, pk_known={}), uuid.uuid4())
         session = MagicMock()
         eval_set = _make_eval_set()
         pred_set = _make_pred_set()
-        ann_old = _make_ann_old()
         snapshot = _make_snapshot()
-        session.get.side_effect = [eval_set, pred_set, ann_old, snapshot]
+        session.get.side_effect = [eval_set, pred_set, snapshot]
 
         with pytest.raises(ValueError, match="No delta proteins"):
             self.op.execute(
@@ -710,16 +722,15 @@ def test_no_delta_proteins(self, mock_compute):
                 emit=self.emit,
             )
 
-    @patch("protea.core.operations.run_cafa_evaluation.compute_evaluation_data")
+    @patch("protea.core.operations.run_cafa_evaluation.load_evaluation_data_for_set")
     def test_missing_scoring_config(self, mock_compute):
-        mock_compute.return_value = _make_eval_data()
+        mock_compute.return_value = (_make_eval_data(), uuid.uuid4())
         session = MagicMock()
         eval_set = _make_eval_set()
         pred_set = _make_pred_set()
-        ann_old = _make_ann_old()
         snapshot = _make_snapshot()
         # get calls: eval_set, pred_set, ann_old, snapshot, scoring_config (None)
-        session.get.side_effect = [eval_set, pred_set, ann_old, snapshot, None]
+        session.get.side_effect = [eval_set, pred_set, snapshot, None]
 
         with pytest.raises(ValueError, match="ScoringConfig.*not found"):
             self.op.execute(
@@ -738,21 +749,43 @@ def test_missing_scoring_config(self, mock_compute):
 # ---------------------------------------------------------------------------
 
 
+@pytest.fixture(autouse=True)
+def _mock_artifact_store(request):
+    """Stub the ArtifactStore so happy-path tests don't try to reach MinIO.
+
+    Only applied to TestExecuteHappyPath and TestExecuteErrors — the other
+    classes in this file test pure helpers that don't touch the store.
+    """
+    if not request.cls or not request.cls.__name__.startswith("TestExecute"):
+        yield
+        return
+    with (
+        patch(
+            "protea.core.operations.run_cafa_evaluation.get_artifact_store",
+            return_value=MagicMock(),
+        ),
+        patch(
+            "protea.core.operations.run_cafa_evaluation.load_settings",
+            return_value=MagicMock(),
+        ),
+    ):
+        yield
+
+
 class TestExecuteHappyPath:
     def setup_method(self):
         self.op = RunCafaEvaluationOperation()
         self.emit = _make_emit()
 
-    @patch("protea.core.operations.run_cafa_evaluation.compute_evaluation_data")
+    @patch("protea.core.operations.run_cafa_evaluation.load_evaluation_data_for_set")
     def test_full_run(self, mock_compute):
-        mock_compute.return_value = _make_eval_data()
+        mock_compute.return_value = (_make_eval_data(), uuid.uuid4())
 
         session = MagicMock()
         eval_set = _make_eval_set()
         pred_set = _make_pred_set()
-        ann_old = _make_ann_old()
         snapshot = _make_snapshot()
-        session.get.side_effect = [eval_set, pred_set, ann_old, snapshot]
+        session.get.side_effect = [eval_set, pred_set, snapshot]
 
         # Mock the DB query for _write_predictions
         query = MagicMock()
@@ -783,16 +816,15 @@ def test_full_run(self, mock_compute):
         session.add.assert_called_once()
         session.flush.assert_called_once()
 
-    @patch("protea.core.operations.run_cafa_evaluation.compute_evaluation_data")
+    @patch("protea.core.operations.run_cafa_evaluation.load_evaluation_data_for_set")
     def test_emit_events(self, mock_compute):
-        mock_compute.return_value = _make_eval_data()
+        mock_compute.return_value = (_make_eval_data(), uuid.uuid4())
 
         session = MagicMock()
         eval_set = _make_eval_set()
         pred_set = _make_pred_set()
-        ann_old = _make_ann_old()
         snapshot = _make_snapshot()
-        session.get.side_effect = [eval_set, pred_set, ann_old, snapshot]
+        session.get.side_effect = [eval_set, pred_set, snapshot]
 
         query = MagicMock()
         session.query.return_value = query
@@ -826,17 +858,16 @@ def test_emit_events(self, mock_compute):
         assert emit_events.count("run_cafa_evaluation.evaluating") == 3
         assert emit_events.count("run_cafa_evaluation.setting_done") == 3
 
-    @patch("protea.core.operations.run_cafa_evaluation.compute_evaluation_data")
+    @patch("protea.core.operations.run_cafa_evaluation.load_evaluation_data_for_set")
     def test_cafa_eval_failure_catches_exception(self, mock_compute):
         """When cafa_eval raises for one setting, it should log warning and continue."""
-        mock_compute.return_value = _make_eval_data()
+        mock_compute.return_value = (_make_eval_data(), uuid.uuid4())
 
         session = MagicMock()
         eval_set = _make_eval_set()
         pred_set = _make_pred_set()
-        ann_old = _make_ann_old()
         snapshot = _make_snapshot()
-        session.get.side_effect = [eval_set, pred_set, ann_old, snapshot]
+        session.get.side_effect = [eval_set, pred_set, snapshot]
 
         query = MagicMock()
         session.query.return_value = query
@@ -866,17 +897,16 @@ def test_cafa_eval_failure_catches_exception(self, mock_compute):
         emit_events = [c[0][0] for c in self.emit.call_args_list]
         assert emit_events.count("run_cafa_evaluation.setting_failed") == 3
 
-    @patch("protea.core.operations.run_cafa_evaluation.compute_evaluation_data")
+    @patch("protea.core.operations.run_cafa_evaluation.load_evaluation_data_for_set")
     def test_ia_missing_warning(self, mock_compute):
         """When no IA file and no ia_url, a warning should be emitted."""
-        mock_compute.return_value = _make_eval_data()
+        mock_compute.return_value = (_make_eval_data(), uuid.uuid4())
 
         session = MagicMock()
         eval_set = _make_eval_set()
         pred_set = _make_pred_set()
-        ann_old = _make_ann_old()
         snapshot = _make_snapshot(ia_url=None)  # no ia_url
-        session.get.side_effect = [eval_set, pred_set, ann_old, snapshot]
+        session.get.side_effect = [eval_set, pred_set, snapshot]
 
         query = MagicMock()
         session.query.return_value = query
@@ -899,17 +929,16 @@ def test_ia_missing_warning(self, mock_compute):
         emit_events = [c[0][0] for c in self.emit.call_args_list]
         assert "run_cafa_evaluation.ia_missing" in emit_events
 
-    @patch("protea.core.operations.run_cafa_evaluation.compute_evaluation_data")
+    @patch("protea.core.operations.run_cafa_evaluation.load_evaluation_data_for_set")
     def test_ia_url_download(self, mock_compute):
         """When snapshot has ia_url, _download_tsv should be called."""
-        mock_compute.return_value = _make_eval_data()
+        mock_compute.return_value = (_make_eval_data(), uuid.uuid4())
 
         session = MagicMock()
         eval_set = _make_eval_set()
         pred_set = _make_pred_set()
-        ann_old = _make_ann_old()
         snapshot = _make_snapshot(ia_url="https://example.com/ia.tsv")
-        session.get.side_effect = [eval_set, pred_set, ann_old, snapshot]
+        session.get.side_effect = [eval_set, pred_set, snapshot]
 
         query = MagicMock()
         session.query.return_value = query
@@ -918,12 +947,14 @@ def test_ia_url_download(self, mock_compute):
         query.order_by.return_value = query
         query.yield_per.return_value = []
 
-        with patch.object(self.op, "_download_obo"), \
-             patch.object(self.op, "_download_tsv") as mock_dl_tsv, \
-             patch(
-                 "cafaeval.evaluation.cafa_eval",
-                 return_value=(MagicMock(), _dfs_best_fixture()),
-             ):
+        with (
+            patch.object(self.op, "_download_obo"),
+            patch.object(self.op, "_download_tsv") as mock_dl_tsv,
+            patch(
+                "cafaeval.evaluation.cafa_eval",
+                return_value=(MagicMock(), _dfs_best_fixture()),
+            ),
+        ):
             self.op.execute(
                 session,
                 {"evaluation_set_id": EVAL_SET_ID, "prediction_set_id": PRED_SET_ID},
@@ -937,17 +968,16 @@ def test_ia_url_download(self, mock_compute):
         assert "run_cafa_evaluation.downloading_ia" in emit_events
         assert "run_cafa_evaluation.ia_resolved" in emit_events
 
-    @patch("protea.core.operations.run_cafa_evaluation.compute_evaluation_data")
+    @patch("protea.core.operations.run_cafa_evaluation.load_evaluation_data_for_set")
     def test_explicit_ia_file_takes_precedence(self, mock_compute):
         """Explicit ia_file in payload overrides snapshot ia_url."""
-        mock_compute.return_value = _make_eval_data()
+        mock_compute.return_value = (_make_eval_data(), uuid.uuid4())
 
         session = MagicMock()
         eval_set = _make_eval_set()
         pred_set = _make_pred_set()
-        ann_old = _make_ann_old()
         snapshot = _make_snapshot(ia_url="https://example.com/ia.tsv")
-        session.get.side_effect = [eval_set, pred_set, ann_old, snapshot]
+        session.get.side_effect = [eval_set, pred_set, snapshot]
 
         query = MagicMock()
         session.query.return_value = query
@@ -956,12 +986,14 @@ def test_explicit_ia_file_takes_precedence(self, mock_compute):
         query.order_by.return_value = query
         query.yield_per.return_value = []
 
-        with patch.object(self.op, "_download_obo"), \
-             patch.object(self.op, "_download_tsv") as mock_dl_tsv, \
-             patch(
-                 "cafaeval.evaluation.cafa_eval",
-                 return_value=(MagicMock(), _dfs_best_fixture()),
-             ):
+        with (
+            patch.object(self.op, "_download_obo"),
+            patch.object(self.op, "_download_tsv") as mock_dl_tsv,
+            patch(
+                "cafaeval.evaluation.cafa_eval",
+                return_value=(MagicMock(), _dfs_best_fixture()),
+            ),
+        ):
             self.op.execute(
                 session,
                 {
@@ -979,17 +1011,16 @@ def test_explicit_ia_file_takes_precedence(self, mock_compute):
         assert "run_cafa_evaluation.ia_resolved" in emit_events
         assert "run_cafa_evaluation.downloading_ia" not in emit_events
 
-    @patch("protea.core.operations.run_cafa_evaluation.compute_evaluation_data")
+    @patch("protea.core.operations.run_cafa_evaluation.load_evaluation_data_for_set")
     def test_session_commit_before_cafa_eval(self, mock_compute):
         """Session should be committed before cafa_eval to release DB connection."""
-        mock_compute.return_value = _make_eval_data()
+        mock_compute.return_value = (_make_eval_data(), uuid.uuid4())
 
         session = MagicMock()
         eval_set = _make_eval_set()
         pred_set = _make_pred_set()
-        ann_old = _make_ann_old()
         snapshot = _make_snapshot()
-        session.get.side_effect = [eval_set, pred_set, ann_old, snapshot]
+        session.get.side_effect = [eval_set, pred_set, snapshot]
 
         query = MagicMock()
         session.query.return_value = query
@@ -1004,7 +1035,10 @@ def test_session_commit_before_cafa_eval(self, mock_compute):
         with patch.object(self.op, "_download_obo"):
             with patch(
                 "cafaeval.evaluation.cafa_eval",
-                side_effect=lambda *a, **kw: (call_order.append("cafa_eval"), (MagicMock(), _dfs_best_fixture()))[-1],
+                side_effect=lambda *a, **kw: (
+                    call_order.append("cafa_eval"),
+                    (MagicMock(), _dfs_best_fixture()),
+                )[-1],
             ):
                 self.op.execute(
                     session,
@@ -1015,17 +1049,19 @@ def test_session_commit_before_cafa_eval(self, mock_compute):
         assert call_order[0] == "commit"
         assert "cafa_eval" in call_order
 
-    @patch("protea.core.operations.run_cafa_evaluation.compute_evaluation_data")
-    def test_artifacts_dir(self, mock_compute):
-        """When artifacts_dir is set, artifact directory should be created."""
-        mock_compute.return_value = _make_eval_data()
+    @patch("protea.core.operations.run_cafa_evaluation.get_artifact_store")
+    @patch("protea.core.operations.run_cafa_evaluation.load_evaluation_data_for_set")
+    def test_artifacts_uploaded_to_store(self, mock_compute, mock_get_store):
+        """Cafaeval output staged in a tempdir is uploaded via artifact_store.put."""
+        mock_compute.return_value = (_make_eval_data(), uuid.uuid4())
+        store = MagicMock()
+        mock_get_store.return_value = store
 
         session = MagicMock()
         eval_set = _make_eval_set()
         pred_set = _make_pred_set()
-        ann_old = _make_ann_old()
         snapshot = _make_snapshot()
-        session.get.side_effect = [eval_set, pred_set, ann_old, snapshot]
+        session.get.side_effect = [eval_set, pred_set, snapshot]
 
         query = MagicMock()
         session.query.return_value = query
@@ -1034,88 +1070,46 @@ def test_artifacts_dir(self, mock_compute):
         query.order_by.return_value = query
         query.yield_per.return_value = []
 
-        with tempfile.TemporaryDirectory() as tmpdir:
-            with patch.object(self.op, "_download_obo"):
-                with patch(
-                    "cafaeval.evaluation.cafa_eval",
-                    return_value=(None, _dfs_best_fixture()),
-                ):
-                    result = self.op.execute(
-                        session,
-                        {
-                            "evaluation_set_id": EVAL_SET_ID,
-                            "prediction_set_id": PRED_SET_ID,
-                            "artifacts_dir": tmpdir,
-                        },
-                        emit=self.emit,
-                    )
-
-            result_id = result.result["evaluation_result_id"]
-            assert os.path.isdir(os.path.join(tmpdir, result_id))
-
-    @patch("protea.core.operations.run_cafa_evaluation.compute_evaluation_data")
-    def test_artifacts_dir_with_write_results(self, mock_compute):
-        """When artifacts_dir is set and df is not None, write_results is called."""
-        mock_compute.return_value = _make_eval_data()
-
-        session = MagicMock()
-        eval_set = _make_eval_set()
-        pred_set = _make_pred_set()
-        ann_old = _make_ann_old()
-        snapshot = _make_snapshot()
-        session.get.side_effect = [eval_set, pred_set, ann_old, snapshot]
-
-        query = MagicMock()
-        session.query.return_value = query
-        query.join.return_value = query
-        query.filter.return_value = query
-        query.order_by.return_value = query
-        query.yield_per.return_value = []
-
-        df_mock = MagicMock()  # non-None df triggers write_results
+        df_mock = MagicMock()  # non-None df triggers write_results inside the staging dir
         dfs_best = _dfs_best_fixture()
 
-        with tempfile.TemporaryDirectory() as tmpdir:
-            with patch.object(self.op, "_download_obo"), \
-                 patch(
-                     "cafaeval.evaluation.cafa_eval",
-                     return_value=(df_mock, dfs_best),
-                 ), \
-                 patch(
-                     "cafaeval.evaluation.write_results"
-                 ) as mock_write:
-                result = self.op.execute(
-                    session,
-                    {
-                        "evaluation_set_id": EVAL_SET_ID,
-                        "prediction_set_id": PRED_SET_ID,
-                        "artifacts_dir": tmpdir,
-                    },
-                    emit=self.emit,
-                )
+        with (
+            patch.object(self.op, "_download_obo"),
+            patch(
+                "cafaeval.evaluation.cafa_eval",
+                return_value=(df_mock, dfs_best),
+            ),
+            patch("cafaeval.evaluation.write_results") as mock_write,
+        ):
+            result = self.op.execute(
+                session,
+                {"evaluation_set_id": EVAL_SET_ID, "prediction_set_id": PRED_SET_ID},
+                emit=self.emit,
+            )
 
-            # write_results called 3 times (NK, LK, PK)
-            assert mock_write.call_count == 3
-            result_id = result.result["evaluation_result_id"]
-            # Check setting subdirectories were created
-            for setting in ("NK", "LK", "PK"):
-                setting_dir = os.path.join(tmpdir, result_id, setting)
-                assert os.path.isdir(setting_dir)
+        # write_results called once per setting (NK, LK, PK)
+        assert mock_write.call_count == 3
+        # Result advertises the uploaded keys (via artifact_store.put)
+        assert "results" in result.result
+        # Test runs without MinIO; the artifact_store mock just records the calls.
+        # We don't assert exact count because the staging tempdir is empty under
+        # the cafaeval.write_results patch — what matters is the operation
+        # finishes cleanly and the store is consulted.
+        assert mock_get_store.called
 
-    @patch("protea.core.operations.run_cafa_evaluation.compute_evaluation_data")
+    @patch("protea.core.operations.run_cafa_evaluation.load_evaluation_data_for_set")
     def test_scoring_config_snapshot(self, mock_compute):
         """When scoring_config_id is provided and found, it snapshots the config."""
-        mock_compute.return_value = _make_eval_data()
+        mock_compute.return_value = (_make_eval_data(), uuid.uuid4())
 
         session = MagicMock()
         eval_set = _make_eval_set()
         pred_set = _make_pred_set()
-        ann_old = _make_ann_old()
         snapshot = _make_snapshot()
         scoring_cfg = MagicMock()
         scoring_cfg.formula = "linear"
         scoring_cfg.weights = {"embedding_similarity": 1.0}
-        session.get.side_effect = [eval_set, pred_set, ann_old, snapshot, scoring_cfg]
+        session.get.side_effect = [eval_set, pred_set, snapshot, scoring_cfg]
 
         query = MagicMock()
         session.query.return_value = query
@@ -1124,14 +1118,14 @@ def test_scoring_config_snapshot(self, mock_compute):
         query.order_by.return_value = query
         query.yield_per.return_value = []
 
-        with patch.object(self.op, "_download_obo"), \
-             patch(
-                 "cafaeval.evaluation.cafa_eval",
-                 return_value=(MagicMock(), _dfs_best_fixture()),
-             ), \
-             patch(
-                 "protea.core.operations.run_cafa_evaluation.ScoringConfig"
-             ) as mock_sc_cls:
+        with (
+            patch.object(self.op, "_download_obo"),
+            patch(
+                "cafaeval.evaluation.cafa_eval",
+                return_value=(MagicMock(), _dfs_best_fixture()),
+            ),
+            patch("protea.core.operations.run_cafa_evaluation.ScoringConfig") as mock_sc_cls,
+        ):
             mock_sc_cls.return_value = MagicMock()
             result = self.op.execute(
                 session,
diff --git a/tests/test_safe_emit.py b/tests/test_safe_emit.py
new file mode 100644
index 0000000..1b57ee7
--- /dev/null
+++ b/tests/test_safe_emit.py
@@ -0,0 +1,81 @@
+"""Tests for make_safe_emit (T0.2 of master plan v3)."""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+import pytest
+
+from protea.core.contracts.operation import make_safe_emit
+
+
+class TestMakeSafeEmit:
+    """make_safe_emit wraps an EmitFn so failures don't crash the operation."""
+
+    def test_passthrough_on_success(self) -> None:
+        calls: list[tuple] = []
+
+        def raw(event, message, fields, level):
+            calls.append((event, message, fields, level))
+
+        safe = make_safe_emit(raw)
+        safe("step.start", "doing work", {"k": 1}, "info")
+
+        assert calls == [("step.start", "doing work", {"k": 1}, "info")]
+
+    def test_default_arguments_normalised(self) -> None:
+        calls: list[tuple] = []
+
+        def raw(event, message, fields, level):
+            calls.append((event, message, fields, level))
+
+        safe = make_safe_emit(raw)
+        safe("step.tick")
+
+        # Defaults: message=None, fields={} (not None), level="info".
+        assert calls == [("step.tick", None, {}, "info")]
+
+    def test_swallows_exceptions(self, caplog: pytest.LogCaptureFixture) -> None:
+        def raw(*_args: Any, **_kwargs: Any) -> None:
+            raise RuntimeError("DB lost connection")
+
+        safe = make_safe_emit(raw)
+
+        with caplog.at_level(logging.ERROR, logger="protea.emit"):
+            safe("step.boom", "oops", {"x": 2}, "warning")
+
+        assert any(
+            "emit failed" in record.message and record.levelno == logging.ERROR
+            for record in caplog.records
+        )
+        # Find the record and confirm exc_info captured.
+        emit_records = [r for r in caplog.records if "emit failed" in r.message]
+        assert emit_records, "expected at least one error log from safe_emit"
+        assert emit_records[0].exc_info is not None
+
+    def test_does_not_propagate_arbitrary_exceptions(self) -> None:
+        class CustomError(Exception):
+            pass
+
+        def raw(*_args: Any, **_kwargs: Any) -> None:
+            raise CustomError("anything")
+
+        safe = make_safe_emit(raw)
+        # Must not raise.
+        safe("any.event")
+
+    def test_can_be_called_repeatedly_after_failure(self) -> None:
+        attempts: list[bool] = []
+
+        def raw(*_args: Any, **_kwargs: Any) -> None:
+            attempts.append(True)
+            if len(attempts) == 1:
+                raise RuntimeError("first attempt fails")
+
+        safe = make_safe_emit(raw)
+        safe("first")
+        safe("second")
+        safe("third")
+
+        assert len(attempts) == 3
diff --git a/tests/test_scoring.py b/tests/test_scoring.py
index dfd114b..a8e8404 100644
--- a/tests/test_scoring.py
+++ b/tests/test_scoring.py
@@ -1,4 +1,5 @@
 """Tests for protea.core.scoring and related evidence weight resolution."""
+
 from unittest.mock import MagicMock
 
 import pytest
@@ -15,6 +16,7 @@
 # Helpers
 # ---------------------------------------------------------------------------
 
+
 def _config(
     weights: dict,
     formula: str = FORMULA_LINEAR,
@@ -31,6 +33,7 @@ def _config(
 # evidence_weight
 # ---------------------------------------------------------------------------
 
+
 class TestEvidenceWeight:
     def test_none_code_returns_fallback(self):
         assert evidence_weight(None) == DEFAULT_EVIDENCE_WEIGHT_FALLBACK
@@ -42,7 +45,7 @@ def test_known_experimental_code(self):
         assert evidence_weight("IDA") == 1.0
 
     def test_known_electronic_code(self):
-        assert evidence_weight("IEA") == 0.3
+        assert evidence_weight("IEA") == 0.8
 
     def test_known_computational_code(self):
         assert evidence_weight("IBA") == 0.7
@@ -60,9 +63,10 @@ def test_partial_override_fallback_to_default(self):
     def test_eco_id_normalized(self):
         # ECO:0000501 maps to IEA
         from protea.core.evidence_codes import ECO_TO_CODE
+
         eco_ids = [eco for eco, go in ECO_TO_CODE.items() if go == "IEA"]
         if eco_ids:
-            assert evidence_weight(eco_ids[0]) == pytest.approx(0.3)
+            assert evidence_weight(eco_ids[0]) == pytest.approx(0.8)
 
     def test_override_with_none_overrides_arg(self):
         # overrides=None should not crash
@@ -73,6 +77,7 @@ def test_override_with_none_overrides_arg(self):
 # compute_score — pure embedding
 # ---------------------------------------------------------------------------
 
+
 class TestComputeScoreEmbeddingOnly:
     def setup_method(self):
         self.cfg = _config({"embedding_similarity": 1.0})
@@ -107,6 +112,7 @@ def test_score_rounded_to_6_decimals(self):
 # compute_score — multi-signal
 # ---------------------------------------------------------------------------
 
+
 class TestComputeScoreMultiSignal:
     def test_nw_identity_contributes(self):
         cfg = _config({"embedding_similarity": 0.5, "identity_nw": 0.5})
@@ -158,11 +164,37 @@ def test_signal_clamped_to_one(self):
         score = compute_score({"identity_nw": 1.5}, cfg)
         assert score == pytest.approx(1.0)
 
+    def test_vote_fraction_signal(self):
+        cfg = _config({"neighbor_vote_fraction": 1.0})
+        score = compute_score({"neighbor_vote_fraction": 0.7}, cfg)
+        assert score == pytest.approx(0.7)
+
+    def test_vote_fraction_unanimous(self):
+        cfg = _config({"neighbor_vote_fraction": 1.0})
+        score = compute_score({"neighbor_vote_fraction": 1.0}, cfg)
+        assert score == pytest.approx(1.0)
+
+    def test_vote_fraction_combined_with_embedding(self):
+        cfg = _config({"embedding_similarity": 0.5, "neighbor_vote_fraction": 0.5})
+        # embedding similarity = 1.0 (distance=0), vote_fraction = 0.6
+        # → (0.5*1.0 + 0.5*0.6)/1.0 = 0.8
+        pred = {"distance": 0.0, "neighbor_vote_fraction": 0.6}
+        score = compute_score(pred, cfg)
+        assert score == pytest.approx(0.8)
+
+    def test_vote_fraction_none_excluded(self):
+        cfg = _config({"embedding_similarity": 0.5, "neighbor_vote_fraction": 0.5})
+        # vote_fraction is None → only embedding_similarity contributes,
+        # denominator collapses so score = 1.0
+        score = compute_score({"distance": 0.0, "neighbor_vote_fraction": None}, cfg)
+        assert score == pytest.approx(1.0)
+
 
 # ---------------------------------------------------------------------------
 # compute_score — evidence_weighted formula
 # ---------------------------------------------------------------------------
 
+
 class TestComputeScoreEvidenceWeighted:
     def test_iea_downgrades_score(self):
         cfg_linear = _config({"embedding_similarity": 1.0}, formula=FORMULA_LINEAR)
@@ -193,6 +225,7 @@ def test_custom_evidence_override_applied(self):
 # score_predictions
 # ---------------------------------------------------------------------------
 
+
 class TestScorePredictions:
     def setup_method(self):
         self.cfg = _config({"embedding_similarity": 1.0})
diff --git a/tests/test_scoring_router.py b/tests/test_scoring_router.py
index 121013a..d9b97c4 100644
--- a/tests/test_scoring_router.py
+++ b/tests/test_scoring_router.py
@@ -1,4 +1,5 @@
 """Unit tests for the scoring API router — no real DB required."""
+
 from __future__ import annotations
 
 from contextlib import contextmanager
@@ -23,6 +24,7 @@
 # Fixtures
 # ---------------------------------------------------------------------------
 
+
 def _make_config(name="test", formula=FORMULA_LINEAR, weights=None, ev_weights=None):
     cfg = MagicMock(spec=ScoringConfig)
     cfg.id = uuid4()
@@ -56,7 +58,9 @@ def session():
 @pytest.fixture()
 def client(session):
     app, factory, _ = _make_app(session)
-    with patch("protea.api.routers.scoring.session_scope", side_effect=lambda _: _mock_scope(session)):
+    with patch(
+        "protea.api.routers.scoring.session_scope", side_effect=lambda _: _mock_scope(session)
+    ):
         yield TestClient(app, raise_server_exceptions=True)
 
 
@@ -64,6 +68,7 @@ def client(session):
 # GET /configs
 # ---------------------------------------------------------------------------
 
+
 class TestListScoringConfigs:
     def test_empty_list(self, client, session):
         session.query.return_value.order_by.return_value.all.return_value = []
@@ -85,6 +90,7 @@ def test_returns_configs(self, client, session):
 # POST /configs
 # ---------------------------------------------------------------------------
 
+
 class TestCreateScoringConfig:
     def test_create_valid(self, client, session):
         cfg = _make_config("new-config")
@@ -96,46 +102,61 @@ def add_side_effect(obj):
 
         session.add.side_effect = add_side_effect
 
-        resp = client.post("/scoring/configs", json={
-            "name": "new-config",
-            "formula": "linear",
-            "weights": {"embedding_similarity": 1.0},
-        })
+        resp = client.post(
+            "/scoring/configs",
+            json={
+                "name": "new-config",
+                "formula": "linear",
+                "weights": {"embedding_similarity": 1.0},
+            },
+        )
         assert resp.status_code == 201
         assert resp.json()["name"] == "new-config"
 
     def test_invalid_formula_returns_422(self, client, session):
-        resp = client.post("/scoring/configs", json={
-            "name": "bad",
-            "formula": "nonexistent_formula",
-            "weights": {"embedding_similarity": 1.0},
-        })
+        resp = client.post(
+            "/scoring/configs",
+            json={
+                "name": "bad",
+                "formula": "nonexistent_formula",
+                "weights": {"embedding_similarity": 1.0},
+            },
+        )
         assert resp.status_code == 422
 
     def test_unknown_signal_key_returns_422(self, client, session):
-        resp = client.post("/scoring/configs", json={
-            "name": "bad",
-            "formula": "linear",
-            "weights": {"nonexistent_signal": 1.0},
-        })
+        resp = client.post(
+            "/scoring/configs",
+            json={
+                "name": "bad",
+                "formula": "linear",
+                "weights": {"nonexistent_signal": 1.0},
+            },
+        )
         assert resp.status_code == 422
 
     def test_invalid_evidence_weight_value_returns_422(self, client, session):
-        resp = client.post("/scoring/configs", json={
-            "name": "bad",
-            "formula": "linear",
-            "weights": {"embedding_similarity": 1.0},
-            "evidence_weights": {"IEA": 1.5},
-        })
+        resp = client.post(
+            "/scoring/configs",
+            json={
+                "name": "bad",
+                "formula": "linear",
+                "weights": {"embedding_similarity": 1.0},
+                "evidence_weights": {"IEA": 1.5},
+            },
+        )
         assert resp.status_code == 422
 
     def test_unknown_evidence_code_returns_422(self, client, session):
-        resp = client.post("/scoring/configs", json={
-            "name": "bad",
-            "formula": "linear",
-            "weights": {"embedding_similarity": 1.0},
-            "evidence_weights": {"BADCODE": 0.5},
-        })
+        resp = client.post(
+            "/scoring/configs",
+            json={
+                "name": "bad",
+                "formula": "linear",
+                "weights": {"embedding_similarity": 1.0},
+                "evidence_weights": {"BADCODE": 0.5},
+            },
+        )
         assert resp.status_code == 422
 
     def test_evidence_weighted_formula_accepted(self, client, session):
@@ -148,11 +169,14 @@ def add_side_effect(obj):
 
         session.add.side_effect = add_side_effect
 
-        resp = client.post("/scoring/configs", json={
-            "name": "ew-config",
-            "formula": "evidence_weighted",
-            "weights": {"embedding_similarity": 1.0},
-        })
+        resp = client.post(
+            "/scoring/configs",
+            json={
+                "name": "ew-config",
+                "formula": "evidence_weighted",
+                "weights": {"embedding_similarity": 1.0},
+            },
+        )
         assert resp.status_code == 201
 
 
@@ -160,6 +184,7 @@ def add_side_effect(obj):
 # GET /configs/{config_id}
 # ---------------------------------------------------------------------------
 
+
 class TestGetScoringConfig:
     def test_found(self, client, session):
         cfg = _make_config("found")
@@ -178,6 +203,7 @@ def test_not_found(self, client, session):
 # DELETE /configs/{config_id}
 # ---------------------------------------------------------------------------
 
+
 class TestDeleteScoringConfig:
     def test_delete_existing(self, client, session):
         cfg = _make_config()
@@ -196,6 +222,7 @@ def test_delete_not_found(self, client, session):
 # POST /configs/presets
 # ---------------------------------------------------------------------------
 
+
 class TestCreatePresets:
     def test_creates_all_presets_when_none_exist(self, client, session):
         session.query.return_value.all.return_value = []
@@ -207,6 +234,7 @@ def test_creates_all_presets_when_none_exist(self, client, session):
 
     def test_skips_existing_presets(self, client, session):
         from protea.api.routers.scoring import _PRESET_CONFIGS
+
         all_names = [(p["name"],) for p in _PRESET_CONFIGS]
         session.query.return_value.all.return_value = all_names
         resp = client.post("/scoring/configs/presets")
@@ -218,12 +246,12 @@ def test_skips_existing_presets(self, client, session):
 # GET /prediction-sets/{set_id}/score.tsv — 404 preflight checks
 # ---------------------------------------------------------------------------
 
+
 class TestScoredTSV:
     def test_prediction_set_not_found(self, client, session):
         session.get.return_value = None
         resp = client.get(
-            f"/scoring/prediction-sets/{uuid4()}/score.tsv"
-            f"?scoring_config_id={uuid4()}"
+            f"/scoring/prediction-sets/{uuid4()}/score.tsv?scoring_config_id={uuid4()}"
         )
         assert resp.status_code == 404
 
@@ -231,8 +259,7 @@ def test_scoring_config_not_found(self, client, session):
         # First get (PredictionSet) found, second (ScoringConfig) not found
         session.get.side_effect = [MagicMock(), None]
         resp = client.get(
-            f"/scoring/prediction-sets/{uuid4()}/score.tsv"
-            f"?scoring_config_id={uuid4()}"
+            f"/scoring/prediction-sets/{uuid4()}/score.tsv?scoring_config_id={uuid4()}"
         )
         assert resp.status_code == 404
 
@@ -258,6 +285,7 @@ def test_streams_tsv_with_data(self, mock_score, session):
         def get_side(model, id_):
             from protea.infrastructure.orm.models.embedding.prediction_set import PredictionSet
             from protea.infrastructure.orm.models.embedding.scoring_config import ScoringConfig
+
             if model is PredictionSet:
                 return pred_set
             if model is ScoringConfig:
@@ -274,11 +302,12 @@ def get_side(model, id_):
         factory = MagicMock()
         app.state.session_factory = factory
         app.include_router(router)
-        with patch("protea.api.routers.scoring.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.scoring.session_scope", side_effect=lambda _: _mock_scope(session)
+        ):
             with TestClient(app) as c:
                 resp = c.get(
-                    f"/scoring/prediction-sets/{set_id}/score.tsv"
-                    f"?scoring_config_id={config_id}"
+                    f"/scoring/prediction-sets/{set_id}/score.tsv?scoring_config_id={config_id}"
                 )
         assert resp.status_code == 200
         assert "text/tab-separated-values" in resp.headers["content-type"]
@@ -309,6 +338,7 @@ def test_min_score_filters_rows(self, mock_score, session):
         def get_side(model, id_):
             from protea.infrastructure.orm.models.embedding.prediction_set import PredictionSet
             from protea.infrastructure.orm.models.embedding.scoring_config import ScoringConfig
+
             if model is PredictionSet:
                 return MagicMock()
             if model is ScoringConfig:
@@ -325,7 +355,9 @@ def get_side(model, id_):
         factory = MagicMock()
         app.state.session_factory = factory
         app.include_router(router)
-        with patch("protea.api.routers.scoring.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.scoring.session_scope", side_effect=lambda _: _mock_scope(session)
+        ):
             with TestClient(app) as c:
                 resp = c.get(
                     f"/scoring/prediction-sets/{set_id}/score.tsv"
@@ -358,6 +390,7 @@ def test_accession_filter(self, mock_score, session):
         def get_side(model, id_):
             from protea.infrastructure.orm.models.embedding.prediction_set import PredictionSet
             from protea.infrastructure.orm.models.embedding.scoring_config import ScoringConfig
+
             if model is PredictionSet:
                 return MagicMock()
             if model is ScoringConfig:
@@ -374,7 +407,9 @@ def get_side(model, id_):
         factory = MagicMock()
         app.state.session_factory = factory
         app.include_router(router)
-        with patch("protea.api.routers.scoring.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.scoring.session_scope", side_effect=lambda _: _mock_scope(session)
+        ):
             with TestClient(app) as c:
                 resp = c.get(
                     f"/scoring/prediction-sets/{set_id}/score.tsv"
@@ -386,10 +421,104 @@ def get_side(model, id_):
         assert "P99999" in lines[1]
 
 
+# ---------------------------------------------------------------------------
+# Signal coverage guard — 409 when the ScoringConfig requires signals absent
+# from the target PredictionSet (e.g. alignment_weighted on a KNN-only set).
+# ---------------------------------------------------------------------------
+
+
+class TestSignalCoverageGuard:
+    def _wire_session(
+        self, session, cfg, *, total: int, counts: dict[str, int]
+    ) -> None:
+        """Configure session.get and session.execute for the guard test."""
+        from protea.infrastructure.orm.models.embedding.prediction_set import PredictionSet
+        from protea.infrastructure.orm.models.embedding.scoring_config import ScoringConfig
+
+        def get_side(model, _id):
+            if model is PredictionSet:
+                return MagicMock()
+            if model is ScoringConfig:
+                return cfg
+            return None
+
+        session.get.side_effect = get_side
+        mapping = {"total": total, **counts}
+        session.execute.return_value.mappings.return_value.one.return_value = mapping
+
+    def test_missing_identity_nw_returns_409(self, client, session):
+        set_id, config_id = uuid4(), uuid4()
+        cfg = _make_config(
+            "align",
+            weights={"embedding_similarity": 0.5, "identity_nw": 0.5},
+        )
+        cfg.id = config_id
+        self._wire_session(
+            session,
+            cfg,
+            total=100,
+            counts={"cnt_distance": 100, "cnt_identity_nw": 0},
+        )
+        resp = client.get(
+            f"/scoring/prediction-sets/{set_id}/score.tsv?scoring_config_id={config_id}"
+        )
+        assert resp.status_code == 409
+        assert "identity_nw" in resp.json()["detail"]
+
+    def test_full_coverage_passes_guard(self, client, session):
+        """When every required column is filled, the guard lets the stream start."""
+        set_id, config_id = uuid4(), uuid4()
+        cfg = _make_config(
+            "align",
+            weights={"embedding_similarity": 0.5, "identity_nw": 0.5},
+        )
+        cfg.id = config_id
+        self._wire_session(
+            session,
+            cfg,
+            total=100,
+            counts={"cnt_distance": 100, "cnt_identity_nw": 100},
+        )
+        # Empty yield_per so the stream produces only the header
+        session.query.return_value.join.return_value.filter.return_value.yield_per.return_value = []
+        resp = client.get(
+            f"/scoring/prediction-sets/{set_id}/score.tsv?scoring_config_id={config_id}"
+        )
+        assert resp.status_code == 200
+        assert resp.text.strip().startswith("protein_accession")
+
+    def test_evidence_weighted_always_requires_evidence_code(self, client, session):
+        """formula=evidence_weighted applies the multiplier regardless of weights,
+        so evidence_code must be present even when its weight is 0."""
+        from protea.infrastructure.orm.models.embedding.scoring_config import (
+            FORMULA_EVIDENCE_WEIGHTED,
+        )
+
+        set_id, config_id = uuid4(), uuid4()
+        cfg = _make_config(
+            "veto",
+            formula=FORMULA_EVIDENCE_WEIGHTED,
+            weights={"embedding_similarity": 1.0, "evidence_weight": 0.0},
+        )
+        cfg.id = config_id
+        self._wire_session(
+            session,
+            cfg,
+            total=100,
+            counts={"cnt_distance": 100, "cnt_evidence_code": 0},
+        )
+        resp = client.get(
+            f"/scoring/prediction-sets/{set_id}/score.tsv?scoring_config_id={config_id}"
+        )
+        assert resp.status_code == 409
+        assert "evidence_weight" in resp.json()["detail"]
+
+
 # ---------------------------------------------------------------------------
 # GET /prediction-sets/{set_id}/metrics — 404 preflight checks
 # ---------------------------------------------------------------------------
 
+
 class TestMetricsEndpoint:
     def _url(self):
         return (
@@ -434,6 +563,7 @@ def test_returns_metrics_with_curve(self, mock_score, mock_eval, mock_metrics, c
         def get_side(model, id_):
             from protea.infrastructure.orm.models.embedding.prediction_set import PredictionSet
             from protea.infrastructure.orm.models.embedding.scoring_config import ScoringConfig
+
             if model is PredictionSet:
                 return pred_set
             if model is ScoringConfig:
@@ -495,6 +625,7 @@ def test_lk_category(self, mock_score, mock_eval, mock_metrics, client, session)
         def get_side(model, id_):
             from protea.infrastructure.orm.models.embedding.prediction_set import PredictionSet
             from protea.infrastructure.orm.models.embedding.scoring_config import ScoringConfig
+
             if model is PredictionSet:
                 return MagicMock()
             if model is ScoringConfig:
@@ -521,7 +652,11 @@ def get_side(model, id_):
         assert resp.status_code == 200
         mock_metrics.assert_called_once()
         call_kwargs = mock_metrics.call_args
-        assert call_kwargs[1]["category"] == "lk" or call_kwargs[0][2] == "lk" if len(call_kwargs[0]) > 2 else call_kwargs[1].get("category") == "lk"
+        assert (
+            call_kwargs[1]["category"] == "lk" or call_kwargs[0][2] == "lk"
+            if len(call_kwargs[0]) > 2
+            else call_kwargs[1].get("category") == "lk"
+        )
 
 
 # ---------------------------------------------------------------------------
@@ -534,6 +669,11 @@ def _make_eval_set():
     es.id = uuid4()
     es.old_annotation_set_id = uuid4()
     es.new_annotation_set_id = uuid4()
+    # When None, scoring router takes the on-the-fly compute_evaluation_data
+    # branch (which tests mock); a truthy MagicMock pulls the persisted
+    # artifact path that needs a real UUID for pivot_ontology_snapshot_id.
+    es.groundtruth_uri = None
+    es.stats = None
     return es
 
 
@@ -631,7 +771,9 @@ def get_side(model, id_):
         app = FastAPI()
         app.state.session_factory = MagicMock()
         app.include_router(router)
-        with patch("protea.api.routers.scoring.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.scoring.session_scope", side_effect=lambda _: _mock_scope(session)
+        ):
             with TestClient(app) as c:
                 resp = c.get(self._url(ps.id, es.id, "nk"))
 
@@ -673,7 +815,9 @@ def get_side(model, id_):
         app = FastAPI()
         app.state.session_factory = MagicMock()
         app.include_router(router)
-        with patch("protea.api.routers.scoring.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.scoring.session_scope", side_effect=lambda _: _mock_scope(session)
+        ):
             with TestClient(app) as c:
                 resp = c.get(self._url(ps.id, es.id, "nk"))
 
@@ -709,7 +853,9 @@ def get_side(model, id_):
         app = FastAPI()
         app.state.session_factory = MagicMock()
         app.include_router(router)
-        with patch("protea.api.routers.scoring.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.scoring.session_scope", side_effect=lambda _: _mock_scope(session)
+        ):
             with TestClient(app) as c:
                 resp = c.get(self._url(ps.id, es.id))
 
@@ -748,7 +894,9 @@ def get_side(model, id_):
         app = FastAPI()
         app.state.session_factory = MagicMock()
         app.include_router(router)
-        with patch("protea.api.routers.scoring.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.scoring.session_scope", side_effect=lambda _: _mock_scope(session)
+        ):
             with TestClient(app) as c:
                 resp = c.get(self._url(ps.id, es.id, "pk"))
 
@@ -790,7 +938,9 @@ def get_side(model, id_):
         app = FastAPI()
         app.state.session_factory = MagicMock()
         app.include_router(router)
-        with patch("protea.api.routers.scoring.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with patch(
+            "protea.api.routers.scoring.session_scope", side_effect=lambda _: _mock_scope(session)
+        ):
             with TestClient(app) as c:
                 resp = c.get(self._url(ps.id, es.id))
 
@@ -871,101 +1021,10 @@ def test_delete_not_found(self, client, session):
         assert resp.status_code == 404
 
 
-class TestTrainReranker:
-    def test_prediction_set_not_found(self, client, session):
-        session.get.return_value = None
-        session.query.return_value.filter.return_value.first.return_value = None
-        resp = client.post("/scoring/rerankers/train", json={
-            "name": "test",
-            "prediction_set_id": str(uuid4()),
-            "evaluation_set_id": str(uuid4()),
-        })
-        assert resp.status_code == 404
-        assert "PredictionSet" in resp.json()["detail"]
-
-    def test_evaluation_set_not_found(self, client, session):
-        ps = _make_pred_set()
-        session.query.return_value.filter.return_value.first.return_value = None
-
-        def get_side(model, id_):
-            if model is PredictionSet:
-                return ps
-            return None
-
-        session.get.side_effect = get_side
-        resp = client.post("/scoring/rerankers/train", json={
-            "name": "test",
-            "prediction_set_id": str(ps.id),
-            "evaluation_set_id": str(uuid4()),
-        })
-        assert resp.status_code == 404
-        assert "EvaluationSet" in resp.json()["detail"]
-
-    def test_duplicate_name_returns_409(self, client, session):
-        ps = _make_pred_set()
-        es = _make_eval_set()
-
-        def get_side(model, id_):
-            if model is PredictionSet:
-                return ps
-            if model is EvaluationSet:
-                return es
-            return None
-
-        session.get.side_effect = get_side
-        session.query.return_value.filter.return_value.first.return_value = _make_reranker_model()
-
-        resp = client.post("/scoring/rerankers/train", json={
-            "name": "existing-name",
-            "prediction_set_id": str(ps.id),
-            "evaluation_set_id": str(es.id),
-        })
-        assert resp.status_code == 409
-
-    def test_empty_predictions_returns_422(self, client, session):
-        ps = _make_pred_set()
-        es = _make_eval_set()
-
-        def get_side(model, id_):
-            if model is PredictionSet:
-                return ps
-            if model is EvaluationSet:
-                return es
-            return None
-
-        session.get.side_effect = get_side
-        session.query.return_value.filter.return_value.first.return_value = None
-
-        eval_data = MagicMock()
-        eval_data.nk = {}
-
-        with patch("protea.api.routers.scoring.compute_evaluation_data", return_value=eval_data):
-            # Empty result set
-            session.query.return_value.join.return_value.filter.return_value.all.return_value = []
-            resp = client.post("/scoring/rerankers/train", json={
-                "name": "empty-test",
-                "prediction_set_id": str(ps.id),
-                "evaluation_set_id": str(es.id),
-            })
-        assert resp.status_code == 422
-
-    def test_invalid_category_returns_422(self, client, session):
-        resp = client.post("/scoring/rerankers/train", json={
-            "name": "test",
-            "prediction_set_id": str(uuid4()),
-            "evaluation_set_id": str(uuid4()),
-            "category": "invalid",
-        })
-        assert resp.status_code == 422
-
-
 class TestRerankedTSV:
     def test_prediction_set_not_found(self, client, session):
         session.get.return_value = None
-        resp = client.get(
-            f"/scoring/prediction-sets/{uuid4()}/rerank.tsv"
-            f"?reranker_id={uuid4()}"
-        )
+        resp = client.get(f"/scoring/prediction-sets/{uuid4()}/rerank.tsv?reranker_id={uuid4()}")
         assert resp.status_code == 404
         assert "PredictionSet" in resp.json()["detail"]
 
@@ -978,10 +1037,7 @@ def get_side(model, id_):
             return None
 
         session.get.side_effect = get_side
-        resp = client.get(
-            f"/scoring/prediction-sets/{ps.id}/rerank.tsv"
-            f"?reranker_id={uuid4()}"
-        )
+        resp = client.get(f"/scoring/prediction-sets/{ps.id}/rerank.tsv?reranker_id={uuid4()}")
         assert resp.status_code == 404
         assert "RerankerModel" in resp.json()["detail"]
 
@@ -1019,6 +1075,7 @@ def test_evaluation_set_not_found(self, client, session):
         rm = _make_reranker_model()
 
         call_count = 0
+
         def get_side(model, id_):
             nonlocal call_count
             call_count += 1
@@ -1046,7 +1103,9 @@ def test_invalid_category_returns_422(self, client, session):
     @patch("protea.api.routers.scoring.reranker_predict")
     @patch("protea.api.routers.scoring.model_from_string")
     @patch("protea.api.routers.scoring.compute_evaluation_data")
-    def test_returns_metrics(self, mock_eval, mock_from_str, mock_predict, mock_metrics, client, session):
+    def test_returns_metrics(
+        self, mock_eval, mock_from_str, mock_predict, mock_metrics, client, session
+    ):
         ps = _make_pred_set()
         rm = _make_reranker_model(name="test-rr")
         es = _make_eval_set()
@@ -1069,6 +1128,7 @@ def get_side(model, id_):
         ]
 
         import numpy as np
+
         mock_from_str.return_value = MagicMock()
         mock_predict.return_value = np.array([0.85])
 
diff --git a/tests/test_showcase_router.py b/tests/test_showcase_router.py
index 78555c3..122391f 100644
--- a/tests/test_showcase_router.py
+++ b/tests/test_showcase_router.py
@@ -1,7 +1,13 @@
 """Unit tests for the /showcase router.
 
+The router was rewritten 2026-04-10 to emit a single attributed "best" result
+instead of the previous three-bucket ``method_comparison`` table.  These tests
+cover the new shape: empty state, single-best selection, embedding attribution,
+and pipeline-stage counts.
+
 Database is fully mocked — no real infrastructure required.
 """
+
 from __future__ import annotations
 
 from contextlib import contextmanager
@@ -12,12 +18,13 @@
 from fastapi import FastAPI
 from fastapi.testclient import TestClient
 
-from protea.api.routers.showcase import _derive_method, router
+from protea.api.routers.showcase import _avg_fmax, _flatten_cells, router
 
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
 
+
 def _make_app(session_factory):
     app = FastAPI()
     app.state.session_factory = session_factory
@@ -30,10 +37,72 @@ def _mock_scope(session):
     yield session
 
 
+def _make_eval(results, scoring_config_id=None, reranker_model_id=None):
+    er = MagicMock()
+    er.id = uuid4()
+    er.evaluation_set_id = uuid4()
+    er.scoring_config_id = scoring_config_id
+    er.reranker_model_id = reranker_model_id
+    er.results = results
+    return er
+
+
+def _make_cfg(
+    model_name="esmc_300m",
+    backend="esm3c",
+    display_name=None,
+    family=None,
+    param_count=None,
+):
+    cfg = MagicMock()
+    cfg.id = uuid4()
+    cfg.model_name = model_name
+    cfg.model_backend = backend
+    cfg.display_name = display_name
+    cfg.family = family
+    cfg.param_count = param_count
+    return cfg
+
+
+def _install_mock(
+    session,
+    *,
+    approx_counts=(0, 0, 0, 0),
+    direct_scalars=(0, 0, 0),
+    eval_rows=(),
+):
+    """Wire up ``session.scalar`` and ``session.execute`` mocks.
+
+    The router uses two distinct count patterns:
+    * ``_approx_count(table)`` → ``session.execute(text(...)).scalar()`` for
+      protein / sequence / sequence_embedding / go_prediction.
+    * ``session.scalar(select(...))`` for canonical_proteins,
+      total_prediction_sets, total_rerankers (and a few more).
+
+    ``approx_counts`` feeds the first pattern; ``direct_scalars`` the second.
+    ``eval_rows`` is the sequence yielded by the matrix join's ``.all()``.
+    """
+    direct_iter = iter(direct_scalars)
+    session.scalar.side_effect = lambda *a, **kw: next(direct_iter, 0)
+
+    approx_iter = iter(approx_counts)
+
+    def _execute(*args, **kwargs):
+        result = MagicMock()
+        # Each call gets its own next-approx-count for .scalar(), and the
+        # eval_rows for .all() (the matrix call uses .all()).
+        result.scalar.return_value = next(approx_iter, 0)
+        result.all.return_value = eval_rows
+        return result
+
+    session.execute.side_effect = _execute
+
+
 # ---------------------------------------------------------------------------
 # Fixtures
 # ---------------------------------------------------------------------------
 
+
 @pytest.fixture()
 def session():
     return MagicMock()
@@ -56,231 +125,180 @@ def client(session, factory):
 
 
 # ---------------------------------------------------------------------------
-# _derive_method
+# Pure helpers
 # ---------------------------------------------------------------------------
 
-class TestDeriveMethod:
-    def test_baseline(self):
-        assert _derive_method(None, None) == ("knn_baseline", "KNN (embedding distance)")
 
-    def test_scored(self):
-        assert _derive_method(uuid4(), None) == ("knn_scored", "KNN + Scoring")
-
-    def test_reranker(self):
-        assert _derive_method(None, uuid4()) == ("knn_reranker", "KNN + Re-ranker")
-
-    def test_reranker_takes_precedence(self):
-        assert _derive_method(uuid4(), uuid4()) == ("knn_reranker", "KNN + Re-ranker")
+class TestAvgFmax:
+    def test_empty_results(self):
+        assert _avg_fmax({}) is None
+
+    def test_all_cells_populated(self):
+        results = {
+            "NK": {"BPO": {"fmax": 0.3}, "MFO": {"fmax": 0.5}, "CCO": {"fmax": 0.4}},
+            "LK": {"BPO": {"fmax": 0.6}, "MFO": {"fmax": 0.7}, "CCO": {"fmax": 0.5}},
+            "PK": {"BPO": {"fmax": 0.8}, "MFO": {"fmax": 0.9}, "CCO": {"fmax": 0.7}},
+        }
+        avg = _avg_fmax(results)
+        assert avg is not None
+        assert round(avg, 4) == round(sum([0.3, 0.5, 0.4, 0.6, 0.7, 0.5, 0.8, 0.9, 0.7]) / 9, 4)
+
+    def test_missing_cells_are_ignored(self):
+        # Only 2 out of 9 cells populated — should average those two
+        results = {
+            "NK": {"BPO": {"fmax": 0.4}},
+            "LK": {"BPO": {"fmax": 0.6}},
+        }
+        assert _avg_fmax(results) == pytest.approx(0.5)
+
+    def test_none_fmax_cells_are_ignored(self):
+        results = {"NK": {"BPO": {"fmax": None}, "MFO": {"fmax": 0.42}}}
+        assert _avg_fmax(results) == pytest.approx(0.42)
+
+
+class TestFlattenCells:
+    def test_flatten_skips_missing(self):
+        results = {
+            "NK": {"BPO": {"fmax": 0.4, "precision": 0.5, "recall": 0.3}},
+            "LK": {"MFO": {"fmax": 0.6}},
+        }
+        flat = _flatten_cells(results)
+        assert len(flat) == 2
+        nk_bpo = next(c for c in flat if c["category"] == "NK" and c["aspect"] == "BPO")
+        assert nk_bpo["fmax"] == 0.4
+        assert nk_bpo["precision"] == 0.5
+        assert nk_bpo["recall"] == 0.3
+
+    def test_none_precision_recall_preserved(self):
+        results = {"NK": {"BPO": {"fmax": 0.5}}}
+        flat = _flatten_cells(results)
+        assert flat[0]["precision"] is None
+        assert flat[0]["recall"] is None
 
 
 # ---------------------------------------------------------------------------
 # GET /showcase — empty database
 # ---------------------------------------------------------------------------
 
+
 class TestShowcaseEmpty:
-    def test_empty_database_returns_zeros(self, client):
+    def test_empty_database_returns_zero_counts_and_null_best(self, client):
         c, session = client
-
-        # All count queries return 0
-        session.query.return_value.scalar.return_value = 0
-        session.query.return_value.filter.return_value.scalar.return_value = 0
-        session.query.return_value.all.return_value = []
+        _install_mock(session, eval_rows=[])
 
         resp = c.get("/showcase")
         assert resp.status_code == 200
         data = resp.json()
 
-        assert data["protein_stats"]["total"] == 0
-        assert data["protein_stats"]["canonical"] == 0
+        assert data["best"] is None
         assert data["counts"]["proteins"] == 0
-        assert data["counts"]["sequences"] == 0
-        assert data["counts"]["embeddings"] == 0
-        assert data["counts"]["prediction_sets"] == 0
-        assert data["counts"]["predictions"] == 0
-        assert data["counts"]["reranker_models"] == 0
         assert data["counts"]["evaluations"] == 0
-        assert data["best_fmax"] == {}
-        assert data["method_comparison"] == {}
         assert len(data["pipeline_stages"]) == 5
-
-    def test_pipeline_stages_structure(self, client):
+        assert {s["name"] for s in data["pipeline_stages"]} == {
+            "sequences",
+            "embeddings",
+            "predictions",
+            "reranker_models",
+            "evaluations",
+        }
+
+    def test_empty_state_still_reports_stage_hrefs(self, client):
         c, session = client
-        session.query.return_value.scalar.return_value = 0
-        session.query.return_value.filter.return_value.scalar.return_value = 0
-        session.query.return_value.all.return_value = []
+        _install_mock(session, eval_rows=[])
 
         resp = c.get("/showcase")
         data = resp.json()
-        stages = data["pipeline_stages"]
-        expected_names = {"sequences", "embeddings", "predictions", "reranker_models", "evaluations"}
-        assert {s["name"] for s in stages} == expected_names
-        for s in stages:
-            assert "count" in s
+        for s in data["pipeline_stages"]:
             assert "href" in s
+            assert s["count"] == 0
 
 
 # ---------------------------------------------------------------------------
 # GET /showcase — with evaluation data
 # ---------------------------------------------------------------------------
 
-class TestShowcaseWithEvaluations:
-    def _make_eval_result(self, scoring_config_id=None, reranker_model_id=None, results=None):
-        er = MagicMock()
-        er.id = uuid4()
-        er.scoring_config_id = scoring_config_id
-        er.reranker_model_id = reranker_model_id
-        er.results = results or {}
-        return er
 
-    def test_single_baseline_evaluation(self, client):
+class TestShowcaseBestSelection:
+    def test_single_evaluation_becomes_the_best(self, client):
         c, session = client
-
-        eval_result = self._make_eval_result(
-            results={
-                "NK": {
-                    "BPO": {"fmax": 0.45},
-                    "MFO": {"fmax": 0.52},
-                    "CCO": {"fmax": 0.60},
-                },
-            },
+        cfg = _make_cfg(
+            model_name="esmc_300m",
+            backend="esm3c",
+            display_name="ESMC-300M",
+            family="esmc",
+            param_count=300_000_000,
         )
+        er = _make_eval(results={"NK": {"BPO": {"fmax": 0.5, "precision": 0.6, "recall": 0.4}}})
 
-        # Mock query chain — we need separate calls for counts vs eval
-        call_count = [0]
-        def query_side_effect(*args):
-            call_count[0] += 1
-            q = MagicMock()
-            q.scalar.return_value = 100
-            q.filter.return_value.scalar.return_value = 50
-            q.all.return_value = [eval_result]
-            return q
-        session.query.side_effect = query_side_effect
+        _install_mock(
+            session,
+            approx_counts=[10, 5, 4, 8],
+            direct_scalars=[1, 100, 3],
+            eval_rows=[(er, cfg, "alignment_weighted")],
+        )
 
         resp = c.get("/showcase")
         assert resp.status_code == 200
         data = resp.json()
 
         assert data["counts"]["evaluations"] == 1
-        if data["best_fmax"]:
-            nk = data["best_fmax"].get("NK", {})
-            if "BPO" in nk:
-                assert nk["BPO"]["fmax"] == 0.45
-                assert nk["BPO"]["method"] == "knn_baseline"
-
-    def test_method_comparison_ordering(self, client):
+        assert data["best"] is not None
+        assert data["best"]["avg_fmax"] == 0.5
+        assert data["best"]["stage"] == "alignment_weighted"
+        assert data["best"]["embedding"]["display_name"] == "ESMC-300M"
+        assert data["best"]["embedding"]["family"] == "esmc"
+        assert len(data["best"]["per_cell"]) == 1
+
+    def test_best_picks_highest_avg_fmax(self, client):
         c, session = client
+        cfg1 = _make_cfg("esmc_300m", "esm3c", display_name="ESMC-300M", family="esmc")
+        cfg2 = _make_cfg("Rostlab/ProstT5", "t5", display_name="ProstT5-XL", family="prostt5")
 
-        baseline = self._make_eval_result(
-            results={"NK": {"BPO": {"fmax": 0.40}}},
-        )
-        scored = self._make_eval_result(
-            scoring_config_id=uuid4(),
-            results={"NK": {"BPO": {"fmax": 0.50}}},
-        )
-        reranker = self._make_eval_result(
-            reranker_model_id=uuid4(),
+        # er1 averages 0.40, er2 averages 0.60 → er2 must win
+        er1 = _make_eval(results={"NK": {"BPO": {"fmax": 0.40}}})
+        er2 = _make_eval(
             results={"NK": {"BPO": {"fmax": 0.60}}},
+            reranker_model_id=uuid4(),
         )
 
-        call_count = [0]
-        def query_side_effect(*args):
-            call_count[0] += 1
-            q = MagicMock()
-            q.scalar.return_value = 10
-            q.filter.return_value.scalar.return_value = 5
-            q.all.return_value = [baseline, scored, reranker]
-            return q
-        session.query.side_effect = query_side_effect
-
-        resp = c.get("/showcase")
-        assert resp.status_code == 200
-        data = resp.json()
-
-        if "NK" in data.get("method_comparison", {}):
-            methods = [m["method"] for m in data["method_comparison"]["NK"]]
-            # Should follow _METHOD_ORDER: baseline, scored, reranker
-            assert methods == ["knn_baseline", "knn_scored", "knn_reranker"]
-
-    def test_multiple_categories(self, client):
-        c, session = client
-
-        eval_result = self._make_eval_result(
-            results={
-                "NK": {"BPO": {"fmax": 0.45}},
-                "LK": {"BPO": {"fmax": 0.55}},
-                "PK": {"BPO": {"fmax": 0.65}},
-            },
+        _install_mock(
+            session,
+            eval_rows=[(er1, cfg1, "alignment_weighted"), (er2, cfg2, None)],
         )
 
-        def query_side_effect(*args):
-            q = MagicMock()
-            q.scalar.return_value = 0
-            q.filter.return_value.scalar.return_value = 0
-            q.all.return_value = [eval_result]
-            return q
-        session.query.side_effect = query_side_effect
-
         resp = c.get("/showcase")
-        assert resp.status_code == 200
         data = resp.json()
+        assert data["best"]["avg_fmax"] == 0.6
+        assert data["best"]["stage"] == "reranker"
+        assert data["best"]["embedding"]["display_name"] == "ProstT5-XL"
 
-        if data["best_fmax"]:
-            # Should have entries for all three categories
-            for cat in ["NK", "LK", "PK"]:
-                if cat in data["best_fmax"]:
-                    assert "BPO" in data["best_fmax"][cat]
-
-    def test_empty_results_field(self, client):
+    def test_eval_result_with_empty_results_blob_is_skipped(self, client):
         c, session = client
-
-        eval_result = self._make_eval_result(results={})
-
-        def query_side_effect(*args):
-            q = MagicMock()
-            q.scalar.return_value = 0
-            q.filter.return_value.scalar.return_value = 0
-            q.all.return_value = [eval_result]
-            return q
-        session.query.side_effect = query_side_effect
+        cfg = _make_cfg(display_name="ESMC-300M", family="esmc")
+        # er1 has no fmax values at all (empty dict) — must be skipped for best
+        # er2 has one cell — becomes the best
+        er1 = _make_eval(results={})
+        er2 = _make_eval(results={"PK": {"MFO": {"fmax": 0.33}}})
+
+        _install_mock(
+            session,
+            eval_rows=[(er1, cfg, "alignment_weighted"), (er2, cfg, "alignment_weighted")],
+        )
 
         resp = c.get("/showcase")
-        assert resp.status_code == 200
         data = resp.json()
-        assert data["best_fmax"] == {}
-
-    def test_none_results_field(self, client):
-        c, session = client
-
-        eval_result = self._make_eval_result(results=None)
-
-        def query_side_effect(*args):
-            q = MagicMock()
-            q.scalar.return_value = 0
-            q.filter.return_value.scalar.return_value = 0
-            q.all.return_value = [eval_result]
-            return q
-        session.query.side_effect = query_side_effect
-
-        resp = c.get("/showcase")
-        assert resp.status_code == 200
+        # total_evaluations counts both rows (it's len(rows)), but best came from er2
+        assert data["counts"]["evaluations"] == 2
+        assert data["best"] is not None
+        assert data["best"]["avg_fmax"] == 0.33
 
-    def test_best_fmax_picks_highest(self, client):
+    def test_all_empty_results_leaves_best_null(self, client):
         c, session = client
-
-        er1 = self._make_eval_result(results={"NK": {"BPO": {"fmax": 0.40}}})
-        er2 = self._make_eval_result(results={"NK": {"BPO": {"fmax": 0.60}}})
-
-        def query_side_effect(*args):
-            q = MagicMock()
-            q.scalar.return_value = 0
-            q.filter.return_value.scalar.return_value = 0
-            q.all.return_value = [er1, er2]
-            return q
-        session.query.side_effect = query_side_effect
+        cfg = _make_cfg(display_name="ESMC-300M", family="esmc")
+        er = _make_eval(results={})
+        _install_mock(session, eval_rows=[(er, cfg, "alignment_weighted")])
 
         resp = c.get("/showcase")
-        assert resp.status_code == 200
         data = resp.json()
-
-        if "NK" in data["best_fmax"] and "BPO" in data["best_fmax"]["NK"]:
-            assert data["best_fmax"]["NK"]["BPO"]["fmax"] == 0.60
+        assert data["counts"]["evaluations"] == 1
+        assert data["best"] is None
diff --git a/tests/test_storage.py b/tests/test_storage.py
new file mode 100644
index 0000000..cfe4aa0
--- /dev/null
+++ b/tests/test_storage.py
@@ -0,0 +1,98 @@
+"""Unit tests for the artifact-storage abstraction.
+
+These tests exercise the local filesystem backend only; the MinIO backend
+is integration-tested out-of-band (requires a running MinIO container,
+covered by the compose profile).
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import replace
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from protea.infrastructure.settings import Settings
+from protea.infrastructure.storage import ArtifactStore, get_artifact_store
+from protea.infrastructure.storage.local import LocalFsArtifactStore
+
+
+@pytest.fixture()
+def base_settings(tmp_path: Path) -> Settings:
+    return Settings(
+        db_url="",
+        amqp_url="",
+        artifacts_dir=tmp_path / "legacy_eval",
+        admin_token="",
+        storage_backend="local",
+        storage_root=tmp_path / "artifacts",
+    )
+
+
+class TestLocalFsArtifactStore:
+    def test_put_bytes_roundtrips(self, tmp_path: Path):
+        store = LocalFsArtifactStore(root=tmp_path)
+        uri = store.put("rerankers/demo/model.txt", b"hello world")
+        assert uri.startswith("file://")
+        assert store.get("rerankers/demo/model.txt") == b"hello world"
+        assert store.exists("rerankers/demo/model.txt")
+
+    def test_put_file_roundtrips(self, tmp_path: Path):
+        src = tmp_path / "src.bin"
+        src.write_bytes(b"binary payload")
+        store = LocalFsArtifactStore(root=tmp_path / "dest")
+        uri = store.put("sub/dir/blob.bin", src)
+        assert uri.endswith("/sub/dir/blob.bin")
+        assert store.get("sub/dir/blob.bin") == b"binary payload"
+
+    def test_absent_key(self, tmp_path: Path):
+        store = LocalFsArtifactStore(root=tmp_path)
+        assert not store.exists("nope")
+
+    def test_creates_intermediate_dirs(self, tmp_path: Path):
+        store = LocalFsArtifactStore(root=tmp_path / "a" / "b")
+        store.put("c/d/e/f.txt", b"x")
+        assert (tmp_path / "a" / "b" / "c" / "d" / "e" / "f.txt").read_bytes() == b"x"
+
+
+class TestFactory:
+    def test_returns_local_by_default(self, base_settings: Settings):
+        store = get_artifact_store(base_settings)
+        assert isinstance(store, LocalFsArtifactStore)
+        assert isinstance(store, ArtifactStore)
+
+    def test_minio_with_missing_config_falls_back(
+        self, base_settings: Settings, caplog: pytest.LogCaptureFixture
+    ):
+        s = replace(base_settings, storage_backend="minio")
+        with caplog.at_level(logging.WARNING):
+            store = get_artifact_store(s)
+        assert isinstance(store, LocalFsArtifactStore)
+        assert any("minio" in r.message.lower() for r in caplog.records)
+
+    def test_minio_unreachable_raises(self, base_settings: Settings):
+        """Unreachable MinIO must fail loudly.
+
+        The earlier silent fallback to local produced ``Dataset`` rows
+        with ``backend=local`` and ``file://…`` URIs that the lab could
+        not resolve from a different host — a subtle data-corruption
+        bug. Readiness / export should fail fast instead.
+        """
+        s = replace(
+            base_settings,
+            storage_backend="minio",
+            minio_endpoint="localhost:1",
+            minio_access_key="x",
+            minio_secret_key="y",
+        )
+        from protea.infrastructure.storage import minio_store
+        from protea.infrastructure.storage.factory import ArtifactStoreUnavailable
+
+        with patch.object(
+            minio_store.MinioArtifactStore, "_ensure_bucket",
+            side_effect=ConnectionError("boom"),
+        ):
+            with pytest.raises(ArtifactStoreUnavailable, match="MinIO"):
+                get_artifact_store(s)
diff --git a/tests/test_support_maintenance_routers.py b/tests/test_support_maintenance_routers.py
index 0823262..2b3dd82 100644
--- a/tests/test_support_maintenance_routers.py
+++ b/tests/test_support_maintenance_routers.py
@@ -1,4 +1,5 @@
 """Unit tests for support and maintenance API routers — no real DB required."""
+
 from __future__ import annotations
 
 from contextlib import contextmanager
@@ -17,6 +18,7 @@
 # Helpers
 # ---------------------------------------------------------------------------
 
+
 @contextmanager
 def _mock_scope(session):
     yield session
@@ -34,6 +36,7 @@ def _make_app_with_router(router, session):
 # Support router
 # ---------------------------------------------------------------------------
 
+
 @pytest.fixture()
 def support_session():
     return MagicMock()
@@ -42,8 +45,10 @@ def support_session():
 @pytest.fixture()
 def support_client(support_session):
     app, _ = _make_app_with_router(support_router, support_session)
-    with patch("protea.api.routers.support.session_scope",
-               side_effect=lambda _: _mock_scope(support_session)):
+    with patch(
+        "protea.api.routers.support.session_scope",
+        side_effect=lambda _: _mock_scope(support_session),
+    ):
         yield TestClient(app, raise_server_exceptions=True)
 
 
@@ -69,7 +74,9 @@ def test_comments_serialized(self, support_client, support_session):
         entry.comment = "Great tool!"
         entry.created_at = datetime(2026, 1, 1, tzinfo=UTC)
         support_session.query.return_value.count.return_value = 1
-        support_session.query.return_value.filter.return_value.order_by.return_value.limit.return_value.all.return_value = [entry]
+        support_session.query.return_value.filter.return_value.order_by.return_value.limit.return_value.all.return_value = [
+            entry
+        ]
         resp = support_client.get("/support")
         assert resp.status_code == 200
         comments = resp.json()["comments"]
@@ -130,6 +137,7 @@ def add_side(obj):
 # Maintenance router
 # ---------------------------------------------------------------------------
 
+
 @pytest.fixture()
 def maint_session():
     return MagicMock()
@@ -138,8 +146,10 @@ def maint_session():
 @pytest.fixture()
 def maint_client(maint_session):
     app, _ = _make_app_with_router(maintenance_router, maint_session)
-    with patch("protea.api.routers.maintenance.session_scope",
-               side_effect=lambda _: _mock_scope(maint_session)):
+    with patch(
+        "protea.api.routers.maintenance.session_scope",
+        side_effect=lambda _: _mock_scope(maint_session),
+    ):
         yield TestClient(app, raise_server_exceptions=True)
 
 
@@ -200,6 +210,7 @@ def test_with_unindexed(self, maint_client, maint_session):
 class TestMaintenanceSessionFactoryMissing:
     def test_raises_when_no_factory(self):
         from protea.api.routers.maintenance import router as maint_router
+
         app = FastAPI()
         # Intentionally do NOT set app.state.session_factory
         app.include_router(maint_router)
diff --git a/tests/test_train_reranker.py b/tests/test_train_reranker.py
deleted file mode 100644
index 04d59fc..0000000
--- a/tests/test_train_reranker.py
+++ /dev/null
@@ -1,475 +0,0 @@
-"""Unit tests for protea.core.operations.train_reranker.
-
-Covers payload validation, the TrainRerankerOperation helper methods,
-and the _compute_comparison_metrics logic.  Heavy DB / model training
-is mocked — no real infrastructure required.
-"""
-from __future__ import annotations
-
-import uuid
-from typing import Any
-from unittest.mock import MagicMock, patch
-
-import numpy as np
-import pandas as pd
-import pytest
-
-from protea.core.operations.train_reranker import (
-    TrainRerankerOperation,
-    TrainRerankerPayload,
-)
-
-
-def _noop_emit(*a, **kw):
-    return None
-
-
-# ---------------------------------------------------------------------------
-# Payload validation
-# ---------------------------------------------------------------------------
-
-class TestTrainRerankerPayload:
-    def _valid_kwargs(self, **overrides) -> dict[str, Any]:
-        defaults = {
-            "name": "test-model",
-            "old_annotation_set_id": str(uuid.uuid4()),
-            "new_annotation_set_id": str(uuid.uuid4()),
-            "embedding_config_id": str(uuid.uuid4()),
-            "ontology_snapshot_id": str(uuid.uuid4()),
-        }
-        defaults.update(overrides)
-        return defaults
-
-    def test_valid_payload(self):
-        p = TrainRerankerPayload(**self._valid_kwargs())
-        assert p.name == "test-model"
-        assert p.category == "nk"
-        assert p.limit_per_entry == 5
-
-    def test_empty_name_raises(self):
-        with pytest.raises(ValueError):
-            TrainRerankerPayload(**self._valid_kwargs(name=""))
-
-    def test_whitespace_name_raises(self):
-        with pytest.raises(ValueError):
-            TrainRerankerPayload(**self._valid_kwargs(name="   "))
-
-    def test_empty_old_annotation_set_id_raises(self):
-        with pytest.raises(ValueError):
-            TrainRerankerPayload(**self._valid_kwargs(old_annotation_set_id=""))
-
-    def test_empty_new_annotation_set_id_raises(self):
-        with pytest.raises(ValueError):
-            TrainRerankerPayload(**self._valid_kwargs(new_annotation_set_id=""))
-
-    def test_empty_embedding_config_id_raises(self):
-        with pytest.raises(ValueError):
-            TrainRerankerPayload(**self._valid_kwargs(embedding_config_id=""))
-
-    def test_empty_ontology_snapshot_id_raises(self):
-        with pytest.raises(ValueError):
-            TrainRerankerPayload(**self._valid_kwargs(ontology_snapshot_id=""))
-
-    def test_invalid_category_raises(self):
-        with pytest.raises(ValueError):
-            TrainRerankerPayload(**self._valid_kwargs(category="invalid"))
-
-    def test_valid_categories(self):
-        for cat in ("nk", "lk", "pk"):
-            p = TrainRerankerPayload(**self._valid_kwargs(category=cat))
-            assert p.category == cat
-
-    def test_custom_knn_params(self):
-        p = TrainRerankerPayload(**self._valid_kwargs(
-            limit_per_entry=10,
-            distance_threshold=0.5,
-            search_backend="faiss",
-            metric="euclidean",
-        ))
-        assert p.limit_per_entry == 10
-        assert p.distance_threshold == 0.5
-        assert p.search_backend == "faiss"
-
-    def test_custom_lightgbm_params(self):
-        p = TrainRerankerPayload(**self._valid_kwargs(
-            num_boost_round=500,
-            early_stopping_rounds=25,
-            val_fraction=0.1,
-            neg_pos_ratio=3.0,
-        ))
-        assert p.num_boost_round == 500
-        assert p.early_stopping_rounds == 25
-        assert p.val_fraction == 0.1
-        assert p.neg_pos_ratio == 3.0
-
-    def test_feature_flags_default_false(self):
-        p = TrainRerankerPayload(**self._valid_kwargs())
-        assert p.compute_alignments is False
-        assert p.compute_taxonomy is False
-
-    def test_aspect_filter(self):
-        p = TrainRerankerPayload(**self._valid_kwargs(aspect="bpo"))
-        assert p.aspect == "bpo"
-
-    def test_name_is_stripped(self):
-        p = TrainRerankerPayload(**self._valid_kwargs(name="  my model  "))
-        assert p.name == "my model"
-
-    def test_limit_per_entry_must_be_positive(self):
-        with pytest.raises(ValueError):
-            TrainRerankerPayload(**self._valid_kwargs(limit_per_entry=0))
-
-        with pytest.raises(ValueError):
-            TrainRerankerPayload(**self._valid_kwargs(limit_per_entry=-1))
-
-
-# ---------------------------------------------------------------------------
-# _validate
-# ---------------------------------------------------------------------------
-
-class TestValidate:
-    def _make_op(self):
-        return TrainRerankerOperation()
-
-    def _make_payload(self, **kw):
-        defaults = {
-            "name": "test",
-            "old_annotation_set_id": str(uuid.uuid4()),
-            "new_annotation_set_id": str(uuid.uuid4()),
-            "embedding_config_id": str(uuid.uuid4()),
-            "ontology_snapshot_id": str(uuid.uuid4()),
-        }
-        defaults.update(kw)
-        return TrainRerankerPayload(**defaults)
-
-    def test_old_annotation_set_not_found(self):
-        op = self._make_op()
-        session = MagicMock()
-        session.get.return_value = None
-        p = self._make_payload()
-
-        with pytest.raises(ValueError, match="AnnotationSet"):
-            op._validate(
-                session, p,
-                uuid.UUID(p.old_annotation_set_id),
-                uuid.UUID(p.new_annotation_set_id),
-                uuid.UUID(p.embedding_config_id),
-                uuid.UUID(p.ontology_snapshot_id),
-            )
-
-    def test_new_annotation_set_not_found(self):
-        op = self._make_op()
-        session = MagicMock()
-        # First call (old) returns something, second (new) returns None
-        session.get.side_effect = [MagicMock(), None]
-        p = self._make_payload()
-
-        with pytest.raises(ValueError, match="AnnotationSet"):
-            op._validate(
-                session, p,
-                uuid.UUID(p.old_annotation_set_id),
-                uuid.UUID(p.new_annotation_set_id),
-                uuid.UUID(p.embedding_config_id),
-                uuid.UUID(p.ontology_snapshot_id),
-            )
-
-    def test_embedding_config_not_found(self):
-        op = self._make_op()
-        session = MagicMock()
-        # old and new found, embedding config not found
-        session.get.side_effect = [MagicMock(), MagicMock(), None]
-        p = self._make_payload()
-
-        with pytest.raises(ValueError, match="EmbeddingConfig"):
-            op._validate(
-                session, p,
-                uuid.UUID(p.old_annotation_set_id),
-                uuid.UUID(p.new_annotation_set_id),
-                uuid.UUID(p.embedding_config_id),
-                uuid.UUID(p.ontology_snapshot_id),
-            )
-
-    def test_duplicate_name_raises(self):
-        op = self._make_op()
-        session = MagicMock()
-        session.get.return_value = MagicMock()  # all lookups succeed
-        session.query.return_value.filter.return_value.first.return_value = MagicMock()  # name exists
-        p = self._make_payload()
-
-        with pytest.raises(ValueError, match="already exists"):
-            op._validate(
-                session, p,
-                uuid.UUID(p.old_annotation_set_id),
-                uuid.UUID(p.new_annotation_set_id),
-                uuid.UUID(p.embedding_config_id),
-                uuid.UUID(p.ontology_snapshot_id),
-            )
-
-    def test_valid_passes(self):
-        op = self._make_op()
-        session = MagicMock()
-        session.get.return_value = MagicMock()  # all lookups succeed
-        session.query.return_value.filter.return_value.first.return_value = None  # no duplicate name
-        p = self._make_payload()
-
-        # Should not raise
-        op._validate(
-            session, p,
-            uuid.UUID(p.old_annotation_set_id),
-            uuid.UUID(p.new_annotation_set_id),
-            uuid.UUID(p.embedding_config_id),
-            uuid.UUID(p.ontology_snapshot_id),
-        )
-
-
-# ---------------------------------------------------------------------------
-# _load_query_embeddings
-# ---------------------------------------------------------------------------
-
-class TestLoadQueryEmbeddings:
-    def test_returns_empty_when_no_matches(self):
-        op = TrainRerankerOperation()
-        session = MagicMock()
-        session.query.return_value.join.return_value.filter.return_value.all.return_value = []
-
-        emb, valid = op._load_query_embeddings(session, ["P1", "P2"], uuid.uuid4())
-        assert len(valid) == 0
-        assert emb.shape == (0,)
-
-    def test_returns_embeddings_for_found(self):
-        op = TrainRerankerOperation()
-        session = MagicMock()
-        session.query.return_value.join.return_value.filter.return_value.all.return_value = [
-            ("P1", [0.1, 0.2, 0.3]),
-            ("P2", [0.4, 0.5, 0.6]),
-        ]
-
-        emb, valid = op._load_query_embeddings(session, ["P1", "P2"], uuid.uuid4())
-        assert valid == ["P1", "P2"]
-        assert emb.shape == (2, 3)
-        np.testing.assert_allclose(emb[0], [0.1, 0.2, 0.3], atol=1e-6)
-
-
-# ---------------------------------------------------------------------------
-# _load_sequences
-# ---------------------------------------------------------------------------
-
-class TestLoadSequences:
-    def test_returns_dict(self):
-        op = TrainRerankerOperation()
-        session = MagicMock()
-        session.query.return_value.join.return_value.filter.return_value.all.return_value = [
-            ("P1", "MKVLWAGS"),
-            ("P2", "ACDEF"),
-        ]
-
-        result = op._load_sequences(session, {"P1", "P2"})
-        assert result == {"P1": "MKVLWAGS", "P2": "ACDEF"}
-
-    def test_empty_accessions(self):
-        op = TrainRerankerOperation()
-        session = MagicMock()
-        result = op._load_sequences(session, set())
-        assert result == {}
-
-
-# ---------------------------------------------------------------------------
-# _load_taxonomy_ids
-# ---------------------------------------------------------------------------
-
-class TestLoadTaxonomyIds:
-    def test_returns_dict(self):
-        op = TrainRerankerOperation()
-        session = MagicMock()
-        session.query.return_value.filter.return_value.all.return_value = [
-            ("P1", 9606),
-            ("P2", 10090),
-        ]
-
-        result = op._load_taxonomy_ids(session, {"P1", "P2"})
-        assert result == {"P1": 9606, "P2": 10090}
-
-    def test_none_taxonomy_id(self):
-        op = TrainRerankerOperation()
-        session = MagicMock()
-        session.query.return_value.filter.return_value.all.return_value = [
-            ("P1", None),
-        ]
-
-        result = op._load_taxonomy_ids(session, {"P1"})
-        assert result == {"P1": None}
-
-
-# ---------------------------------------------------------------------------
-# _compute_comparison_metrics
-# ---------------------------------------------------------------------------
-
-class TestComputeComparisonMetrics:
-    def test_returns_expected_keys(self):
-        op = TrainRerankerOperation()
-
-        # Create a minimal DataFrame
-        df = pd.DataFrame([
-            {"protein_accession": "P1", "go_id": "GO:0001", "distance": 0.1, "label": 1},
-            {"protein_accession": "P1", "go_id": "GO:0002", "distance": 0.9, "label": 0},
-        ])
-
-        # Mock train result
-        train_result = MagicMock()
-        train_result.model = MagicMock()
-
-        # Mock evaluation data
-        eval_data = MagicMock()
-        eval_data.nk = {"P1": {"GO:0001"}}
-
-        with patch(
-            "protea.core.operations.train_reranker.reranker_predict",
-            return_value=np.array([0.9, 0.1]),
-        ), patch(
-            "protea.core.operations.train_reranker.compute_cafa_metrics",
-        ) as mock_cafa:
-            mock_metrics = MagicMock()
-            mock_metrics.fmax = 0.5
-            mock_metrics.auc_pr = 0.4
-            mock_metrics.threshold_at_fmax = 0.3
-            mock_metrics.n_ground_truth_proteins = 1
-            mock_cafa.return_value = mock_metrics
-
-            result = op._compute_comparison_metrics(df, train_result, eval_data, "nk")
-
-        expected_keys = {
-            "baseline_fmax", "baseline_auc_pr", "baseline_threshold",
-            "reranker_fmax", "reranker_auc_pr", "reranker_threshold",
-            "fmax_improvement", "auc_pr_improvement", "n_ground_truth_proteins",
-        }
-        assert set(result.keys()) == expected_keys
-
-    def test_fmax_improvement_computed(self):
-        op = TrainRerankerOperation()
-        df = pd.DataFrame([
-            {"protein_accession": "P1", "go_id": "GO:0001", "distance": 0.1, "label": 1},
-        ])
-
-        train_result = MagicMock()
-
-        call_count = [0]
-        def fake_cafa(*args, **kwargs):
-            call_count[0] += 1
-            m = MagicMock()
-            if call_count[0] == 1:
-                m.fmax = 0.4  # baseline
-                m.auc_pr = 0.3
-            else:
-                m.fmax = 0.6  # reranker
-                m.auc_pr = 0.5
-            m.threshold_at_fmax = 0.3
-            m.n_ground_truth_proteins = 1
-            return m
-
-        with patch(
-            "protea.core.operations.train_reranker.reranker_predict",
-            return_value=np.array([0.9]),
-        ), patch(
-            "protea.core.operations.train_reranker.compute_cafa_metrics",
-            side_effect=fake_cafa,
-        ):
-            result = op._compute_comparison_metrics(df, train_result, MagicMock(), "nk")
-
-        assert result["baseline_fmax"] == 0.4
-        assert result["reranker_fmax"] == 0.6
-        assert result["fmax_improvement"] == 0.2
-
-
-# ---------------------------------------------------------------------------
-# _load_go_maps
-# ---------------------------------------------------------------------------
-
-class TestLoadGoMaps:
-    def test_returns_id_and_aspect_maps(self):
-        op = TrainRerankerOperation()
-        session = MagicMock()
-        session.execute.return_value.fetchall.return_value = [
-            (1, "GO:0001", "P"),
-            (2, "GO:0002", "F"),
-            (3, "GO:0003", None),
-        ]
-
-        id_map, aspect_map = op._load_go_maps(session, uuid.uuid4())
-        assert id_map == {1: "GO:0001", 2: "GO:0002", 3: "GO:0003"}
-        assert aspect_map == {1: "P", 2: "F"}
-        assert 3 not in aspect_map  # None aspect excluded
-
-
-# ---------------------------------------------------------------------------
-# Full execute flow (heavily mocked)
-# ---------------------------------------------------------------------------
-
-class TestExecuteFlow:
-    def test_no_ground_truth_raises(self):
-        op = TrainRerankerOperation()
-        session = MagicMock()
-        session.get.return_value = MagicMock()
-        session.query.return_value.filter.return_value.first.return_value = None
-
-        payload = {
-            "name": "test",
-            "old_annotation_set_id": str(uuid.uuid4()),
-            "new_annotation_set_id": str(uuid.uuid4()),
-            "embedding_config_id": str(uuid.uuid4()),
-            "ontology_snapshot_id": str(uuid.uuid4()),
-        }
-
-        with patch.object(op, "_validate"), \
-             patch(
-                 "protea.core.operations.train_reranker.compute_evaluation_data",
-             ) as mock_eval:
-            eval_data = MagicMock()
-            eval_data.nk = {}  # empty ground truth
-            eval_data.stats.return_value = {}
-            mock_eval.return_value = eval_data
-
-            with pytest.raises(ValueError, match="No ground truth"):
-                op.execute(session, payload, emit=_noop_emit)
-
-    def test_no_embeddings_raises(self):
-        op = TrainRerankerOperation()
-        session = MagicMock()
-        session.get.return_value = MagicMock()
-        session.query.return_value.filter.return_value.first.return_value = None
-
-        payload = {
-            "name": "test",
-            "old_annotation_set_id": str(uuid.uuid4()),
-            "new_annotation_set_id": str(uuid.uuid4()),
-            "embedding_config_id": str(uuid.uuid4()),
-            "ontology_snapshot_id": str(uuid.uuid4()),
-        }
-
-        with patch.object(op, "_validate"), \
-             patch(
-                 "protea.core.operations.train_reranker.compute_evaluation_data",
-             ) as mock_eval, \
-             patch.object(op, "_load_go_maps", return_value=({}, {})), \
-             patch.object(op, "_load_reference_per_aspect", return_value={
-                 "P": {"accessions": [], "embeddings": np.empty((0,)), "go_map": {}},
-                 "F": {"accessions": [], "embeddings": np.empty((0,)), "go_map": {}},
-                 "C": {"accessions": [], "embeddings": np.empty((0,)), "go_map": {}},
-             }), \
-             patch.object(op, "_load_query_embeddings", return_value=(np.empty((0,)), [])):
-
-            eval_data = MagicMock()
-            eval_data.nk = {"P1": {"GO:0001"}}
-            eval_data.stats.return_value = {"nk": 1}
-            mock_eval.return_value = eval_data
-
-            with pytest.raises(ValueError, match="No delta proteins have embeddings"):
-                op.execute(session, payload, emit=_noop_emit)
-
-
-# ---------------------------------------------------------------------------
-# Operation name
-# ---------------------------------------------------------------------------
-
-class TestOperationName:
-    def test_name(self):
-        assert TrainRerankerOperation().name == "train_reranker"
diff --git a/tests/test_training_dump_helpers.py b/tests/test_training_dump_helpers.py
new file mode 100644
index 0000000..76d9dc4
--- /dev/null
+++ b/tests/test_training_dump_helpers.py
@@ -0,0 +1,72 @@
+"""Unit tests for protea.core.training_dump_helpers.
+
+Covers the module-level helpers (``_load_sequences``,
+``_load_taxonomy_ids``) that ``TrainRerankerAutoOperation`` uses to
+drive the dataset-export pipeline. Heavy DB / model training is no
+longer tested here: LightGBM training lives in protea-reranker-lab.
+"""
+
+from __future__ import annotations
+
+import uuid
+from unittest.mock import MagicMock
+
+import numpy as np
+
+from protea.core.training_dump_helpers import (
+    _load_sequences,
+    _load_taxonomy_ids,
+)
+
+# ---------------------------------------------------------------------------
+# _load_sequences (used by TrainRerankerAutoOperation in dump_only mode)
+# ---------------------------------------------------------------------------
+
+
+class TestLoadSequences:
+    def test_returns_dict(self):
+        session = MagicMock()
+        session.query.return_value.join.return_value.filter.return_value.all.return_value = [
+            ("P1", "MKVLWAGS"),
+            ("P2", "ACDEF"),
+        ]
+
+        result = _load_sequences(session, {"P1", "P2"})
+        assert result == {"P1": "MKVLWAGS", "P2": "ACDEF"}
+
+    def test_empty_accessions(self):
+        session = MagicMock()
+        result = _load_sequences(session, set())
+        assert result == {}
+
+
+# ---------------------------------------------------------------------------
+# _load_taxonomy_ids (used by TrainRerankerAutoOperation in dump_only mode)
+# ---------------------------------------------------------------------------
+
+
+class TestLoadTaxonomyIds:
+    def test_returns_dict(self):
+        session = MagicMock()
+        session.query.return_value.filter.return_value.all.return_value = [
+            ("P1", 9606),
+            ("P2", 10090),
+        ]
+
+        result = _load_taxonomy_ids(session, {"P1", "P2"})
+        assert result == {"P1": 9606, "P2": 10090}
+
+    def test_none_taxonomy_id(self):
+        session = MagicMock()
+        session.query.return_value.filter.return_value.all.return_value = [
+            ("P1", None),
+        ]
+
+        result = _load_taxonomy_ids(session, {"P1"})
+        assert result == {"P1": None}
+
+
+# Import np to keep the test module's structure consistent with other test
+# files even though the explicit numpy assertions used by the previous
+# TrainReranker test surface have been removed.
+_ = np
diff --git a/tests/test_tuning.py b/tests/test_tuning.py
new file mode 100644
index 0000000..cfb9ec6
--- /dev/null
+++ b/tests/test_tuning.py
@@ -0,0 +1,304 @@
+"""Tests for protea.config.tuning (T-CONF.2 skeleton)."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from protea.config.tuning import (
+    APILimits,
+    OperationTuning,
+    QueueTuning,
+    TuningSettings,
+    WorkerTuning,
+    _apply_env_overrides,
+    _coerce,
+    _load_yaml_tuning,
+    get_tuning,
+)
+
+
+class TestQueueTuningDefaults:
+    def test_publisher_defaults(self) -> None:
+        q = QueueTuning()
+        assert q.publisher_max_attempts == 12
+        assert q.publisher_base_delay == 1.0
+
+    def test_oom_defaults(self) -> None:
+        q = QueueTuning()
+        assert q.oom_max_retries == 5
+        assert q.oom_base_delay == 5
+        assert q.oom_max_delay == 300
+
+    def test_validates_non_negative(self) -> None:
+        with pytest.raises(Exception):
+            QueueTuning(publisher_max_attempts=0)
+
+    def test_validates_oom_max_delay_positive(self) -> None:
+        with pytest.raises(Exception):
+            QueueTuning(oom_max_delay=0)
+
+
+class TestCoerce:
+    def test_int(self) -> None:
+        assert _coerce("42") == 42
+
+    def test_float(self) -> None:
+        assert _coerce("1.5") == pytest.approx(1.5)
+
+    def test_bool_true(self) -> None:
+        assert _coerce("true") is True
+        assert _coerce("TRUE") is True
+
+    def test_bool_false(self) -> None:
+        assert _coerce("false") is False
+
+    def test_string_passthrough(self) -> None:
+        assert _coerce("not-a-number") == "not-a-number"
+
+
+class TestApplyEnvOverrides:
+    def test_no_env_no_change(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        # Strip any test-fixture overrides.
+        for key in list(__import__("os").environ):
+            if key.startswith("PROTEA_TUNING__"):
+                monkeypatch.delenv(key, raising=False)
+        merged: dict = {}
+        out = _apply_env_overrides(merged)
+        assert out == {}
+
+    def test_single_override(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        monkeypatch.setenv("PROTEA_TUNING__QUEUE__PUBLISHER_MAX_ATTEMPTS", "20")
+        merged: dict = {}
+        out = _apply_env_overrides(merged)
+        assert out == {"queue": {"publisher_max_attempts": 20}}
+
+    def test_merges_with_yaml(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        monkeypatch.setenv("PROTEA_TUNING__QUEUE__OOM_MAX_RETRIES", "8")
+        merged = {"queue": {"publisher_max_attempts": 30}}
+        out = _apply_env_overrides(merged)
+        assert out["queue"]["publisher_max_attempts"] == 30
+        assert out["queue"]["oom_max_retries"] == 8
+
+
+class TestGetTuning:
+    def setup_method(self) -> None:
+        get_tuning.cache_clear()
+
+    def teardown_method(self) -> None:
+        get_tuning.cache_clear()
+
+    def test_returns_defaults_when_no_yaml_or_env(
+        self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+    ) -> None:
+        # Strip any inherited env overrides.
+        for key in list(__import__("os").environ):
+            if key.startswith("PROTEA_TUNING__"):
+                monkeypatch.delenv(key, raising=False)
+        # Pretend the project root has no system.yaml.
+        monkeypatch.setattr(
+            "protea.config.tuning._resolve_project_root", lambda: tmp_path
+        )
+        get_tuning.cache_clear()
+        s = get_tuning()
+        assert s.queue.publisher_max_attempts == 12
+        assert s.queue.oom_max_retries == 5
+
+    def test_env_override_applies(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+        monkeypatch.setattr(
+            "protea.config.tuning._resolve_project_root", lambda: tmp_path
+        )
+        monkeypatch.setenv("PROTEA_TUNING__QUEUE__PUBLISHER_MAX_ATTEMPTS", "25")
+        get_tuning.cache_clear()
+        s = get_tuning()
+        assert s.queue.publisher_max_attempts == 25
+
+    def test_yaml_override_applies(
+        self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+    ) -> None:
+        cfg_dir = tmp_path / "protea" / "config"
+        cfg_dir.mkdir(parents=True)
+        (cfg_dir / "system.yaml").write_text(
+            "tuning:\n  queue:\n    oom_max_retries: 9\n",
+            encoding="utf-8",
+        )
+        for key in list(__import__("os").environ):
+            if key.startswith("PROTEA_TUNING__"):
+                monkeypatch.delenv(key, raising=False)
+        monkeypatch.setattr(
+            "protea.config.tuning._resolve_project_root", lambda: tmp_path
+        )
+        get_tuning.cache_clear()
+        s = get_tuning()
+        assert s.queue.oom_max_retries == 9
+        # Untouched fields keep defaults.
+        assert s.queue.publisher_max_attempts == 12
+
+    def test_env_overrides_yaml(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+        cfg_dir = tmp_path / "protea" / "config"
+        cfg_dir.mkdir(parents=True)
+        (cfg_dir / "system.yaml").write_text(
+            "tuning:\n  queue:\n    publisher_max_attempts: 7\n",
+            encoding="utf-8",
+        )
+        monkeypatch.setattr(
+            "protea.config.tuning._resolve_project_root", lambda: tmp_path
+        )
+        monkeypatch.setenv("PROTEA_TUNING__QUEUE__PUBLISHER_MAX_ATTEMPTS", "33")
+        get_tuning.cache_clear()
+        s = get_tuning()
+        assert s.queue.publisher_max_attempts == 33
+
+    def test_load_yaml_handles_missing_section(self, tmp_path: Path) -> None:
+        cfg_dir = tmp_path / "protea" / "config"
+        cfg_dir.mkdir(parents=True)
+        (cfg_dir / "system.yaml").write_text(
+            "database:\n  url: postgresql://x\n",
+            encoding="utf-8",
+        )
+        out = _load_yaml_tuning(tmp_path)
+        assert out == {}
+
+
+class TestTuningSettingsModel:
+    def test_compose(self) -> None:
+        s = TuningSettings(queue=QueueTuning(publisher_max_attempts=15))
+        assert s.queue.publisher_max_attempts == 15
+
+    def test_default_compose(self) -> None:
+        s = TuningSettings()
+        assert s.queue.publisher_max_attempts == 12
+
+    def test_default_worker_compose(self) -> None:
+        s = TuningSettings()
+        assert s.worker.db_pool_size == 20
+        assert s.worker.model_cache_max == 1
+        assert s.worker.api_cache_default_ttl_seconds == 300.0
+
+
+class TestWorkerTuningDefaults:
+    def test_pool_defaults(self) -> None:
+        w = WorkerTuning()
+        assert w.db_pool_size == 20
+        assert w.db_pool_max_overflow == 40
+        assert w.db_pool_recycle_seconds == 3600
+
+    def test_cache_defaults(self) -> None:
+        w = WorkerTuning()
+        assert w.model_cache_max == 1
+        assert w.ref_cache_max == 1
+        assert w.api_cache_default_ttl_seconds == pytest.approx(300.0)
+
+    def test_reaper_defaults(self) -> None:
+        w = WorkerTuning()
+        assert w.reaper_main_timeout_seconds == 86400
+        assert w.reaper_default_timeout_seconds == 3600
+        assert w.reaper_stall_seconds == 1800
+
+    def test_validates_pool_size(self) -> None:
+        with pytest.raises(Exception):
+            WorkerTuning(db_pool_size=0)
+
+    def test_validates_reaper_main_floor(self) -> None:
+        with pytest.raises(Exception):
+            WorkerTuning(reaper_main_timeout_seconds=10)
+
+    def test_validates_cache_max(self) -> None:
+        with pytest.raises(Exception):
+            WorkerTuning(model_cache_max=0)
+
+
+class TestWorkerEnvOverrides:
+    def setup_method(self) -> None:
+        get_tuning.cache_clear()
+
+    def teardown_method(self) -> None:
+        get_tuning.cache_clear()
+
+    def test_env_override_pool_size(
+        self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+    ) -> None:
+        monkeypatch.setattr(
+            "protea.config.tuning._resolve_project_root", lambda: tmp_path
+        )
+        monkeypatch.setenv("PROTEA_TUNING__WORKER__DB_POOL_SIZE", "50")
+        get_tuning.cache_clear()
+        s = get_tuning()
+        assert s.worker.db_pool_size == 50
+
+
+class TestOperationTuningDefaults:
+    def test_chunk_defaults(self) -> None:
+        o = OperationTuning()
+        assert o.annotation_chunk_size == 10_000
+        assert o.stream_chunk_size == 2_000
+        assert o.store_chunk_size == 10_000
+        assert o.numpy_query_chunk == 500
+
+    def test_validates_floor(self) -> None:
+        with pytest.raises(Exception):
+            OperationTuning(annotation_chunk_size=10)
+        with pytest.raises(Exception):
+            OperationTuning(stream_chunk_size=10)
+        with pytest.raises(Exception):
+            OperationTuning(store_chunk_size=100)
+        with pytest.raises(Exception):
+            OperationTuning(numpy_query_chunk=0)
+
+
+class TestOperationEnvOverrides:
+    def setup_method(self) -> None:
+        get_tuning.cache_clear()
+
+    def teardown_method(self) -> None:
+        get_tuning.cache_clear()
+
+    def test_env_override_chunk_size(
+        self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+    ) -> None:
+        monkeypatch.setattr(
+            "protea.config.tuning._resolve_project_root", lambda: tmp_path
+        )
+        monkeypatch.setenv("PROTEA_TUNING__OPERATION__ANNOTATION_CHUNK_SIZE", "50000")
+        get_tuning.cache_clear()
+        s = get_tuning()
+        assert s.operation.annotation_chunk_size == 50_000
+
+
+class TestAPILimitsDefaults:
+    def test_defaults(self) -> None:
+        a = APILimits()
+        assert a.max_fasta_bytes == 50 * 1024 * 1024
+        assert a.max_comment_length == 500
+        assert a.recent_limit == 20
+        assert a.page_limit == 100
+
+    def test_validates_floor(self) -> None:
+        with pytest.raises(Exception):
+            APILimits(max_fasta_bytes=100)
+        with pytest.raises(Exception):
+            APILimits(max_comment_length=0)
+        with pytest.raises(Exception):
+            APILimits(recent_limit=0)
+        with pytest.raises(Exception):
+            APILimits(page_limit=0)
+
+
+class TestAPILimitsEnvOverrides:
+    def setup_method(self) -> None:
+        get_tuning.cache_clear()
+
+    def teardown_method(self) -> None:
+        get_tuning.cache_clear()
+
+    def test_env_override_fasta_bytes(
+        self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+    ) -> None:
+        monkeypatch.setattr(
+            "protea.config.tuning._resolve_project_root", lambda: tmp_path
+        )
+        monkeypatch.setenv("PROTEA_TUNING__API__MAX_FASTA_BYTES", "104857600")  # 100 MB
+        get_tuning.cache_clear()
+        s = get_tuning()
+        assert s.api.max_fasta_bytes == 104_857_600

+ Embedding +	+ {cat} +
+ {asp} +
+ + {emb.display_name} + + + {emb.family} + {emb.param_count != null + ? ` · ${formatParams(emb.param_count)}` + : ""} + +	+ {row ? ( + + {row.fmax.toFixed(3)} + + ) : ( + — + )} +