sisap-challenges · jacketsj · Jun 8, 2026 · Jun 8, 2026 · Jun 8, 2026
diff --git a/README.md b/README.md
@@ -44,6 +44,15 @@ Run `./download_datasets.sh` to download all datasets. You can provide `--small-
 
 ## Running the Code
 The suggested approach is to run the the Docker container as detailed in `run_search.sh`.
+To run on a specific dataset, for example:
+```
+./run_search.sh wikipedia-small
+```
+You can also test without docker as follows (not recommended):
+```py
+python search.py --input data/wikipedia-small/*.h5 --task-description data/wikipedia-small/config.json --output results/wikipedia-small/
+```
+Be warned that this will use all of your available cores, which may be undesireable on your local machine.
 
 ### Evaluation
 
@@ -52,7 +61,7 @@ python eval.py results.csv
 ```
 will produce a summary file of the results with the computed recall against the ground truth data. 
 
-This csv file can be further processed to create plots (using `python plot.py --task {task1, task2, task3} res.csv`) and show the fastest solutions above a certain recall threshold (using `python show_operating_points.py`).
+This csv file can be further processed to create plots (using `python plot.py --task {task1, task2, task3} res.csv` or `python plot.py --task {task1, task2, task3} --dataset {dataset_name}`) and show the fastest solutions above a certain recall threshold (using `python show_operating_points.py`).
 
 ## Task configuration format (`config.json`)
 

diff --git a/download_datasets.sh b/download_datasets.sh
@@ -10,9 +10,15 @@
 #                  Skips the large full-scale datasets (wikipedia ~15 GB, nq ~7 GB).
 #
 # After running this script every dataset is ready to use:
-#   python search.py --task task1 --dataset wikipedia-small
-#   python search.py --task task2 --dataset llama-dev
-#   python search.py --task task3 --dataset fiqa-dev
+#   python search.py --input data/task-1-spot-check/*.h5 --task-description data/task-1-spot-check/config.json --output results/task-1-spot-check/
+#   python search.py --input data/task-2-spot-check/*.h5 --task-description data/task-2-spot-check/config.json --output results/task-2-spot-check/
+#   python search.py --input data/task-3-spot-check/*.h5 --task-description data/task-3-spot-check/config.json --output results/task-3-spot-check/
+#   python search.py --input data/wikipedia-small/*.h5 --task-description data/wikipedia-small/config.json --output results/wikipedia-small/
+#   python search.py --input data/llama-dev/*.h5 --task-description data/llama-dev/config.json --output results/llama-dev/
+#   python search.py --input data/fiqa-dev/*.h5 --task-description data/fiqa-dev/config.json --output results/fiqa-dev/
+# Alternatively, use run_search.sh to run these with docker, e.g.:"
+#   ./run_search.sh                     # run all three spot checks"
+#   ./run_search.sh wikipedia-small     # run on wikipedia-small only"
 
 set -euo pipefail
 
@@ -22,7 +28,7 @@ set -euo pipefail
 
 if ! command -v hf &>/dev/null; then
     echo "Error: hf command not found."
-    echo "Please install it with: pip install -U huggingface_hub[cli]"
+    echo "Please install it with: pip install -U huggingface_hub"
     exit 1
 fi
 
@@ -132,6 +138,12 @@ echo "    task-2-spot-check data/task-2-spot-check/"
 echo "    task-3-spot-check data/task-3-spot-check/"
 echo ""
 echo "Run search.py with any of these dataset names, e.g.:"
-echo "  python search.py --task task1 --dataset wikipedia-small"
-echo "  python search.py --task task2 --dataset llama-dev"
-echo "  python search.py --task task3 --dataset fiqa-dev"
+echo "  python search.py --input data/task-1-spot-check/*.h5 --task-description data/task-1-spot-check/config.json --output results/task-1-spot-check/"
+echo "  python search.py --input data/task-2-spot-check/*.h5 --task-description data/task-2-spot-check/config.json --output results/task-2-spot-check/"
+echo "  python search.py --input data/task-3-spot-check/*.h5 --task-description data/task-3-spot-check/config.json --output results/task-3-spot-check/"
+echo "  python search.py --input data/wikipedia-small/*.h5 --task-description data/wikipedia-small/config.json --output results/wikipedia-small/"
+echo "  python search.py --input data/llama-dev/*.h5 --task-description data/llama-dev/config.json --output results/llama-dev/"
+echo "  python search.py --input data/fiqa-dev/*.h5 --task-description data/fiqa-dev/config.json --output results/fiqa-dev/"
+echo "Alternatively, use run_search.sh to run these with docker, e.g.:"
+echo "  ./run_search.sh                     # run all three spot checks"
+echo "  ./run_search.sh wikipedia-small     # run on wikipedia-small only"
diff --git a/requirements.txt b/requirements.txt
@@ -4,4 +4,5 @@ h5py
 tqdm
 faiss-cpu
 matplotlib
-pandas
+pandas
+huggingface_hub
diff --git a/run_search.sh b/run_search.sh
@@ -1,6 +1,7 @@
-for task in 1 2 3; do
-    echo Running Task $task
-    mkdir -p results/task-$task-spot-check
+run_for_name() {
+    local name="$1"
+    echo "Running for dataset: $name"
+    mkdir -p "results/$name"
     docker run \
         --rm \
         --user "$(id -u):$(id -g)" \
@@ -11,5 +12,16 @@ for task in 1 2 3; do
         --volume $(pwd)/search.py:/app/search.py:ro \
         --volume $(pwd)/data:/app/data:ro \
         --volume $(pwd)/results:/app/results:rw \
-        sisap-baseline python search.py --input data/task-$task-spot-check/*.h5 --task-description data/task-$task-spot-check/config.json --output results/task-$task-spot-check/
-done
+        sisap-baseline python search.py \
+            --input "data/$name/"*.h5 \
+            --task-description "data/$name/config.json" \
+            --output "results/$name/"
+}
+
+if [ $# -eq 0 ]; then
+    for task in 1 2 3; do
+        run_for_name "task-$task-spot-check"
+    done
+else
+    run_for_name "$1"
+fi