diff --git a/README.md b/README.md index f672c71..0eb3f0e 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,15 @@ Run `./download_datasets.sh` to download all datasets. You can provide `--small- ## Running the Code The suggested approach is to run the the Docker container as detailed in `run_search.sh`. +To run on a specific dataset, for example: +``` +./run_search.sh wikipedia-small +``` +You can also test without docker as follows (not recommended): +```py +python search.py --input data/wikipedia-small/*.h5 --task-description data/wikipedia-small/config.json --output results/wikipedia-small/ +``` +Be warned that this will use all of your available cores, which may be undesireable on your local machine. ### Evaluation @@ -52,7 +61,7 @@ python eval.py results.csv ``` will produce a summary file of the results with the computed recall against the ground truth data. -This csv file can be further processed to create plots (using `python plot.py --task {task1, task2, task3} res.csv`) and show the fastest solutions above a certain recall threshold (using `python show_operating_points.py`). +This csv file can be further processed to create plots (using `python plot.py --task {task1, task2, task3} res.csv` or `python plot.py --task {task1, task2, task3} --dataset {dataset_name}`) and show the fastest solutions above a certain recall threshold (using `python show_operating_points.py`). ## Task configuration format (`config.json`) diff --git a/download_datasets.sh b/download_datasets.sh index a1f7431..d357bc3 100755 --- a/download_datasets.sh +++ b/download_datasets.sh @@ -10,9 +10,15 @@ # Skips the large full-scale datasets (wikipedia ~15 GB, nq ~7 GB). # # After running this script every dataset is ready to use: -# python search.py --task task1 --dataset wikipedia-small -# python search.py --task task2 --dataset llama-dev -# python search.py --task task3 --dataset fiqa-dev +# python search.py --input data/task-1-spot-check/*.h5 --task-description data/task-1-spot-check/config.json --output results/task-1-spot-check/ +# python search.py --input data/task-2-spot-check/*.h5 --task-description data/task-2-spot-check/config.json --output results/task-2-spot-check/ +# python search.py --input data/task-3-spot-check/*.h5 --task-description data/task-3-spot-check/config.json --output results/task-3-spot-check/ +# python search.py --input data/wikipedia-small/*.h5 --task-description data/wikipedia-small/config.json --output results/wikipedia-small/ +# python search.py --input data/llama-dev/*.h5 --task-description data/llama-dev/config.json --output results/llama-dev/ +# python search.py --input data/fiqa-dev/*.h5 --task-description data/fiqa-dev/config.json --output results/fiqa-dev/ +# Alternatively, use run_search.sh to run these with docker, e.g.:" +# ./run_search.sh # run all three spot checks" +# ./run_search.sh wikipedia-small # run on wikipedia-small only" set -euo pipefail @@ -22,7 +28,7 @@ set -euo pipefail if ! command -v hf &>/dev/null; then echo "Error: hf command not found." - echo "Please install it with: pip install -U huggingface_hub[cli]" + echo "Please install it with: pip install -U huggingface_hub" exit 1 fi @@ -132,6 +138,12 @@ echo " task-2-spot-check data/task-2-spot-check/" echo " task-3-spot-check data/task-3-spot-check/" echo "" echo "Run search.py with any of these dataset names, e.g.:" -echo " python search.py --task task1 --dataset wikipedia-small" -echo " python search.py --task task2 --dataset llama-dev" -echo " python search.py --task task3 --dataset fiqa-dev" +echo " python search.py --input data/task-1-spot-check/*.h5 --task-description data/task-1-spot-check/config.json --output results/task-1-spot-check/" +echo " python search.py --input data/task-2-spot-check/*.h5 --task-description data/task-2-spot-check/config.json --output results/task-2-spot-check/" +echo " python search.py --input data/task-3-spot-check/*.h5 --task-description data/task-3-spot-check/config.json --output results/task-3-spot-check/" +echo " python search.py --input data/wikipedia-small/*.h5 --task-description data/wikipedia-small/config.json --output results/wikipedia-small/" +echo " python search.py --input data/llama-dev/*.h5 --task-description data/llama-dev/config.json --output results/llama-dev/" +echo " python search.py --input data/fiqa-dev/*.h5 --task-description data/fiqa-dev/config.json --output results/fiqa-dev/" +echo "Alternatively, use run_search.sh to run these with docker, e.g.:" +echo " ./run_search.sh # run all three spot checks" +echo " ./run_search.sh wikipedia-small # run on wikipedia-small only" diff --git a/requirements.txt b/requirements.txt index 033b6b7..1919df9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,5 @@ h5py tqdm faiss-cpu matplotlib -pandas \ No newline at end of file +pandas +huggingface_hub diff --git a/run_search.sh b/run_search.sh old mode 100644 new mode 100755 index a1601d2..0baa1d1 --- a/run_search.sh +++ b/run_search.sh @@ -1,6 +1,7 @@ -for task in 1 2 3; do - echo Running Task $task - mkdir -p results/task-$task-spot-check +run_for_name() { + local name="$1" + echo "Running for dataset: $name" + mkdir -p "results/$name" docker run \ --rm \ --user "$(id -u):$(id -g)" \ @@ -11,5 +12,16 @@ for task in 1 2 3; do --volume $(pwd)/search.py:/app/search.py:ro \ --volume $(pwd)/data:/app/data:ro \ --volume $(pwd)/results:/app/results:rw \ - sisap-baseline python search.py --input data/task-$task-spot-check/*.h5 --task-description data/task-$task-spot-check/config.json --output results/task-$task-spot-check/ -done + sisap-baseline python search.py \ + --input "data/$name/"*.h5 \ + --task-description "data/$name/config.json" \ + --output "results/$name/" +} + +if [ $# -eq 0 ]; then + for task in 1 2 3; do + run_for_name "task-$task-spot-check" + done +else + run_for_name "$1" +fi