This repository was archived by the owner on Jun 11, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathMakefile
More file actions
93 lines (76 loc) · 2.92 KB
/
Makefile
File metadata and controls
93 lines (76 loc) · 2.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# Define environment, bash script execution line
python-default: script
#
# Python dependencies (pip)
# First two commands to help with installation on RIT CS systems
pip install --user numpy --upgrade
pip install --user packaging --upgrade
pip install --user python-terrier bs4 tqdm pandas lxml --upgrade
pip install --upgrade git+https://github.com/terrierteam/pyterrier_colbert
pip install --upgrade git+https://github.com/Georgetown-IR-Lab/OpenNIR.git
script:
# Creating test scripts...
@echo "#!`which bash`" > exec_line
@cat exec_line bin/arqmath-index-TEMPLATE > arqmath-index
@chmod u+x arqmath-index
@cat exec_line bin/run-topics-test-TEMPLATE > run-topics-test
@cat exec_line bin/run-topics-2020-TEMPLATE > run-topics-2020
@cat exec_line bin/run-topics-2021-TEMPLATE > run-topics-2021
@chmod u+x run-topics-test run-topics-2020 run-topics-2021
@rm exec_line
#
# Quick run/eval test script is ./run-topics-test
# Evaluation run script for ARQMath-1 topics w. BM25 is ./run-topics-2020
# Evaluation run script for ARQMath-2 topics w. BM25 is ./run-topics-2021
#
# Indexing test script is ./arqmath-test
data: collection post-data raw-post-data math-data
collection:
wget https://www.cs.rit.edu/~dprl/data/ARQMath/ARQMath_Collection.zip
unzip ARQMath_Collection.zip
rm ARQMath_Collection.zip
# Tokenized with Terrier defaults (stopwords + porter stemmer), English tokenizer
post-data:
wget https://www.cs.rit.edu/~dprl/data/ARQMath/ARQMath_Collection-post-ptindex.zip
unzip ARQMath_Collection-post-ptindex.zip
rm ARQMath_Collection-post-ptindex.zip
# No stopwords or stemming, using English tokenizer
raw-post-data:
wget https://www.cs.rit.edu/~dprl/data/ARQMath/ARQMath_Collection-post-ptindex-rawtokens.zip
unzip ARQMath_Collection-post-ptindex-rawtokens.zip
rm ARQMath_Collection-post-ptindex-rawtokens.zip
math-data:
wget https://www.cs.rit.edu/~dprl/data/ARQMath/ARQMath_Collection-math-ptindex.zip
unzip ARQMath_Collection-math-ptindex.zip
rm ARQMath_Collection-math-ptindex.zip
posts:
./arqmath-index test/indexTest.xml -s
math:
./arqmath-index test/indexTest.xml -m
eval:
./run-topics-test
delete-results:
rm -g *.res.gz
delete-indices:
rm -rf *-ptindex
baseline:
./run-topics-2020
./run-topics-2021
experiment-short:
python3 src/run_topics_experiment.py \
./ARQMath_Collection-math-ptindex \
./ARQMath_Collection-post-ptindex \
./test/2020_topics_task1_short.xml \
./ARQMath_Evaluation/qrels_task_1/2020_qrels_task1.tsv
experiment-2020:
python3 src/run_topics_experiment.py \
./ARQMath_Collection-math-ptindex \
./ARQMath_Collection-post-ptindex \
./ARQMath_Evaluation/topics_task_1/2020_topics_task1.xml \
./ARQMath_Evaluation/qrels_task_1/2020_qrels_task1.tsv
experiment-2021:
python3 src/run_topics_experiment.py \
./ARQMath_Collection-math-ptindex \
./ARQMath_Collection-post-ptindex \
./ARQMath_Evaluation/topics_task_1/2021_topics_task1.xml \
./ARQMath_Evaluation/qrels_task_1/2021_qrels_task1.tsv