diff --git a/demo.py b/demo.py index 5074b51..520be09 100644 --- a/demo.py +++ b/demo.py @@ -8,7 +8,14 @@ def create_phrases_file(phrases_dir, phrases_file, dictionary_dir=ETHICS_DICTIONARY_DIR): - global terms_xml_path + """ + Creates file for phrases based on the dictionary dir and dictionary name + + creates directories if they do not exist + :param phrases_dir: directory holding the phrases file + :param phrases_file: file holding phrases file + + """ terms_xml_dir = Path(dictionary_dir, phrases_dir) if not terms_xml_dir.exists(): terms_xml_dir.mkdir() @@ -68,7 +75,7 @@ def create_and_write_list_for_fields(dict_for_entities, field, out_filename): run_analysis( get_or_create_corpus_dir(TERPENES), create_phrases_file("terpenes_key_phrases", "terpenes_key_phrases.xml", dictionary_dir="terpenes_dictionary"), - query=TERPENES, + query="terpenes", hits = 20, ) diff --git a/docanalysis/extract_entities.py b/docanalysis/extract_entities.py index 3c3717f..2d630c3 100644 --- a/docanalysis/extract_entities.py +++ b/docanalysis/extract_entities.py @@ -58,7 +58,8 @@ def extract_entities_from_papers(self, corpus_path, terms_xml_path, query=None, self.create_project_files(query, hits, corpus_path) if install_ami: logging.info(f"installing ami3 (check whether this is a good idea)") - self.install_ami() + logging.info(f"please check independently that ami is installed") + # self.install_ami() logging.info(f"dict with parsed xml in {corpus_path}") dict_with_parsed_xml = self.make_dict_with_parsed_xml(corpus_path) @@ -88,10 +89,15 @@ def create_project_files(self, QUERY, HITS, OUTPUT): os.system(f'pygetpapers -q "{QUERY}" -k {HITS} -o {OUTPUT} -x') os.system(f"ami -p {OUTPUT} section") - def install_ami(self): - os.system("git clone https://github.com/petermr/ami3.git") - os.system("cd ami3") - os.system("mvn install -Dmaven.test.skip=true") + """ + removed as too complex. + TODO maybe have a check that `ami` is installed, but install elsewhere + """ + # def install_ami(self): + # os.system("git clone https://github.com/petermr/ami3.git") + # os.system("cd ami3") + # os.system("mvn install -Dmaven.test.skip=true") + def make_dict_with_parsed_xml(self, output): diff --git a/pmr_demo.py b/pmr_demo.py deleted file mode 100644 index f5327e8..0000000 --- a/pmr_demo.py +++ /dev/null @@ -1,24 +0,0 @@ -import os -from docanalysis import DocAnalysis -from pathlib import Path - -ethic_statement_creator = DocAnalysis() -term_dir = Path(os.getcwd(), "terpenes_dictionary", "terpenes_key_phrases", ) -if not term_dir.exists(): - term_dir.mkdir() -dict_for_entities = ethic_statement_creator.extract_entities_from_papers( - corpus_path=Path(os.getcwd(), "corpus", "terpenes", ), - terms_xml_path=Path(term_dir, "terpenes_key_phrases.xml"), - query="terpenes", - hits=10, - make_project=True -) -print(f"dict {dict_for_entities}") -list_with_orgs = ethic_statement_creator.extract_particular_fields( - dict_for_entities, 'ORG') -with open('org.text', 'w') as f: - f.write(str(list_with_orgs)) -list_with_gpe = ethic_statement_creator.extract_particular_fields( - dict_for_entities, 'GPE') -with open('GPE.text', 'w') as f: - f.write(str(list_with_gpe)) diff --git a/terpenes_dictionary/terpenes_key_phrases/terpenes_key_phrases.xml b/terpenes_dictionary/terpenes_key_phrases/terpenes_key_phrases.xml new file mode 100644 index 0000000..6bafc77 --- /dev/null +++ b/terpenes_dictionary/terpenes_key_phrases/terpenes_key_phrases.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file