-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathpyproject.toml
More file actions
111 lines (98 loc) · 2.88 KB
/
pyproject.toml
File metadata and controls
111 lines (98 loc) · 2.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
[tool.poetry]
name = "autocorpus"
version = "v1.2"
description = "A tool to standardise text and table data extracted from full text publications."
authors = [
"Tim Beck <tim.beck@nottingham.ac.uk>",
"Joram Matthias Posma <j.posma11@imperial.ac.uk>",
"Antoine Lain <a.lain@imperial.ac.uk>",
"Thomas Rowlands <thomas.rowlands@nottingham.ac.uk>",
]
license = "GPL-3.0-only"
readme = "README.md"
repository = "https://github.com/omicsNLP/Auto-CORPus"
documentation = "https://omicsnlp.github.io/Auto-CORPus/"
keywords = [
"natural language processing",
"text mining",
"biomedical literature",
"semantics",
"health data",
]
classifiers = ["Intended Audience :: Science/Research"]
[tool.poetry.urls]
"Publication" = "https://doi.org/10.3389/fdgth.2022.788124"
[tool.poetry.scripts]
auto-corpus = "autocorpus.__main__:main"
[tool.poetry.dependencies]
python = ">=3.10,<4"
regex = "^2024.9.11"
beautifulsoup4 = "^4.12.3"
nltk = "^3.9.1"
fuzzywuzzy = {extras = ["speedup"], version = "^0.18.0"}
lxml = ">=5.3,<7.0"
pandas = "^2.2.3"
python-docx = "^1.1.2"
marker-pdf = { version = "^1.6.2", optional = true }
dataclasses-json = "^0.6.7"
openpyxl = "^3.1.5"
[tool.poetry.extras]
pdf = ["marker-pdf"]
[tool.poetry.group.dev.dependencies]
pytest = "^8.3.2"
pytest-cov = ">=5,<8"
pytest-mock = "^3.14.0"
ruff = ">=0.7.1,<0.15.0"
pre-commit = "^4.2.0"
jsonschema = "^4.23.0"
mypy = "^1.15.0"
types-beautifulsoup4 = "^4.12.0.20250204"
types-regex = ">=2024.11.6.20250318,<2026.0.0.0"
types-tqdm = "^4.67.0.20250319"
types-jsonschema = "^4.23.0.20241208"
types-pywin32 = ">=310.0.0.20250516,<312.0.0.0"
lxml-stubs = "^0.5.1"
pandas-stubs = "^2.2.3.250308"
pytest-xdist = {extras = ["psutil"], version = "^3.6.1"}
[tool.poetry.group.docs.dependencies]
mkdocs = "^1.6.0"
mkdocstrings = ">=0.26.2,<0.31.0"
mkdocstrings-python = "^1.12.2"
mkdocs-gen-files = "^0.5.0"
mkdocs-literate-nav = "^0.6.1"
mkdocs-section-index = "^0.3.9"
mkdocs-material = "^9.5.42"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.mypy]
disallow_any_generics = true
warn_unreachable = true
warn_unused_ignores = true
# disallow_untyped_defs = true
exclude = [".venv/", "docs/", "site/"]
[[tool.mypy.overrides]]
module = "tests.*"
disallow_untyped_defs = false
[[tool.mypy.overrides]]
module = ["nltk.*", "fuzzywuzzy.*", "bioc.*", "marker.*"]
ignore_missing_imports = true
[tool.pytest.ini_options]
addopts = "-v -p no:warnings -n auto --cov=autocorpus --cov-branch --cov-report=xml --cov-report=html --doctest-modules --ignore=docs/ --ignore=site/"
[tool.ruff]
target-version = "py310"
[tool.ruff.lint]
select = [
"D", # pydocstyle
"E", # pycodestyle
"F", # Pyflakes
"I", # isort
"UP", # pyupgrade
"RUF", # ruff
"W", # pylint
]
ignore = [
"E501", # line too long
"RUF001", # ambiguous multiplication/minus/en dash sign
]
pydocstyle.convention = "google"