-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathpyproject.toml
More file actions
122 lines (117 loc) · 2.97 KB
/
pyproject.toml
File metadata and controls
122 lines (117 loc) · 2.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "contextifier"
version = "0.2.6"
description = "Convert raw documents into AI-understandable context with intelligent text extraction, table detection, and semantic chunking"
readme = "README.md"
requires-python = ">=3.12"
license = {text = "Apache-2.0"}
authors = [
{name = "CocoRoF", email = "gkfua00@gmail.com"},
]
maintainers = [
{name = "CocoRoF", email = "gkfua00@gmail.com"},
]
keywords = [
"document-processing",
"pdf",
"ocr",
"text-extraction",
"chunking",
"ai",
"llm",
"langchain",
"hwp",
"docx",
"xlsx",
]
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
"Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: Text Processing",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
]
dependencies = [
# Core
"beautifulsoup4>=4.12.0",
"chardet>=5.0.0",
"langchain-text-splitters>=1.0.0",
# PDF
"pdfplumber>=0.11.0",
"pdfminer.six>=20231228",
"pdf2image>=1.17.0",
# Office — DOCX / PPTX / Excel
"python-docx>=1.1.0",
"docx2pdf>=0.1.8",
"python-pptx>=1.0.0",
"openpyxl>=3.1.0",
"xlrd>=2.0.0",
# HWP
"pyhwp>=0.1b15",
"olefile>=0.47",
# RTF
"striprtf>=0.0.29",
# Image & OCR
"pi-heif>=1.0.0",
"pytesseract>=0.3.10",
]
[project.optional-dependencies]
# PDF — PyMuPDF is AGPL-3.0; install explicitly to acknowledge license terms
pdf = ["pymupdf>=1.24.0"]
# These are ecosystem integrations, not core functionality
langchain = [
"langchain>=1.0.0",
"langchain-aws>=1.0.0",
"langchain-community>=0.4.0",
"langchain-core>=1.0.0",
"langchain-openai>=1.0.0",
"langchain-anthropic>=1.0.0",
"langchain-google-genai>=4.0.0",
"langgraph>=1.0.0",
"langsmith>=0.6.0",
]
server = [
"pydantic>=2.12.0",
"pydantic-settings>=2.12.0",
"python-dotenv>=1.0.0",
"python-multipart>=0.0.20",
"orjson>=3.10.0",
"psutil>=7.0.0",
]
all = [
"contextifier[pdf,langchain,server]",
"pandas>=2.2.0",
"cachetools>=5.3.0",
]
[project.urls]
Homepage = "https://github.com/CocoRoF/Contextifier"
Documentation = "https://github.com/CocoRoF/Contextifier#readme"
Repository = "https://github.com/CocoRoF/Contextifier.git"
Issues = "https://github.com/CocoRoF/Contextifier/issues"
Changelog = "https://github.com/CocoRoF/Contextifier/releases"
[tool.hatch.build.targets.wheel]
packages = ["contextifier"]
[tool.hatch.build.targets.sdist]
include = [
"contextifier/",
"README.md",
"LICENSE",
"pyproject.toml",
]
exclude = [
"/.git",
"/.venv",
"/__pycache__",
"*.pyc",
"test/",
"temp/",
"libs/",
]