-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy pathCMakeLists.txt
More file actions
executable file
·210 lines (179 loc) · 7.08 KB
/
CMakeLists.txt
File metadata and controls
executable file
·210 lines (179 loc) · 7.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
cmake_minimum_required(VERSION 3.22)
project(RagPUREAI VERSION 1.0)
# General build settings
set(CMAKE_CXX_STANDARD 23)
set(CMAKE_CXX_STANDARD_REQUIRED True)
set(CMAKE_BUILD_TYPE "Release")
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
# Toolchain
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
set(CMAKE_TOOLCHAIN_FILE ${CMAKE_BINARY_DIR}/generators/conan_toolchain.cmake)
set(CMAKE_CXX_FLAGS_RELEASE "/Od")
else()
set(CMAKE_TOOLCHAIN_FILE ${CMAKE_BINARY_DIR}/Release/generators/conan_toolchain.cmake)
set(CMAKE_CXX_FLAGS_RELEASE "-O0")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
endif()
option(CURL_STATIC_LINKING "Set to ON to build libcurl with static linking." OFF)
option(BUILD_APPS "Build apps" OFF)
# Python & Pybind11
find_package(Python3 REQUIRED COMPONENTS Interpreter Development)
include_directories(${Python3_INCLUDE_DIRS})
find_package(pybind11 REQUIRED)
# External Dependencies
find_package(pdfium REQUIRED)
find_package(ICU REQUIRED)
find_package(miniz REQUIRED)
find_package(rapidxml REQUIRED)
find_package(beauty REQUIRED)
find_package(lexbor REQUIRED)
find_package(OpenMP REQUIRED)
find_package(re2 REQUIRED)
find_package(nlohmann_json REQUIRED)
find_package(CURL REQUIRED)
find_package(onnxruntime REQUIRED)
find_package(redis++ REQUIRED)
execute_process(
COMMAND "${Python3_EXECUTABLE}" -c
"import sysconfig; import pathlib; site_packages = sysconfig.get_paths()['purelib']; print(str(pathlib.Path(site_packages).resolve()))"
OUTPUT_VARIABLE PYTHON_SITE_PACKAGES
OUTPUT_STRIP_TRAILING_WHITESPACE
)
# Protobuf
find_package(Protobuf REQUIRED)
include_directories(${Protobuf_INCLUDE_DIRS})
# Torch
set(Torch_DIR "${CMAKE_SOURCE_DIR}/libs/libtorch/cpu/share/cmake/Torch")
find_package(Torch REQUIRED)
include_directories("${CMAKE_SOURCE_DIR}/libs/libtorch/cpu/include")
link_directories("${CMAKE_SOURCE_DIR}/libs/libtorch/cpu/lib")
# Tokenizers
set(TOKENIZERS_PATH "${CMAKE_SOURCE_DIR}/libs/tokenizers-cpp")
add_subdirectory(${TOKENIZERS_PATH} tokenizers EXCLUDE_FROM_ALL)
# OpenAI
set(OPEANAI_CPP_PATH "${CMAKE_SOURCE_DIR}/libs/openai-cpp")
# RPATH
set(CMAKE_BUILD_RPATH
"${PYTHON_SITE_PACKAGES}/*/d_libs/libtorch/cpu/lib"
"\$ORIGIN/purecpp.libs"
"\$ORIGIN/d_libs/libtorch/cpu/lib"
"${CMAKE_SOURCE_DIR}/libs/libtorch/cpu/lib"
"\$ORIGIN/libs/libtorch/cpu/lib"
"\$ORIGIN:/usr/bin/protoc"
"/usr/lib/x86_64-linux-gnu"
"/usr/lib64"
"\$ORIGIN/"
"\$ORIGIN/purecpp/d_libs/libtorch/cpu/lib"
)
set(CMAKE_INSTALL_RPATH
"${PYTHON_SITE_PACKAGES}/*/d_libs/libtorch/cpu/lib"
"\$ORIGIN/purecpp.libs"
"\$ORIGIN/d_libs/libtorch/cpu/lib"
"${CMAKE_SOURCE_DIR}/libs/libtorch/cpu/lib"
"\$ORIGIN/libs/libtorch/cpu/lib"
"\$ORIGIN:/usr/bin/protoc"
"/usr/lib/x86_64-linux-gnu"
"/usr/lib64"
"\$ORIGIN/"
"\$ORIGIN/purecpp/d_libs/libtorch/cpu/lib"
)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--enable-new-dtags")
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
# Sources
file(GLOB_RECURSE VDB_SRCS ${CMAKE_SOURCE_DIR}/components/VectorDatabase/src/*.cpp)
set(RagPUREAI_BINDING_SRCS
${CMAKE_SOURCE_DIR}/src/binding.cpp
${CMAKE_SOURCE_DIR}/components/VectorDatabase/python/binding_vectordb.cpp
)
set(RagPUREAI_IMPL_SRCS
${VDB_SRCS}
${CMAKE_SOURCE_DIR}/libs/StringUtils/StringUtils.cpp
${CMAKE_SOURCE_DIR}/libs/CommonStructs/CommonStructs.cpp
${CMAKE_SOURCE_DIR}/components/DataLoader/BaseLoader.cpp
${CMAKE_SOURCE_DIR}/components/DataLoader/PDFLoader/PDFLoader.cpp
${CMAKE_SOURCE_DIR}/components/DataLoader/DOCXLoader/DOCXLoader.cpp
${CMAKE_SOURCE_DIR}/components/DataLoader/WebLoader/WebLoader.cpp
${CMAKE_SOURCE_DIR}/components/DataLoader/TXTLoader/TXTLoader.cpp
${CMAKE_SOURCE_DIR}/components/MetadataExtractor/MetadataExtractor.cpp
${CMAKE_SOURCE_DIR}/components/MetadataExtractor/MetadataRegexExtractor/MetadataRegexExtractor.cpp
${CMAKE_SOURCE_DIR}/components/MetadataExtractor/MetadataHFExtractor/MetadataHFExtractor.cpp
${CMAKE_SOURCE_DIR}/components/Embedding/EmbeddingOpenAI/EmbeddingOpenAI.cpp
${CMAKE_SOURCE_DIR}/components/Embedding/EmbeddingModel/EmbeddingModel.cpp
${CMAKE_SOURCE_DIR}/components/Chunk/ChunkCommons/ChunkCommons.cpp
${CMAKE_SOURCE_DIR}/components/Chunk/ChunkCount/ChunkCount.cpp
${CMAKE_SOURCE_DIR}/components/Chunk/ChunkDefault/ChunkDefault.cpp
${CMAKE_SOURCE_DIR}/components/Chunk/ChunkSimilarity/ChunkSimilarity.cpp
${CMAKE_SOURCE_DIR}/components/Chunk/ChunkQuery/ChunkQuery.cpp
${CMAKE_SOURCE_DIR}/components/CleanData/ContentCleaner/ContentCleaner.cpp
${CMAKE_SOURCE_DIR}/components/Chat/Message/HumanMessage.cpp
${CMAKE_SOURCE_DIR}/components/Chat/Message/AIMessage.cpp
${CMAKE_SOURCE_DIR}/components/Chat/Message/SystemMessage.cpp
${CMAKE_SOURCE_DIR}/components/Chat/ChatHistory/ChatHistory.cpp
)
# RagPUREAILib
add_library(RagPUREAILib STATIC ${RagPUREAI_IMPL_SRCS})
target_include_directories(RagPUREAILib PUBLIC
${CMAKE_SOURCE_DIR}/components
${CMAKE_SOURCE_DIR}/components/DataLoader
${CMAKE_SOURCE_DIR}/components/MetadataExtractor
${CMAKE_SOURCE_DIR}/components/Chunk
${CMAKE_SOURCE_DIR}/components/CleanData
${CMAKE_SOURCE_DIR}/components/Embedding
${CMAKE_SOURCE_DIR}/components/Embedding/EmbeddingOpenAI
${CMAKE_SOURCE_DIR}/components/Embedding/EmbeddingModel
${CMAKE_SOURCE_DIR}/components/Chat
${CMAKE_SOURCE_DIR}/components/Chat/ChatHistory
${CMAKE_SOURCE_DIR}/components/Chat/Message
${CMAKE_SOURCE_DIR}/components/VectorDatabase/include
${CMAKE_SOURCE_DIR}/libs/RagException
${CMAKE_SOURCE_DIR}/libs/ThreadSafeQueue
${CMAKE_SOURCE_DIR}/libs/CommonStructs
${CMAKE_SOURCE_DIR}/libs/StringUtils
${CMAKE_SOURCE_DIR}/libs/FileUtils
${CMAKE_SOURCE_DIR}/libs/MemoryUtils
${TOKENIZERS_PATH}/include
${OPEANAI_CPP_PATH}/include
${CMAKE_SOURCE_DIR}/libs/libtorch/cpu/include
${CURL_INCLUDE_DIRS}
${TORCH_INCLUDE_DIRS}
)
target_link_libraries(RagPUREAILib PUBLIC
pdfium::pdfium
icu::icu
miniz::miniz
rapidxml::rapidxml
beauty::beauty
lexbor::lexbor_static
OpenMP::OpenMP_CXX
re2::re2
nlohmann_json::nlohmann_json
redis++::redis++_static
hiredis::hiredis
onnxruntime::onnxruntime
tokenizers_cpp
protobuf::libprotobuf
CURL::libcurl
${Python3_LIBRARIES}
${TORCH_LIBRARIES}
)
# Binding with Pybind11
pybind11_add_module(RagPUREAI ${RagPUREAI_BINDING_SRCS})
target_link_libraries(RagPUREAI PRIVATE RagPUREAILib)
# vectordb
pybind11_add_module(vectordb components/VectorDatabase/python/_vectordb.cpp)
target_link_libraries(vectordb PRIVATE
-Wl,--whole-archive
VectorDatabase
-Wl,--no-whole-archive
)
# Disables LTO/IPO in the module to avoid ODR/refcount problems.
set_property(TARGET vectordb PROPERTY INTERPROCEDURAL_OPTIMIZATION FALSE)
target_compile_options(vectordb PRIVATE -fno-lto)
target_link_options(vectordb PRIVATE -fno-lto)
# .so output
set_target_properties(vectordb PROPERTIES
OUTPUT_NAME "vectordb"
LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/python"
ARCHIVE_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/python"
RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/python"
)