From 0aa937b91a7d9be8f72110b8dd10507b4c045c82 Mon Sep 17 00:00:00 2001 From: Sangjoon PARK Date: Wed, 25 Mar 2026 18:42:37 +0900 Subject: [PATCH 1/8] =?UTF-8?q?develop=20=EB=B8=8C=EB=9E=9C=EC=B9=98?= =?UTF-8?q?=EC=99=80=20=ED=86=B5=ED=95=A9=20=EB=B0=8F=20=EC=A0=95=EB=A6=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 최근 develop 브랜치 내용과 통합 - docsinfos 테이블 연동 서버 독립 (포트 8002) --- EasyDOC-frontend/easydoc-parser/main.py | 44 ++++++++- .../easydoc-parser/requirements.txt | 5 + EasyDOC-frontend/package-lock.json | 9 ++ EasyDOC-frontend/src/pages/Viewer.jsx | 87 +++++++++++++----- backend-OCR/ocr_server.py | 57 ++++++++++-- backend-OCR/requirements.txt | Bin 186 -> 256 bytes database_document/__init__.py | 0 .../__pycache__/__init__.cpython-310.pyc | Bin 0 -> 162 bytes .../__pycache__/__init__.cpython-314.pyc | Bin 0 -> 168 bytes .../__pycache__/database.cpython-310.pyc | Bin 0 -> 1973 bytes .../__pycache__/database.cpython-314.pyc | Bin 0 -> 2917 bytes .../__pycache__/document_api.cpython-314.pyc | Bin 0 -> 4807 bytes database_document/database.py | 56 +++++++++++ database_document/document_api.py | 78 ++++++++++++++++ database_document/requirements.txt | 8 ++ database_document/schema.sql | 13 +++ 16 files changed, 318 insertions(+), 39 deletions(-) create mode 100644 database_document/__init__.py create mode 100644 database_document/__pycache__/__init__.cpython-310.pyc create mode 100644 database_document/__pycache__/__init__.cpython-314.pyc create mode 100644 database_document/__pycache__/database.cpython-310.pyc create mode 100644 database_document/__pycache__/database.cpython-314.pyc create mode 100644 database_document/__pycache__/document_api.cpython-314.pyc create mode 100644 database_document/database.py create mode 100644 database_document/document_api.py create mode 100644 database_document/requirements.txt create mode 100644 database_document/schema.sql diff --git a/EasyDOC-frontend/easydoc-parser/main.py b/EasyDOC-frontend/easydoc-parser/main.py index b2f0805..922a539 100644 --- a/EasyDOC-frontend/easydoc-parser/main.py +++ b/EasyDOC-frontend/easydoc-parser/main.py @@ -1,4 +1,6 @@ -from fastapi import FastAPI, UploadFile, File +from fastapi import FastAPI, UploadFile, File, Depends +from sqlalchemy.orm import Session +import sys from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from typing import List, Optional @@ -31,6 +33,16 @@ if _env_files: print(f"✓ .env 로드 완료: {[str(f) for f in _env_files]}") +# --- DB 공유를 위한 경로 설정 --- +current_file_path = Path(__file__).resolve() +root_dir = current_file_path.parent.parent.parent + +if str(root_dir) not in sys.path: + sys.path.append(str(root_dir)) + +from database_document.database import get_db, Document +# ----------------------------- + app = FastAPI() app.add_middleware( @@ -191,8 +203,8 @@ async def parse_hwp(file: UploadFile = File(...)): @app.get("/parse/s3/{file_key:path}") -async def parse_from_s3(file_key: str): - """S3에서 파일 가져와서 파싱""" +async def parse_from_s3(file_key: str, db: Session = Depends(get_db)): + """S3에서 파일 가져와서 파싱 및 DB 저장""" print(f"[DEBUG] 파싱 요청 받음 - 파일 키: {file_key}") print(f"[DEBUG] 버킷: {BUCKET_NAME}") try: @@ -212,7 +224,6 @@ async def parse_from_s3(file_key: str): page_text = page.extract_text() if page_text: text += page_text + "\n" - return {"filename": filename, "text": text} elif ext == "hwp": ole = olefile.OleFileIO(io.BytesIO(contents)) @@ -222,10 +233,33 @@ async def parse_from_s3(file_key: str): else: text = "텍스트를 추출할 수 없습니다." ole.close() - return {"filename": filename, "text": text.strip()} + text = text.strip() else: return {"error": "지원하지 않는 파일 형식입니다."} + + # ================= [새로 추가] DB 저장 로직 ================= + # S3 URL 조립 + s3_url = f"https://{BUCKET_NAME}.s3.{os.getenv('AWS_DEFAULT_REGION')}.amazonaws.com/{file_key}" + + # DB 모델 생성 + new_doc = Document( + file_name=filename, + file_type=ext, + s3_url=s3_url, + extracted_text=text + ) + + # DB에 추가 및 커밋 + db.add(new_doc) + db.commit() + db.refresh(new_doc) + + print(f"[DEBUG] DB 저장 성공! (문서 번호: {new_doc.id})") + # ======================================================== + + # 저장된 ID와 함께 프론트엔드로 응답 + return {"id": new_doc.id, "filename": filename, "text": text} except Exception as e: return {"error": str(e)} diff --git a/EasyDOC-frontend/easydoc-parser/requirements.txt b/EasyDOC-frontend/easydoc-parser/requirements.txt index 3435f10..7186df7 100644 --- a/EasyDOC-frontend/easydoc-parser/requirements.txt +++ b/EasyDOC-frontend/easydoc-parser/requirements.txt @@ -6,3 +6,8 @@ python-dotenv==1.0.0 boto3==1.34.34 google-generativeai==0.8.3 pandas==2.2.0 +sqlalchemy +pymysql +cryptography +google-cloud-aiplatform +python-multipart \ No newline at end of file diff --git a/EasyDOC-frontend/package-lock.json b/EasyDOC-frontend/package-lock.json index 08d9e63..abc4204 100644 --- a/EasyDOC-frontend/package-lock.json +++ b/EasyDOC-frontend/package-lock.json @@ -58,6 +58,7 @@ "integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@babel/code-frame": "^7.27.1", "@babel/generator": "^7.28.5", @@ -1704,6 +1705,7 @@ "integrity": "sha512-MWtvHrGZLFttgeEj28VXHxpmwYbor/ATPYbBfSFZEIRK0ecCFLl2Qo55z52Hss+UV9CRN7trSeq1zbgx7YDWWg==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "csstype": "^3.2.2" } @@ -1752,6 +1754,7 @@ "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "dev": true, "license": "MIT", + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -1884,6 +1887,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "baseline-browser-mapping": "^2.9.0", "caniuse-lite": "^1.0.30001759", @@ -2253,6 +2257,7 @@ "integrity": "sha512-LEyamqS7W5HB3ujJyvi0HQK/dtVINZvd5mAAp9eT5S/ujByGjiZLCzPcHVzuXbpJDJF/cxwHlfceVUDZ2lnSTw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -3198,6 +3203,7 @@ "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "dev": true, "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -3275,6 +3281,7 @@ "resolved": "https://registry.npmjs.org/react/-/react-19.2.3.tgz", "integrity": "sha512-Ku/hhYbVjOQnXDZFv2+RibmLFGwFdeeKHFcOTlrt7xplBnya5OGn/hIRDsqDiSUcfORsDC7MPxwork8jBwsIWA==", "license": "MIT", + "peer": true, "engines": { "node": ">=0.10.0" } @@ -3565,6 +3572,7 @@ "integrity": "sha512-w+N7Hifpc3gRjZ63vYBXA56dvvRlNWRczTdmCBBa+CotUzAPf5b7YMdMR/8CQoeYE5LX3W4wj6RYTgonm1b9DA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "esbuild": "^0.27.0", "fdir": "^6.5.0", @@ -3686,6 +3694,7 @@ "integrity": "sha512-k7Nwx6vuWx1IJ9Bjuf4Zt1PEllcwe7cls3VNzm4CQ1/hgtFUK2bRNG3rvnpPUhFjmqJKAKtjV576KnUkHocg/g==", "dev": true, "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/EasyDOC-frontend/src/pages/Viewer.jsx b/EasyDOC-frontend/src/pages/Viewer.jsx index add2068..03268bc 100644 --- a/EasyDOC-frontend/src/pages/Viewer.jsx +++ b/EasyDOC-frontend/src/pages/Viewer.jsx @@ -74,12 +74,8 @@ export default function Viewer({ parsedData, ocrData, pdfFileUrl }) { const [isPdf, setIsPdf] = useState(false); // 최근 문서 목록 상태 - const [recentDocs, setRecentDocs] = useState([ - {id: 1, title: '행정기본법.pdf', date: '2024.11.14'}, - {id: 2, title: '조세특례제한법.pdf', date: '2024.11.13'}, - {id: 3, title: '도시및주거환경지정비법.pdf', date: '2024.11.13'}, - {id: 4, title: '건축법시행령.pdf', date: '2024.11.12'}, - ]); + const [recentDocs, setRecentDocs] = useState([]); + const [selectedDoc, setSelectedDoc] = useState(null); // 현재 선택된 문서 상세 정보 // 파일 선택을 위한 ref const fileInputRef = useRef(null); @@ -126,8 +122,60 @@ export default function Viewer({ parsedData, ocrData, pdfFileUrl }) { setPendingSelectedText(null); }; + // docsinfos DB에서 최근 문서 목록 가져오기 + useEffect(() => { + fetchDocuments(); + }, []); + + const fetchDocuments = async () => { + try { + const response = await axios.get('http://localhost:8002/api/documents'); + setRecentDocs(response.data); + } catch (error) { + console.error("문서 목록 로딩 실패:", error); + } + }; + + // 사이드바에서 문서 클릭 시 상세 내용 가져오기 + const handleDocClick = async (id) => { + try { + setIsLoading(true); + const response = await axios.get(`http://localhost:8002/api/documents/${id}`); + + const docData = response.data; + + setSelectedDoc(docData); // 선택된 문서 상태 업데이트 + //setPdfUrl(null); // PDF 뷰어에서 텍스트 모드로 전환 + setDocumentName(docData.file_name); // AgentChat용 문서 이름 업데이트 + + // 가져온 텍스트를 뷰어 상태에 반영 + setParsedText(docData.text || ""); + setOcrText(""); + + if (docData.s3_url) { + setPdfUrl(docData.s3_url); + const isPdfFile = docData.file_type?.toLowerCase() === 'pdf' || + docData.file_name?.toLowerCase().endsWith('.pdf'); + setIsPdf(isPdfFile); + } + } catch (error) { + console.error("문서 상세 로딩 실패:", error); + alert("문서 내용을 불러올 수 없습니다."); + } finally { + setIsLoading(false); + } + }; + + // 날짜 형식 변환 함수 (2024-11-14 -> 2024.11.14) + const formatDate = (dateString) => { + if (!dateString) return ""; + return dateString.substring(0, 10).replace(/-/g, '.'); + }; + // props로 받은 데이터를 상태에 반영 (Upload에서 넘어올 때) useEffect(() => { + if (selectedDoc) return; + if (parsedData) { console.log("Viewer가 받은 parsedData:", parsedData); setParsedText(parsedData.text || ""); @@ -143,7 +191,7 @@ export default function Viewer({ parsedData, ocrData, pdfFileUrl }) { setPdfUrl(pdfFileUrl); setIsPdf(true); } - }, [parsedData, ocrData, pdfFileUrl]); + }, [parsedData, ocrData, pdfFileUrl, selectedDoc]); // 버튼 클릭 시 숨겨진 input 실행 const handleUploadBtnClick = () => { @@ -231,20 +279,7 @@ const handleFileChange = async (e) => { } // 최근 문서 목록 업데이트 - const newDoc = { - id: Date.now(), - title: file.name, - date: new Date().toLocaleDateString("ko-KR", { - year: "numeric", - month:"2-digit", - day: "2-digit", - }) - .replace(/\. /g, ".") - .replace(".", "") - }; - - const filteredDocs = recentDocs.filter((doc) => doc.title != file.name); - setRecentDocs([newDoc, ...filteredDocs].slice(0, 10)); + await fetchDocuments(); } catch (error) { console.error("파일 처리 오류:", error); @@ -290,14 +325,18 @@ const handleFileChange = async (e) => {