Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 57 additions & 6 deletions EasyDOC-frontend/easydoc-parser/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from fastapi import FastAPI, UploadFile, File
from fastapi import FastAPI, UploadFile, File, Depends
from sqlalchemy.orm import Session
import sys
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import List, Optional
Expand Down Expand Up @@ -44,6 +46,16 @@
else:
print(f"⚠ GOOGLE_APPLICATION_CREDENTIALS 파일 없음: {_cred_path}")

# --- DB 공유를 위한 경로 설정 ---
current_file_path = Path(__file__).resolve()
root_dir = current_file_path.parent.parent.parent

if str(root_dir) not in sys.path:
sys.path.append(str(root_dir))

from database_document.database import get_db, Document
# -----------------------------

app = FastAPI()

app.add_middleware(
Expand Down Expand Up @@ -210,8 +222,8 @@ async def parse_hwp(file: UploadFile = File(...)):


@app.get("/parse/s3/{file_key:path}")
async def parse_from_s3(file_key: str):
"""S3에서 파일 가져와서 파싱"""
async def parse_from_s3(file_key: str, user_email: str = "", db: Session = Depends(get_db)):
"""S3에서 파일 가져와서 파싱 및 DB 저장"""
print(f"[DEBUG] 파싱 요청 받음 - 파일 키: {file_key}")
print(f"[DEBUG] 버킷: {BUCKET_NAME}")
try:
Expand All @@ -231,7 +243,6 @@ async def parse_from_s3(file_key: str):
page_text = page.extract_text()
if page_text:
text += page_text + "\n"
return {"filename": filename, "text": text}

elif ext == "hwp":
ole = olefile.OleFileIO(io.BytesIO(contents))
Expand All @@ -241,10 +252,49 @@ async def parse_from_s3(file_key: str):
else:
text = "텍스트를 추출할 수 없습니다."
ole.close()
return {"filename": filename, "text": text.strip()}
text = text.strip()

else:
return {"error": "지원하지 않는 파일 형식입니다."}

# ================= DB 저장 로직 =================
# 파일 크기 계산
size_kb = len(contents) / 1024
file_size_str = f"{size_kb:.1f} KB"

# 페이지 수 계산
total_pages = 1
if ext == "pdf":
try:
with pdfplumber.open(io.BytesIO(contents)) as pdf:
total_pages = len(pdf.pages)
except:
total_pages = 1

# S3 URL 조립
s3_url = f"https://{BUCKET_NAME}.s3.{os.getenv('AWS_DEFAULT_REGION')}.amazonaws.com/{file_key}"

# DB 모델 생성
new_doc = Document(
file_name=filename,
file_type=ext,
s3_url=s3_url,
extracted_text=text,
file_size=file_size_str,
page_count=total_pages,
user_email=user_email
)

# DB에 추가 및 커밋
db.add(new_doc)
db.commit()
db.refresh(new_doc)

print(f"[DEBUG] DB 저장 성공! (문서 번호: {new_doc.id}, 크기: {file_size_str}, 페이지: {total_pages})")
# ========================================================

# 저장된 ID와 함께 프론트엔드로 응답
return {"id": new_doc.id, "filename": filename, "text": text}

except Exception as e:
return {"error": str(e)}
Expand Down Expand Up @@ -335,7 +385,8 @@ async def analyze_with_gemini(data: dict):
try:
response = gemini_model.generate_content(prompt)
response_text = response.text.strip()



# 응답 파싱
for line in response_text.split("\n"):
line = line.strip()
Expand Down
5 changes: 5 additions & 0 deletions EasyDOC-frontend/easydoc-parser/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,8 @@ python-dotenv==1.0.0
boto3==1.34.34
google-generativeai==0.8.3
pandas==2.2.0
sqlalchemy
pymysql
cryptography
google-cloud-aiplatform
python-multipart
9 changes: 9 additions & 0 deletions EasyDOC-frontend/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions EasyDOC-frontend/src/pages/Login.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ export default function Login() {
return (
<Upload
onNavigateToMyPage={() => setCurrentView("mypage")}
userEmail={userEmail}
/>
);
case "mypage":
Expand Down
Loading