diff --git a/.coderabbit.yaml b/.coderabbit.yaml new file mode 100644 index 0000000..62ed085 --- /dev/null +++ b/.coderabbit.yaml @@ -0,0 +1,18 @@ +# yaml-language-server: $schema=https://coderabbit.ai/integrations/schema.v2.json +language: "ko-KR" +early_access: false +reviews: + profile: "chill" + request_changes_workflow: false + high_level_summary: true + poem: true + review_status: true + collapse_walkthrough: false + auto_review: + enabled: true + drafts: false + base_branches: + - main + - test +chat: + auto_reply: true \ No newline at end of file diff --git a/.github/workflows/romi-auto-file-upload.yml b/.github/workflows/romi-auto-file-upload.yml new file mode 100644 index 0000000..fe7b27a --- /dev/null +++ b/.github/workflows/romi-auto-file-upload.yml @@ -0,0 +1,96 @@ +name: romi-auto-file-upload.yml + +on: + push: + branches: + - main + +jobs: + upload-files: + runs-on: ubuntu-latest + steps: + - name: 코드 체크아웃 + uses: actions/checkout@v4 + + # 타임스탬프 폴더명 생성 + - name: 타임스탬프 폴더명 생성 + run: | + # Asia/Seoul 로컬타임 사용 + export TZ='Asia/Seoul' + # YYYY-MM-DD_HH-MM-SS 형식 + TIMESTAMP=$(date '+%Y-%m-%d_%H-%M-%S') + echo "TIMESTAMP=$TIMESTAMP" >> $GITHUB_ENV + echo "BUILD_DATE=$(date '+%Y-%m-%d %H:%M')" >> $GITHUB_ENV + echo "생성된 타임스탬프: $TIMESTAMP" + + # 짧은 커밋 해시 계산 + - name: 짧은 커밋 해시 계산 + run: | + echo "SHORT_COMMIT_HASH=$(echo ${{ github.sha }} | cut -c1-7)" >> $GITHUB_ENV + echo "짧은 커밋 해시: $(echo ${{ github.sha }} | cut -c1-7)" + + - name: 서버에 파일 업로드 + uses: appleboy/ssh-action@v1.0.3 + with: + host: ${{ secrets.SERVER_HOST }} + username: ${{ secrets.SERVER_USER }} + password: ${{ secrets.SERVER_PASSWORD }} + port: 2022 + envs: TIMESTAMP,SHORT_COMMIT_HASH,BUILD_DATE + script: | + set -e + + echo "환경변수 설정.." + export PW=${{ secrets.SERVER_PASSWORD }} + + # 최신 파일 저장 디렉토리 생성 + echo "메인 디렉토리 생성 중..." + echo $PW | sudo -S mkdir -p /volume1/projects/romi/github_secret + + # 타임스탬프 백업 디렉토리 생성 + echo "타임스탬프 백업 디렉토리 생성 중... ($TIMESTAMP)" + echo $PW | sudo -S mkdir -p /volume1/projects/romi/github_secret/$TIMESTAMP + + # .env 파일 업로드 (최신 + 백업) + echo ".env 파일 업로드 중..." + cat << 'EOF' | sudo tee /volume1/projects/romi/github_secret/.env > /dev/null + ${{ secrets.APPLICATION_PROD_YML }} + EOF + cat << 'EOF' | sudo tee /volume1/projects/romi/github_secret/$TIMESTAMP/.env > /dev/null + ${{ secrets.APPLICATION_PROD_YML }} + EOF + echo ".env 파일 업로드 완료" + + # 메타데이터 JSON 파일 생성 및 업로드 + echo "메타데이터 JSON 파일 생성 중..." + cat << EOF | sudo tee /volume1/projects/romi/github_secret/$TIMESTAMP/cicd-gitignore-file.json > /dev/null + { + "build_info": { + "timestamp": "$TIMESTAMP", + "workflow": "설정 파일 관리", + "run_id": "${{ github.run_id }}", + "run_number": "${{ github.run_number }}", + "job": "upload-files", + "event": "${{ github.event_name }}", + "repository": "${{ github.repository }}", + "owner": "${{ github.repository_owner }}", + "branch": "${{ github.ref_name }}", + "commit_hash": "${{ github.sha }}", + "short_hash": "$SHORT_COMMIT_HASH", + "commit_url": "https://github.com/${{ github.repository }}/commit/${{ github.sha }}", + "actor": "${{ github.actor }}", + "build_date": "$BUILD_DATE", + "runner_os": "${{ runner.os }}" + }, + "files": [ + { + "file_name": ".env", + "file_path": "/", + "last_updated": "$BUILD_DATE" + } + ] + } + EOF + echo "메타데이터 JSON 파일 업로드 완료" + + echo "모든 파일 업로드 완료" diff --git a/.gitignore b/.gitignore index 83f7d2d..0ebdcfd 100644 --- a/.gitignore +++ b/.gitignore @@ -85,6 +85,7 @@ celerybeat-schedule # Environments .env +.env.* .venv env/ venv/ diff --git a/app/config/database.py b/app/config/database.py index 80d75cc..973d0fd 100644 --- a/app/config/database.py +++ b/app/config/database.py @@ -1,2 +1,40 @@ -# 빈 파일 - DB 연결 설정 +from typing import AsyncGenerator +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine, AsyncEngine + +from app.config.settings import get_settings +from app.utils.logger import logger + +_settings = get_settings() + +_engine = create_async_engine( + _settings.postgres_url, + pool_pre_ping=True, + echo=False, # SQL 쿼리 로깅 +) + +_async_session_factory = async_sessionmaker( + bind=_engine, + expire_on_commit=False, + autoflush=False, +) + + +def get_async_engine() -> AsyncEngine: + return _engine + + +def get_async_session_factory() -> async_sessionmaker[AsyncSession]: + return _async_session_factory + + +async def get_async_session() -> AsyncGenerator[AsyncSession, None]: + async with _async_session_factory() as session: + try: + yield session + except Exception as e: + logger.error(f"Postgres DB 에러: {e}") + await session.rollback() + raise + finally: + await session.close() diff --git a/app/config/settings.py b/app/config/settings.py index c17f4ec..d23d74d 100644 --- a/app/config/settings.py +++ b/app/config/settings.py @@ -1,2 +1,47 @@ -# 빈 파일 - 환경 변수 설정 +from functools import lru_cache +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class Settings(BaseSettings): + """ + .env 에서 환경변수 로딩 + """ + + model_config = SettingsConfigDict( + env_file=".env", + env_file_encoding="utf-8", + case_sensitive=False, + extra="ignore" + ) + + # GitHub API 설정 + github_api_base_url: str + github_api_token: str | None = None + + # Ollama 설정 + ollama_base_url: str + ollama_api_key: str + ollama_model: str + ollama_timeout_seconds: int + + # Qdrant 설정 + qdrant_base_url: str + qdrant_collection: str + qdrant_api_key: str + + # 텍스트 청크 설정 + text_chunk_max_chars: int + text_chunk_overlap_chars: int + text_chunk_hard_max_chars: int + + # 동시성 설정 + concurrency_embedding_max_concurrency: int + + # PostgreSQL 설정 + postgres_url: str + + +@lru_cache(maxsize=1) +def get_settings() -> Settings: + return Settings() diff --git a/app/db/__init__.py b/app/db/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/db/init_db.py b/app/db/init_db.py new file mode 100644 index 0000000..7c386ea --- /dev/null +++ b/app/db/init_db.py @@ -0,0 +1,17 @@ +from sqlalchemy.ext.asyncio import AsyncEngine + +from app.models.base import Base +from app.utils.logger import logger + + +def _import_all_models() -> None: + from app.models.github_cursor import GithubCursorEntity + + +async def create_tables_if_not_exists(engine: AsyncEngine) -> None: + _import_all_models() + + async with engine.begin() as conn: + await conn.run_sync(Base.metadata.create_all) + + logger.info("DB 테이블 생성완료") diff --git a/app/main.py b/app/main.py index 18f44bd..f9c9cff 100644 --- a/app/main.py +++ b/app/main.py @@ -1,8 +1,22 @@ +from contextlib import asynccontextmanager from typing import Dict from fastapi import FastAPI -app = FastAPI() +from app.config.database import get_async_engine +from app.db.init_db import create_tables_if_not_exists +from app.utils.logger import logger + + +@asynccontextmanager +async def lifespan(app: FastAPI): + logger.info("ChatBot 애플리케이션 시작") + await create_tables_if_not_exists(get_async_engine()) + yield + logger.info("애플리케이션 종료") + + +app = FastAPI(lifespan=lifespan) @app.get("/health") diff --git a/app/models/base.py b/app/models/base.py new file mode 100644 index 0000000..364c29f --- /dev/null +++ b/app/models/base.py @@ -0,0 +1,43 @@ +import uuid +from datetime import datetime + +from sqlalchemy import DateTime, func +from sqlalchemy.dialects.postgresql.base import UUID +from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column + + +class Base(DeclarativeBase): + """ + SQLAlchemy Declarative Base + - 모든 엔티티는 Base 상속 + """ + pass + + +class TimestampMixin: + """ + created_at, updated_at 자동 관리 Mixin + - created_at: DB 레벨 자동 설정 + - updated_at: 애플리케이션 레벨에서 명시적 관리 + """ + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + server_default=func.now(), # DB 레벨 - INSERT 시 자동 + ) + + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + server_default=func.now(), # DB 레벨 - INSERT 시 자동 + ) + +class PrimaryKeyMixin: + """ + UUID Primary Key Mixin + """ + id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + primary_key=True, + default=uuid.uuid4, + ) diff --git a/app/models/enums/__init__.py b/app/models/enums/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/models/enums/source_type.py b/app/models/enums/source_type.py new file mode 100644 index 0000000..9501338 --- /dev/null +++ b/app/models/enums/source_type.py @@ -0,0 +1,17 @@ +from enum import Enum + + +class SourceType(str, Enum): + """ + 깃허브 임베딩 대상 SourceType + - Repository: 레포 파일/문서 (README 등) + - ISSUE: 이슈 + - PULL_REQUEST: PR + - COMMIT: 커밋 + - RELEASE: 릴리즈 + """ + REPOSITORY = "REPOSITORY" + ISSUE = "ISSUE" + PULL_REQUEST = "PULL_REQUEST" + COMMIT = "COMMIT" + RELEASE = "RELEASE" diff --git a/app/models/github_cursor.py b/app/models/github_cursor.py new file mode 100644 index 0000000..990cdf2 --- /dev/null +++ b/app/models/github_cursor.py @@ -0,0 +1,20 @@ +from sqlalchemy import Enum as SqlEnum +from sqlalchemy import UniqueConstraint, Index, String +from sqlalchemy.orm import Mapped, mapped_column + +from app.models.base import Base, PrimaryKeyMixin, TimestampMixin +from app.models.enums.source_type import SourceType + + +class GithubCursorEntity(Base, PrimaryKeyMixin, TimestampMixin): + __tablename__ = "github_cursor" + __table_args__ = ( + UniqueConstraint("repository_name", "source_type", name="uq_github_cursor"), + Index("idx_github_cursor_repo_type", "repository_name", "source_type") + ) + + repository_name: Mapped[str] = mapped_column(String(200), nullable=False) + + source_type: Mapped[SourceType] = mapped_column(SqlEnum(SourceType, native_enum=False), nullable=False) + + cursor_value: Mapped[str] = mapped_column(String(500), nullable=False) diff --git a/app/models/github_issue.py b/app/models/github_issue.py deleted file mode 100644 index 4a402cb..0000000 --- a/app/models/github_issue.py +++ /dev/null @@ -1,2 +0,0 @@ -# 빈 파일 - GitHub 이슈 모델 - diff --git a/app/repositories/github_cursor_repository.py b/app/repositories/github_cursor_repository.py new file mode 100644 index 0000000..7a5e93d --- /dev/null +++ b/app/repositories/github_cursor_repository.py @@ -0,0 +1,53 @@ +import uuid +from typing import Optional + +from sqlalchemy import select, func +from sqlalchemy.dialects.postgresql import insert +from sqlalchemy.ext.asyncio import AsyncSession + +from app.models.enums.source_type import SourceType +from app.models.github_cursor import GithubCursorEntity + + +class GithubCursorRepository: + async def find_by_repository_name_and_source_type( + self, + session: AsyncSession, + repository_name: str, + source_type: SourceType, + ) -> Optional[GithubCursorEntity]: + """ + 특정 repository + source_type 커서 조회 + """ + query = select(GithubCursorEntity).where( + GithubCursorEntity.repository_name == repository_name, + GithubCursorEntity.source_type == source_type, + ) + result = await session.execute(query) + return result.scalar_one_or_none() + + async def upsert( + self, + session: AsyncSession, + repository_name: str, + source_type: SourceType, + cursor_value: str, + ) -> GithubCursorEntity: + """ + 커서 upsert (없으면 생성, 있으면 업데이트) + """ + query = insert(GithubCursorEntity).values( + id=uuid.uuid4(), + repository_name=repository_name, + source_type=source_type, + cursor_value=cursor_value, + ).on_conflict_do_update( + index_elements=["repository_name", "source_type"], + set_={ + "cursor_value": cursor_value, + "updated_at": func.now(), + }, + ).returning(GithubCursorEntity) + + result = await session.execute(query) + return result.scalar_one() diff --git a/app/utils/logger.py b/app/utils/logger.py index e4114fb..f6e39c8 100644 --- a/app/utils/logger.py +++ b/app/utils/logger.py @@ -10,61 +10,60 @@ def setup_logger( - name: str = "chatbot", - log_level: str = "INFO", - log_file: Optional[str] = None, - format_string: Optional[str] = None, + name: str = "chatbot", + log_level: str = "INFO", + log_file: Optional[str] = None, + format_string: Optional[str] = None, ) -> logging.Logger: - """ - 로거를 설정하고 반환합니다. + """ + 로거를 설정하고 반환합니다. - Args: - name: 로거 이름 (기본값: "chatbot") - log_level: 로그 레벨 (DEBUG, INFO, WARNING, ERROR, CRITICAL) - log_file: 로그 파일 경로 (None이면 파일 로깅 안 함) - format_string: 커스텀 포맷 문자열 (None이면 기본 포맷 사용) + Args: + name: 로거 이름 (기본값: "chatbot") + log_level: 로그 레벨 (DEBUG, INFO, WARNING, ERROR, CRITICAL) + log_file: 로그 파일 경로 (None이면 파일 로깅 안 함) + format_string: 커스텀 포맷 문자열 (None이면 기본 포맷 사용) - Returns: - 설정된 Logger 인스턴스 - """ - logger = logging.getLogger(name) - - # 이미 핸들러가 설정되어 있으면 기존 로거 반환 - if logger.handlers: - return logger + Returns: + 설정된 Logger 인스턴스 + """ + logger = logging.getLogger(name) - # 로그 레벨 설정 - level = getattr(logging, log_level.upper(), logging.INFO) - logger.setLevel(level) + # 이미 핸들러가 설정되어 있으면 기존 로거 반환 + if logger.handlers: + return logger - # 기본 포맷 설정 - if format_string is None: - format_string = ( - "%(asctime)s - %(name)s - %(levelname)s - " - "%(filename)s:%(lineno)d - %(message)s" - ) - - formatter = logging.Formatter(format_string, datefmt="%Y-%m-%d %H:%M:%S") + # 로그 레벨 설정 + level = getattr(logging, log_level.upper(), logging.INFO) + logger.setLevel(level) - # 콘솔 핸들러 설정 - console_handler = logging.StreamHandler(sys.stdout) - console_handler.setLevel(level) - console_handler.setFormatter(formatter) - logger.addHandler(console_handler) + # 기본 포맷 설정 + if format_string is None: + format_string = ( + "%(asctime)s - %(name)s - %(levelname)s - " + "%(filename)s:%(lineno)d - %(message)s" + ) - # 파일 핸들러 설정 (선택적) - if log_file: - log_path = Path(log_file) - log_path.parent.mkdir(parents=True, exist_ok=True) - - file_handler = logging.FileHandler(log_file, encoding="utf-8") - file_handler.setLevel(level) - file_handler.setFormatter(formatter) - logger.addHandler(file_handler) + formatter = logging.Formatter(format_string, datefmt="%Y-%m-%d %H:%M:%S") - return logger + # 콘솔 핸들러 설정 + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setLevel(level) + console_handler.setFormatter(formatter) + logger.addHandler(console_handler) + + # 파일 핸들러 설정 (선택적) + if log_file: + log_path = Path(log_file) + log_path.parent.mkdir(parents=True, exist_ok=True) + + file_handler = logging.FileHandler(log_file, encoding="utf-8") + file_handler.setLevel(level) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + + return logger # 기본 로거 인스턴스 생성 logger = setup_logger() - diff --git a/requirements.txt b/requirements.txt index 3e3d573..6e1246b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,45 @@ +alembic==1.17.2 annotated-doc==0.0.4 annotated-types==0.7.0 anyio==4.12.0 +asyncpg==0.31.0 +certifi==2025.11.12 +cffi==2.0.0 +charset-normalizer==3.4.4 click==8.3.1 -fastapi==0.124.4 +cryptography==46.0.3 +Deprecated==1.3.1 +fastapi==0.127.0 +greenlet==3.3.0 +grpcio==1.76.0 +grpcio-tools==1.76.0 h11==0.16.0 +h2==4.3.0 +hpack==4.1.0 +httpcore==1.0.9 +httpx==0.28.1 +hyperframe==6.1.0 idna==3.11 +Mako==1.3.10 +MarkupSafe==3.0.3 +numpy==2.4.0 +portalocker==3.2.0 +protobuf==6.33.2 +pycparser==2.23 pydantic==2.12.5 +pydantic-settings==2.12.0 pydantic_core==2.41.5 +PyGithub==2.8.1 +PyJWT==2.10.1 +PyNaCl==1.6.1 +python-dotenv==1.2.1 +qdrant-client==1.16.2 +requests==2.32.5 +setuptools==80.9.0 +SQLAlchemy==2.0.45 starlette==0.50.0 typing-inspection==0.4.2 typing_extensions==4.15.0 -uvicorn==0.38.0 +urllib3==2.6.2 +uvicorn==0.40.0 +wrapt==2.0.1 diff --git a/sql/V20251222_141233__create_github_cursor_table.sql b/sql/V20251222_141233__create_github_cursor_table.sql new file mode 100644 index 0000000..4e52192 --- /dev/null +++ b/sql/V20251222_141233__create_github_cursor_table.sql @@ -0,0 +1,16 @@ +CREATE TABLE IF NOT EXISTS github_cursor ( + id UUID PRIMARY KEY, + repository_name VARCHAR(200) NOT NULL, + source_type VARCHAR(50) NOT NULL, + cursor_value VARCHAR(500) NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + CONSTRAINT uq_github_cursor UNIQUE (repository_name, source_type), + + CONSTRAINT ck_github_cursor_source_type + CHECK (source_type IN ('REPOSITORY', 'ISSUE', 'PULL_REQUEST', 'COMMIT', 'RELEASE')) +); + +-- 인덱스 생성 +CREATE INDEX IF NOT EXISTS idx_github_cursor_repo_type + ON github_cursor (repository_name, source_type); \ No newline at end of file