-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile
More file actions
116 lines (94 loc) · 2.99 KB
/
Dockerfile
File metadata and controls
116 lines (94 loc) · 2.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# OCR Processor Enterprise Dockerfile
# Multi-stage build for optimal image size and security
# Stage 1: Builder stage
FROM python:3.11-slim as builder
# Install system dependencies for building
RUN apt-get update && apt-get install -y \
build-essential \
tesseract-ocr \
tesseract-ocr-heb \
libleptonica-dev \
libtesseract-dev \
qpdf \
poppler-utils \
&& rm -rf /var/lib/apt/lists/*
# Set working directory
WORKDIR /app
# Copy requirements first for better caching
COPY requirements.txt .
# Install Python dependencies
RUN pip install --no-cache-dir --user -r requirements.txt
# Stage 2: Runtime stage
FROM python:3.11-slim as runtime
# Create non-root user for security
RUN groupadd -r ocruser && useradd -r -g ocruser ocruser
# Install runtime system dependencies
RUN apt-get update && apt-get install -y \
tesseract-ocr \
tesseract-ocr-heb \
qpdf \
poppler-utils \
curl \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean
# Create necessary directories
RUN mkdir -p /app /app/data /app/output /app/logs /app/temp /app/quarantine && \
chown -R ocruser:ocruser /app
# Copy installed packages from builder
COPY --from=builder /root/.local /home/ocruser/.local
# Set environment variables
ENV PATH=/home/ocruser/.local/bin:$PATH
ENV PYTHONPATH=/app
ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1
# Set working directory
WORKDIR /app
# Copy application code
COPY src/ ./src/
COPY requirements.txt .
COPY docker/ ./docker/
# Create startup script
RUN echo '#!/bin/bash\n\
if [ "$1" = "api" ]; then\n\
echo "Starting OCR API Server..."\n\
python -m uvicorn src.api_server:get_api_server"(config)".app --host 0.0.0.0 --port 8000\n\
else\n\
echo "Usage: docker run ocr-processor api"\n\
echo " api - Start REST API server"\n\
exec "$@"\n\
fi' > /app/start.sh && chmod +x /app/start.sh
# Health check
RUN echo '#!/bin/bash\n\
if [ "$1" = "api" ]; then\n\
curl -f http://localhost:8000/health || exit 1\n\
else\n\
# Check if OCR tools are available\n\
tesseract --version >/dev/null 2>&1 || exit 1\n\
ocrmypdf --version >/dev/null 2>&1 || exit 1\n\
fi\n\
echo "healthy"' > /app/healthcheck.sh && chmod +x /app/healthcheck.sh
# Change ownership to non-root user
RUN chown -R ocruser:ocruser /app
# Switch to non-root user
USER ocruser
# Expose API port
EXPOSE 8000
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD /app/healthcheck.sh api || exit 1
# Default command
ENTRYPOINT ["/app/start.sh"]
# Labels for metadata
LABEL maintainer="OCR Processor Team"
LABEL version="2.0.0"
LABEL description="Enterprise OCR Processing Suite with API, GUI, and CLI interfaces"
LABEL org.opencontainers.image.source="https://github.com/ocr-processor/enterprise"
# Runtime configuration
ENV OCR_LOG_LEVEL=INFO
ENV OCR_OUTPUT_BASE_DIR=/app/output
ENV OCR_LOG_DIRECTORY=/app/logs
ENV OCR_MAX_CONCURRENT_JOBS=2
ENV OCR_MAX_FILE_SIZE=104857600
ENV OCR_ENABLE_API=true
ENV OCR_API_HOST=0.0.0.0
ENV OCR_API_PORT=8000