Spaces:
Sleeping
Sleeping
| # Use a lightweight Python base image | |
| FROM python:3.10-slim | |
| # Install system-level dependencies | |
| RUN apt-get update && apt-get install -y \ | |
| tesseract-ocr \ | |
| libglib2.0-0 \ | |
| libgl1 \ | |
| libsm6 \ | |
| libxext6 \ | |
| libxrender-dev \ | |
| poppler-utils \ | |
| ffmpeg \ | |
| git \ | |
| build-essential \ | |
| curl \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Verify Tesseract installation | |
| RUN ln -s /usr/bin/tesseract /usr/local/bin/tesseract && \ | |
| tesseract --version | |
| # Set environment variables | |
| ENV PYTHONUNBUFFERED=1 | |
| ENV PIP_NO_CACHE_DIR=1 | |
| ENV HF_HUB_DISABLE_SYMLINKS_WARNING=1 | |
| ENV LANG=C.UTF-8 | |
| ENV LC_ALL=C.UTF-8 | |
| ENV TESSERACT_PATH=/usr/bin/tesseract | |
| ENV PATH="${TESSERACT_PATH}:${PATH}" | |
| # Set working directory | |
| WORKDIR /home/user/app | |
| # Install Python dependencies | |
| COPY requirements.txt . | |
| RUN pip install --upgrade pip && pip install -r requirements.txt | |
| # Copy application code | |
| COPY . . | |
| # Download NLTK tokenizer | |
| RUN python -m nltk.downloader punkt | |
| # Expose port for Gradio | |
| EXPOSE 7860 | |
| # Optional: container health check | |
| HEALTHCHECK CMD curl --fail http://localhost:7860 || exit 1 | |
| # Start the application | |
| CMD ["python", "app.py"] | |