Skip to content

Commit

Permalink
Merge pull request #64 from classtranscribe/staging
Browse files Browse the repository at this point in the history
Push to production
  • Loading branch information
angrave authored Oct 31, 2023
2 parents 70433c4 + 8db1ca4 commit 4f5b89c
Show file tree
Hide file tree
Showing 7 changed files with 199 additions and 44 deletions.
119 changes: 119 additions & 0 deletions .github/workflows/platform.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
name: Platform

# This will run when: requirements-platform.txt or Dockerfile-platform' are changed
#
# To be able to push to dockerhub, this execpts the following
# secrets to be set in the project:
# - DOCKERHUB_USERNAME : username that can push to the org
# - DOCKERHUB_PASSWORD : password asscoaited with the username
on:
push:
paths:
- 'requirements-platform.txt'
- 'Dockerfile-platform'
- '.github/workflows/platform.yml'

branches:
- main
- staging
- expt

pull_request:
paths:
- 'requirements-platform.txt'
- 'Dockerfile-platform'
- '.github/workflows/platform.yml'
# Trigger the workflow on release activity

release:
# Only use the types keyword to narrow down the activity types that will trigger your workflow.
types:
- published
- edited
- created

# Certain actions will only run when this is the main repo.
env:
MAIN_REPO: classtranscribe/pyapi
DOCKERHUB_ORG: classtranscribe

jobs:
docker:
runs-on: ubuntu-latest
strategy:
fail-fast: true
matrix:
name:
- ct-python-platform
include:
- name: ct-python-platform
FOLDER: .
IMAGE: ct-python-platform

steps:
- uses: actions/checkout@v2

# calculate some variables that are used later
- name: github branch
run: |
if [ "${{ github.event.release.target_commitish }}" != "" ]; then
BRANCH="${{ github.event.release.target_commitish }}"
else
BRANCH=${GITHUB_REF##*/}
fi
echo "GITHUB_BRANCH=${BRANCH}" >> $GITHUB_ENV
# Commit was for main/release branch, build a new version
if [ "$BRANCH" == "master" -o "$BRANCH" == "main" ]; then
version="$(cat gui/package.json | jq -r .version)"
echo "VERSION=$(version)" >> $GITHUB_ENV
tags="latest"
oldversion=""
while [ "${oldversion}" != "${version}" ]; do
oldversion="${version}"
tags="${tags},${version}"
version=${version%.*}
done
echo "TAGS=${tags}" >> $GITHUB_ENV
else
echo "VERSION=$BRANCH" >> $GITHUB_ENV
echo "TAGS=$BRANCH" >> $GITHUB_ENV
fi
# build the docker image, this will always run to make sure
# the Dockerfile still works.
- name: Build image
uses: elgohr/Publish-Docker-Github-Action@2.22
env:
BRANCH: ${{ env.GITHUB_BRANCH }}
VERSION: ${{ env.VERSION }}
BUILDNUMBER: ${{ github.run_number }}
GITSHA1: ${{ github.sha }}
with:
registry: docker.pkg.github.com
name: ${{ github.repository_owner }}/${{ github.event.repository.name }}/${{ matrix.IMAGE }}
dockerfile: Dockerfile-platform
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
context: ${{ matrix.FOLDER }}
tags: "${{ env.TAGS }}"
buildargs: BRANCH,VERSION,BUILDNUMBER,GITSHA1
no_push: true

# this will publish to dockerhub
- name: Publish to Docker Hub
if: github.event_name != 'pull_request' && github.repository == env.MAIN_REPO
uses: elgohr/Publish-Docker-Github-Action@2.22
env:
BRANCH: ${{ env.GITHUB_BRANCH }}
VERSION: ${{ env.VERSION }}
BUILDNUMBER: ${{ github.run_number }}
GITSHA1: ${{ github.sha }}
with:
name: ${{ env.DOCKERHUB_ORG }}/${{ matrix.IMAGE }}
dockerfile: Dockerfile-platform
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
context: ${{ matrix.FOLDER }}
tags: "${{ env.TAGS }}"
buildargs: BRANCH,VERSION,BUILDNUMBER,GITSHA1
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -143,3 +143,4 @@ cookies
client/
generated/
**/corpus_count.json
devdocker/
33 changes: 2 additions & 31 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,40 +1,11 @@
FROM python:3-slim

# Install OS dependencies
RUN apt-get -qq update && \
apt-get -qq install --no-install-recommends vim-tiny netcat curl git wget ffmpeg build-essential libsm6 libxext6 libxrender-dev automake libtool pkg-config libsdl-pango-dev libicu-dev libcairo2-dev bc libleptonica-dev && \
apt-get -qq clean autoclean && \
apt-get -qq autoremove && \
rm -rf /var/lib/apt/lists/*

# Build stuff for tesseract
# Based on https://medium.com/quantrium-tech/installing-tesseract-4-on-ubuntu-18-04-b6fcd0cbd78f
RUN curl -L https://github.com/tesseract-ocr/tesseract/archive/refs/tags/4.1.1.tar.gz | tar xvz

ARG MAX_THREADS=""

WORKDIR /tesseract-4.1.1
RUN ./autogen.sh && ./configure && make -j ${MAX_THREADS} && make -j ${MAX_THREADS} install && ldconfig
# Slow! The above line takes 435 seconds on my laptop
RUN make -j ${MAX_THREADS} training && make -j ${MAX_THREADS} training-install
# The above line takes 59 seconds on my laptop

RUN curl -L -o tessdata/eng.traineddata https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata
RUN curl -L -o tessdata/osd.traineddata https://github.com/tesseract-ocr/tessdata/raw/main/osd.traineddata

ENV TESSDATA_PREFIX=/tesseract-4.1.1/tessdata
#Disable multi-threading
ENV OMP_THREAD_LIMIT=1
FROM classtranscribe/ct-python-platform:staging
#Tofix: no 'latest' tag; only staging tag exists at the moment

# Install Python dependencies
WORKDIR /usr/app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Additional dependencies for brown corpus/stopwords, wordnet
RUN python -m nltk.downloader brown stopwords
RUN python -m nltk.downloader wordnet omw-1.4

# Copy in Python source
COPY . .

Expand Down
44 changes: 44 additions & 0 deletions Dockerfile-platform
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
FROM --platform=linux/amd64 python:3.11-slim-bookworm
# Decord is not available on ARM64 (=OSX), so we are forced amd64
# Note the decord build instructions here are insufficient did not immediately work with Debain bookworm
# https://github.com/dmlc/decord#installation


# Install OS dependencies
RUN apt-get -qq update && \
apt-get -qq install --no-install-recommends vim-tiny netcat-openbsd curl git wget ffmpeg build-essential libsm6 libxext6 libxrender-dev automake libtool pkg-config libsdl-pango-dev libicu-dev libcairo2-dev bc libleptonica-dev && \
apt-get -qq clean autoclean && \
apt-get -qq autoremove && \
rm -rf /var/lib/apt/lists/*

# Build stuff for tesseract
# Based on https://medium.com/quantrium-tech/installing-tesseract-4-on-ubuntu-18-04-b6fcd0cbd78f
RUN curl -L https://github.com/tesseract-ocr/tesseract/archive/refs/tags/4.1.3.tar.gz | tar xvz

WORKDIR /tesseract-4.1.3

# On M1 Macbook (with 6GB Ram assigned to Docker and 3GB Swao) trying to cross-compile fails if all 8 CPU cores are used.
ARG MAX_THREADS="2"

RUN ./autogen.sh && ./configure && make -j ${MAX_THREADS} && make -j ${MAX_THREADS} install && ldconfig
# Slow! The above line takes 435 seconds on my laptop (1590.8s on a M1 cross compiling to amd64)
RUN make -j ${MAX_THREADS} training && make -j ${MAX_THREADS} training-install
# The above line takes 59 seconds on my laptop. 127.3s on my M1 laptop cross compiling

RUN curl -L -o tessdata/eng.traineddata https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata
RUN curl -L -o tessdata/osd.traineddata https://github.com/tesseract-ocr/tessdata/raw/main/osd.traineddata

ENV TESSDATA_PREFIX=/tesseract-4.1.3/tessdata

#Disable multi-threading
ENV OMP_THREAD_LIMIT=1

# Install Python dependencies
WORKDIR /usr/app
COPY requirements-platform.txt .
RUN pip install --upgrade pip
RUN pip install --no-cache-dir -r requirements-platform.txt
RUN python -m nltk.downloader brown stopwords wordnet omw-1.4



19 changes: 14 additions & 5 deletions RUNEVERYTHING.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
ClassTranscribe consists as a set of docker images. These instructions walk through setting everything to run locally, including the database, all of the backend projects and the frontend too. You will need to install Docker.
ClassTranscribe consists as a set of docker images. These instructions walk through setting everything to run locally, including the database, all of the backend projects and the frontend too.

M1 Macs are not currently supported.
## Set up Docker correctly!

You will need to install Docker by following the official Docker [instructions](https://docs.docker.com/engine/install/)

Did you install Ubuntu's snap version of Docker? The overlay filesystem won't work! (You might see `Permission denied:` filesystem errors when building from a Dockerfile). So Purge the snap, ```sudo snap remove --purge Docker``` and follow the official Docker install instructions instead.

If you have enough ram, we recommend 6 GB RAM for your Docker Machine.

M1 Macs are not natively supported but can build and run AMD64 images.

## Setting up ClassTranscribe

Expand Down Expand Up @@ -40,14 +48,15 @@ Pull the pre-made docker images from docker hub
```sh
docker compose pull
```
To save time you can use the premade images for api, frontend and pyapi already on dockerhub.
To also build these projects uncomment the build lines in docker-compose.override.yml
To save time you can use the premade images for api, frontend and pyapi projects already on dockerhub.
Look at docker-compose.override.yml ; if you want to use the pre-made images comment the build lines

```yml
#build:
# context: ../WebAPI
# dockerfile: ./pythonrpcserver.Dockerfile

```

Build the project(s). This will take more than 10 minutes, especially the first time.
```sh
docker compose build
Expand Down
10 changes: 10 additions & 0 deletions requirements-platform.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@

# ai/sci tools
numpy==1.23.5
pytesseract==0.3.9
opencv-python==4.5.5.64
decord==0.6.0
scikit-image==0.19.2
mtcnn-opencv==1.0.2
nltk==3.7
prefixspan==0.5.2
17 changes: 9 additions & 8 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@ jsonschema==3.2.0
Jinja2

# ai/sci tools
numpy==1.23.5
pytesseract==0.3.9
opencv-python==4.5.5.64
decord==0.6.0
scikit-image==0.19.2
mtcnn-opencv==1.0.2
nltk==3.7
prefixspan==0.5.2
# These are already preinstalled in the base image (see requirements-platform.txt)
##numpy==1.23.5
## pytesseract==0.3.9
## opencv-python==4.5.5.64
## decord==0.6.0
## scikit-image==0.19.2
## mtcnn-opencv==1.0.2
## nltk==3.7
## prefixspan==0.5.2

# Flask + Connexion + OpenAPI dependencies
Flask==2.0.2
Expand Down

0 comments on commit 4f5b89c

Please sign in to comment.