Skip to content

Commit

Permalink
add new dockerfile
Browse files Browse the repository at this point in the history
  • Loading branch information
lfoppiano committed Nov 22, 2024
1 parent f4d6245 commit b0179e5
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 0 deletions.
47 changes: 47 additions & 0 deletions .github/workflows/ci-build-manual-eval.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: Build and push a full docker image

on:
workflow_dispatch:
inputs:
custom_tag:
type: string
description: Docker image tag
required: true
default: "latest"

jobs:
build:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- name: Set up JDK 17
uses: actions/setup-java@v4
with:
java-version: '17.0.10+7'
distribution: 'temurin'
cache: 'gradle'
- name: Build with Gradle
run: ./gradlew build -x test

docker-build-full:
needs: [ build ]
runs-on: ubuntu-latest

steps:
- name: Create more disk space
run: sudo rm -rf /usr/share/dotnet && sudo rm -rf /opt/ghc && sudo rm -rf "/usr/local/share/boost" && sudo rm -rf "$AGENT_TOOLSDIRECTORY"
- uses: actions/checkout@v4
- name: Build and push
id: docker_build
uses: mr-smithers-excellent/docker-build-push@v5
with:
username: ${{ secrets.DOCKERHUB_USERNAME_LFOPPIANO }}
password: ${{ secrets.DOCKERHUB_TOKEN_LFOPPIANO }}
image: lfoppiano/grobid-evaluation
registry: docker.io
pushImage: true
tags: latest, ${{ github.event.inputs.custom_tag}}
dockerfile: Dockerfile.evaluation
- name: Image digest
run: echo ${{ steps.docker_build.outputs.digest }}
67 changes: 67 additions & 0 deletions Dockerfile.evaluation
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
## Grobid evaluation image

# https://grobid.readthedocs.io/en/latest/End-to-end-evaluation/

# docker run -t --rm --init -p 8070:8070 -p 8071:8071 -v /home/lopez/grobid/grobid-home/config/grobid.properties:/opt/grobid/grobid-home/config/grobid.properties:ro grobid/grobid:0.8.0

FROM lfoppiano/grobid:0.8.1 as runtime

# setting locale is likely useless but to be sure
ENV LANG C.UTF-8

USER root

RUN apt-get update && \
apt-get -y --no-install-recommends install unzip wget

WORKDIR /opt/grobid

# gradle
COPY gradle/ ./gradle/
COPY gradlew ./
COPY gradle.properties ./
COPY build.gradle ./
COPY settings.gradle ./

# source
COPY grobid-core/ ./grobid-core/
COPY grobid-service/ ./grobid-service/
COPY grobid-trainer/ ./grobid-trainer/

# Setting DL-powered configuration
COPY grobid-home/config/grobid-full.yaml grobid-home/config/config.yaml

RUN rm -rf /opt/grobid/grobid-home/models/*-with_ELMo \
&& mkdir /opt/grobid/evaluation

# Download evaluation data
WORKDIR /opt/grobid/evaluation
RUN wget https://zenodo.org/records/3873702/files/biorxiv-10k-test-2000.zip -O biorxiv-10k-test-2000.zip \
&& unzip biorxiv-10k-test-2000.zip -d biorxiv-10k-test-2000 \
&& wget https://zenodo.org/records/7708580/files/eLife_984.zip -O eLife_984.zip \
&& unzip eLife_984.zip -d eLife_984 \
&& wget https://zenodo.org/records/7708580/files/PLOS_1000.zip -O PLOS_1000.zip \
&& unzip PLOS_1000.zip -d PLOS_1000 \
&& wget https://zenodo.org/records/7708580/files/PMC_sample_1943.zip -O PMC_sample_1943.zip \
&& unzip PMC_sample_1943.zip -d PMC_sample_1943 \
&& rm *.zip

#RUN wget -q https://zenodo.org/records/7708580/files/PMC_sample_1943.zip -O PMC_sample_1943.zip \
# && unzip PMC_sample_1943.zip -d PMC_sample_1943 \
# && rm *.zip

VOLUME ["/opt/grobid/grobid-home/tmp"]

WORKDIR /opt/grobid

CMD ["./gradlew", "jatsEval", "-Pp2t=/opt/grobid/evaluation/PMC_sample_1943","-Prun=1", "-PfileRatio=1"]
CMD ["./gradlew", "jatsEval", "-Pp2t=/opt/grobid/evaluation/biorxiv-10k-test-2000","-Prun=1", "-PfileRatio=1"]
CMD ["./gradlew", "jatsEval", "-Pp2t=/opt/grobid/evaluation/eLife_984","-Prun=1", "-PfileRatio=1"]
CMD ["./gradlew", "jatsEval", "-Pp2t=/opt/grobid/evaluation/PLOS_1000","-Prun=1", "-PfileRatio=1"]

LABEL \
authors="The contributors" \
org.label-schema.name="datastet" \
org.label-schema.description="Image with DataStet service" \
org.label-schema.url="https://github.com/DataSeer/datastet" \
org.label-schema.version=${GROBID_VERSION}

0 comments on commit b0179e5

Please sign in to comment.