diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a7f28dcde363e9150bc17dbd2873824a9e3b5b27..982f4664acefbfca5f75fcecef9f9134e3dd862c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -62,7 +62,6 @@ pep8: - lib/ - pyvenv.cfg script: - - source bin/activate - make lint allow_failure: true rules: @@ -75,20 +74,16 @@ pep8: - unprivileged needs: [install] script: - - source bin/activate - - pip install --upgrade -r requirements.txt + - s3/download-from-s3.sh "${PREF}" "${S3_KEY}" "${S3_SECRET}" "${S3_HOST}" "${S3_BUCKET}" || true - /etc/init.d/tor start - - python ./cli.py "${PREF}" + - make "${PREF}" + - s3/upload-to-s3.sh "${PREF}" "${S3_KEY}" "${S3_SECRET}" "${S3_HOST}" "${S3_BUCKET}" || true cache: - key: $CI_COMMIT_REF_SLUG-$CI_JOB_NAME_SLUG - fallback_keys: - - $CI_COMMIT_REF_SLUG + key: $CI_COMMIT_REF_SLUG paths: - bin/ - lib/ - pyvenv.cfg - - data/${PREF}/raa/*.txt - - data/${PREF}/raa/*.json artifacts: paths: - data/${PREF}/raa/*.txt diff --git a/Dockerfile-base b/Dockerfile-base index 433cc25108de0d39ecb821c78fe1ae7cee401c4b..9d9b9322065a69c3407cd18cb5d737741c1d913e 100644 --- a/Dockerfile-base +++ b/Dockerfile-base @@ -4,7 +4,7 @@ ENV DEBIAN_FRONTEND="noninteractive" RUN apt-get update && \ apt-get dist-upgrade -y && \ - apt-get install --no-install-recommends -y python3 python3-virtualenv chromium-driver make xauth xvfb tesseract-ocr tesseract-ocr-eng tesseract-ocr-fra ocrmypdf curl unzip tor && \ + apt-get install --no-install-recommends -y python3 python3-virtualenv chromium-driver make xauth xvfb tesseract-ocr tesseract-ocr-eng tesseract-ocr-fra ocrmypdf curl unzip zip tor && \ apt-get clean && \ rm -rf /var/lib/apt/lists && \ ln -s /usr/bin/python3 /usr/bin/python && \ diff --git a/s3/download-from-s3.sh b/s3/download-from-s3.sh new file mode 100755 index 0000000000000000000000000000000000000000..1ebc53d820244f2d2f5117c9976c7f4a5787f31b --- /dev/null +++ b/s3/download-from-s3.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +set -e + +pref="${1}" +s3_key="${2}" +s3_secret="${3}" +s3_host="${4}" +s3_bucket="${5}" + +if test -z "$pref" || test -z "$s3_key" || test -z "$s3_secret" || test -z "$s3_host" || test -z "$s3_bucket"; then + echo "Usage: ${0} <pref> <s3_key> <s3_secret> <s3_host> <s3_bucket>" + exit 1 +fi + +mkdir -p "$(dirname $(realpath "${BASH_SOURCE[0]}"))/../data/" +cd "$(dirname $(realpath "${BASH_SOURCE[0]}"))/../data/" + +file="${pref}.zip" + +echo "Downloading..." + +ressource="/${s3_bucket}/${file}" +content_type="application/octet-stream" +date=$(date --utc -R) + +signature=$(echo -en "GET\n\n${content_type}\n${date}\n${ressource}" | openssl sha1 -hmac "${s3_secret}" -binary | base64) + +curl -X GET \ + --silent \ + -H "Date: ${date}" \ + -H "Content-Type: ${content_type}" \ + -H "Authorization: AWS ${s3_key}:${signature}" \ + "${s3_host}${ressource}" \ + -o "${file}" + +unzip -o "${file}" > /dev/null +rm "${file}" diff --git a/s3/upload-to-s3.sh b/s3/upload-to-s3.sh new file mode 100755 index 0000000000000000000000000000000000000000..cfa2f67ad13cbf74b8ab96e7fd56fcec1e592bf7 --- /dev/null +++ b/s3/upload-to-s3.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +set -e + +pref="${1}" +s3_key="${2}" +s3_secret="${3}" +s3_host="${4}" +s3_bucket="${5}" + +if test -z "$pref" || test -z "$s3_key" || test -z "$s3_secret" || test -z "$s3_host" || test -z "$s3_bucket"; then + echo "Usage: ${0} <pref> <s3_key> <s3_secret> <s3_host> <s3_bucket>" + exit 1 +fi + +cd "$(dirname $(realpath "${BASH_SOURCE[0]}"))/../data/${pref}/raa" +cd "../../" + +file="${pref}.zip" +find . -name "${file}" -type f -delete +zip "${file}" "${pref}/raa/"*.txt "${pref}/raa/"*.json > /dev/null + +echo "Uploading..." + +ressource="/${s3_bucket}/${file}" +content_type=$(file --mime-type "${file}") +date=$(date --utc -R) + +signature=$(echo -en "PUT\n\n${content_type}\n${date}\n${ressource}" | openssl sha1 -hmac "${s3_secret}" -binary | base64) + +curl -X PUT \ + -T "${file}" \ + -H "Date: ${date}" \ + -H "Content-Type: ${content_type}" \ + -H "Authorization: AWS ${s3_key}:${signature}" \ + "${s3_host}${ressource}" + +rm "${file}"