From 8aa0f2c00c19c1f87628d432fac94504d61de1ac Mon Sep 17 00:00:00 2001 From: Bastien Le Querrec <blq@laquadrature.net> Date: Fri, 10 May 2024 20:07:14 +0200 Subject: [PATCH] =?UTF-8?q?ci:=20envoie=20et=20r=C3=A9cup=C3=A8re=20les=20?= =?UTF-8?q?RAA=20analys=C3=A9s=20depuis=20un=20bucket=20S3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitlab-ci.yml | 13 ++++--------- Dockerfile-base | 2 +- s3/download-from-s3.sh | 38 ++++++++++++++++++++++++++++++++++++++ s3/upload-to-s3.sh | 38 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 81 insertions(+), 10 deletions(-) create mode 100755 s3/download-from-s3.sh create mode 100755 s3/upload-to-s3.sh diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a7f28dc..982f466 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -62,7 +62,6 @@ pep8: - lib/ - pyvenv.cfg script: - - source bin/activate - make lint allow_failure: true rules: @@ -75,20 +74,16 @@ pep8: - unprivileged needs: [install] script: - - source bin/activate - - pip install --upgrade -r requirements.txt + - s3/download-from-s3.sh "${PREF}" "${S3_KEY}" "${S3_SECRET}" "${S3_HOST}" "${S3_BUCKET}" || true - /etc/init.d/tor start - - python ./cli.py "${PREF}" + - make "${PREF}" + - s3/upload-to-s3.sh "${PREF}" "${S3_KEY}" "${S3_SECRET}" "${S3_HOST}" "${S3_BUCKET}" || true cache: - key: $CI_COMMIT_REF_SLUG-$CI_JOB_NAME_SLUG - fallback_keys: - - $CI_COMMIT_REF_SLUG + key: $CI_COMMIT_REF_SLUG paths: - bin/ - lib/ - pyvenv.cfg - - data/${PREF}/raa/*.txt - - data/${PREF}/raa/*.json artifacts: paths: - data/${PREF}/raa/*.txt diff --git a/Dockerfile-base b/Dockerfile-base index 433cc25..9d9b932 100644 --- a/Dockerfile-base +++ b/Dockerfile-base @@ -4,7 +4,7 @@ ENV DEBIAN_FRONTEND="noninteractive" RUN apt-get update && \ apt-get dist-upgrade -y && \ - apt-get install --no-install-recommends -y python3 python3-virtualenv chromium-driver make xauth xvfb tesseract-ocr tesseract-ocr-eng tesseract-ocr-fra ocrmypdf curl unzip tor && \ + apt-get install --no-install-recommends -y python3 python3-virtualenv chromium-driver make xauth xvfb tesseract-ocr tesseract-ocr-eng tesseract-ocr-fra ocrmypdf curl unzip zip tor && \ apt-get clean && \ rm -rf /var/lib/apt/lists && \ ln -s /usr/bin/python3 /usr/bin/python && \ diff --git a/s3/download-from-s3.sh b/s3/download-from-s3.sh new file mode 100755 index 0000000..1ebc53d --- /dev/null +++ b/s3/download-from-s3.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +set -e + +pref="${1}" +s3_key="${2}" +s3_secret="${3}" +s3_host="${4}" +s3_bucket="${5}" + +if test -z "$pref" || test -z "$s3_key" || test -z "$s3_secret" || test -z "$s3_host" || test -z "$s3_bucket"; then + echo "Usage: ${0} <pref> <s3_key> <s3_secret> <s3_host> <s3_bucket>" + exit 1 +fi + +mkdir -p "$(dirname $(realpath "${BASH_SOURCE[0]}"))/../data/" +cd "$(dirname $(realpath "${BASH_SOURCE[0]}"))/../data/" + +file="${pref}.zip" + +echo "Downloading..." + +ressource="/${s3_bucket}/${file}" +content_type="application/octet-stream" +date=$(date --utc -R) + +signature=$(echo -en "GET\n\n${content_type}\n${date}\n${ressource}" | openssl sha1 -hmac "${s3_secret}" -binary | base64) + +curl -X GET \ + --silent \ + -H "Date: ${date}" \ + -H "Content-Type: ${content_type}" \ + -H "Authorization: AWS ${s3_key}:${signature}" \ + "${s3_host}${ressource}" \ + -o "${file}" + +unzip -o "${file}" > /dev/null +rm "${file}" diff --git a/s3/upload-to-s3.sh b/s3/upload-to-s3.sh new file mode 100755 index 0000000..cfa2f67 --- /dev/null +++ b/s3/upload-to-s3.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +set -e + +pref="${1}" +s3_key="${2}" +s3_secret="${3}" +s3_host="${4}" +s3_bucket="${5}" + +if test -z "$pref" || test -z "$s3_key" || test -z "$s3_secret" || test -z "$s3_host" || test -z "$s3_bucket"; then + echo "Usage: ${0} <pref> <s3_key> <s3_secret> <s3_host> <s3_bucket>" + exit 1 +fi + +cd "$(dirname $(realpath "${BASH_SOURCE[0]}"))/../data/${pref}/raa" +cd "../../" + +file="${pref}.zip" +find . -name "${file}" -type f -delete +zip "${file}" "${pref}/raa/"*.txt "${pref}/raa/"*.json > /dev/null + +echo "Uploading..." + +ressource="/${s3_bucket}/${file}" +content_type=$(file --mime-type "${file}") +date=$(date --utc -R) + +signature=$(echo -en "PUT\n\n${content_type}\n${date}\n${ressource}" | openssl sha1 -hmac "${s3_secret}" -binary | base64) + +curl -X PUT \ + -T "${file}" \ + -H "Date: ${date}" \ + -H "Content-Type: ${content_type}" \ + -H "Authorization: AWS ${s3_key}:${signature}" \ + "${s3_host}${ressource}" + +rm "${file}" -- GitLab