Skip to content
Extraits de code Groupes Projets

Comparer les révisions

Les modifications sont affichées comme si la révision source était fusionnée avec la révision cible. En savoir plus sur la comparaison des révisions.

Source

Sélectionner le projet cible
No results found

Cible

Sélectionner le projet cible
  • la-quadrature-du-net/Attrap
  • foggyfrog/Attrap
  • skhwiz/Attrap
  • precambrien/Attrap
  • ketsapiwiq/Attrap
  • Joseki/Attrap
  • kr1p/attrap-pref-12
  • kr1p/attrap-pref-46
  • kr1p/attrap-pi
  • Guinness/Attrap
  • astroidgritty/attrap-pref-84
  • davinov/Attrap
  • maettellite/attrap-pref-01
  • m242/Attrap
  • multi/Attrap
  • mverdeil/Attrap
  • olpo/Attrap
17 résultats
Afficher les modifications
Validations sur la source (359)
__pycache__/
bin/
lib/
lib64
data/
pyvenv.cfg
output.log
output_*.log
*.patch
CACHEDIR.TAG
image: debian:bookworm
variables:
DEBIAN_FRONTEND: noninteractive
IMAGE_NAME: $CI_REGISTRY_IMAGE/base
stages:
- docker
- install
- lint
- test
build:
docker:
stage: docker
tags:
- dind
variables:
DOCKER_DRIVER: "overlay2"
DOCKER_TLS_CERTDIR: "/certs"
services:
- docker:20.10.17-dind
before_script:
- apt-get update && apt-get install --no-install-recommends -y python3 python3-pip python3-selenium python3-virtualenv chromium-driver make xauth xvfb tesseract-ocr tesseract-ocr-eng tesseract-ocr-fra ocrmypdf
- ln -s /usr/bin/python3 /usr/bin/python
- docker info
- docker login -u ${CI_REGISTRY_USER} -p ${CI_REGISTRY_PASSWORD} ${CI_REGISTRY}
script:
- docker build -t ${IMAGE_NAME}:latest -f Dockerfile-base .
- docker push ${IMAGE_NAME}:latest
image: docker:20.10.17
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH && $CI_PIPELINE_SOURCE != "schedule"
changes:
paths:
- Dockerfile-base
- if: $COMPILE_DOCKER
install:
stage: install
image: registry.git.laquadrature.net/la-quadrature-du-net/attrap/base:latest
tags:
- unprivileged
script:
- virtualenv --python=/usr/bin/python3 .
- source bin/activate
- pip3 install -r requirements.txt
cache:
key: $CI_COMMIT_REF_SLUG
paths:
- bin/
- lib/
- pyvenv.cfg
rules:
- if: $COMPILE_DOCKER == null
pep8:
stage: lint
image: registry.git.laquadrature.net/la-quadrature-du-net/attrap/base:latest
needs: [install]
tags:
- unprivileged
cache:
key: $CI_COMMIT_REF_SLUG
paths:
- bin/
- lib/
- pyvenv.cfg
script:
- make lint
allow_failure: true
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event" || $CI_PIPELINE_SOURCE == "push"
.default_pref:
stage: test
image: registry.git.laquadrature.net/la-quadrature-du-net/attrap/base:latest
tags:
- unprivileged
needs: [install]
script:
- misc/download-from-s3.sh "${PREF}" "${S3_KEY}" "${S3_SECRET}" "${S3_HOST}" "${S3_BUCKET}" data/ || true
- source bin/activate
- make
only:
- main
- /etc/init.d/tor start
- make "${PREF}"
- misc/upload-to-s3.sh "${PREF}" "${S3_KEY}" "${S3_SECRET}" "${S3_HOST}" "${S3_BUCKET}" data/ || true
cache:
key: $CI_COMMIT_REF_SLUG
paths:
- data/
- bin/
- lib/
- pyvenv.cfg
artifacts:
paths:
- output.log
expire_in: 1 year
- data/${PREF}/raa/*.txt
- data/${PREF}/raa/*.json
- output_${PREF}.log
expire_in: 2 days
rules:
- if: $CI_PIPELINE_SOURCE == "schedule" && $COMPILE_DOCKER == null
test_ppparis:
variables:
PREF: "ppparis"
extends: .default_pref
test_pref01:
variables:
PREF: "pref01"
extends: .default_pref
test_pref02:
variables:
PREF: "pref02"
extends: .default_pref
test_pref03:
variables:
PREF: "pref03"
extends: .default_pref
test_pref04:
variables:
PREF: "pref04"
extends: .default_pref
test_pref05:
variables:
PREF: "pref05"
extends: .default_pref
test_pref06:
variables:
PREF: "pref06"
extends: .default_pref
test_pref09:
variables:
PREF: "pref09"
extends: .default_pref
test_pref10:
variables:
PREF: "pref10"
extends: .default_pref
test_pref11:
variables:
PREF: "pref11"
extends: .default_pref
test_pref13:
variables:
PREF: "pref13"
extends: .default_pref
test_pref2a:
variables:
PREF: "pref2a"
extends: .default_pref
test_pref2b:
variables:
PREF: "pref2b"
extends: .default_pref
test_pref25:
variables:
PREF: "pref25"
extends: .default_pref
test_pref29:
variables:
PREF: "pref29"
extends: .default_pref
test_pref30:
variables:
PREF: "pref30"
extends: .default_pref
test_pref31:
variables:
PREF: "pref31"
extends: .default_pref
test_pref33:
variables:
PREF: "pref33"
extends: .default_pref
test_pref34:
variables:
PREF: "pref34"
extends: .default_pref
test_pref35:
variables:
PREF: "pref35"
extends: .default_pref
test_pref38:
variables:
PREF: "pref38"
extends: .default_pref
test_pref39:
variables:
PREF: "pref39"
extends: .default_pref
test_pref42:
variables:
PREF: "pref42"
extends: .default_pref
test_pref44:
variables:
PREF: "pref44"
extends: .default_pref
test_pref49:
variables:
PREF: "pref49"
extends: .default_pref
test_pref50:
variables:
PREF: "pref50"
extends: .default_pref
test_pref52:
variables:
PREF: "pref52"
extends: .default_pref
test_pref54:
variables:
PREF: "pref54"
extends: .default_pref
test_pref55:
variables:
PREF: "pref55"
extends: .default_pref
test_pref59:
variables:
PREF: "pref59"
extends: .default_pref
test_pref61:
variables:
PREF: "pref61"
extends: .default_pref
test_pref62:
variables:
PREF: "pref62"
extends: .default_pref
test_pref63:
variables:
PREF: "pref63"
extends: .default_pref
test_pref64:
variables:
PREF: "pref64"
extends: .default_pref
test_pref65:
variables:
PREF: "pref65"
extends: .default_pref
test_pref66:
variables:
PREF: "pref66"
extends: .default_pref
test_pref69:
variables:
PREF: "pref69"
extends: .default_pref
test_pref73:
variables:
PREF: "pref73"
extends: .default_pref
test_pref75:
variables:
PREF: "pref75"
extends: .default_pref
test_pref77:
variables:
PREF: "pref77"
extends: .default_pref
test_pref80:
variables:
PREF: "pref80"
extends: .default_pref
test_pref81:
variables:
PREF: "pref81"
extends: .default_pref
test_pref83:
variables:
PREF: "pref83"
extends: .default_pref
test_pref87:
variables:
PREF: "pref87"
extends: .default_pref
test_pref91:
variables:
PREF: "pref91"
extends: .default_pref
test_pref92:
variables:
PREF: "pref92"
extends: .default_pref
test_pref93:
variables:
PREF: "pref93"
extends: .default_pref
test_pref94:
variables:
PREF: "pref94"
extends: .default_pref
test_pref976:
variables:
PREF: "pref976"
extends: .default_pref
test_prefbretagne:
variables:
PREF: "prefbretagne"
extends: .default_pref
test_prefidf:
variables:
PREF: "prefidf"
extends: .default_pref
test_prefpaca:
variables:
PREF: "prefpaca"
extends: .default_pref
Ce diff est replié.
import datetime
from bs4 import BeautifulSoup
from urllib.parse import unquote
from Attrap import Attrap
class Attrap_ppparis(Attrap):
# Config
hostname = 'https://www.prefecturedepolice.interieur.gouv.fr'
raa_page = f'{hostname}/actualites-et-presse/arretes/accueil-arretes'
__WAIT_ELEMENT = 'block-decree-list-block'
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
full_name = 'Préfecture de police de Paris'
short_code = 'ppparis'
timezone = 'Europe/Paris'
def __init__(self, data_dir):
super().__init__(data_dir, self.user_agent)
def get_raa(self, keywords):
page_content = self.get_session(self.raa_page, self.__WAIT_ELEMENT, 6)
raa_elements = self.get_raa_elements(page_content)
self.parse_raa(raa_elements, keywords)
self.mailer()
def get_raa_elements(self, page_content):
elements = []
# On charge le parser
soup = BeautifulSoup(page_content, 'html.parser')
# Pour chaque balise a, on regarde si c'est un PDF, et si oui on le
# parse
for a in soup.find_all('a', href=True):
if a['href'].endswith('.pdf'):
if a['href'].startswith('/'):
url = 'https://www.prefecturedepolice.interieur.gouv.fr' + a['href']
else:
url = a['href']
url = unquote(url)
name = a.find('span').get_text()
date = datetime.datetime.strptime(a.find('div', class_="field--type-datetime").get_text().strip(), '%d/%m/%Y')
raa = Attrap.RAA(url, date, name, timezone=self.timezone)
elements.append(raa)
return elements
from Attrap_prefdpt import Attrap_prefdpt
class Attrap_pref01(Attrap_prefdpt):
# Configuration de la préfecture
hostname = 'https://www.ain.gouv.fr'
raa_page = f'{hostname}/Publications/Recueil-des-actes-administratifs-RAA'
full_name = 'Préfecture de l\'Ain'
short_code = 'pref01'
timezone = 'Europe/Paris'
# Configuration des widgets à analyser
Attrap_prefdpt.grey_card['regex']['year'] = '(?:Recueil|Recueils) (?:des actes administratifs)(?:[ -])*([0-9]{4})'
from Attrap_prefdpt import Attrap_prefdpt
class Attrap_pref02(Attrap_prefdpt):
# Configuration de la préfecture
hostname = 'https://www.aisne.gouv.fr'
raa_page = f'{hostname}/Publications/Recueil-des-Actes-Administratifs'
full_name = 'Préfecture de l\'Aisne'
short_code = 'pref02'
timezone = 'Europe/Paris'
# Configuration des widgets à analyser
Attrap_prefdpt.grey_card['regex']['year'] = 'RAA [Aa]nnée ([0-9]{4})'
from Attrap_prefdpt import Attrap_prefdpt
class Attrap_pref03(Attrap_prefdpt):
# Configuration de la préfecture
hostname = 'https://www.allier.gouv.fr'
raa_page = f'{hostname}/Publications/Recueil-des-actes-administratifs-arretes'
full_name = 'Préfecture de l\'Allier'
short_code = 'pref03'
timezone = 'Europe/Paris'
# Configuration des widgets à analyser
Attrap_prefdpt.grey_card['regex']['year'] = '([0-9]{4})'
from Attrap_prefdpt import Attrap_prefdpt
class Attrap_pref04(Attrap_prefdpt):
# Configuration de la préfecture
hostname = 'https://www.alpes-de-haute-provence.gouv.fr'
raa_page = f'{hostname}/Publications/Publications-administratives-et-legales/Recueil-des-Actes-Administratifs'
full_name = 'Préfecture des Alpes-de-Haute-Provence'
short_code = 'pref04'
timezone = 'Europe/Paris'
# Configuration des widgets à analyser
Attrap_prefdpt.grey_card['regex']['year'] = '([0-9]{4})'
from Attrap_prefdpt import Attrap_prefdpt
class Attrap_pref05(Attrap_prefdpt):
# Configuration de la préfecture
hostname = 'https://www.hautes-alpes.gouv.fr'
raa_page = f'{hostname}/Publications/Recueil-des-actes-administratifs'
full_name = 'Préfecture des Hautes-Alpes'
short_code = 'pref05'
timezone = 'Europe/Paris'
# Configuration des widgets à analyser
Attrap_prefdpt.grey_card['regex']['year'] = 'Année *([0-9]{4})'
Attrap_prefdpt.grey_card['regex']['month'] = '([A-Za-zéû]* *[0-9]{4})'
from Attrap_prefdpt import Attrap_prefdpt
class Attrap_pref06(Attrap_prefdpt):
# Configuration de la préfecture
hostname = 'https://www.alpes-maritimes.gouv.fr'
raa_page = f'{hostname}/Publications/Recueil-des-actes-administratifs-RAA'
full_name = 'Préfecture des Alpes-Maritimes'
short_code = 'pref06'
timezone = 'Europe/Paris'
# Configuration des widgets à analyser
Attrap_prefdpt.grey_card['regex']['year'] = 'Année *([0-9]{4})'
from Attrap_prefdpt import Attrap_prefdpt
class Attrap_pref09(Attrap_prefdpt):
# Configuration de la préfecture
hostname = 'https://www.ariege.gouv.fr'
raa_page = f'{hostname}/Publications/Recueil-des-actes-administratifs/Recueils-des-Actes-Administratifs-de-l-Ariege-a-partir-du-28-avril-2015'
full_name = 'Préfecture de l\'Ariège'
short_code = 'pref09'
timezone = 'Europe/Paris'
from Attrap_prefdpt import Attrap_prefdpt
class Attrap_pref10(Attrap_prefdpt):
# Configuration de la préfecture
hostname = 'https://www.aube.gouv.fr'
raa_page = [
f'{hostname}/Publications/RAA-Recueil-des-Actes-Administratifs',
f'{hostname}/Publications/RAA-Recueil-des-Actes-Administratifs/RAA-Archives'
]
full_name = 'Préfecture de l\'Aube'
short_code = 'pref10'
timezone = 'Europe/Paris'
# Configuration des widgets à analyser
Attrap_prefdpt.grey_card['regex']['year'] = 'RAA *([0-9]{4})'
# On ajoute un widget custom représentant les liens sur la page d'accueil
Attrap_prefdpt.widgets.append(
Attrap_prefdpt.DptWidget(
'homepage_links',
regex={'year': 'Année *([0-9]{4})'},
css_path={'title': 'div.fr-text--lead p a.fr-link'}
)
)
from Attrap_prefdpt import Attrap_prefdpt
class Attrap_pref11(Attrap_prefdpt):
# Configuration de la préfecture
hostname = 'https://www.aude.gouv.fr'
raa_page = f'{hostname}/Publications/Recueil-des-Actes-Administratifs-RAA'
full_name = 'Préfecture de l\'Aude'
short_code = 'pref11'
timezone = 'Europe/Paris'
# Configuration des widgets à analyser
Attrap_prefdpt.grey_card['regex']['year'] = 'Année *([0-9]{4})'
from Attrap_prefdpt import Attrap_prefdpt
class Attrap_pref13(Attrap_prefdpt):
# Configuration de la préfecture
hostname = 'https://www.bouches-du-rhone.gouv.fr'
raa_page = [
f'{hostname}/Publications/RAA-et-Archives',
f'{hostname}/Publications/RAA-et-Archives/Archives-RAA-des-Bouches-du-Rhone'
]
full_name = 'Préfecture des Bouches-du-Rhône'
short_code = 'pref13'
timezone = 'Europe/Paris'
# Configuration des widgets à analyser
Attrap_prefdpt.grey_card['regex']['year'] = 'RAA[- ]*([0-9]{4})'
Attrap_prefdpt.grey_card['follow_link_on_unrecognised_date'] = False
from Attrap_prefdpt import Attrap_prefdpt
class Attrap_pref25(Attrap_prefdpt):
# Configuration de la préfecture
hostname = 'https://www.doubs.gouv.fr'
raa_page = f'{hostname}/Publications/Publications-Legales/Recueil-des-Actes-Administratifs-RAA'
full_name = 'Préfecture du Doubs'
short_code = 'pref25'
timezone = 'Europe/Paris'
# Configuration des widgets à analyser
Attrap_prefdpt.grey_card['regex']['year'] = '([0-9]{4})'
Attrap_prefdpt.grey_card['follow_link_on_unrecognised_date'] = False
from Attrap_prefdpt import Attrap_prefdpt
class Attrap_pref29(Attrap_prefdpt):
# Configuration de la préfecture
hostname = 'https://www.finistere.gouv.fr'
raa_page = f'{hostname}/Publications/Recueil-des-actes-administratifs'
full_name = 'Préfecture du Finistère'
short_code = 'pref29'
timezone = 'Europe/Paris'
# Configuration des widgets à analyser
Attrap_prefdpt.grey_card['regex']['year'] = '(?:Recueils publiés en ).*([0-9]{4})'
from Attrap_prefdpt import Attrap_prefdpt
class Attrap_pref2a(Attrap_prefdpt):
# Configuration de la préfecture
hostname = 'https://www.corse-du-sud.gouv.fr'
raa_page = f'{hostname}/Publications/Recueil-des-actes-administratifs/Recueil-des-actes-administratifs-de-la-prefecture-de-la-Corse-du-Sud'
full_name = 'Préfecture de la Corse-du-Sud'
short_code = 'pref2a'
timezone = 'Europe/Paris'
# Configuration des widgets à analyser
Attrap_prefdpt.white_card['regex']['year'] = '([0-9]{4})'
from Attrap_prefdpt import Attrap_prefdpt
class Attrap_pref2b(Attrap_prefdpt):
# Configuration de la préfecture
hostname = 'https://www.haute-corse.gouv.fr'
raa_page = f'{hostname}/Publications/Publications-administratives-et-legales/Recueils-des-actes-administratifs'
full_name = 'Préfecture de Haute-Corse'
short_code = 'pref2b'
timezone = 'Europe/Paris'
# Configuration des widgets à analyser
Attrap_prefdpt.grey_card['regex']['year'] = 'Recueils des actes administratifs ([0-9]{4})'
Attrap_prefdpt.white_card['regex']['month'] = '([A-Za-zéû]* [0-9]{4})'
from Attrap_prefdpt import Attrap_prefdpt
class Attrap_pref30(Attrap_prefdpt):
# Configuration de la préfecture
hostname = 'https://www.gard.gouv.fr'
raa_page = f'{hostname}/Publications/Recueil-des-Actes-Administratifs'
full_name = 'Préfecture du Gard'
short_code = 'pref30'
timezone = 'Europe/Paris'
# Configuration des widgets à analyser
Attrap_prefdpt.grey_card['regex']['year'] = '([0-9]{4})'
from Attrap_prefdpt import Attrap_prefdpt
class Attrap_pref31(Attrap_prefdpt):
# Configuration de la préfecture
hostname = 'https://www.haute-garonne.gouv.fr'
raa_page = f'{hostname}/Publications/Recueil-des-Actes-Administratifs/Recueil-des-Actes-Administratifs-Haute-Garonne'
full_name = 'Préfecture de la Haute-Garonne'
short_code = 'pref31'
timezone = 'Europe/Paris'
# Configuration des widgets à analyser
Attrap_prefdpt.grey_card['regex']['month'] = '([A-Za-zéû]* [0-9]{4})'