From 5860df1df40d8e0dbd9dbefc79041bd52650fb09 Mon Sep 17 00:00:00 2001 From: precambrien <precambrien@cestcool.net> Date: Sun, 9 Jun 2024 18:32:46 +0200 Subject: [PATCH] pref94: ajout de pref94 (val-de-marne) Closes !6 --- .gitlab-ci.yml | 5 ++++ Attrap_pref94.py | 60 ++++++++++++++++++++++++++++++++++++++++++++++++ Makefile | 2 ++ README.md | 1 + cli.py | 1 + 5 files changed, 69 insertions(+) create mode 100644 Attrap_pref94.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index df21d60..f832ac6 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -218,6 +218,11 @@ test_pref93: PREF: "pref93" extends: .default_pref +test_pref94: + variables: + PREF: "pref94" + extends: .default_pref + test_pref976: variables: PREF: "pref976" diff --git a/Attrap_pref94.py b/Attrap_pref94.py new file mode 100644 index 0000000..61cf875 --- /dev/null +++ b/Attrap_pref94.py @@ -0,0 +1,60 @@ +import os +import datetime + +from bs4 import BeautifulSoup +from urllib.parse import unquote + +import logging +from Attrap import Attrap + +class Attrap_pref94(Attrap): + + # Config + __HOST = 'https://www.val-de-marne.gouv.fr' + __RAA_PAGE = f'{__HOST}/Publications/Publications-legales/RAA-Recueil-des-actes-administratifs' + __USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/115.0' + full_name = 'Préfecture du Val-de-Marne' + short_code = 'pref94' + + def __init__(self, data_dir): + super().__init__(data_dir, self.__USER_AGENT) + self.enable_tor(10) + + def get_raa(self, keywords): + elements = [] + page_content = self.get_page(self.__RAA_PAGE, 'get').content + for sub_page in self.get_sub_pages( + page_content, + 'div.fr-card__body div.fr-card__content h2.fr-card__title a', + self.__HOST, + False + ): + if Attrap.guess_date(sub_page['name'], '([0-9]{4})$').year >= self.not_before.year: + sub_page_content = self.get_page(sub_page['url'], 'get').content + for element in self.get_raa_elements(sub_page_content): + elements.append(element) + + self.parse_raa(elements, keywords) + self.mailer() + + def get_raa_elements(self, page_content): + elements = [] + # On charge le parser + soup = BeautifulSoup(page_content, 'html.parser') + + # Pour chaque balise a, on regarde si c'est un PDF, et si oui on le + # parse + for a in soup.select('a.fr-link.fr-link--download'): + if a.get('href') and a['href'].endswith('.pdf'): + if a['href'].startswith('/'): + url = f"{self.__HOST}{a['href']}" + else: + url = a['href'] + + url = unquote(url) + name = a.find('span').previous_sibling.replace('Télécharger ', '').strip() + date = datetime.datetime.strptime(a.find('span').get_text().split(' - ')[-1].strip(), '%d/%m/%Y') + + raa = Attrap.RAA(url, date, name) + elements.append(raa) + return elements diff --git a/Makefile b/Makefile index dbbd7dd..079aa95 100644 --- a/Makefile +++ b/Makefile @@ -49,6 +49,8 @@ pref87: bin/python3 cli.py pref87 pref93: bin/python3 cli.py pref93 +pref94: + bin/python3 cli.py pref94 pref976: bin/python3 cli.py pref976 lint: diff --git a/README.md b/README.md index 20aaafe..9d7db73 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,7 @@ Les options suivantes peuvent être précisées, par un paramètre si l'utilitai - Préfecture du Var (identifiant : `pref83`) - Préfecture de la Haute-Vienne (identifiant : `pref87`) - Préfecture de Seine-Saint-Denis (identifiant : `pref93`) +- Préfecture du Val-de-Marne (identifiant : `pref94`) - Préfecture de Mayotte (identifiant : `pref976`) ## Contributions diff --git a/cli.py b/cli.py index 275bb16..f33159e 100755 --- a/cli.py +++ b/cli.py @@ -64,6 +64,7 @@ available_administrations = [ 'pref83', 'pref87', 'pref93', + 'pref94', 'pref976' ] -- GitLab