diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index df21d603f00e4d998c18ef6f50409f5d6b5d5ebe..f832ac60782dc5b80fd6adcb9a6e1e2970d277f3 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -218,6 +218,11 @@ test_pref93: PREF: "pref93" extends: .default_pref +test_pref94: + variables: + PREF: "pref94" + extends: .default_pref + test_pref976: variables: PREF: "pref976" diff --git a/Attrap_pref94.py b/Attrap_pref94.py new file mode 100644 index 0000000000000000000000000000000000000000..61cf87510821c1e05c15af81a92d48be83cd24d5 --- /dev/null +++ b/Attrap_pref94.py @@ -0,0 +1,60 @@ +import os +import datetime + +from bs4 import BeautifulSoup +from urllib.parse import unquote + +import logging +from Attrap import Attrap + +class Attrap_pref94(Attrap): + + # Config + __HOST = 'https://www.val-de-marne.gouv.fr' + __RAA_PAGE = f'{__HOST}/Publications/Publications-legales/RAA-Recueil-des-actes-administratifs' + __USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/115.0' + full_name = 'Préfecture du Val-de-Marne' + short_code = 'pref94' + + def __init__(self, data_dir): + super().__init__(data_dir, self.__USER_AGENT) + self.enable_tor(10) + + def get_raa(self, keywords): + elements = [] + page_content = self.get_page(self.__RAA_PAGE, 'get').content + for sub_page in self.get_sub_pages( + page_content, + 'div.fr-card__body div.fr-card__content h2.fr-card__title a', + self.__HOST, + False + ): + if Attrap.guess_date(sub_page['name'], '([0-9]{4})$').year >= self.not_before.year: + sub_page_content = self.get_page(sub_page['url'], 'get').content + for element in self.get_raa_elements(sub_page_content): + elements.append(element) + + self.parse_raa(elements, keywords) + self.mailer() + + def get_raa_elements(self, page_content): + elements = [] + # On charge le parser + soup = BeautifulSoup(page_content, 'html.parser') + + # Pour chaque balise a, on regarde si c'est un PDF, et si oui on le + # parse + for a in soup.select('a.fr-link.fr-link--download'): + if a.get('href') and a['href'].endswith('.pdf'): + if a['href'].startswith('/'): + url = f"{self.__HOST}{a['href']}" + else: + url = a['href'] + + url = unquote(url) + name = a.find('span').previous_sibling.replace('Télécharger ', '').strip() + date = datetime.datetime.strptime(a.find('span').get_text().split(' - ')[-1].strip(), '%d/%m/%Y') + + raa = Attrap.RAA(url, date, name) + elements.append(raa) + return elements diff --git a/Makefile b/Makefile index dbbd7dd24112e11fdd96f53c92f36efc0664617d..079aa9519b1d3ea7e375389e33d3837f2df6ec24 100644 --- a/Makefile +++ b/Makefile @@ -49,6 +49,8 @@ pref87: bin/python3 cli.py pref87 pref93: bin/python3 cli.py pref93 +pref94: + bin/python3 cli.py pref94 pref976: bin/python3 cli.py pref976 lint: diff --git a/README.md b/README.md index 20aaafef03c3d06365237a47d9dd02ba75cb34e9..9d7db731f6d38d7afd66a02f1e31345311c499ec 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,7 @@ Les options suivantes peuvent être précisées, par un paramètre si l'utilitai - Préfecture du Var (identifiant : `pref83`) - Préfecture de la Haute-Vienne (identifiant : `pref87`) - Préfecture de Seine-Saint-Denis (identifiant : `pref93`) +- Préfecture du Val-de-Marne (identifiant : `pref94`) - Préfecture de Mayotte (identifiant : `pref976`) ## Contributions diff --git a/cli.py b/cli.py index 275bb16f479a7844884a6ebaed59fd2e71ebaac8..f33159e215724356a93dbd3a9426e5be4e13b074 100755 --- a/cli.py +++ b/cli.py @@ -64,6 +64,7 @@ available_administrations = [ 'pref83', 'pref87', 'pref93', + 'pref94', 'pref976' ]