From a65949458d54dfedeac8ae827f894e0fb74db7ad Mon Sep 17 00:00:00 2001
From: Bastien Le Querrec <blq@laquadrature.net>
Date: Sun, 24 Mar 2024 16:01:00 +0100
Subject: [PATCH] =?UTF-8?q?pref04:=20ajout=20de=20la=20pr=C3=A9fecture=20d?=
 =?UTF-8?q?es=20Alpes-de-Haute-Provence?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .gitlab-ci.yml       |  5 ++++
 Makefile             |  4 +++-
 RAAspotter_pref04.py | 57 ++++++++++++++++++++++++++++++++++++++++++++
 README.md            |  1 +
 cli.py               |  1 +
 5 files changed, 67 insertions(+), 1 deletion(-)
 create mode 100644 RAAspotter_pref04.py

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index dccd0eb..c469def 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -86,6 +86,11 @@ test_pref06:
     PREF: "pref06"
   extends: .default_pref
 
+test_pref04:
+  variables:
+    PREF: "pref04"
+  extends: .default_pref
+
 test_pref13:
   variables:
     PREF: "pref13"
diff --git a/Makefile b/Makefile
index a7ce433..82dce5c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,8 @@
-make: ppparis pref06 pref13 pref34 pref35 pref38 pref59 pref62 pref69 pref83 pref976
+make: ppparis pref04 pref06 pref13 pref34 pref35 pref38 pref59 pref62 pref69 pref83 pref976
 ppparis:
 	python cli.py --pref ppparis
+pref04:
+	python cli.py --pref pref04
 pref06:
 	python cli.py --pref pref06
 pref13:
diff --git a/RAAspotter_pref04.py b/RAAspotter_pref04.py
new file mode 100644
index 0000000..118afb5
--- /dev/null
+++ b/RAAspotter_pref04.py
@@ -0,0 +1,57 @@
+import os, sys
+import datetime
+
+from bs4 import BeautifulSoup
+from urllib.parse import unquote
+
+from RAAspotter import RAAspotter
+
+class RAAspotter_pref04(RAAspotter):
+
+  # Config
+  __HOST       = 'https://www.alpes-de-haute-provence.gouv.fr'
+  __RAA_PAGE   = f'{__HOST}/Publications/Publications-administratives-et-legales/Recueil-des-Actes-Administratifs'
+  __USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/115.0'
+  full_name = 'Préfecture des Alpes-de-Haute-Provence'
+  short_code = 'pref04'
+
+  def __init__(self, data_dir):
+    super().__init__(data_dir, self.__USER_AGENT)
+    self.enable_tor(10)
+
+  def get_raa(self, keywords):
+    self.print_output('RAAspotter_pref04')
+    self.print_output(f'Termes recherchés: {keywords}')
+    self.print_output('')
+
+    pages = []
+    page_content = self.get_page(self.__RAA_PAGE, 'get').content
+    for sub_page in self.get_sub_pages(page_content, 'div.fr-card__body div.fr-card__content h2.fr-card__title a', self.__HOST, False):
+      if RAAspotter.guess_date(sub_page['name'], '([0-9]{4}).*').year >= self.not_before.year:
+        sub_page_content = self.get_page(sub_page['url'], 'get').content
+        raa_elements = self.get_raa_elements(sub_page_content)
+        self.parse_raa(raa_elements, keywords.split(','))
+
+    self.mailer()
+
+  def get_raa_elements(self, page_content):
+    elements = []
+    # On charge le parser
+    soup = BeautifulSoup(page_content, 'html.parser')
+
+    # Pour chaque balise a, on regarde si c'est un PDF, et si oui on le parse
+    for a in soup.select('a.fr-link.fr-link--download'):
+      if a.get('href') and a['href'].endswith('.pdf'):
+        if a['href'].startswith('/'):
+          url = f"{self.__HOST}{a['href']}"
+        else:
+          url = a['href']
+
+        url      = unquote(url)
+        name     = a.find('span').previous_sibling.replace('Télécharger ', '').strip()
+        date     = datetime.datetime.strptime(a.find('span').get_text().split(' - ')[-1].strip(), '%d/%m/%Y')
+        filename = url.split('/')[-1]
+
+        raa = RAAspotter.RAA(url, date, name, filename)
+        elements.append(raa)
+    return elements
diff --git a/README.md b/README.md
index b1fdb61..44ed9ba 100644
--- a/README.md
+++ b/README.md
@@ -33,6 +33,7 @@ Il est possible de ne lancer l'analyse que pour une seule administration, avec l
 ## Administrations supportées
 
 - Préfecture de police de Paris (identifiant : `ppparis`)
+- Préfecture des Alpes-de-Haute-Provence  (identifiant : `pref04`)
 - Préfecture des Alpes-Maritimes  (identifiant : `pref06`)
 - Préfecture des Bouches-du-Rhône (identifiant : `pref13`)
 - Préfecture de l'Hérault  (identifiant : `pref34`)
diff --git a/cli.py b/cli.py
index 955ca3d..3cd4b64 100755
--- a/cli.py
+++ b/cli.py
@@ -36,6 +36,7 @@ __MASTODON_INSTANCE = os.getenv('MASTODON_INSTANCE')
 # Liste des préfectures supportées
 available_prefs = [
   'ppparis',
+  'pref04',
   'pref06',
   'pref13',
   'pref34',
-- 
GitLab