From f84ac38f66f018ff7e01d7249a4cd3364c922105 Mon Sep 17 00:00:00 2001
From: Joe Seki <joeseki@proton.me>
Date: Sun, 9 Jun 2024 21:53:37 +0200
Subject: [PATCH] =?UTF-8?q?pref73:=20ajout=20de=20la=20pr=C3=A9fecture=20d?=
 =?UTF-8?q?e=20la=20Savoie?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes !13

Co-authored-by: Joe Seki <joeseki@proton.me>
Co-authored-by: Bastien Le Querrec <blq@laquadrature.net>
---
 .gitlab-ci.yml   |  5 ++++
 Attrap_pref73.py | 77 ++++++++++++++++++++++++++++++++++++++++++++++++
 Makefile         |  4 ++-
 README.md        |  1 +
 cli.py           |  1 +
 5 files changed, 87 insertions(+), 1 deletion(-)
 create mode 100644 Attrap_pref73.py

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index f832ac6..3f2c4f9 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -193,6 +193,11 @@ test_pref69:
     PREF: "pref69"
   extends: .default_pref
 
+test_pref73:
+  variables:
+    PREF: "pref73"
+  extends: .default_pref
+
 test_pref80:
   variables:
     PREF: "pref80"
diff --git a/Attrap_pref73.py b/Attrap_pref73.py
new file mode 100644
index 0000000..e87c9b8
--- /dev/null
+++ b/Attrap_pref73.py
@@ -0,0 +1,77 @@
+import os
+import datetime
+
+from bs4 import BeautifulSoup
+from urllib.parse import unquote
+
+from Attrap import Attrap
+
+
+class Attrap_pref73(Attrap):
+
+    # Config
+    __HOST = 'https://www.savoie.gouv.fr'
+    __RAA_PAGE = {
+        '2024': f'{__HOST}/Publications/Recueils-hebdomadaires-et-speciaux-des-actes-administratifs/2024',
+        '2023': f'{__HOST}/Publications/Recueils-hebdomadaires-et-speciaux-des-actes-administratifs/2023',
+        '2022': f'{__HOST}/Publications/Recueils-hebdomadaires-et-speciaux-des-actes-administratifs/2022',
+        '2021': f'{__HOST}/Publications/Recueils-hebdomadaires-et-speciaux-des-actes-administratifs/2021',
+        '2020': f'{__HOST}/Publications/Recueils-hebdomadaires-et-speciaux-des-actes-administratifs/2020',
+        '2019': f'{__HOST}/Publications/Recueils-hebdomadaires-et-speciaux-des-actes-administratifs/2019',
+    }
+    __USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/115.0'
+    full_name = 'Préfecture de la Savoie'
+    short_code = 'pref73'
+
+    def __init__(self, data_dir):
+        super().__init__(data_dir, self.__USER_AGENT)
+        self.enable_tor(10)
+
+    def get_raa(self, keywords):
+        pages_to_parse = []
+        if self.not_before.year <= 2024:
+            pages_to_parse.append(self.__RAA_PAGE['2024'])
+        if self.not_before.year <= 2023:
+            pages_to_parse.append(self.__RAA_PAGE['2023'])
+        if self.not_before.year <= 2022:
+            pages_to_parse.append(self.__RAA_PAGE['2022'])
+        if self.not_before.year <= 2021:
+            pages_to_parse.append(self.__RAA_PAGE['2021'])
+        if self.not_before.year <= 2020:
+            pages_to_parse.append(self.__RAA_PAGE['2020'])
+        if self.not_before.year <= 2019:
+            pages_to_parse.append(self.__RAA_PAGE['2019'])
+
+        # On parse les pages contenant des RAA
+        elements = []
+        print(pages_to_parse)
+        for page in pages_to_parse:
+            page_content = self.get_page(page, 'get').content
+            for element in self.get_raa_elements(page_content):
+                elements.append(element)
+
+        self.parse_raa(elements, keywords)
+        self.mailer()
+
+    def get_raa_elements(self, page_content):
+        elements = []
+        # On charge le parser
+        soup = BeautifulSoup(page_content, 'html.parser')
+
+        # On récupère chaque balise a
+        for a in soup.select('a.fr-link.fr-link--download'):
+            if a.get('href') and a['href'].endswith('.pdf'):
+                if a['href'].startswith('/'):
+                    url = f"{self.__HOST}{a['href']}"
+                else:
+                    url = a['href']
+
+                url = unquote(url)
+                name = a.find('span').previous_sibling.replace(
+                    'Télécharger ', '').strip()
+                date = datetime.datetime.strptime(
+                    a.find('span').get_text().split(' - ')[-1].strip(), '%d/%m/%Y')
+
+                raa = Attrap.RAA(url, date, name)
+                elements.append(raa)
+        return elements
diff --git a/Makefile b/Makefile
index 4b512eb..8601dd7 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-make: ppparis pref04 pref05 pref06 pref09 pref13 pref31 pref33 pref34 pref35 pref38 pref42 pref44 pref59 pref62 pref63 pref64 pref65 pref66 pref69 pref80 pref81 pref83 pref87 pref93 pref94 pref976
+make: ppparis pref04 pref05 pref06 pref09 pref13 pref31 pref33 pref34 pref35 pref38 pref42 pref44 pref59 pref62 pref63 pref64 pref65 pref66 pref69 pref73 pref80 pref81 pref83 pref87 pref93 pref94 pref976
 ppparis:
 	bin/python3 cli.py ppparis
 pref04:
@@ -39,6 +39,8 @@ pref66:
 	bin/python3 cli.py pref66
 pref69:
 	bin/python3 cli.py pref69
+pref73:
+	bin/python3 cli.py pref73
 pref80:
 	bin/python3 cli.py pref80
 pref81:
diff --git a/README.md b/README.md
index 745b668..40af5ec 100644
--- a/README.md
+++ b/README.md
@@ -81,6 +81,7 @@ Les options suivantes peuvent être précisées, par un paramètre si l'utilitai
 - Préfecture des Hautes-Pyrénées (identifiant : `pref65`)
 - Préfecture des Pyrénées-Orientales (identifiant : `pref66`)
 - Préfecture du Rhône (identifiant : `pref69`)
+- Préfecture de la Savoie (identifiant : `pref73`)
 - Préfecture de la Somme (identifiant : `pref80`)
 - Préfecture du Tarn (identifiant : `pref81`)
 - Préfecture du Var (identifiant : `pref83`)
diff --git a/cli.py b/cli.py
index f33159e..e40a01f 100755
--- a/cli.py
+++ b/cli.py
@@ -59,6 +59,7 @@ available_administrations = [
     'pref65',
     'pref66',
     'pref69',
+    'pref73',
     'pref80',
     'pref81',
     'pref83',
-- 
GitLab