From cd7d7573fe0230e70b69a5c4fa47dc4227c670e2 Mon Sep 17 00:00:00 2001
From: Bastien Le Querrec <blq@laquadrature.net>
Date: Thu, 2 Jan 2025 13:26:45 +0100
Subject: [PATCH] =?UTF-8?q?pref75:=20d=C3=A9tecte=20l'URL=20de=20l'ann?=
 =?UTF-8?q?=C3=A9e=20voulue=20automatiquement?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Attrap_pref75.py | 39 +++++++++++++--------------------------
 1 file changed, 13 insertions(+), 26 deletions(-)

diff --git a/Attrap_pref75.py b/Attrap_pref75.py
index 58e2fc6..82b4ef5 100644
--- a/Attrap_pref75.py
+++ b/Attrap_pref75.py
@@ -14,16 +14,7 @@ class Attrap_pref75(Attrap):
 
     # Config
     hostname = 'https://www.prefectures-regions.gouv.fr'
-    raa_page = {
-        '2025': f'{hostname}/ile-de-france/ile-de-france/ile-de-france/Documents-publications/Recueil-des-actes-administratifs/Raa-du-departement-de-Paris-2025',
-        '2024': f'{hostname}/ile-de-france/ile-de-france/ile-de-france/Documents-publications/Recueil-des-actes-administratifs/Raa-du-departement-de-Paris-2024',
-        '2023': f'{hostname}/ile-de-france/ile-de-france/ile-de-france/Documents-publications/Recueil-des-actes-administratifs/Raa-du-departement-de-Paris-2023',
-        '2022': f'{hostname}/ile-de-france/ile-de-france/ile-de-france/Documents-publications/Recueil-des-actes-administratifs/Raa-du-departement-de-Paris-2022',
-        '2021': f'{hostname}/ile-de-france/ile-de-france/ile-de-france/Documents-publications/Recueil-des-actes-administratifs/Raa-du-departement-de-Paris-2021',
-        '2020': f'{hostname}/ile-de-france/ile-de-france/ile-de-france/Documents-publications/Recueil-des-actes-administratifs/Raa-du-departement-de-Paris-2020',
-        '2019': f'{hostname}/ile-de-france/ile-de-france/ile-de-france/Documents-publications/Recueil-des-actes-administratifs/Raa-du-departement-de-Paris-2019',
-        '2018': f'{hostname}/ile-de-france/ile-de-france/ile-de-france/Documents-publications/Recueil-des-actes-administratifs/Raa-du-departement-de-Paris-2018'
-    }
+    raa_page = f'{hostname}/ile-de-france/tags/view/Ile-de-France/Documents+et+publications/Recueil+des+actes+administratifs'
     user_agent = 'Mozilla/5.0 (Windows NT 10.0; rv:109.0) Gecko/20100101 Firefox/115.0'
     full_name = 'Préfecture de Paris'
     short_code = 'pref75'
@@ -36,22 +27,18 @@ class Attrap_pref75(Attrap):
     def get_raa(self, keywords):
         year_pages_to_parse = []
 
-        # Les RAA de Paris sont éparpillés sur des sous-pages par mois.
-        # Donc on parse la page principale à la recherche des sous-pages.
-        if self.not_before.year <= 2025:
-            year_pages_to_parse.append(self.raa_page['2025'])
-        if self.not_before.year <= 2024:
-            year_pages_to_parse.append(self.raa_page['2024'])
-        if self.not_before.year <= 2023:
-            year_pages_to_parse.append(self.raa_page['2023'])
-        if self.not_before.year <= 2022:
-            year_pages_to_parse.append(self.raa_page['2022'])
-        if self.not_before.year <= 2021:
-            year_pages_to_parse.append(self.raa_page['2021'])
-        if self.not_before.year <= 2020:
-            year_pages_to_parse.append(self.raa_page['2020'])
-        if self.not_before.year <= 2019:
-            year_pages_to_parse.append(self.raa_page['2019'])
+        # On détermine quelles pages d'année parser
+        page_content = self.get_page(self.raa_page, 'get').content
+        year_pages = self.get_sub_pages(
+            page_content,
+            'article.news-list-item header h2.news-list-title a',
+            self.hostname,
+            False
+        )
+        for year_page in year_pages:
+            year_date = Attrap.guess_date(year_page['name'].strip(), '(?:.*Paris.*)([0-9]{4})').replace(day=1, month=1)
+            if year_date.year >= self.not_before.year and year_date.year < 9999:
+                year_pages_to_parse.append(year_page['url'])
 
         pages_to_parse = []
         for year_page in year_pages_to_parse:
-- 
GitLab