From ddfa365bfd8288c489f3c8e82cf4513a298ca638 Mon Sep 17 00:00:00 2001
From: Bastien Le Querrec <blq@laquadrature.net>
Date: Mon, 18 Mar 2024 22:50:28 +0100
Subject: [PATCH] RAAspotter: ajout d'un parser de pager

---
 RAAspotter.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/RAAspotter.py b/RAAspotter.py
index cf4bed5..fbd34c7 100644
--- a/RAAspotter.py
+++ b/RAAspotter.py
@@ -134,6 +134,22 @@ class RAAspotter:
         sub_pages.append(url)
     return sub_pages
 
+  def get_raa_with_pager(self, pages_list, pager_element, host=""):
+    elements = []
+    # On parse chaque page passée en paramètre
+    for page in pages_list:
+      page_content = self.get_page(page).content
+
+      # Pour chaque page, on récupère les PDF
+      for raa in self.get_raa_elements(page_content):
+        elements.append(raa)
+
+      # On regarde également s'il n'y aurait pas un pager
+      sub_pages = self.get_sub_pages(page_content, pager_element, host)
+      for sub_raa in self.get_raa_with_pager(sub_pages, pager_element, host):
+        elements.append(sub_raa)
+    return elements
+
   def set_sleep_time(self, sleep_time):
     self.sleep_time = sleep_time
 
-- 
GitLab