From ddfa365bfd8288c489f3c8e82cf4513a298ca638 Mon Sep 17 00:00:00 2001 From: Bastien Le Querrec <blq@laquadrature.net> Date: Mon, 18 Mar 2024 22:50:28 +0100 Subject: [PATCH] RAAspotter: ajout d'un parser de pager --- RAAspotter.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/RAAspotter.py b/RAAspotter.py index cf4bed5..fbd34c7 100644 --- a/RAAspotter.py +++ b/RAAspotter.py @@ -134,6 +134,22 @@ class RAAspotter: sub_pages.append(url) return sub_pages + def get_raa_with_pager(self, pages_list, pager_element, host=""): + elements = [] + # On parse chaque page passée en paramètre + for page in pages_list: + page_content = self.get_page(page).content + + # Pour chaque page, on récupère les PDF + for raa in self.get_raa_elements(page_content): + elements.append(raa) + + # On regarde également s'il n'y aurait pas un pager + sub_pages = self.get_sub_pages(page_content, pager_element, host) + for sub_raa in self.get_raa_with_pager(sub_pages, pager_element, host): + elements.append(sub_raa) + return elements + def set_sleep_time(self, sleep_time): self.sleep_time = sleep_time -- GitLab