From f641ba44e883a192374deb744481458afec420f7 Mon Sep 17 00:00:00 2001 From: Bastien Le Querrec <blq@laquadrature.net> Date: Thu, 4 Apr 2024 00:03:04 +0200 Subject: [PATCH] =?UTF-8?q?RAAspotter:=20ajoute=20la=20possibilit=C3=A9=20?= =?UTF-8?q?de=20r=C3=A9cup=C3=A9rer=20les=20d=C3=A9tails=20d'une=20sous-pa?= =?UTF-8?q?ge?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- RAAspotter.py | 11 +++++++++-- RAAspotter_pref69.py | 1 + RAAspotter_pref976.py | 1 + 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/RAAspotter.py b/RAAspotter.py index 3454dcd..7476b5b 100644 --- a/RAAspotter.py +++ b/RAAspotter.py @@ -167,7 +167,7 @@ class RAAspotter: return sub_pages def get_sub_pages_with_pager(self, page, sub_page_element, pager_element, - host): + details_element, host): pages = [] page_content = self.get_page(page, 'get').content @@ -176,13 +176,19 @@ class RAAspotter: # On recherche les sous-pages sub_pages = soup.select(sub_page_element) + sub_pages_details = soup.select(details_element) + i = 0 for sub_page in sub_pages: if sub_page.get('href'): page = { 'url': f"{host}{sub_page['href']}", - 'name': sub_page.get_text().strip() + 'name': sub_page.get_text().strip(), + 'details': '' } + if details_element is not None: + page['details'] = sub_pages_details[i].get_text().strip() pages.append(page) + i = i + 1 # On recherche un pager, et si on le trouve on le suit pager = soup.select(pager_element) @@ -191,6 +197,7 @@ class RAAspotter: f"{host}{pager[0]['href']}", sub_page_element, pager_element, + details_element, host ): pages.append(sub_page) diff --git a/RAAspotter_pref69.py b/RAAspotter_pref69.py index a3c0d8e..f8c5b11 100644 --- a/RAAspotter_pref69.py +++ b/RAAspotter_pref69.py @@ -61,6 +61,7 @@ class RAAspotter_pref69(RAAspotter): 'div.fr-card__body div.fr-card__content ' 'h2.fr-card__title a.fr-card__link', "ul.fr-pagination__list li a.fr-pagination__link--next", + None, self.__HOST)[::-1] for sub_page in sub_pages: sub_pages_to_parse.append(sub_page['url']) diff --git a/RAAspotter_pref976.py b/RAAspotter_pref976.py index f06eea5..4ba5a56 100644 --- a/RAAspotter_pref976.py +++ b/RAAspotter_pref976.py @@ -98,6 +98,7 @@ class RAAspotter_pref976(RAAspotter): 'a.fr-card__link', 'ul.fr-pagination__list li ' 'a.fr-pagination__link.fr-pagination__link--next', + None, self.__HOST )[::-1] for card_page in card_pages: -- GitLab