diff --git a/RAAspotter.py b/RAAspotter.py index 3454dcde0eece737425369df722183c5dfce6817..7476b5b51d7cd7f235fb45f5147bbf623f88b5be 100644 --- a/RAAspotter.py +++ b/RAAspotter.py @@ -167,7 +167,7 @@ class RAAspotter: return sub_pages def get_sub_pages_with_pager(self, page, sub_page_element, pager_element, - host): + details_element, host): pages = [] page_content = self.get_page(page, 'get').content @@ -176,13 +176,19 @@ class RAAspotter: # On recherche les sous-pages sub_pages = soup.select(sub_page_element) + sub_pages_details = soup.select(details_element) + i = 0 for sub_page in sub_pages: if sub_page.get('href'): page = { 'url': f"{host}{sub_page['href']}", - 'name': sub_page.get_text().strip() + 'name': sub_page.get_text().strip(), + 'details': '' } + if details_element is not None: + page['details'] = sub_pages_details[i].get_text().strip() pages.append(page) + i = i + 1 # On recherche un pager, et si on le trouve on le suit pager = soup.select(pager_element) @@ -191,6 +197,7 @@ class RAAspotter: f"{host}{pager[0]['href']}", sub_page_element, pager_element, + details_element, host ): pages.append(sub_page) diff --git a/RAAspotter_pref69.py b/RAAspotter_pref69.py index a3c0d8e6c10eb954b0432771349605a3dca09063..f8c5b11a8276ac19396b8fdfb2c8d2db93d9a808 100644 --- a/RAAspotter_pref69.py +++ b/RAAspotter_pref69.py @@ -61,6 +61,7 @@ class RAAspotter_pref69(RAAspotter): 'div.fr-card__body div.fr-card__content ' 'h2.fr-card__title a.fr-card__link', "ul.fr-pagination__list li a.fr-pagination__link--next", + None, self.__HOST)[::-1] for sub_page in sub_pages: sub_pages_to_parse.append(sub_page['url']) diff --git a/RAAspotter_pref976.py b/RAAspotter_pref976.py index f06eea57f20c73a51814175bd6ad6860ccf5ce76..4ba5a56d24f15ed79526bb7e5900d25002f9a605 100644 --- a/RAAspotter_pref976.py +++ b/RAAspotter_pref976.py @@ -98,6 +98,7 @@ class RAAspotter_pref976(RAAspotter): 'a.fr-card__link', 'ul.fr-pagination__list li ' 'a.fr-pagination__link.fr-pagination__link--next', + None, self.__HOST )[::-1] for card_page in card_pages: