diff --git a/RAAspotter.py b/RAAspotter.py index e8df3b6a08e9a09d80d39224d8a74882ba923be6..37c24c5d43064a603bd091bfba91b488e7515c45 100644 --- a/RAAspotter.py +++ b/RAAspotter.py @@ -129,11 +129,18 @@ class RAAspotter: for a in soup.select(element): if a.get('href'): url = f"{host}{a['href']}" - sub_page_content = self.get_page(url, 'get').content - if recursive_until_pdf and not self.has_pdf(sub_page_content): - logger.info(f'{url} ne contient pas de PDF, on récupère ses sous-pages') - for sub_sub_page in self.get_sub_pages(sub_page_content, element, host, recursive_until_pdf): - sub_pages.append(sub_sub_page) + if recursive_until_pdf: + sub_page_content = self.get_page(url, 'get').content + if not self.has_pdf(sub_page_content): + logger.info(f'{url} ne contient pas de PDF, on récupère ses sous-pages') + for sub_sub_page in self.get_sub_pages(sub_page_content, element, host, recursive_until_pdf): + sub_pages.append(sub_sub_page) + else: + sub_page = { + 'url': url, + 'name': a.get_text().strip() + } + sub_pages.append(sub_page) else: sub_page = { 'url': url,