From b2e0a18b39c65d2f8269d45229d8a9332c9a6a68 Mon Sep 17 00:00:00 2001
From: Bastien Le Querrec <blq@laquadrature.net>
Date: Mon, 18 Mar 2024 22:50:42 +0100
Subject: [PATCH] =?UTF-8?q?RAAspotter:=20s'assure=20de=20la=20pr=C3=A9senc?=
 =?UTF-8?q?e=20d'un=20lien=20avant=20de=20chercher=20une=20sous-page?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 RAAspotter.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/RAAspotter.py b/RAAspotter.py
index fbd34c7..1e83b18 100644
--- a/RAAspotter.py
+++ b/RAAspotter.py
@@ -124,14 +124,15 @@ class RAAspotter:
     soup = BeautifulSoup(page_content, 'html.parser')
     sub_pages = []
     for a in soup.select(element):
-      url = f"{host}{a['href']}"
-      sub_page_content = self.get_page(url).content
-      if not self.has_pdf(sub_page_content):
-        logger.info(f'{url} ne contient pas de PDF, on récupère ses sous-pages')
-        for sub_sub_page in self.get_sub_pages(sub_page_content, element, host):
-          sub_pages.append(sub_sub_page)
-      else:
-        sub_pages.append(url)
+      if a.get('href'):
+        url = f"{host}{a['href']}"
+        sub_page_content = self.get_page(url).content
+        if not self.has_pdf(sub_page_content):
+          logger.info(f'{url} ne contient pas de PDF, on récupère ses sous-pages')
+          for sub_sub_page in self.get_sub_pages(sub_page_content, element, host):
+            sub_pages.append(sub_sub_page)
+        else:
+          sub_pages.append(url)
     return sub_pages
 
   def get_raa_with_pager(self, pages_list, pager_element, host=""):
-- 
GitLab