From 8f39adb980ba3bc615e17fe1b7617a3486a06cc2 Mon Sep 17 00:00:00 2001 From: Bastien Le Querrec <blq@laquadrature.net> Date: Wed, 20 Mar 2024 23:11:08 +0100 Subject: [PATCH] =?UTF-8?q?pref13:=20am=C3=A9liore=20la=20d=C3=A9tection?= =?UTF-8?q?=20des=20balises=20a=20et=20valide=20la=20pr=C3=A9sence=20d'un?= =?UTF-8?q?=20lien?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- RAAspotter_pref13.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/RAAspotter_pref13.py b/RAAspotter_pref13.py index d8edba8..0285e12 100644 --- a/RAAspotter_pref13.py +++ b/RAAspotter_pref13.py @@ -42,8 +42,8 @@ class RAAspotter_pref13(RAAspotter): soup = BeautifulSoup(page_content, 'html.parser') # Pour chaque balise a, on regarde si c'est un PDF, et si oui on le parse - for a in soup.find_all('a', href=True, class_='fr-link--download'): - if a['href'].endswith('.pdf'): + for a in soup.select('a.fr-link.fr-link--download'): + if a.get('href') and a['href'].endswith('.pdf'): if a['href'].startswith('/'): url = f"{self.__HOST}{a['href']}" else: -- GitLab