from bs4 import BeautifulSoup from urllib.parse import unquote from RAAspotter import RAAspotter class RAAspotter_ppparis(RAAspotter): def get_raa(self, page_content): elements = [] # On charge le parser soup = BeautifulSoup(page_content, 'html.parser') # Pour chaque balise a, on regarde si c'est un PDF, et si oui on le parse for a in soup.find_all('a', href=True): if a['href'].endswith('.pdf'): if a['href'].startswith('/'): url = 'https://www.prefecturedepolice.interieur.gouv.fr'+a['href'] else: url = a['href'] name = a.find('span').get_text() date = a.find('div', class_="field--type-datetime").get_text() filename = unquote(url.split('/')[-1]) raa = RAAspotter.RAA(url, date, name, filename) elements.append(raa) return elements