diff --git a/Attrap_pref39.py b/Attrap_pref39.py index 0ccf7f6e49a6275694e6566ad03e12cf78deaae6..0ac61986173f82b77f1746c5b726963107b4be0c 100644 --- a/Attrap_pref39.py +++ b/Attrap_pref39.py @@ -79,6 +79,7 @@ class Attrap_pref39(Attrap): # On récupère chaque balise a for a in soup.select("a.fr-card__link"): + if a.get("href") and a["href"].endswith(".pdf"): if a["href"].startswith("/"): url = f"{self.__HOST}{a['href']}" @@ -87,11 +88,11 @@ class Attrap_pref39(Attrap): url = unquote(url) name = a.get_text().strip() - date_str = name.strip("RAA-39-").split(" ")[0] - try: - date = datetime.datetime.strptime(date_str.strip("_"), "%Y-%m-0%d") - except ValueError: - date = datetime.datetime.strptime(date_str.strip("_"), "%Y_%m_0%d") + + date_str = soup.select("p.fr-card__detail").get_text().replace("Publié le ", "") + print(date_str) + date = datetime.datetime.strptime(date_str, "%Y-%m-0%d") + raa = Attrap.RAA(url, date, name) elements.append(raa) return elements