From b8c1a11c76afd8d7a938a5ad01299087065f3bd0 Mon Sep 17 00:00:00 2001 From: Hadrien <ketsapiwiq@protonmail.com> Date: Mon, 10 Jun 2024 11:06:23 +0200 Subject: [PATCH] wip: jura --- Attrap_pref39.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Attrap_pref39.py b/Attrap_pref39.py index 0ccf7f6..0ac6198 100644 --- a/Attrap_pref39.py +++ b/Attrap_pref39.py @@ -79,6 +79,7 @@ class Attrap_pref39(Attrap): # On récupère chaque balise a for a in soup.select("a.fr-card__link"): + if a.get("href") and a["href"].endswith(".pdf"): if a["href"].startswith("/"): url = f"{self.__HOST}{a['href']}" @@ -87,11 +88,11 @@ class Attrap_pref39(Attrap): url = unquote(url) name = a.get_text().strip() - date_str = name.strip("RAA-39-").split(" ")[0] - try: - date = datetime.datetime.strptime(date_str.strip("_"), "%Y-%m-0%d") - except ValueError: - date = datetime.datetime.strptime(date_str.strip("_"), "%Y_%m_0%d") + + date_str = soup.select("p.fr-card__detail").get_text().replace("Publié le ", "") + print(date_str) + date = datetime.datetime.strptime(date_str, "%Y-%m-0%d") + raa = Attrap.RAA(url, date, name) elements.append(raa) return elements -- GitLab