Skip to content
Extraits de code Groupes Projets
Valider 1ee129c3 rédigé par Hadrien's avatar Hadrien
Parcourir les fichiers

format

parent 9a5db214
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
......@@ -10,25 +10,25 @@ from Attrap import Attrap
class Attrap_pref39(Attrap):
# Config
__HOST = 'https://www.jura.gouv.fr'
__HOST = "https://www.jura.gouv.fr"
__RAA_PAGE = {
'2024': f'{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2024',
'2023': f'{__HOST}/Publications/Recueils-des-actes-administratifs/Recueil-des-actes-administratifs-2023',
'2022': f'{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2022',
'2021': f'{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2021',
'2020': f'{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2020',
'2019': f'{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2019',
'2018': f'{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2018',
'2017': f'{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2017',
'2016': f'{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2016',
'2015': f'{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2015',
'2011': f'{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2011',
'2010': f'{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2010',
'2009': f'{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2009',
"2024": f"{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2024",
"2023": f"{__HOST}/Publications/Recueils-des-actes-administratifs/Recueil-des-actes-administratifs-2023",
"2022": f"{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2022",
"2021": f"{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2021",
"2020": f"{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2020",
"2019": f"{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2019",
"2018": f"{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2018",
"2017": f"{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2017",
"2016": f"{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2016",
"2015": f"{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2015",
"2011": f"{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2011",
"2010": f"{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2010",
"2009": f"{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2009",
}
__USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
full_name = 'Préfecture du Jura'
short_code = 'pref39'
__USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
full_name = "Préfecture du Jura"
short_code = "pref39"
def __init__(self, data_dir):
super().__init__(data_dir, self.__USER_AGENT)
......@@ -37,35 +37,35 @@ class Attrap_pref39(Attrap):
def get_raa(self, keywords):
pages_to_parse = []
if self.not_before.year <= 2024:
pages_to_parse.append(self.__RAA_PAGE['2024'])
pages_to_parse.append(self.__RAA_PAGE["2024"])
if self.not_before.year <= 2023:
pages_to_parse.append(self.__RAA_PAGE['2023'])
pages_to_parse.append(self.__RAA_PAGE["2023"])
if self.not_before.year <= 2022:
pages_to_parse.append(self.__RAA_PAGE['2022'])
pages_to_parse.append(self.__RAA_PAGE["2022"])
if self.not_before.year <= 2021:
pages_to_parse.append(self.__RAA_PAGE['2021'])
pages_to_parse.append(self.__RAA_PAGE["2021"])
if self.not_before.year <= 2020:
pages_to_parse.append(self.__RAA_PAGE['2020'])
pages_to_parse.append(self.__RAA_PAGE["2020"])
if self.not_before.year <= 2019:
pages_to_parse.append(self.__RAA_PAGE['2019'])
pages_to_parse.append(self.__RAA_PAGE["2019"])
if self.not_before.year <= 2018:
pages_to_parse.append(self.__RAA_PAGE['2018'])
pages_to_parse.append(self.__RAA_PAGE["2018"])
if self.not_before.year <= 2017:
pages_to_parse.append(self.__RAA_PAGE['2017'])
pages_to_parse.append(self.__RAA_PAGE["2017"])
if self.not_before.year <= 2016:
pages_to_parse.append(self.__RAA_PAGE['2016'])
pages_to_parse.append(self.__RAA_PAGE["2016"])
if self.not_before.year <= 2015:
pages_to_parse.append(self.__RAA_PAGE['2015'])
pages_to_parse.append(self.__RAA_PAGE["2015"])
if self.not_before.year <= 2011:
pages_to_parse.append(self.__RAA_PAGE['2011'])
pages_to_parse.append(self.__RAA_PAGE["2011"])
if self.not_before.year <= 2010:
pages_to_parse.append(self.__RAA_PAGE['2010'])
pages_to_parse.append(self.__RAA_PAGE["2010"])
if self.not_before.year <= 2009:
pages_to_parse.append(self.__RAA_PAGE['2009'])
pages_to_parse.append(self.__RAA_PAGE["2009"])
elements = []
for raa_page in pages_to_parse:
page_content = self.get_page(raa_page, 'get').content
page_content = self.get_page(raa_page, "get").content
for element in self.get_raa_elements(page_content):
elements.append(element)
......@@ -75,23 +75,23 @@ class Attrap_pref39(Attrap):
def get_raa_elements(self, page_content):
elements = []
# On charge le parser
soup = BeautifulSoup(page_content, 'html.parser')
soup = BeautifulSoup(page_content, "html.parser")
# On récupère chaque balise a
for a in soup.select('a.fr-card__link'):
if a.get('href') and a['href'].endswith('.pdf'):
if a['href'].startswith('/'):
for a in soup.select("a.fr-card__link"):
if a.get("href") and a["href"].endswith(".pdf"):
if a["href"].startswith("/"):
url = f"{self.__HOST}{a['href']}"
else:
url = a['href']
url = a["href"]
url = unquote(url)
name = a.get_text().strip()
date_str = name.strip('RAA-39-').split(" ")[0]
date_str = name.strip("RAA-39-").split(" ")[0]
try:
date = datetime.datetime.strptime(date_str.strip('_'), '%Y-%m-0%d')
date = datetime.datetime.strptime(date_str.strip("_"), "%Y-%m-0%d")
except ValueError:
date = datetime.datetime.strptime(date_str.strip('_'), '%Y_%m_0%d')
date = datetime.datetime.strptime(date_str.strip("_"), "%Y_%m_0%d")
raa = Attrap.RAA(url, date, name)
elements.append(raa)
return elements
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter