Skip to content
Extraits de code Groupes Projets
Valider 4142bd33 rédigé par Hadrien's avatar Hadrien
Parcourir les fichiers

wip: jura

parent 937640e7
Aucune branche associée trouvée
Aucune étiquette associée trouvée
1 requête de fusion!16Jura
Ce commit fait partie de la requête de fusion !16. Les commentaires créés ici seront créés dans le contexte de cette requête de fusion.
import os
import datetime
from bs4 import BeautifulSoup
from urllib.parse import unquote
from Attrap import Attrap
class Attrap_pref39(Attrap):
# Config
__HOST = 'https://www.jura.gouv.fr'
__RAA_PAGE = {
'2024': f'{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2024',
'2023': f'{__HOST}/Publications/Recueils-des-actes-administratifs/Recueil-des-actes-administratifs-2023',
'2022': f'{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2022',
'2021': f'{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2021',
'2020': f'{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2020',
'2019': f'{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2019',
'2018': f'{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2018',
'2017': f'{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2017',
'2016': f'{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2016',
'2015': f'{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2015',
'2011': f'{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2011',
'2010': f'{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2010',
'2009': f'{__HOST}/Publications/Publications-legales/Recueil-des-Actes-Administratifs/Annee-2009',
}
__USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
full_name = 'Préfecture du Jura'
short_code = 'pref39'
def __init__(self, data_dir):
super().__init__(data_dir, self.__USER_AGENT)
self.enable_tor(10)
def get_raa(self, keywords):
pages_to_parse = []
if self.not_before.year <= 2024:
pages_to_parse.append(self.__RAA_PAGE['2024'])
if self.not_before.year <= 2023:
pages_to_parse.append(self.__RAA_PAGE['2023'])
if self.not_before.year <= 2022:
pages_to_parse.append(self.__RAA_PAGE['2022'])
if self.not_before.year <= 2021:
pages_to_parse.append(self.__RAA_PAGE['2021'])
if self.not_before.year <= 2020:
pages_to_parse.append(self.__RAA_PAGE['2020'])
if self.not_before.year <= 2019:
pages_to_parse.append(self.__RAA_PAGE['2019'])
if self.not_before.year <= 2018:
pages_to_parse.append(self.__RAA_PAGE['2018'])
if self.not_before.year <= 2017:
pages_to_parse.append(self.__RAA_PAGE['2017'])
if self.not_before.year <= 2016:
pages_to_parse.append(self.__RAA_PAGE['2016'])
if self.not_before.year <= 2015:
pages_to_parse.append(self.__RAA_PAGE['2015'])
if self.not_before.year <= 2011:
pages_to_parse.append(self.__RAA_PAGE['2011'])
if self.not_before.year <= 2010:
pages_to_parse.append(self.__RAA_PAGE['2010'])
if self.not_before.year <= 2009:
pages_to_parse.append(self.__RAA_PAGE['2009'])
elements = []
for raa_page in pages_to_parse:
page_content = self.get_page(raa_page, 'get').content
for element in self.get_raa_elements(page_content):
elements.append(element)
self.parse_raa(elements, keywords)
self.mailer()
def get_raa_elements(self, page_content):
elements = []
# On charge le parser
soup = BeautifulSoup(page_content, 'html.parser')
# On récupère chaque balise a
for a in soup.select('a.fr-card__link'):
if a.get('href') and a['href'].endswith('.pdf'):
if a['href'].startswith('/'):
url = f"{self.__HOST}{a['href']}"
else:
url = a['href']
url = unquote(url)
name = a.get_text().strip()
date_str = name.strip('RAA-39-').split(" ")[0]
try:
date_str = date_str.strip('_')
date = datetime.datetime.strptime(date_str, '%Y-%m-0%d')
except Exception:
date = datetime.datetime.strptime(date_str, '%Y_%m_0%d')
raa = Attrap.RAA(url, date, name)
elements.append(raa)
return elements
......@@ -50,6 +50,7 @@ available_administrations = [
'pref34',
'pref35',
'pref38',
'pref39',
'pref42',
'pref44',
'pref59',
......
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter