Skip to content
Extraits de code Groupes Projets

Comparer les révisions

Les modifications sont affichées comme si la révision source était fusionnée avec la révision cible. En savoir plus sur la comparaison des révisions.

Source

Sélectionner le projet cible
No results found

Cible

Sélectionner le projet cible
  • la-quadrature-du-net/Attrap
  • foggyfrog/Attrap
  • skhwiz/Attrap
  • precambrien/Attrap
  • ketsapiwiq/Attrap
  • Joseki/Attrap
  • kr1p/attrap-pref-12
  • kr1p/attrap-pref-46
  • kr1p/attrap-pi
  • Guinness/Attrap
  • astroidgritty/attrap-pref-84
  • davinov/Attrap
  • maettellite/attrap-pref-01
  • m242/Attrap
  • multi/Attrap
  • mverdeil/Attrap
  • olpo/Attrap
17 résultats
Afficher les modifications
Validations sur la source (3)
......@@ -232,3 +232,8 @@ test_pref976:
variables:
PREF: "pref976"
extends: .default_pref
test_pref971:
variables:
PREF: "pref971"
extends: .default_pref
import os
import datetime
from bs4 import BeautifulSoup
from urllib.parse import unquote
from Attrap import Attrap
import locale
locale.setlocale(locale.LC_TIME, "fr_FR.UTF-8")
# https://www.guadeloupe.gouv.fr/Publications/Le-Recueil-des-actes-administratifs/2021/Decembre
class Attrap_pref971(Attrap):
months_fr = [
"Janvier",
"Fevrier",
"Mars",
"Avril",
"Mai",
"Juin",
"Juillet",
"Aout",
"Septembre",
"Octobre",
"Novembre",
"Decembre",
]
# Config
__HOST = "https://www.guadeloupe.gouv.fr"
__RAA_PAGE = {
"2015": [],
"2016": [],
"2017": [],
"2018": [],
"2019": [],
"2020": [],
"2021": [],
"2022": [],
"2023": [],
"2024": [],
}
for year in range(2015, 2024):
for month in range(1, 12):
__RAA_PAGE[str(year)].append(
f"{__HOST}/Publications/Le-Recueil-des-actes-administratifs/{year}/{months_fr[month]}"
)
__USER_AGENT = (
"Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/115.0"
)
full_name = "Préfecture de la Guadeloupe"
short_code = "pref971"
def __init__(self, data_dir):
super().__init__(data_dir, self.__USER_AGENT)
self.enable_tor(10)
def get_raa(self, keywords):
year_pages_to_parse = []
if self.not_before.year <= 2024:
for year_page in self.__RAA_PAGE["2024"]:
year_pages_to_parse.append(year_page)
if self.not_before.year <= 2023:
for year_page in self.__RAA_PAGE["2023"]:
year_pages_to_parse.append(year_page)
if self.not_before.year <= 2022:
for year_page in self.__RAA_PAGE["2022"]:
year_pages_to_parse.append(year_page)
if self.not_before.year <= 2021:
for year_page in self.__RAA_PAGE["2021"]:
year_pages_to_parse.append(year_page)
if self.not_before.year <= 2020:
for year_page in self.__RAA_PAGE["2020"]:
year_pages_to_parse.append(year_page)
if self.not_before.year <= 2019:
for year_page in self.__RAA_PAGE["2019"]:
year_pages_to_parse.append(year_page)
if self.not_before.year <= 2018:
for year_page in self.__RAA_PAGE["2018"]:
year_pages_to_parse.append(year_page)
if self.not_before.year <= 2017:
for year_page in self.__RAA_PAGE["2017"]:
year_pages_to_parse.append(year_page)
if self.not_before.year <= 2016:
for year_page in self.__RAA_PAGE["2016"]:
year_pages_to_parse.append(year_page)
if self.not_before.year <= 2015:
for year_page in self.__RAA_PAGE["2015"]:
year_pages_to_parse.append(year_page)
# On parse les pages contenant des RAA
elements = []
for year in self.__RAA_PAGE:
for page in self.__RAA_PAGE[year]:
page_content = self.get_page(page, "get").content
for raa in self.get_raa_elements(page_content):
elements.append(raa)
self.parse_raa(elements, keywords)
self.mailer()
def get_raa_elements(self, page_content):
elements = []
# On charge le parser
soup = BeautifulSoup(page_content, "html.parser")
# On récupère chaque balise a
for a in soup.select("a.fr-link.fr-link--download"):
if a.get("href") and a["href"].endswith(".pdf"):
if a["href"].startswith("/"):
url = f"{self.__HOST}{a['href']}"
else:
url = a["href"]
url = unquote(url)
name = a.get_text().replace("Télécharger ", "").strip()
date_str = a.find_all("span")[-1].get_text().split(" - ")[-1].strip()
date = datetime.datetime.strptime(date_str, "%d/%m/%Y")
raa = Attrap.RAA(url, date, name)
elements.append(raa)
return elements
......@@ -53,6 +53,8 @@ pref93:
bin/python3 cli.py pref93
pref94:
bin/python3 cli.py pref94
pref971:
bin/python3 cli.py pref971
pref976:
bin/python3 cli.py pref976
lint:
......
......@@ -52,6 +52,7 @@ available_administrations = [
'pref38',
'pref42',
'pref44',
'pref45',
'pref59',
'pref62',
'pref63',
......@@ -66,6 +67,7 @@ available_administrations = [
'pref87',
'pref93',
'pref94',
'pref971',
'pref976'
]
......