From 3ddfe070114e3ce570afc845c27786120fe839be Mon Sep 17 00:00:00 2001 From: Bastien Le Querrec <blq@laquadrature.net> Date: Tue, 7 Jan 2025 01:20:20 +0100 Subject: [PATCH] =?UTF-8?q?prefdpt:=20possibilit=C3=A9=20d'exclure=20des?= =?UTF-8?q?=20titres=20de=20cartes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Attrap_prefdpt.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/Attrap_prefdpt.py b/Attrap_prefdpt.py index c83c8dd..2578ada 100644 --- a/Attrap_prefdpt.py +++ b/Attrap_prefdpt.py @@ -21,7 +21,8 @@ class Attrap_prefdpt(Attrap): }, 'link_to_raa': False, 'autodetect_links_to_raa': True, - 'follow_link_on_unrecognised_date': True + 'follow_link_on_unrecognised_date': True, + 'exclude': [] } white_card = { 'regex': { @@ -35,7 +36,8 @@ class Attrap_prefdpt(Attrap): }, 'link_to_raa': False, 'autodetect_links_to_raa': True, - 'follow_link_on_unrecognised_date': True + 'follow_link_on_unrecognised_date': True, + 'exclude': [] } # Liste des widgets à analyser (sera remplie au moment de l'initialisation, mais peut être surchargée par la classe de préfecture de département) @@ -54,13 +56,14 @@ class Attrap_prefdpt(Attrap): class DptWidget: """Une classe représentant un widget sur le site d'une préfecture de département.""" - def __init__(self, name, regex=None, css_path=None, link_to_raa=False, autodetect_links_to_raa=True, follow_link_on_unrecognised_date=True): + def __init__(self, name, regex=None, css_path=None, link_to_raa=False, autodetect_links_to_raa=True, follow_link_on_unrecognised_date=True, exclude=[]): self.name = name self.regex = regex self.css_path = css_path self.link_to_raa = link_to_raa self.autodetect_links_to_raa = autodetect_links_to_raa self.follow_link_on_unrecognised_date = follow_link_on_unrecognised_date + self.exclude = exclude def has_css_path(self, key): return self.css_path and self.css_path.get(key, None) is not None @@ -100,7 +103,8 @@ class Attrap_prefdpt(Attrap): css_path=self.grey_card['css_path'], link_to_raa=self.grey_card['link_to_raa'], autodetect_links_to_raa=self.grey_card['autodetect_links_to_raa'], - follow_link_on_unrecognised_date=self.grey_card['follow_link_on_unrecognised_date'] + follow_link_on_unrecognised_date=self.grey_card['follow_link_on_unrecognised_date'], + exclude=self.grey_card['exclude'] ) ) @@ -112,7 +116,8 @@ class Attrap_prefdpt(Attrap): css_path=self.white_card['css_path'], link_to_raa=self.white_card['link_to_raa'], autodetect_links_to_raa=self.white_card['autodetect_links_to_raa'], - follow_link_on_unrecognised_date=self.white_card['follow_link_on_unrecognised_date'] + follow_link_on_unrecognised_date=self.white_card['follow_link_on_unrecognised_date'], + exclude=self.white_card['exclude'] ) ) @@ -149,7 +154,7 @@ class Attrap_prefdpt(Attrap): False ) for card in cards: - if card['url'] not in self.page_urls_to_parse: + if card['url'] not in self.page_urls_to_parse and card['name'].strip() not in widget.exclude: date = None date_is_correct = False -- GitLab