diff --git a/Attrap_prefdpt.py b/Attrap_prefdpt.py index c83c8dded37b6e7821e39d54a32d06325b7f7ab8..2578ada8f205dd594e91f9a1bb637f8e8b26ec8c 100644 --- a/Attrap_prefdpt.py +++ b/Attrap_prefdpt.py @@ -21,7 +21,8 @@ class Attrap_prefdpt(Attrap): }, 'link_to_raa': False, 'autodetect_links_to_raa': True, - 'follow_link_on_unrecognised_date': True + 'follow_link_on_unrecognised_date': True, + 'exclude': [] } white_card = { 'regex': { @@ -35,7 +36,8 @@ class Attrap_prefdpt(Attrap): }, 'link_to_raa': False, 'autodetect_links_to_raa': True, - 'follow_link_on_unrecognised_date': True + 'follow_link_on_unrecognised_date': True, + 'exclude': [] } # Liste des widgets à analyser (sera remplie au moment de l'initialisation, mais peut être surchargée par la classe de préfecture de département) @@ -54,13 +56,14 @@ class Attrap_prefdpt(Attrap): class DptWidget: """Une classe représentant un widget sur le site d'une préfecture de département.""" - def __init__(self, name, regex=None, css_path=None, link_to_raa=False, autodetect_links_to_raa=True, follow_link_on_unrecognised_date=True): + def __init__(self, name, regex=None, css_path=None, link_to_raa=False, autodetect_links_to_raa=True, follow_link_on_unrecognised_date=True, exclude=[]): self.name = name self.regex = regex self.css_path = css_path self.link_to_raa = link_to_raa self.autodetect_links_to_raa = autodetect_links_to_raa self.follow_link_on_unrecognised_date = follow_link_on_unrecognised_date + self.exclude = exclude def has_css_path(self, key): return self.css_path and self.css_path.get(key, None) is not None @@ -100,7 +103,8 @@ class Attrap_prefdpt(Attrap): css_path=self.grey_card['css_path'], link_to_raa=self.grey_card['link_to_raa'], autodetect_links_to_raa=self.grey_card['autodetect_links_to_raa'], - follow_link_on_unrecognised_date=self.grey_card['follow_link_on_unrecognised_date'] + follow_link_on_unrecognised_date=self.grey_card['follow_link_on_unrecognised_date'], + exclude=self.grey_card['exclude'] ) ) @@ -112,7 +116,8 @@ class Attrap_prefdpt(Attrap): css_path=self.white_card['css_path'], link_to_raa=self.white_card['link_to_raa'], autodetect_links_to_raa=self.white_card['autodetect_links_to_raa'], - follow_link_on_unrecognised_date=self.white_card['follow_link_on_unrecognised_date'] + follow_link_on_unrecognised_date=self.white_card['follow_link_on_unrecognised_date'], + exclude=self.white_card['exclude'] ) ) @@ -149,7 +154,7 @@ class Attrap_prefdpt(Attrap): False ) for card in cards: - if card['url'] not in self.page_urls_to_parse: + if card['url'] not in self.page_urls_to_parse and card['name'].strip() not in widget.exclude: date = None date_is_correct = False