diff --git a/Attrap_pref06.py b/Attrap_pref06.py index 68ad57ac5def0495eeb646c794e4abf909ae33ef..382e21f4dc91718d8af7af2f242fc58eee764bd9 100644 --- a/Attrap_pref06.py +++ b/Attrap_pref06.py @@ -12,4 +12,3 @@ class Attrap_pref06(Attrap_prefdpt): # Configuration des widgets à analyser Attrap_prefdpt.grey_card['regex']['year'] = 'Année *([0-9]{4})' - Attrap_prefdpt.white_card['link_to_raa'] = True diff --git a/Attrap_pref10.py b/Attrap_pref10.py index 689748c7fae14193bbedae2f08c34848537c1be8..08d59eae1e2e2ceb6a960c90eb8fd30a084144e9 100644 --- a/Attrap_pref10.py +++ b/Attrap_pref10.py @@ -16,7 +16,6 @@ class Attrap_pref10(Attrap_prefdpt): # Configuration des widgets à analyser Attrap_prefdpt.grey_card['regex']['year'] = 'RAA *([0-9]{4})' - Attrap_prefdpt.white_card['link_to_raa'] = True # On ajoute un widget custom représentant les liens sur la page d'accueil Attrap_prefdpt.widgets.append( diff --git a/Attrap_prefdpt.py b/Attrap_prefdpt.py index 0e32a047b29631dcf945fd0db3da6ffcabd6e41c..c83c8dded37b6e7821e39d54a32d06325b7f7ab8 100644 --- a/Attrap_prefdpt.py +++ b/Attrap_prefdpt.py @@ -20,6 +20,7 @@ class Attrap_prefdpt(Attrap): 'title': 'div.fr-card.fr-card--sm.fr-card--grey.fr-enlarge-link div.fr-card__body div.fr-card__content h2.fr-card__title a' }, 'link_to_raa': False, + 'autodetect_links_to_raa': True, 'follow_link_on_unrecognised_date': True } white_card = { @@ -33,6 +34,7 @@ class Attrap_prefdpt(Attrap): 'pager': 'ul.fr-pagination__list li a.fr-pagination__link.fr-pagination__link--next.fr-pagination__link--lg-label' # Chemin CSS du pager des cartes blanches }, 'link_to_raa': False, + 'autodetect_links_to_raa': True, 'follow_link_on_unrecognised_date': True } @@ -52,11 +54,12 @@ class Attrap_prefdpt(Attrap): class DptWidget: """Une classe représentant un widget sur le site d'une préfecture de département.""" - def __init__(self, name, regex=None, css_path=None, link_to_raa=False, follow_link_on_unrecognised_date=True): + def __init__(self, name, regex=None, css_path=None, link_to_raa=False, autodetect_links_to_raa=True, follow_link_on_unrecognised_date=True): self.name = name self.regex = regex self.css_path = css_path self.link_to_raa = link_to_raa + self.autodetect_links_to_raa = autodetect_links_to_raa self.follow_link_on_unrecognised_date = follow_link_on_unrecognised_date def has_css_path(self, key): @@ -96,6 +99,7 @@ class Attrap_prefdpt(Attrap): regex=self.grey_card['regex'], css_path=self.grey_card['css_path'], link_to_raa=self.grey_card['link_to_raa'], + autodetect_links_to_raa=self.grey_card['autodetect_links_to_raa'], follow_link_on_unrecognised_date=self.grey_card['follow_link_on_unrecognised_date'] ) ) @@ -107,6 +111,7 @@ class Attrap_prefdpt(Attrap): regex=self.white_card['regex'], css_path=self.white_card['css_path'], link_to_raa=self.white_card['link_to_raa'], + autodetect_links_to_raa=self.white_card['autodetect_links_to_raa'], follow_link_on_unrecognised_date=self.white_card['follow_link_on_unrecognised_date'] ) ) @@ -171,7 +176,7 @@ class Attrap_prefdpt(Attrap): date = datetime.datetime(9999, 1, 1) # Si la configuration indique que les liens renvoient vers un RAA, on ajoute le lien à la liste des éléments - if widget.link_to_raa: + if widget.link_to_raa or (widget.autodetect_links_to_raa and card['url'].endswith('.pdf')): if date.year == 9999: date = None raa = Attrap.RAA(card['url'], date, card['name'].strip(), timezone=self.timezone)