From 2382f5c4f074681e2ef641332ec6fc7f902e644c Mon Sep 17 00:00:00 2001 From: Bastien Le Querrec <blq@laquadrature.net> Date: Sun, 14 Apr 2024 19:24:14 +0200 Subject: [PATCH] =?UTF-8?q?RAAspotter:=20l'affichage=20des=20informations?= =?UTF-8?q?=20relatives=20=C3=A0=20l'analyse=20en=20cours=20est=20g=C3=A9r?= =?UTF-8?q?=C3=A9=20par=20RAAspotter=20directement=20et=20non=20par=20les?= =?UTF-8?q?=20classes=20d'administrations?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- RAAspotter.py | 5 +++++ RAAspotter_ppparis.py | 4 ---- RAAspotter_pref04.py | 11 ++++------- RAAspotter_pref05.py | 4 ---- RAAspotter_pref06.py | 4 ---- RAAspotter_pref09.py | 11 +++++------ RAAspotter_pref13.py | 10 ++++------ RAAspotter_pref31.py | 4 ---- RAAspotter_pref33.py | 4 ---- RAAspotter_pref34.py | 10 ++++------ RAAspotter_pref35.py | 10 ++++------ RAAspotter_pref38.py | 11 +++++------ RAAspotter_pref42.py | 4 ---- RAAspotter_pref44.py | 4 ---- RAAspotter_pref59.py | 11 +++++------ RAAspotter_pref62.py | 11 +++++------ RAAspotter_pref64.py | 4 ---- RAAspotter_pref65.py | 10 ++++------ RAAspotter_pref66.py | 4 ---- RAAspotter_pref69.py | 4 ---- RAAspotter_pref80.py | 4 ---- RAAspotter_pref81.py | 10 ++++------ RAAspotter_pref83.py | 4 ---- RAAspotter_pref87.py | 4 ---- RAAspotter_pref976.py | 11 +++++------ 25 files changed, 54 insertions(+), 119 deletions(-) diff --git a/RAAspotter.py b/RAAspotter.py index 6bccffb..e42095d 100644 --- a/RAAspotter.py +++ b/RAAspotter.py @@ -148,6 +148,8 @@ class RAAspotter: f.write('') f.close() + self.print_output(str(self.__class__.__name__)) + def configure_mastodon(self, access_token, instance, mastodon_prefix, mastodon_suffix): if access_token and access_token != "" and instance and instance != "": self.mastodon = Mastodon( @@ -496,6 +498,9 @@ class RAAspotter: ) def parse_raa(self, elements, keywords): + self.print_output(f'Termes recherchés: {keywords}') + self.print_output('') + for raa in elements: # Si le fichier n'a pas déjà été parsé et qu'il est postérieur à la # date maximale d'analyse, on le télécharge et on le parse diff --git a/RAAspotter_ppparis.py b/RAAspotter_ppparis.py index 902e327..f3afe30 100644 --- a/RAAspotter_ppparis.py +++ b/RAAspotter_ppparis.py @@ -20,10 +20,6 @@ class RAAspotter_ppparis(RAAspotter): super().__init__(data_dir, self.__USER_AGENT) def get_raa(self, keywords): - self.print_output('RAAspotter_ppparis') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - page_content = self.get_session(self.__RAA_PAGE, self.__WAIT_ELEMENT, 6) raa_elements = self.get_raa_elements(page_content) self.parse_raa(raa_elements, keywords) diff --git a/RAAspotter_pref04.py b/RAAspotter_pref04.py index d73c1bf..eccaea1 100644 --- a/RAAspotter_pref04.py +++ b/RAAspotter_pref04.py @@ -21,11 +21,7 @@ class RAAspotter_pref04(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref04') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - - pages = [] + elements = [] page_content = self.get_page(self.__RAA_PAGE, 'get').content for sub_page in self.get_sub_pages( page_content, @@ -35,9 +31,10 @@ class RAAspotter_pref04(RAAspotter): ): if RAAspotter.guess_date(sub_page['name'], '([0-9]{4}).*').year >= self.not_before.year: sub_page_content = self.get_page(sub_page['url'], 'get').content - raa_elements = self.get_raa_elements(sub_page_content) - self.parse_raa(raa_elements, keywords) + for element in self.get_raa_elements(sub_page_content): + elements.append(element) + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref05.py b/RAAspotter_pref05.py index 57c419c..c9f7c60 100644 --- a/RAAspotter_pref05.py +++ b/RAAspotter_pref05.py @@ -21,10 +21,6 @@ class RAAspotter_pref05(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref05') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - year_pages_to_parse = [] # On détermine quelles pages d'année parser diff --git a/RAAspotter_pref06.py b/RAAspotter_pref06.py index 46f05ff..9c3fc13 100644 --- a/RAAspotter_pref06.py +++ b/RAAspotter_pref06.py @@ -52,10 +52,6 @@ class RAAspotter_pref06(RAAspotter): self.enable_tor(20) def get_raa(self, keywords): - self.print_output('RAAspotter_pref06') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - pages_to_parse = [] if self.not_before.year <= 2024: for page in self.__RAA_PAGE['2024']: diff --git a/RAAspotter_pref09.py b/RAAspotter_pref09.py index 4c4f68b..8ce0cb0 100644 --- a/RAAspotter_pref09.py +++ b/RAAspotter_pref09.py @@ -21,10 +21,6 @@ class RAAspotter_pref09(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref09') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - pages_to_parse = [] # Les RAA de l'Ariège sont éparpillés sur des sous-pages par mois. @@ -45,10 +41,13 @@ class RAAspotter_pref09(RAAspotter): pages_to_parse.append(sub_page['url']) # On parse les pages contenant des RAA + elements = [] for page in pages_to_parse: page_content = self.get_page(page, 'get').content - raa_elements = self.get_raa_elements(page_content) - self.parse_raa(raa_elements, keywords) + for element in self.get_raa_elements(page_content): + elements.append(element) + + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref13.py b/RAAspotter_pref13.py index 4be6d73..2e0e884 100644 --- a/RAAspotter_pref13.py +++ b/RAAspotter_pref13.py @@ -28,15 +28,13 @@ class RAAspotter_pref13(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref13') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - + elements = [] for raa_page in self.__RAA_PAGE: page_content = self.get_page(raa_page, 'get').content - raa_elements = self.get_raa_elements(page_content) - self.parse_raa(raa_elements, keywords) + for element in self.get_raa_elements(page_content): + elements.append(element) + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref31.py b/RAAspotter_pref31.py index c612bf8..3a2d1bb 100644 --- a/RAAspotter_pref31.py +++ b/RAAspotter_pref31.py @@ -20,10 +20,6 @@ class RAAspotter_pref31(RAAspotter): super().__init__(data_dir, self.__USER_AGENT) def get_raa(self, keywords): - self.print_output('RAAspotter_pref31') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - # On cherche les pages de chaque mois page_content = self.get_page(self.__RAA_PAGE, 'get').content month_pages = self.get_sub_pages( diff --git a/RAAspotter_pref33.py b/RAAspotter_pref33.py index d7d0458..b628028 100644 --- a/RAAspotter_pref33.py +++ b/RAAspotter_pref33.py @@ -25,10 +25,6 @@ class RAAspotter_pref33(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref33') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - pages_to_parse = [] # Parfois un RAA est mal catégorisé et se retrouve sur la page racine, donc on la parse diff --git a/RAAspotter_pref34.py b/RAAspotter_pref34.py index 1cf5b86..ff163ed 100644 --- a/RAAspotter_pref34.py +++ b/RAAspotter_pref34.py @@ -28,10 +28,6 @@ class RAAspotter_pref34(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref34') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - pages_to_parse = [] if self.not_before.year <= 2024: pages_to_parse.append(self.__RAA_PAGE['2024']) @@ -46,11 +42,13 @@ class RAAspotter_pref34(RAAspotter): if self.not_before.year <= 2019: pages_to_parse.append(self.__RAA_PAGE['2019']) + elements = [] for raa_page in pages_to_parse: page_content = self.get_page(raa_page, 'get').content - raa_elements = self.get_raa_elements(page_content) - self.parse_raa(raa_elements, keywords) + for element in self.get_raa_elements(page_content): + elements.append(element) + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref35.py b/RAAspotter_pref35.py index d2f5bda..3192c64 100644 --- a/RAAspotter_pref35.py +++ b/RAAspotter_pref35.py @@ -28,15 +28,13 @@ class RAAspotter_pref35(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref35') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - + elements = [] for raa_page in self.__RAA_PAGE: page_content = self.get_page(raa_page, 'get').content - raa_elements = self.get_raa_elements(page_content) - self.parse_raa(raa_elements, keywords) + for element in self.get_raa_elements(page_content): + elements.append(element) + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref38.py b/RAAspotter_pref38.py index 39e0656..576e1a5 100644 --- a/RAAspotter_pref38.py +++ b/RAAspotter_pref38.py @@ -31,10 +31,6 @@ class RAAspotter_pref38(RAAspotter): self.enable_tor(20) def get_raa(self, keywords): - self.print_output('RAAspotter_pref38') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - pages_to_parse = [] if self.not_before.year <= 2024: pages_to_parse.append(self.__RAA_PAGE['2024']) @@ -49,10 +45,13 @@ class RAAspotter_pref38(RAAspotter): if self.not_before.year <= 2019: pages_to_parse.append(self.__RAA_PAGE['2019']) + elements = [] for raa_page in pages_to_parse: page_content = self.get_page(raa_page, 'get').content - raa_elements = self.get_raa_elements(page_content, raa_page) - self.parse_raa(raa_elements, keywords) + for element in self.get_raa_elements(page_content, raa_page): + elements.append(element) + + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content, raa_page): diff --git a/RAAspotter_pref42.py b/RAAspotter_pref42.py index ae1ea7f..ef8b2de 100644 --- a/RAAspotter_pref42.py +++ b/RAAspotter_pref42.py @@ -22,10 +22,6 @@ class RAAspotter_pref42(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref42') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - year_pages_to_parse = [] # On détermine quelles pages d'année parser diff --git a/RAAspotter_pref44.py b/RAAspotter_pref44.py index 96cf356..7839775 100644 --- a/RAAspotter_pref44.py +++ b/RAAspotter_pref44.py @@ -24,10 +24,6 @@ class RAAspotter_pref44(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref44') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - pages_to_parse = [] # Parfois un RAA est mal catégorisé et se retrouve sur la page racine, donc on la parse diff --git a/RAAspotter_pref59.py b/RAAspotter_pref59.py index 956aa61..cf2e9fb 100644 --- a/RAAspotter_pref59.py +++ b/RAAspotter_pref59.py @@ -32,10 +32,6 @@ class RAAspotter_pref59(RAAspotter): self.enable_tor(20) def get_raa(self, keywords): - self.print_output('RAAspotter_pref59') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - pages_to_parse = [] if self.not_before.year <= 2024: pages_to_parse.append(self.__RAA_PAGE['2024']) @@ -50,6 +46,7 @@ class RAAspotter_pref59(RAAspotter): if self.not_before.year <= 2019: pages_to_parse.append(self.__RAA_PAGE['2019']) + elements = [] for raa_page in pages_to_parse: page_content = self.get_page(raa_page, 'get').content sub_pages = self.get_sub_pages( @@ -60,8 +57,10 @@ class RAAspotter_pref59(RAAspotter): ) for sub_page in sub_pages[::-1]: sub_page_content = self.get_page(sub_page['url'], 'get').content - sub_raa_elements = self.get_raa_elements(sub_page_content) - self.parse_raa(sub_raa_elements, keywords) + for element in self.get_raa_elements(sub_page_content): + elements.append(element) + + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref62.py b/RAAspotter_pref62.py index d542d57..bc1d2f1 100644 --- a/RAAspotter_pref62.py +++ b/RAAspotter_pref62.py @@ -45,10 +45,6 @@ class RAAspotter_pref62(RAAspotter): self.enable_tor(20) def get_raa(self, keywords): - self.print_output('RAAspotter_pref62') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - pages_to_parse = [] if self.not_before.year <= 2024: for page in self.__RAA_PAGE['2024']: @@ -69,10 +65,13 @@ class RAAspotter_pref62(RAAspotter): for page in self.__RAA_PAGE['2019']: pages_to_parse.append(page) + elements = [] for raa_page in pages_to_parse: page_content = self.get_page(raa_page, 'get').content - raa_elements = self.get_raa_elements(page_content) - self.parse_raa(raa_elements, keywords) + for element in self.get_raa_elements(page_content): + elements.append(element) + + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref64.py b/RAAspotter_pref64.py index 404d1dc..044d914 100644 --- a/RAAspotter_pref64.py +++ b/RAAspotter_pref64.py @@ -28,10 +28,6 @@ class RAAspotter_pref64(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref64') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - year_pages_to_parse = [] if self.not_before.year <= 2024: year_pages_to_parse.append(self.__RAA_PAGE['2024']) diff --git a/RAAspotter_pref65.py b/RAAspotter_pref65.py index 99d3c99..242997e 100644 --- a/RAAspotter_pref65.py +++ b/RAAspotter_pref65.py @@ -28,10 +28,6 @@ class RAAspotter_pref65(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref65') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - pages_to_parse = [] if self.not_before.year <= 2024: pages_to_parse.append(self.__RAA_PAGE['2024']) @@ -46,11 +42,13 @@ class RAAspotter_pref65(RAAspotter): if self.not_before.year <= 2019: pages_to_parse.append(self.__RAA_PAGE['2019']) + elements = [] for raa_page in pages_to_parse: page_content = self.get_page(raa_page, 'get').content - raa_elements = self.get_raa_elements(page_content) - self.parse_raa(raa_elements, keywords) + for element in self.get_raa_elements(page_content): + elements.append(element) + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref66.py b/RAAspotter_pref66.py index 3bf4406..0ed6a8d 100644 --- a/RAAspotter_pref66.py +++ b/RAAspotter_pref66.py @@ -32,10 +32,6 @@ class RAAspotter_pref66(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref66') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - elements = [] # La préfecture des Pyrénées-Orientales est une originale : avant 2024, diff --git a/RAAspotter_pref69.py b/RAAspotter_pref69.py index 0d6be9a..ae7e3cf 100644 --- a/RAAspotter_pref69.py +++ b/RAAspotter_pref69.py @@ -28,10 +28,6 @@ class RAAspotter_pref69(RAAspotter): self.enable_tor(20) def get_raa(self, keywords): - self.print_output('RAAspotter_pref69') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - pages_to_parse = [] if self.not_before.year <= 2024: pages_to_parse.append(self.__RAA_PAGE['2024']) diff --git a/RAAspotter_pref80.py b/RAAspotter_pref80.py index 03b4c27..818c25c 100644 --- a/RAAspotter_pref80.py +++ b/RAAspotter_pref80.py @@ -31,10 +31,6 @@ class RAAspotter_pref80(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref80') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - year_pages_to_parse = [] if self.not_before.year <= 2024: year_pages_to_parse.append(self.__RAA_PAGE['2024']) diff --git a/RAAspotter_pref81.py b/RAAspotter_pref81.py index c909a70..5a3d1e2 100644 --- a/RAAspotter_pref81.py +++ b/RAAspotter_pref81.py @@ -29,10 +29,6 @@ class RAAspotter_pref81(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref81') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - pages_to_parse = [] if self.not_before.year <= 2024: pages_to_parse.append(self.__RAA_PAGE['2024']) @@ -74,11 +70,13 @@ class RAAspotter_pref81(RAAspotter): sub_pages_to_parse.append(month_page['url']) # On parse les pages contenant des RAA + elements = [] for page in sub_pages_to_parse: page_content = self.get_page(page, 'get').content - raa_elements = self.get_raa_elements(page_content) - self.parse_raa(raa_elements, keywords) + for element in self.get_raa_elements(page_content): + elements.append(element) + self.parse_raa(elements, keywords) self.mailer() def find_raa_card(self, page, year=None): diff --git a/RAAspotter_pref83.py b/RAAspotter_pref83.py index 39b7c17..ccc2af0 100644 --- a/RAAspotter_pref83.py +++ b/RAAspotter_pref83.py @@ -28,10 +28,6 @@ class RAAspotter_pref83(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref83') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - pages_to_parse = [] if self.not_before.year <= 2024: pages_to_parse.append(self.__RAA_PAGE['2024']) diff --git a/RAAspotter_pref87.py b/RAAspotter_pref87.py index 4e814b4..3a008f4 100644 --- a/RAAspotter_pref87.py +++ b/RAAspotter_pref87.py @@ -37,10 +37,6 @@ class RAAspotter_pref87(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref87') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - year_pages_to_parse = [] if self.not_before.year <= 2024: for year_page in self.__RAA_PAGE['2024']: diff --git a/RAAspotter_pref976.py b/RAAspotter_pref976.py index cfceb60..4e42957 100644 --- a/RAAspotter_pref976.py +++ b/RAAspotter_pref976.py @@ -29,10 +29,6 @@ class RAAspotter_pref976(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref976') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - pages_to_parse = [] if self.not_before.year <= 2024: pages_to_parse.append(self.__RAA_PAGE['2024']) @@ -74,10 +70,13 @@ class RAAspotter_pref976(RAAspotter): sub_pages_to_parse.append(page_to_parse) # On parse les pages contenant des RAA + elements = [] for page in sub_pages_to_parse: page_content = self.get_page(page, 'get').content - raa_elements = self.get_raa_elements(page_content) - self.parse_raa(raa_elements, keywords) + for element in self.get_raa_elements(page_content): + elements.append(element) + + self.parse_raa(elements, keywords) self.mailer() def find_raa_card(self, page, year=None): -- GitLab