diff --git a/RAAspotter.py b/RAAspotter.py index 6bccffbde5f14ae8fe9b8337fe023858a7f6a707..e42095d9a36bdfdf7e3cacba7819f2ff875031cb 100644 --- a/RAAspotter.py +++ b/RAAspotter.py @@ -148,6 +148,8 @@ class RAAspotter: f.write('') f.close() + self.print_output(str(self.__class__.__name__)) + def configure_mastodon(self, access_token, instance, mastodon_prefix, mastodon_suffix): if access_token and access_token != "" and instance and instance != "": self.mastodon = Mastodon( @@ -496,6 +498,9 @@ class RAAspotter: ) def parse_raa(self, elements, keywords): + self.print_output(f'Termes recherchés: {keywords}') + self.print_output('') + for raa in elements: # Si le fichier n'a pas déjà été parsé et qu'il est postérieur à la # date maximale d'analyse, on le télécharge et on le parse diff --git a/RAAspotter_ppparis.py b/RAAspotter_ppparis.py index 902e327cb6c96bdf2d70d05fcf12d529db34e681..f3afe3044b3ad57c6121ead0916138426ba43ce4 100644 --- a/RAAspotter_ppparis.py +++ b/RAAspotter_ppparis.py @@ -20,10 +20,6 @@ class RAAspotter_ppparis(RAAspotter): super().__init__(data_dir, self.__USER_AGENT) def get_raa(self, keywords): - self.print_output('RAAspotter_ppparis') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - page_content = self.get_session(self.__RAA_PAGE, self.__WAIT_ELEMENT, 6) raa_elements = self.get_raa_elements(page_content) self.parse_raa(raa_elements, keywords) diff --git a/RAAspotter_pref04.py b/RAAspotter_pref04.py index d73c1bf66b7236c0713d20e71614222bdfa3bee9..eccaea17945a5c89526db2bdebc66e5dca9dec79 100644 --- a/RAAspotter_pref04.py +++ b/RAAspotter_pref04.py @@ -21,11 +21,7 @@ class RAAspotter_pref04(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref04') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - - pages = [] + elements = [] page_content = self.get_page(self.__RAA_PAGE, 'get').content for sub_page in self.get_sub_pages( page_content, @@ -35,9 +31,10 @@ class RAAspotter_pref04(RAAspotter): ): if RAAspotter.guess_date(sub_page['name'], '([0-9]{4}).*').year >= self.not_before.year: sub_page_content = self.get_page(sub_page['url'], 'get').content - raa_elements = self.get_raa_elements(sub_page_content) - self.parse_raa(raa_elements, keywords) + for element in self.get_raa_elements(sub_page_content): + elements.append(element) + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref05.py b/RAAspotter_pref05.py index 57c419c1247c184547c7a6a088032b3dcda0441d..c9f7c60ae945cf21ed05974b6274c1dd842ef76a 100644 --- a/RAAspotter_pref05.py +++ b/RAAspotter_pref05.py @@ -21,10 +21,6 @@ class RAAspotter_pref05(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref05') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - year_pages_to_parse = [] # On détermine quelles pages d'année parser diff --git a/RAAspotter_pref06.py b/RAAspotter_pref06.py index 46f05ffb2d1095a81ee17a7e8b4400b6e773e670..9c3fc13d7f77610827acf9d674c6adb30b8a5288 100644 --- a/RAAspotter_pref06.py +++ b/RAAspotter_pref06.py @@ -52,10 +52,6 @@ class RAAspotter_pref06(RAAspotter): self.enable_tor(20) def get_raa(self, keywords): - self.print_output('RAAspotter_pref06') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - pages_to_parse = [] if self.not_before.year <= 2024: for page in self.__RAA_PAGE['2024']: diff --git a/RAAspotter_pref09.py b/RAAspotter_pref09.py index 4c4f68bfbcd8f456dde507884a22f729c7e5c4c2..8ce0cb0e6f44eacc0b8d1935833c86e77f97701f 100644 --- a/RAAspotter_pref09.py +++ b/RAAspotter_pref09.py @@ -21,10 +21,6 @@ class RAAspotter_pref09(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref09') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - pages_to_parse = [] # Les RAA de l'Ariège sont éparpillés sur des sous-pages par mois. @@ -45,10 +41,13 @@ class RAAspotter_pref09(RAAspotter): pages_to_parse.append(sub_page['url']) # On parse les pages contenant des RAA + elements = [] for page in pages_to_parse: page_content = self.get_page(page, 'get').content - raa_elements = self.get_raa_elements(page_content) - self.parse_raa(raa_elements, keywords) + for element in self.get_raa_elements(page_content): + elements.append(element) + + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref13.py b/RAAspotter_pref13.py index 4be6d73c1f274c32d8cb73f920a93bfb835ea1f7..2e0e884f04fd99fa2cf8abe941a2cc3746387919 100644 --- a/RAAspotter_pref13.py +++ b/RAAspotter_pref13.py @@ -28,15 +28,13 @@ class RAAspotter_pref13(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref13') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - + elements = [] for raa_page in self.__RAA_PAGE: page_content = self.get_page(raa_page, 'get').content - raa_elements = self.get_raa_elements(page_content) - self.parse_raa(raa_elements, keywords) + for element in self.get_raa_elements(page_content): + elements.append(element) + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref31.py b/RAAspotter_pref31.py index c612bf817d632f81a7afa99b12f2edb9b6337895..3a2d1bb337648d2f40847844c56b8a708a2bb1dc 100644 --- a/RAAspotter_pref31.py +++ b/RAAspotter_pref31.py @@ -20,10 +20,6 @@ class RAAspotter_pref31(RAAspotter): super().__init__(data_dir, self.__USER_AGENT) def get_raa(self, keywords): - self.print_output('RAAspotter_pref31') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - # On cherche les pages de chaque mois page_content = self.get_page(self.__RAA_PAGE, 'get').content month_pages = self.get_sub_pages( diff --git a/RAAspotter_pref33.py b/RAAspotter_pref33.py index d7d0458f4f5eb0ee9d38f651d9802f47c1fea334..b6280288aeb1b9be89fdbcc6811948150aadb72f 100644 --- a/RAAspotter_pref33.py +++ b/RAAspotter_pref33.py @@ -25,10 +25,6 @@ class RAAspotter_pref33(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref33') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - pages_to_parse = [] # Parfois un RAA est mal catégorisé et se retrouve sur la page racine, donc on la parse diff --git a/RAAspotter_pref34.py b/RAAspotter_pref34.py index 1cf5b86d4152507c81313f4f2810bc5df9d815e9..ff163ed1ae549fcef0a2342b6e08101e10571343 100644 --- a/RAAspotter_pref34.py +++ b/RAAspotter_pref34.py @@ -28,10 +28,6 @@ class RAAspotter_pref34(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref34') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - pages_to_parse = [] if self.not_before.year <= 2024: pages_to_parse.append(self.__RAA_PAGE['2024']) @@ -46,11 +42,13 @@ class RAAspotter_pref34(RAAspotter): if self.not_before.year <= 2019: pages_to_parse.append(self.__RAA_PAGE['2019']) + elements = [] for raa_page in pages_to_parse: page_content = self.get_page(raa_page, 'get').content - raa_elements = self.get_raa_elements(page_content) - self.parse_raa(raa_elements, keywords) + for element in self.get_raa_elements(page_content): + elements.append(element) + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref35.py b/RAAspotter_pref35.py index d2f5bda68398f14ba7dfbfe919a3c90da1111a5e..3192c64e7e2639ff08f5b4cd1ec79ab692b0fc72 100644 --- a/RAAspotter_pref35.py +++ b/RAAspotter_pref35.py @@ -28,15 +28,13 @@ class RAAspotter_pref35(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref35') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - + elements = [] for raa_page in self.__RAA_PAGE: page_content = self.get_page(raa_page, 'get').content - raa_elements = self.get_raa_elements(page_content) - self.parse_raa(raa_elements, keywords) + for element in self.get_raa_elements(page_content): + elements.append(element) + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref38.py b/RAAspotter_pref38.py index 39e06567fbc80f1533baf819cf731d23af777283..576e1a5efc82a38c199233cbaa1efca6237cdbb1 100644 --- a/RAAspotter_pref38.py +++ b/RAAspotter_pref38.py @@ -31,10 +31,6 @@ class RAAspotter_pref38(RAAspotter): self.enable_tor(20) def get_raa(self, keywords): - self.print_output('RAAspotter_pref38') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - pages_to_parse = [] if self.not_before.year <= 2024: pages_to_parse.append(self.__RAA_PAGE['2024']) @@ -49,10 +45,13 @@ class RAAspotter_pref38(RAAspotter): if self.not_before.year <= 2019: pages_to_parse.append(self.__RAA_PAGE['2019']) + elements = [] for raa_page in pages_to_parse: page_content = self.get_page(raa_page, 'get').content - raa_elements = self.get_raa_elements(page_content, raa_page) - self.parse_raa(raa_elements, keywords) + for element in self.get_raa_elements(page_content, raa_page): + elements.append(element) + + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content, raa_page): diff --git a/RAAspotter_pref42.py b/RAAspotter_pref42.py index ae1ea7f4e74af7af6c6b13f4360affe70b00f7c8..ef8b2def3a3708e70ea070ca294bd2a878b89863 100644 --- a/RAAspotter_pref42.py +++ b/RAAspotter_pref42.py @@ -22,10 +22,6 @@ class RAAspotter_pref42(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref42') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - year_pages_to_parse = [] # On détermine quelles pages d'année parser diff --git a/RAAspotter_pref44.py b/RAAspotter_pref44.py index 96cf35685ca4285012d70cb481f84528f47ce584..783977559594629e7def7aba1012955423896046 100644 --- a/RAAspotter_pref44.py +++ b/RAAspotter_pref44.py @@ -24,10 +24,6 @@ class RAAspotter_pref44(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref44') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - pages_to_parse = [] # Parfois un RAA est mal catégorisé et se retrouve sur la page racine, donc on la parse diff --git a/RAAspotter_pref59.py b/RAAspotter_pref59.py index 956aa616c109bda870a8317a479df6bd35b35f5a..cf2e9fb8f6403690e8ff312bc11a97b8cc963689 100644 --- a/RAAspotter_pref59.py +++ b/RAAspotter_pref59.py @@ -32,10 +32,6 @@ class RAAspotter_pref59(RAAspotter): self.enable_tor(20) def get_raa(self, keywords): - self.print_output('RAAspotter_pref59') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - pages_to_parse = [] if self.not_before.year <= 2024: pages_to_parse.append(self.__RAA_PAGE['2024']) @@ -50,6 +46,7 @@ class RAAspotter_pref59(RAAspotter): if self.not_before.year <= 2019: pages_to_parse.append(self.__RAA_PAGE['2019']) + elements = [] for raa_page in pages_to_parse: page_content = self.get_page(raa_page, 'get').content sub_pages = self.get_sub_pages( @@ -60,8 +57,10 @@ class RAAspotter_pref59(RAAspotter): ) for sub_page in sub_pages[::-1]: sub_page_content = self.get_page(sub_page['url'], 'get').content - sub_raa_elements = self.get_raa_elements(sub_page_content) - self.parse_raa(sub_raa_elements, keywords) + for element in self.get_raa_elements(sub_page_content): + elements.append(element) + + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref62.py b/RAAspotter_pref62.py index d542d57f7c8e1b964612b76937bb41acf0bb83eb..bc1d2f18600a92c7dbacb887de1f8e29fda5c7fe 100644 --- a/RAAspotter_pref62.py +++ b/RAAspotter_pref62.py @@ -45,10 +45,6 @@ class RAAspotter_pref62(RAAspotter): self.enable_tor(20) def get_raa(self, keywords): - self.print_output('RAAspotter_pref62') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - pages_to_parse = [] if self.not_before.year <= 2024: for page in self.__RAA_PAGE['2024']: @@ -69,10 +65,13 @@ class RAAspotter_pref62(RAAspotter): for page in self.__RAA_PAGE['2019']: pages_to_parse.append(page) + elements = [] for raa_page in pages_to_parse: page_content = self.get_page(raa_page, 'get').content - raa_elements = self.get_raa_elements(page_content) - self.parse_raa(raa_elements, keywords) + for element in self.get_raa_elements(page_content): + elements.append(element) + + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref64.py b/RAAspotter_pref64.py index 404d1dc4f0278ed396cfc27d210053f959219955..044d9145ce8b91d22973184a40bc5a0f281e8cd9 100644 --- a/RAAspotter_pref64.py +++ b/RAAspotter_pref64.py @@ -28,10 +28,6 @@ class RAAspotter_pref64(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref64') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - year_pages_to_parse = [] if self.not_before.year <= 2024: year_pages_to_parse.append(self.__RAA_PAGE['2024']) diff --git a/RAAspotter_pref65.py b/RAAspotter_pref65.py index 99d3c99080a805ed143401ca2e13d595bcc0b4d1..242997e7c866884cc093f6979b695a0146839293 100644 --- a/RAAspotter_pref65.py +++ b/RAAspotter_pref65.py @@ -28,10 +28,6 @@ class RAAspotter_pref65(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref65') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - pages_to_parse = [] if self.not_before.year <= 2024: pages_to_parse.append(self.__RAA_PAGE['2024']) @@ -46,11 +42,13 @@ class RAAspotter_pref65(RAAspotter): if self.not_before.year <= 2019: pages_to_parse.append(self.__RAA_PAGE['2019']) + elements = [] for raa_page in pages_to_parse: page_content = self.get_page(raa_page, 'get').content - raa_elements = self.get_raa_elements(page_content) - self.parse_raa(raa_elements, keywords) + for element in self.get_raa_elements(page_content): + elements.append(element) + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref66.py b/RAAspotter_pref66.py index 3bf4406a4e588cbfbbebbf81c49d866022ee214c..0ed6a8d553f0cae89fb288295a384a9ae00f008f 100644 --- a/RAAspotter_pref66.py +++ b/RAAspotter_pref66.py @@ -32,10 +32,6 @@ class RAAspotter_pref66(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref66') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - elements = [] # La préfecture des Pyrénées-Orientales est une originale : avant 2024, diff --git a/RAAspotter_pref69.py b/RAAspotter_pref69.py index 0d6be9ab19650cf37ba0e87bb7072f3ab8d46399..ae7e3cf312191e7fc57b0d34c3599de527752f74 100644 --- a/RAAspotter_pref69.py +++ b/RAAspotter_pref69.py @@ -28,10 +28,6 @@ class RAAspotter_pref69(RAAspotter): self.enable_tor(20) def get_raa(self, keywords): - self.print_output('RAAspotter_pref69') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - pages_to_parse = [] if self.not_before.year <= 2024: pages_to_parse.append(self.__RAA_PAGE['2024']) diff --git a/RAAspotter_pref80.py b/RAAspotter_pref80.py index 03b4c2772ddb975edcfb163c6db3bc45cc546647..818c25c13f283eab312728e5100e4ea1831008fd 100644 --- a/RAAspotter_pref80.py +++ b/RAAspotter_pref80.py @@ -31,10 +31,6 @@ class RAAspotter_pref80(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref80') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - year_pages_to_parse = [] if self.not_before.year <= 2024: year_pages_to_parse.append(self.__RAA_PAGE['2024']) diff --git a/RAAspotter_pref81.py b/RAAspotter_pref81.py index c909a70323ae9cfd0f67fe33773c5c8ad53148f0..5a3d1e2e3285e72dbe2bbc3843a177680adb9c81 100644 --- a/RAAspotter_pref81.py +++ b/RAAspotter_pref81.py @@ -29,10 +29,6 @@ class RAAspotter_pref81(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref81') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - pages_to_parse = [] if self.not_before.year <= 2024: pages_to_parse.append(self.__RAA_PAGE['2024']) @@ -74,11 +70,13 @@ class RAAspotter_pref81(RAAspotter): sub_pages_to_parse.append(month_page['url']) # On parse les pages contenant des RAA + elements = [] for page in sub_pages_to_parse: page_content = self.get_page(page, 'get').content - raa_elements = self.get_raa_elements(page_content) - self.parse_raa(raa_elements, keywords) + for element in self.get_raa_elements(page_content): + elements.append(element) + self.parse_raa(elements, keywords) self.mailer() def find_raa_card(self, page, year=None): diff --git a/RAAspotter_pref83.py b/RAAspotter_pref83.py index 39b7c17230ccb1cb1e663814f04a91cc254a7c3e..ccc2af0358246cee75b4b340f67178e804f8f19a 100644 --- a/RAAspotter_pref83.py +++ b/RAAspotter_pref83.py @@ -28,10 +28,6 @@ class RAAspotter_pref83(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref83') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - pages_to_parse = [] if self.not_before.year <= 2024: pages_to_parse.append(self.__RAA_PAGE['2024']) diff --git a/RAAspotter_pref87.py b/RAAspotter_pref87.py index 4e814b41ec9d691f13ea54b5f008d0651e588ad7..3a008f4a036b9a620df6a29ef2b69e9f80577aa8 100644 --- a/RAAspotter_pref87.py +++ b/RAAspotter_pref87.py @@ -37,10 +37,6 @@ class RAAspotter_pref87(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref87') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - year_pages_to_parse = [] if self.not_before.year <= 2024: for year_page in self.__RAA_PAGE['2024']: diff --git a/RAAspotter_pref976.py b/RAAspotter_pref976.py index cfceb60eec946926806cdd7385e5476e1bd71300..4e42957c98ed1a877beea7c21060e6d68e64d655 100644 --- a/RAAspotter_pref976.py +++ b/RAAspotter_pref976.py @@ -29,10 +29,6 @@ class RAAspotter_pref976(RAAspotter): self.enable_tor(10) def get_raa(self, keywords): - self.print_output('RAAspotter_pref976') - self.print_output(f'Termes recherchés: {keywords}') - self.print_output('') - pages_to_parse = [] if self.not_before.year <= 2024: pages_to_parse.append(self.__RAA_PAGE['2024']) @@ -74,10 +70,13 @@ class RAAspotter_pref976(RAAspotter): sub_pages_to_parse.append(page_to_parse) # On parse les pages contenant des RAA + elements = [] for page in sub_pages_to_parse: page_content = self.get_page(page, 'get').content - raa_elements = self.get_raa_elements(page_content) - self.parse_raa(raa_elements, keywords) + for element in self.get_raa_elements(page_content): + elements.append(element) + + self.parse_raa(elements, keywords) self.mailer() def find_raa_card(self, page, year=None):