diff --git a/RAAspotter.py b/RAAspotter.py index 06624fb98cf8c162fed58e3fd5d41e22f8cf4864..30611c1b811840b762373513b1e5d1d9b5c8eac5 100644 --- a/RAAspotter.py +++ b/RAAspotter.py @@ -460,31 +460,32 @@ class RAAspotter: writer.write(f'{self.data_dir}/raa/{raa.get_sha256()}.flat.pdf') def search_keywords(self, raa, keywords): - text = open(f'{self.data_dir}/raa/{raa.get_sha256()}.txt').read() - - found = False - found_keywords = [] - for keyword in keywords: - if re.search(keyword, text, re.IGNORECASE | re.MULTILINE): - if not found: - url = quote(raa.url, safe='/:') - self.print_output(f'\033[92m{raa.name}\033[0m ({raa.date_str})') - self.print_output(f'URL : {url}') - found = True - self.found = True - self.print_output(f' Le terme \033[1m{keyword}\033[0m a été trouvé.') - found_keywords.append(keyword) - - if found: - self.print_output('') - url = quote(raa.url, safe='/:') - found_keywords_str = ', '.join( - [str(x) for x in found_keywords] - ) - self.mastodon_toot( - f'{raa.name} ({raa.date_str})\n\nLes termes suivants ont ' - f'été trouvés : {found_keywords_str}.\n\nURL : {url}' - ) + if keywords and not keywords == '': + text = open(f'{self.data_dir}/raa/{raa.get_sha256()}.txt').read() + + found = False + found_keywords = [] + for keyword in keywords.split(','): + if re.search(keyword, text, re.IGNORECASE | re.MULTILINE): + if not found: + url = quote(raa.url, safe='/:') + self.print_output(f'\033[92m{raa.name}\033[0m ({raa.date_str})') + self.print_output(f'URL : {url}') + found = True + self.found = True + self.print_output(f' Le terme \033[1m{keyword}\033[0m a été trouvé.') + found_keywords.append(keyword) + + if found: + self.print_output('') + url = quote(raa.url, safe='/:') + found_keywords_str = ', '.join( + [str(x) for x in found_keywords] + ) + self.mastodon_toot( + f'{raa.name} ({raa.date_str})\n\nLes termes suivants ont ' + f'été trouvés : {found_keywords_str}.\n\nURL : {url}' + ) def parse_raa(self, elements, keywords): for raa in elements: diff --git a/RAAspotter_ppparis.py b/RAAspotter_ppparis.py index c7981d40b13dde3f6fe1c90c7a212451c14c8b28..360e0dd97e6b610d7b185957331679e0623fb1a8 100644 --- a/RAAspotter_ppparis.py +++ b/RAAspotter_ppparis.py @@ -26,7 +26,7 @@ class RAAspotter_ppparis(RAAspotter): page_content = self.get_session(self.__RAA_PAGE, self.__WAIT_ELEMENT, 3) raa_elements = self.get_raa_elements(page_content) - self.parse_raa(raa_elements, keywords.split(',')) + self.parse_raa(raa_elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref04.py b/RAAspotter_pref04.py index 2a169785f1ba9e46b575a22a72aec0a037fea6d2..d73c1bf66b7236c0713d20e71614222bdfa3bee9 100644 --- a/RAAspotter_pref04.py +++ b/RAAspotter_pref04.py @@ -36,7 +36,7 @@ class RAAspotter_pref04(RAAspotter): if RAAspotter.guess_date(sub_page['name'], '([0-9]{4}).*').year >= self.not_before.year: sub_page_content = self.get_page(sub_page['url'], 'get').content raa_elements = self.get_raa_elements(sub_page_content) - self.parse_raa(raa_elements, keywords.split(',')) + self.parse_raa(raa_elements, keywords) self.mailer() diff --git a/RAAspotter_pref05.py b/RAAspotter_pref05.py index 1bfa6f87c9ad8c540db5b8e5192023c592841cac..57c419c1247c184547c7a6a088032b3dcda0441d 100644 --- a/RAAspotter_pref05.py +++ b/RAAspotter_pref05.py @@ -78,7 +78,7 @@ class RAAspotter_pref05(RAAspotter): elements.append(element) # On parse les RAA - self.parse_raa(elements, keywords.split(',')) + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref06.py b/RAAspotter_pref06.py index 0210da923886356ee2b67265edb30ad9d337ea6a..46f05ffb2d1095a81ee17a7e8b4400b6e773e670 100644 --- a/RAAspotter_pref06.py +++ b/RAAspotter_pref06.py @@ -81,7 +81,7 @@ class RAAspotter_pref06(RAAspotter): ".fr-pagination__link.fr-pagination__link--next", self.__HOST ) - self.parse_raa(elements, keywords.split(',')) + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref09.py b/RAAspotter_pref09.py index 20b395cadb248afe0bd8bf646238d72b47a5cf55..4c4f68bfbcd8f456dde507884a22f729c7e5c4c2 100644 --- a/RAAspotter_pref09.py +++ b/RAAspotter_pref09.py @@ -48,7 +48,7 @@ class RAAspotter_pref09(RAAspotter): for page in pages_to_parse: page_content = self.get_page(page, 'get').content raa_elements = self.get_raa_elements(page_content) - self.parse_raa(raa_elements, keywords.split(',')) + self.parse_raa(raa_elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref13.py b/RAAspotter_pref13.py index 17ca16e52a895522a98466a807cc43ac3b8e8c59..4be6d73c1f274c32d8cb73f920a93bfb835ea1f7 100644 --- a/RAAspotter_pref13.py +++ b/RAAspotter_pref13.py @@ -35,7 +35,7 @@ class RAAspotter_pref13(RAAspotter): for raa_page in self.__RAA_PAGE: page_content = self.get_page(raa_page, 'get').content raa_elements = self.get_raa_elements(page_content) - self.parse_raa(raa_elements, keywords.split(',')) + self.parse_raa(raa_elements, keywords) self.mailer() diff --git a/RAAspotter_pref31.py b/RAAspotter_pref31.py index 7e3e72e399cfc09810e838588399be5839b293a8..c612bf817d632f81a7afa99b12f2edb9b6337895 100644 --- a/RAAspotter_pref31.py +++ b/RAAspotter_pref31.py @@ -50,7 +50,7 @@ class RAAspotter_pref31(RAAspotter): ): elements.append(element) - self.parse_raa(elements, keywords.split(',')) + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref33.py b/RAAspotter_pref33.py index e68827c46c5c7ed8b4c4e7483babfe38710617cf..d7d0458f4f5eb0ee9d38f651d9802f47c1fea334 100644 --- a/RAAspotter_pref33.py +++ b/RAAspotter_pref33.py @@ -79,7 +79,7 @@ class RAAspotter_pref33(RAAspotter): self.__HOST )[::-1] - self.parse_raa(elements, keywords.split(',')) + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref34.py b/RAAspotter_pref34.py index d905e4b09e523872bb09dbf5e1b7e40164389553..1cf5b86d4152507c81313f4f2810bc5df9d815e9 100644 --- a/RAAspotter_pref34.py +++ b/RAAspotter_pref34.py @@ -49,7 +49,7 @@ class RAAspotter_pref34(RAAspotter): for raa_page in pages_to_parse: page_content = self.get_page(raa_page, 'get').content raa_elements = self.get_raa_elements(page_content) - self.parse_raa(raa_elements, keywords.split(',')) + self.parse_raa(raa_elements, keywords) self.mailer() diff --git a/RAAspotter_pref35.py b/RAAspotter_pref35.py index 469215c040c9b410d3c031f97eb194046a485bf2..d2f5bda68398f14ba7dfbfe919a3c90da1111a5e 100644 --- a/RAAspotter_pref35.py +++ b/RAAspotter_pref35.py @@ -35,7 +35,7 @@ class RAAspotter_pref35(RAAspotter): for raa_page in self.__RAA_PAGE: page_content = self.get_page(raa_page, 'get').content raa_elements = self.get_raa_elements(page_content) - self.parse_raa(raa_elements, keywords.split(',')) + self.parse_raa(raa_elements, keywords) self.mailer() diff --git a/RAAspotter_pref38.py b/RAAspotter_pref38.py index 22e0f9f423cde07aa221856e4bc2feddb2439e59..39e06567fbc80f1533baf819cf731d23af777283 100644 --- a/RAAspotter_pref38.py +++ b/RAAspotter_pref38.py @@ -52,7 +52,7 @@ class RAAspotter_pref38(RAAspotter): for raa_page in pages_to_parse: page_content = self.get_page(raa_page, 'get').content raa_elements = self.get_raa_elements(page_content, raa_page) - self.parse_raa(raa_elements, keywords.split(',')) + self.parse_raa(raa_elements, keywords) self.mailer() def get_raa_elements(self, page_content, raa_page): diff --git a/RAAspotter_pref42.py b/RAAspotter_pref42.py index 7e246295a863b3f0b8a492fc17b853bea7a7ec19..ae1ea7f4e74af7af6c6b13f4360affe70b00f7c8 100644 --- a/RAAspotter_pref42.py +++ b/RAAspotter_pref42.py @@ -57,7 +57,7 @@ class RAAspotter_pref42(RAAspotter): elements.append(element) # On parse les RAA - self.parse_raa(elements, keywords.split(',')) + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref44.py b/RAAspotter_pref44.py index 49ad13d48e3980e316189f8c12fce55a4f2ee1d3..96cf35685ca4285012d70cb481f84528f47ce584 100644 --- a/RAAspotter_pref44.py +++ b/RAAspotter_pref44.py @@ -79,7 +79,7 @@ class RAAspotter_pref44(RAAspotter): self.__HOST )[::-1] - self.parse_raa(elements, keywords.split(',')) + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref59.py b/RAAspotter_pref59.py index e911a7e694ea4313e54c91e33ecef72b98d07933..956aa616c109bda870a8317a479df6bd35b35f5a 100644 --- a/RAAspotter_pref59.py +++ b/RAAspotter_pref59.py @@ -61,7 +61,7 @@ class RAAspotter_pref59(RAAspotter): for sub_page in sub_pages[::-1]: sub_page_content = self.get_page(sub_page['url'], 'get').content sub_raa_elements = self.get_raa_elements(sub_page_content) - self.parse_raa(sub_raa_elements, keywords.split(',')) + self.parse_raa(sub_raa_elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref62.py b/RAAspotter_pref62.py index 3f64ccc7d2f1860286cb82ec74b5df1619a10fde..d542d57f7c8e1b964612b76937bb41acf0bb83eb 100644 --- a/RAAspotter_pref62.py +++ b/RAAspotter_pref62.py @@ -72,7 +72,7 @@ class RAAspotter_pref62(RAAspotter): for raa_page in pages_to_parse: page_content = self.get_page(raa_page, 'get').content raa_elements = self.get_raa_elements(page_content) - self.parse_raa(raa_elements, keywords.split(',')) + self.parse_raa(raa_elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref64.py b/RAAspotter_pref64.py index adacb30e2db194c5f082f1c4bac42e7ec5ecdc2f..404d1dc4f0278ed396cfc27d210053f959219955 100644 --- a/RAAspotter_pref64.py +++ b/RAAspotter_pref64.py @@ -80,7 +80,7 @@ class RAAspotter_pref64(RAAspotter): for raa in self.get_raa_elements(page_content): elements.append(raa) - self.parse_raa(elements, keywords.split(',')) + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref65.py b/RAAspotter_pref65.py index 68278d2571c7f663c94c1f7578aa45363e58ddfb..99d3c99080a805ed143401ca2e13d595bcc0b4d1 100644 --- a/RAAspotter_pref65.py +++ b/RAAspotter_pref65.py @@ -49,7 +49,7 @@ class RAAspotter_pref65(RAAspotter): for raa_page in pages_to_parse: page_content = self.get_page(raa_page, 'get').content raa_elements = self.get_raa_elements(page_content) - self.parse_raa(raa_elements, keywords.split(',')) + self.parse_raa(raa_elements, keywords) self.mailer() diff --git a/RAAspotter_pref66.py b/RAAspotter_pref66.py index 6ffde9bc73137d3a370cf7b46a342e448bea7b0f..3bf4406a4e588cbfbbebbf81c49d866022ee214c 100644 --- a/RAAspotter_pref66.py +++ b/RAAspotter_pref66.py @@ -66,7 +66,7 @@ class RAAspotter_pref66(RAAspotter): for element in self.get_raa_elements_before_2024(self.__RAA_PAGE['2019']): elements.append(element) - self.parse_raa(elements, keywords.split(',')) + self.parse_raa(elements, keywords) self.mailer() # On parse un lien d'avant 2024 diff --git a/RAAspotter_pref69.py b/RAAspotter_pref69.py index e28a8313c94f6c0fff70b741f885f26e0dec5cc9..0d6be9ab19650cf37ba0e87bb7072f3ab8d46399 100644 --- a/RAAspotter_pref69.py +++ b/RAAspotter_pref69.py @@ -64,7 +64,7 @@ class RAAspotter_pref69(RAAspotter): for element in self.get_raa_elements(page_content)[::-1]: elements.append(element) - self.parse_raa(elements, keywords.split(',')) + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref80.py b/RAAspotter_pref80.py index b616fd17bcbf99803067911857f22573fb6fdd18..03b4c2772ddb975edcfb163c6db3bc45cc546647 100644 --- a/RAAspotter_pref80.py +++ b/RAAspotter_pref80.py @@ -56,7 +56,7 @@ class RAAspotter_pref80(RAAspotter): for element in self.get_raa_elements(page_content): elements.append(element) - self.parse_raa(elements, keywords.split(',')) + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref81.py b/RAAspotter_pref81.py index 2f4bf7f1305c49203215cde921cdd18075e900b7..c909a70323ae9cfd0f67fe33773c5c8ad53148f0 100644 --- a/RAAspotter_pref81.py +++ b/RAAspotter_pref81.py @@ -77,7 +77,7 @@ class RAAspotter_pref81(RAAspotter): for page in sub_pages_to_parse: page_content = self.get_page(page, 'get').content raa_elements = self.get_raa_elements(page_content) - self.parse_raa(raa_elements, keywords.split(',')) + self.parse_raa(raa_elements, keywords) self.mailer() diff --git a/RAAspotter_pref83.py b/RAAspotter_pref83.py index fe73b4f8ab88cbbadda250f39b13232959011c92..39b7c17230ccb1cb1e663814f04a91cc254a7c3e 100644 --- a/RAAspotter_pref83.py +++ b/RAAspotter_pref83.py @@ -67,7 +67,7 @@ class RAAspotter_pref83(RAAspotter): '.fr-pagination__link.fr-pagination__link--next', self.__HOST ) - self.parse_raa(elements, keywords.split(',')) + self.parse_raa(elements, keywords) self.mailer() diff --git a/RAAspotter_pref87.py b/RAAspotter_pref87.py index 6436659d62c581e478996381eb7ff39e6a29c5ec..4e814b41ec9d691f13ea54b5f008d0651e588ad7 100644 --- a/RAAspotter_pref87.py +++ b/RAAspotter_pref87.py @@ -85,7 +85,7 @@ class RAAspotter_pref87(RAAspotter): for raa in self.get_raa_elements(page_content): elements.append(raa) - self.parse_raa(elements, keywords.split(',')) + self.parse_raa(elements, keywords) self.mailer() def get_raa_elements(self, page_content): diff --git a/RAAspotter_pref976.py b/RAAspotter_pref976.py index 5d17bb0ea5cae33ef260d14c5490e7dd677f928d..cfceb60eec946926806cdd7385e5476e1bd71300 100644 --- a/RAAspotter_pref976.py +++ b/RAAspotter_pref976.py @@ -77,7 +77,7 @@ class RAAspotter_pref976(RAAspotter): for page in sub_pages_to_parse: page_content = self.get_page(page, 'get').content raa_elements = self.get_raa_elements(page_content) - self.parse_raa(raa_elements, keywords.split(',')) + self.parse_raa(raa_elements, keywords) self.mailer() def find_raa_card(self, page, year=None): diff --git a/cli.py b/cli.py index 2f959d37cdaa71b77a89f729bb6752f687f5fafc..1b038a108c40e3793eee6773ca19999364de5cec 100755 --- a/cli.py +++ b/cli.py @@ -7,8 +7,7 @@ import importlib from RAAspotter import RAAspotter # Config -__KEYWORDS = os.getenv('KEYWORDS') or \ - 'vidéoprotection,caméras,captation,aéronef' +__KEYWORDS = os.getenv('KEYWORDS') or '' __DATA_DIR_ROOT = os.path.dirname(os.path.abspath(__file__)) + '/data/' __SMTP_HOSTNAME = os.getenv('SMTP_HOSTNAME') or 'localhost' __SMTP_USERNAME = os.getenv('SMTP_USERNAME') or '' @@ -81,7 +80,7 @@ parser.add_argument( '-k', '--keywords', action='store', - help='liste des termes recherchés, séparés par une virgule (par défaut : vidéoprotection,caméras,captation,aéronef)' + help='liste des termes recherchés, séparés par une virgule (aucun par défaut)' ) parser.add_argument( '--not-before',