From 07741d8f3e18f82cf42667ecc13f609ade4df040 Mon Sep 17 00:00:00 2001 From: Bastien Le Querrec <blq@laquadrature.net> Date: Sat, 16 Mar 2024 13:30:53 +0100 Subject: [PATCH] RAAspotter: ajout d'un mailer --- RAAspotter.py | 67 +++++++++++++++++++++++++++++++++++++++++++-------- ppparis.py | 60 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 115 insertions(+), 12 deletions(-) diff --git a/RAAspotter.py b/RAAspotter.py index e31532f..e3132ac 100644 --- a/RAAspotter.py +++ b/RAAspotter.py @@ -1,4 +1,4 @@ -import os, re +import os, re, ssl import subprocess import logging import requests @@ -13,6 +13,8 @@ from pyvirtualdisplay import Display from pdfminer.high_level import extract_text import hashlib +import smtplib +from email.message import EmailMessage logger = logging.getLogger(__name__) @@ -41,9 +43,16 @@ class RAAspotter: def __init__(self, data_dir, user_agent=""): logger.debug('Initialisation de RAAspotter') - self.user_agent = user_agent - self.session = requests.Session() - self.data_dir = data_dir + + self.user_agent = user_agent + self.session = requests.Session() + self.data_dir = data_dir + self.found = False + self.output_file_path = os.path.dirname(os.path.abspath(__file__))+'/output.log' + + f = open(self.output_file_path,'w') + f.write('') + f.close() # On démarre le navigateur def get_session(self, url, wait_element=""): @@ -84,12 +93,12 @@ class RAAspotter: return page_content - def print_output(data): + def print_output(self, data): print(data) data = data.replace('\033[92m', '') data = data.replace('\033[0m', '') data = data.replace('\033[1m', '') - f = open(os.path.dirname(os.path.abspath(__file__))+'/output.log','a') + f = open(self.output_file_path,'a') f.write(data+"\n") f.close() @@ -98,7 +107,7 @@ class RAAspotter: os.makedirs(os.path.dirname(f'{self.data_dir}{raa.get_sha256()}.pdf'), exist_ok=True) file = self.session.get(raa.url) f = open(f'{self.data_dir}{raa.get_sha256()}.pdf','wb') - f.write(file.content); + f.write(file.content) f.close() except (requests.exceptions.ConnectionError, requests.exceptions.ChunkedEncodingError): logger.warning(f'ATTENTION: la connexion a été interrompue pendant le téléchargement de {raa.url}, nouvelle tentative...') @@ -115,9 +124,10 @@ class RAAspotter: for keyword in keywords: if re.search(keyword, text, re.IGNORECASE|re.MULTILINE): if not found: - RAAspotter.print_output(f'\033[92m{raa.name}\033[0m ({raa.date})') + self.print_output(f'\033[92m{raa.name}\033[0m ({raa.date})') found = True - RAAspotter.print_output(f' Le terme \033[1m{keyword}\033[0m a été trouvé.') + self.found = True + self.print_output(f' Le terme \033[1m{keyword}\033[0m a été trouvé.') # Écrit le texte du PDF dans un fichier texte pour une analyse future, puis supprime le PDF f = open(f'{self.data_dir}{raa.get_sha256()}.txt','w') @@ -125,7 +135,7 @@ class RAAspotter: f.close() os.remove(f'{self.data_dir}{raa.get_sha256()}.pdf') if found: - RAAspotter.print_output('') + self.print_output('') def ocr(self, raa, retry_on_failure=True): cmd = ['ocrmypdf', '-l', 'eng+fra', '--output-type', 'pdfa', '--redo-ocr', '--skip-big', '500' , f'{self.data_dir}{raa.get_sha256()}.pdf', f'{self.data_dir}{raa.get_sha256()}.pdf'] @@ -151,3 +161,40 @@ class RAAspotter: def get_raa(self, page_content): logger.error('Cette fonction doit être surchargée') + + def mailer(smtp_host, smtp_username, smtp_password, smtp_port, + smtp_starttls, smtp_ssl, email_from, email_to, email_object, + email_content): + try: + message = EmailMessage() + message.set_content(email_content) + + message['Subject'] = email_object + message['From'] = email_from + + context = ssl.create_default_context() + + if smtp_ssl == True: + for address in email_to.split(','): + message['To'] = address + smtp = smtplib.SMTP_SSL(smtp_host, port, context=context) + smtp.login(smtp_username, smtp_password) + smtp.send_message(message) + smtp.quit() + elif smtp_starttls == True: + for address in email_to.split(','): + message['To'] = address + smtp = smtplib.SMTP(smtp_host) + smtp.starttls(context=context) + smtp.login(smtp_username, smtp_password) + smtp.send_message(message) + smtp.quit() + else: + for address in email_to.split(','): + message['To'] = address + smtp = smtplib.SMTP(smtp_host) + smtp.login(smtp_username, smtp_password) + smtp.send_message(message) + smtp.quit() + except Exception as exc: + logger.warning(f'Impossible d\'envoyer le courrier électronique : {exc}') diff --git a/ppparis.py b/ppparis.py index c002c72..43e5580 100755 --- a/ppparis.py +++ b/ppparis.py @@ -11,10 +11,35 @@ __USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like __headless_mode = True __KEYWORDS = os.getenv('KEYWORDS') or 'vidéoprotection,caméras,captation,aéronef' __DATA_DIR = os.path.dirname(os.path.abspath(__file__))+'/data/ppparis/' +__SMTP_HOSTNAME = os.getenv('SMTP_HOSTNAME') or 'localhost' +__SMTP_USERNAME = os.getenv('SMTP_USERNAME') or '' +__SMTP_PASSWORD = os.getenv('SMTP_PASSWORD') or '' +__EMAIL_FROM = os.getenv('EMAIL_FROM') +__EMAIL_TO = os.getenv('EMAIL_TO') +if os.getenv('SMTP_PORT'): + __SMTP_PORT = int(os.getenv('SMTP_PORT')) +else: + __SMTP_PORT = 587 +if os.getenv('SMTP_STARTTLS'): + __SMTP_STARTTLS = True +else: + __SMTP_STARTTLS = False +if os.getenv('SMTP_SSL'): + __SMTP_SSL = True +else: + __SMTP_SSL = False # Début du script parser = argparse.ArgumentParser(prog='ppparis.py', description='Télécharge les RAA de la Préfecture de police de Paris et recherche des mots-clés') parser.add_argument('-k', '--keywords', action='store', help='liste des termes recherchés, séparés par une virgule (par défaut : vidéoprotection,caméras,captation,aéronef)') +parser.add_argument('--smtp-hostname', action='store', help='nom d\'hôte SMTP (par défaut : localhost)') +parser.add_argument('--smtp-username', action='store', help='nom d\'utilisateur SMTP (par défaut : vide)') +parser.add_argument('--smtp-password', action='store', help='mot de passe SMTP (par défaut : vide)') +parser.add_argument('--smtp-port', action='store', help='port SMTP (par défaut : 587)') +parser.add_argument('--smtp-starttls', action='store_true', help='connexion SMTP avec STARTTLS') +parser.add_argument('--smtp-ssl', action='store_true', help='connexion SMTP avec SSL') +parser.add_argument('-f', '--email-from', action='store', help='adresse de courrier électronique expéditrice des notifications') +parser.add_argument('-t', '--email-to', action='store', help='adresses de courrier électronique destinataires des notifications (séparées par une virgule)') parser.add_argument('-v', action='store_true', help='relève le niveau de verbosité à INFO') parser.add_argument('-vv', action='store_true', help='relève le niveau de verbosité à DEBUG') args = parser.parse_args() @@ -28,16 +53,47 @@ if args.vv or os.getenv('VVERBOSE'): if args.keywords: __KEYWORDS = args.keywords +if args.smtp_hostname: + __SMTP_HOSTNAME = args.smtp_hostname + +if args.smtp_username: + __SMTP_USERNAME = args.smtp_username + +if args.smtp_password: + __SMTP_PASSWORD = args.smtp_password + +if args.smtp_port: + __SMTP_PORT = int(args.smtp_port) + +if args.smtp_starttls: + __SMTP_STARTTLS = True + +if args.smtp_ssl: + __SMTP_SSL = True + +if args.email_from: + __EMAIL_FROM = args.email_from + +if args.email_to: + __EMAIL_TO = args.email_to + # On crée le dossier de téléchargement os.makedirs(__DATA_DIR, exist_ok=True) raa_spotter = RAAspotter_ppparis(__DATA_DIR, __USER_AGENT) -RAAspotter_ppparis.print_output('RAAspotter_ppparis') -RAAspotter_ppparis.print_output(f'Termes recherchés: {__KEYWORDS}') +raa_spotter.print_output('RAAspotter_ppparis') +raa_spotter.print_output(f'Termes recherchés: {__KEYWORDS}') page_content = raa_spotter.get_session(__RAA_PAGE, "block-decree-list-block") raa_elements = raa_spotter.get_raa(page_content) raa_spotter.parse_raa(raa_elements, __KEYWORDS.split(',')) + +if raa_spotter.found == True and __SMTP_HOSTNAME and __SMTP_USERNAME and __SMTP_PASSWORD and __SMTP_PORT and __EMAIL_TO and __EMAIL_FROM: + output = open(raa_spotter.output_file_path) + RAAspotter.mailer(__SMTP_HOSTNAME, __SMTP_USERNAME, __SMTP_PASSWORD, __SMTP_PORT, + __SMTP_STARTTLS, __SMTP_SSL, __EMAIL_FROM, __EMAIL_TO, + '[RAAspotter] [ppparis] Nouveaux éléments trouvés', + output.read()) -- GitLab