From 07741d8f3e18f82cf42667ecc13f609ade4df040 Mon Sep 17 00:00:00 2001
From: Bastien Le Querrec <blq@laquadrature.net>
Date: Sat, 16 Mar 2024 13:30:53 +0100
Subject: [PATCH] RAAspotter: ajout d'un mailer

---
 RAAspotter.py | 67 +++++++++++++++++++++++++++++++++++++++++++--------
 ppparis.py    | 60 +++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 115 insertions(+), 12 deletions(-)

diff --git a/RAAspotter.py b/RAAspotter.py
index e31532f..e3132ac 100644
--- a/RAAspotter.py
+++ b/RAAspotter.py
@@ -1,4 +1,4 @@
-import os, re
+import os, re, ssl
 import subprocess
 import logging
 import requests
@@ -13,6 +13,8 @@ from pyvirtualdisplay import Display
 from pdfminer.high_level import extract_text
 
 import hashlib
+import smtplib
+from email.message import EmailMessage
 
 logger = logging.getLogger(__name__)
 
@@ -41,9 +43,16 @@ class RAAspotter:
 
   def __init__(self, data_dir, user_agent=""):
     logger.debug('Initialisation de RAAspotter')
-    self.user_agent = user_agent
-    self.session = requests.Session()
-    self.data_dir = data_dir
+
+    self.user_agent       = user_agent
+    self.session          = requests.Session()
+    self.data_dir         = data_dir
+    self.found            = False
+    self.output_file_path = os.path.dirname(os.path.abspath(__file__))+'/output.log'
+
+    f = open(self.output_file_path,'w')
+    f.write('')
+    f.close()
 
   # On démarre le navigateur
   def get_session(self, url, wait_element=""):
@@ -84,12 +93,12 @@ class RAAspotter:
 
     return page_content
 
-  def print_output(data):
+  def print_output(self, data):
     print(data)
     data = data.replace('\033[92m', '')
     data = data.replace('\033[0m', '')
     data = data.replace('\033[1m', '')
-    f = open(os.path.dirname(os.path.abspath(__file__))+'/output.log','a')
+    f = open(self.output_file_path,'a')
     f.write(data+"\n")
     f.close()
 
@@ -98,7 +107,7 @@ class RAAspotter:
       os.makedirs(os.path.dirname(f'{self.data_dir}{raa.get_sha256()}.pdf'), exist_ok=True)
       file = self.session.get(raa.url)
       f = open(f'{self.data_dir}{raa.get_sha256()}.pdf','wb')
-      f.write(file.content);
+      f.write(file.content)
       f.close()
     except (requests.exceptions.ConnectionError, requests.exceptions.ChunkedEncodingError):
       logger.warning(f'ATTENTION: la connexion a été interrompue pendant le téléchargement de {raa.url}, nouvelle tentative...')
@@ -115,9 +124,10 @@ class RAAspotter:
       for keyword in keywords:
         if re.search(keyword, text, re.IGNORECASE|re.MULTILINE):
           if not found:
-            RAAspotter.print_output(f'\033[92m{raa.name}\033[0m ({raa.date})')
+            self.print_output(f'\033[92m{raa.name}\033[0m ({raa.date})')
             found = True
-          RAAspotter.print_output(f'    Le terme \033[1m{keyword}\033[0m a été trouvé.')
+            self.found = True
+          self.print_output(f'    Le terme \033[1m{keyword}\033[0m a été trouvé.')
 
       # Écrit le texte du PDF dans un fichier texte pour une analyse future, puis supprime le PDF
       f = open(f'{self.data_dir}{raa.get_sha256()}.txt','w')
@@ -125,7 +135,7 @@ class RAAspotter:
       f.close()
       os.remove(f'{self.data_dir}{raa.get_sha256()}.pdf')
       if found:
-        RAAspotter.print_output('')
+        self.print_output('')
 
   def ocr(self, raa, retry_on_failure=True):
     cmd = ['ocrmypdf', '-l', 'eng+fra', '--output-type', 'pdfa', '--redo-ocr', '--skip-big', '500' , f'{self.data_dir}{raa.get_sha256()}.pdf', f'{self.data_dir}{raa.get_sha256()}.pdf']
@@ -151,3 +161,40 @@ class RAAspotter:
 
   def get_raa(self, page_content):
     logger.error('Cette fonction doit être surchargée')
+
+  def mailer(smtp_host, smtp_username, smtp_password, smtp_port,
+             smtp_starttls, smtp_ssl, email_from, email_to, email_object, 
+             email_content):
+    try:
+      message = EmailMessage()
+      message.set_content(email_content)
+
+      message['Subject'] = email_object
+      message['From'] = email_from
+
+      context = ssl.create_default_context()
+
+      if smtp_ssl == True:
+        for address in email_to.split(','):
+          message['To'] = address
+          smtp = smtplib.SMTP_SSL(smtp_host, port, context=context)
+          smtp.login(smtp_username, smtp_password)
+          smtp.send_message(message)
+          smtp.quit()
+      elif smtp_starttls == True:
+        for address in email_to.split(','):
+          message['To'] = address
+          smtp = smtplib.SMTP(smtp_host)
+          smtp.starttls(context=context)
+          smtp.login(smtp_username, smtp_password)
+          smtp.send_message(message)
+          smtp.quit()
+      else:
+        for address in email_to.split(','):
+          message['To'] = address
+          smtp = smtplib.SMTP(smtp_host)
+          smtp.login(smtp_username, smtp_password)
+          smtp.send_message(message)
+          smtp.quit()
+    except Exception as exc:
+      logger.warning(f'Impossible d\'envoyer le courrier électronique : {exc}')
diff --git a/ppparis.py b/ppparis.py
index c002c72..43e5580 100755
--- a/ppparis.py
+++ b/ppparis.py
@@ -11,10 +11,35 @@ __USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like
 __headless_mode = True
 __KEYWORDS = os.getenv('KEYWORDS') or 'vidéoprotection,caméras,captation,aéronef'
 __DATA_DIR = os.path.dirname(os.path.abspath(__file__))+'/data/ppparis/'
+__SMTP_HOSTNAME = os.getenv('SMTP_HOSTNAME') or 'localhost'
+__SMTP_USERNAME = os.getenv('SMTP_USERNAME') or ''
+__SMTP_PASSWORD = os.getenv('SMTP_PASSWORD') or ''
+__EMAIL_FROM = os.getenv('EMAIL_FROM')
+__EMAIL_TO = os.getenv('EMAIL_TO')
+if os.getenv('SMTP_PORT'):
+  __SMTP_PORT = int(os.getenv('SMTP_PORT'))
+else:
+  __SMTP_PORT = 587
+if os.getenv('SMTP_STARTTLS'):
+  __SMTP_STARTTLS = True
+else:
+  __SMTP_STARTTLS = False
+if os.getenv('SMTP_SSL'):
+  __SMTP_SSL = True
+else:
+  __SMTP_SSL = False
 
 # Début du script
 parser = argparse.ArgumentParser(prog='ppparis.py', description='Télécharge les RAA de la Préfecture de police de Paris et recherche des mots-clés')
 parser.add_argument('-k', '--keywords', action='store', help='liste des termes recherchés, séparés par une virgule (par défaut : vidéoprotection,caméras,captation,aéronef)')
+parser.add_argument('--smtp-hostname', action='store', help='nom d\'hôte SMTP (par défaut : localhost)')
+parser.add_argument('--smtp-username', action='store', help='nom d\'utilisateur SMTP (par défaut : vide)')
+parser.add_argument('--smtp-password', action='store', help='mot de passe SMTP (par défaut : vide)')
+parser.add_argument('--smtp-port', action='store', help='port SMTP (par défaut : 587)')
+parser.add_argument('--smtp-starttls', action='store_true', help='connexion SMTP avec STARTTLS')
+parser.add_argument('--smtp-ssl', action='store_true', help='connexion SMTP avec SSL')
+parser.add_argument('-f', '--email-from', action='store', help='adresse de courrier électronique expéditrice des notifications')
+parser.add_argument('-t', '--email-to', action='store', help='adresses de courrier électronique destinataires des notifications (séparées par une virgule)')
 parser.add_argument('-v', action='store_true', help='relève le niveau de verbosité à INFO')
 parser.add_argument('-vv', action='store_true', help='relève le niveau de verbosité à DEBUG')
 args = parser.parse_args()
@@ -28,16 +53,47 @@ if args.vv or os.getenv('VVERBOSE'):
 if args.keywords:
   __KEYWORDS = args.keywords
 
+if args.smtp_hostname:
+  __SMTP_HOSTNAME = args.smtp_hostname
+
+if args.smtp_username:
+  __SMTP_USERNAME = args.smtp_username
+
+if args.smtp_password:
+  __SMTP_PASSWORD = args.smtp_password
+
+if args.smtp_port:
+  __SMTP_PORT = int(args.smtp_port)
+
+if args.smtp_starttls:
+  __SMTP_STARTTLS = True
+
+if args.smtp_ssl:
+  __SMTP_SSL = True
+
+if args.email_from:
+  __EMAIL_FROM = args.email_from
+
+if args.email_to:
+  __EMAIL_TO = args.email_to
+
 # On crée le dossier de téléchargement
 os.makedirs(__DATA_DIR, exist_ok=True)
 
 raa_spotter = RAAspotter_ppparis(__DATA_DIR, __USER_AGENT)
 
-RAAspotter_ppparis.print_output('RAAspotter_ppparis')
-RAAspotter_ppparis.print_output(f'Termes recherchés: {__KEYWORDS}')
+raa_spotter.print_output('RAAspotter_ppparis')
+raa_spotter.print_output(f'Termes recherchés: {__KEYWORDS}')
 
 page_content = raa_spotter.get_session(__RAA_PAGE, "block-decree-list-block")
 
 raa_elements = raa_spotter.get_raa(page_content)
 
 raa_spotter.parse_raa(raa_elements, __KEYWORDS.split(','))
+
+if raa_spotter.found == True and __SMTP_HOSTNAME and __SMTP_USERNAME and __SMTP_PASSWORD and __SMTP_PORT and __EMAIL_TO and __EMAIL_FROM:
+  output = open(raa_spotter.output_file_path)
+  RAAspotter.mailer(__SMTP_HOSTNAME, __SMTP_USERNAME, __SMTP_PASSWORD, __SMTP_PORT,
+                    __SMTP_STARTTLS, __SMTP_SSL, __EMAIL_FROM, __EMAIL_TO,
+                    '[RAAspotter] [ppparis] Nouveaux éléments trouvés', 
+                    output.read())
-- 
GitLab