From 7e2d36523cc8cd96ff105587c77c5a409e50ed4b Mon Sep 17 00:00:00 2001
From: Bastien Le Querrec <blq@laquadrature.net>
Date: Wed, 27 Mar 2024 17:21:21 +0100
Subject: [PATCH] =?UTF-8?q?RAAspotter:=20ne=20s'arr=C3=AAte=20pas=20si=20l?=
 =?UTF-8?q?e=20PDF=20n'est=20pas=20valide?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 RAAspotter.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/RAAspotter.py b/RAAspotter.py
index e9e06b9..5eddeaf 100644
--- a/RAAspotter.py
+++ b/RAAspotter.py
@@ -301,7 +301,12 @@ class RAAspotter:
     if not os.path.isfile(f'{self.data_dir}{raa.get_sha256()}.pdf'):
       logger.warning(f'ATTENTION: le fichier {raa.get_sha256()}.pdf n\'existe pas')
     else:
-      text = extract_text(f'{self.data_dir}{raa.get_sha256()}.pdf')
+      text = ""
+      try:
+        text = extract_text(f'{self.data_dir}{raa.get_sha256()}.pdf')
+      except Exception as exc:
+        logger.warning(f'ATTENTION: Impossible d\'extraire le texte du fichier {raa.get_sha256()}.pdf : {exc}')
+
       found = False
       found_keywords = []
       for keyword in keywords:
-- 
GitLab