From 7e2d36523cc8cd96ff105587c77c5a409e50ed4b Mon Sep 17 00:00:00 2001 From: Bastien Le Querrec <blq@laquadrature.net> Date: Wed, 27 Mar 2024 17:21:21 +0100 Subject: [PATCH] =?UTF-8?q?RAAspotter:=20ne=20s'arr=C3=AAte=20pas=20si=20l?= =?UTF-8?q?e=20PDF=20n'est=20pas=20valide?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- RAAspotter.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/RAAspotter.py b/RAAspotter.py index e9e06b9..5eddeaf 100644 --- a/RAAspotter.py +++ b/RAAspotter.py @@ -301,7 +301,12 @@ class RAAspotter: if not os.path.isfile(f'{self.data_dir}{raa.get_sha256()}.pdf'): logger.warning(f'ATTENTION: le fichier {raa.get_sha256()}.pdf n\'existe pas') else: - text = extract_text(f'{self.data_dir}{raa.get_sha256()}.pdf') + text = "" + try: + text = extract_text(f'{self.data_dir}{raa.get_sha256()}.pdf') + except Exception as exc: + logger.warning(f'ATTENTION: Impossible d\'extraire le texte du fichier {raa.get_sha256()}.pdf : {exc}') + found = False found_keywords = [] for keyword in keywords: -- GitLab