diff --git a/Attrap.py b/Attrap.py
index 273de9065f2f5b5744626a031d23bfddca42f300..27bc2b74f99fff68df8a9c1ca9b7ad3bdf75cdc3 100644
--- a/Attrap.py
+++ b/Attrap.py
@@ -34,6 +34,8 @@ import email
 
 from mastodon import Mastodon
 
+import ftfy
+
 logger = logging.getLogger(__name__)
 
 
@@ -79,9 +81,10 @@ class Attrap:
             text = ""
 
             reader = PdfReader(f'{raa_data_dir}{self.get_sha256()}.ocr.pdf')
+            ftfy_config = ftfy.TextFixerConfig(unescape_html=False, explain=False)
             for page in reader.pages:
                 try:
-                    text = text + "\n" + page.extract_text()
+                    text = text + "\n" + ftfy.fix_text(page.extract_text(), config=ftfy_config)
                 except Exception as exc:
                     logger.warning(f'ATTENTION: Impossible d\'extraire le texte du fichier {self.get_sha256()}.pdf : {exc}')
 
diff --git a/requirements.txt b/requirements.txt
index 6d62f561749cbb48be57693b5b2afea5079824c0..441591e7b422a421e349f34589a6c2c5c7af9eef 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
 beautifulsoup4>=4.12.3
 dateparser>=1.2.0
+ftfy>=6.2.0
 Mastodon.py>=1.8.1
 pycodestyle>=2.11.1
 pypdf>=4.2.0