diff --git a/ppparis.py b/ppparis.py index e8b4faa79aa88d01660eeaf2a710c9e72680a7fc..a67434ec37294fc6621e32d29b509999cfebd946 100755 --- a/ppparis.py +++ b/ppparis.py @@ -158,7 +158,7 @@ for a in soup.find_all('a', href=True): logging.info(f'Nouveau fichier : {name} ({date}). URL : {url}') download_file(url, __DATA_DIR+filename) - cmd = ['ocrmypdf', '-l', 'eng+fra', '--output-type', 'pdfa', '--redo-ocr', __DATA_DIR+filename, __DATA_DIR+filename] + cmd = ['ocrmypdf', '-l', 'eng+fra', '--output-type', 'pdfa', '--redo-ocr', '--skip-big', '500' , __DATA_DIR+filename, __DATA_DIR+filename] logging.debug(f'Lancement de ocrmypdf: {cmd}') try: output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)