diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 43a71409bbf2521a57cfff810ce605cb8649f783..bdb1946384828a0e3f803ca9927e72fb629db597 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -88,10 +88,12 @@ pep8: - bin/ - lib/ - pyvenv.cfg - - data/${PREF}/*.txt + - data/${PREF}/raa/*.txt + - data/${PREF}/raa/*.json artifacts: paths: - - data/${PREF}/*.txt + - data/${PREF}/raa/*.txt + - data/${PREF}/raa/*.json - output_${PREF}.log expire_in: 2 days rules: diff --git a/RAAspotter.py b/RAAspotter.py index 18294414af111e6a20eb5a7f02013bf2452ed0a8..4a75a3aa7ad5f68f7ccc58cf2b03d4a2fd2b3ed5 100644 --- a/RAAspotter.py +++ b/RAAspotter.py @@ -1,12 +1,13 @@ import os import re import ssl -import sys import subprocess +import shutil import logging import requests import time import datetime +import json from urllib.parse import quote from selenium import webdriver @@ -19,7 +20,10 @@ import dateparser from bs4 import BeautifulSoup from pyvirtualdisplay import Display + from pdfminer.high_level import extract_text +from pdfminer.pdfparser import PDFParser +from pdfminer.pdfdocument import PDFDocument from stem import Signal from stem.control import Controller @@ -39,10 +43,10 @@ class RAAspotter: date = datetime.datetime(1970, 1, 1) date_str = "" name = "" - filename = "" sha256 = "" + pdf_creation_date = None - def __init__(self, url, date, name, filename): + def __init__(self, url, date, name): if not url == "": self.url = url if not date == "": @@ -50,17 +54,77 @@ class RAAspotter: self.date_str = date.strftime("%d/%m/%Y") if not name == "": self.name = name - if not filename == "": - self.filename = filename def get_sha256(self): if (self.sha256 == ""): - self.sha256 = hashlib.sha256(self.filename.encode('utf-8')).hexdigest() + self.sha256 = hashlib.sha256(self.url.encode('utf-8')).hexdigest() return self.sha256 + def get_pdf_creation_date(self, data_dir): + raa_data_dir = f'{data_dir}/raa/' + + try: + p_pdf = open(f'{raa_data_dir}{self.get_sha256()}.pdf', 'rb') + pdf_parser = PDFParser(p_pdf) + pdf_creation_date_raw = PDFDocument(pdf_parser).info[0]['CreationDate'].decode('utf-8').replace('D:', '').replace('\'', '') + if pdf_creation_date_raw: + try: + self.pdf_creation_date = datetime.datetime.strptime(pdf_creation_date_raw, '%Y%m%d%H%M%S%z') + except ValueError as exc: + self.pdf_creation_date = datetime.datetime.strptime(pdf_creation_date_raw, '%Y%m%d%H%M%S') + except Exception as exc: + logger.warning(f'Impossible d\'extraire la date du PDF {self.get_sha256()}.pdf') + + def extract_content(self, data_dir): + raa_data_dir = f'{data_dir}/raa/' + + text = "" + try: + text = extract_text(f'{raa_data_dir}{self.get_sha256()}.ocr.pdf') + except Exception as exc: + logger.warning(f'ATTENTION: Impossible d\'extraire le texte du fichier {self.get_sha256()}.pdf : {exc}') + + # Écrit le texte du PDF dans un fichier texte pour une analyse future + f = open(f'{raa_data_dir}{self.get_sha256()}.txt', 'w') + f.write(text) + f.close() + + # Supprime le PDF d'origine et la version OCRisée + os.remove(f'{raa_data_dir}{self.get_sha256()}.pdf') + os.remove(f'{raa_data_dir}{self.get_sha256()}.ocr.pdf') + + def write_properties(self, data_dir): + raa_data_dir = f'{data_dir}/raa/' + + pdf_creation_date_json = None + if self.pdf_creation_date: + pdf_creation_date_json = self.pdf_creation_date.strftime("%d/%m/%Y %H:%M:%S") + + properties = { + 'name': self.name, + 'date': self.date_str, + 'url': quote(self.url, safe='/:'), + 'first_saw_on': datetime.datetime.today().strftime("%d/%m/%Y %H:%M:%S"), + 'pdf_creation_date': pdf_creation_date_json + } + f = open(f'{raa_data_dir}{self.get_sha256()}.json', 'w') + f.write(json.dumps(properties)) + f.close() + + def parse(self, data_dir, not_before, keywords): + self.get_pdf_creation_date(data_dir) + self.write_properties(data_dir) + self.extract_content(data_dir) + def __init__(self, data_dir, user_agent=''): logger.debug('Initialisation de RAAspotter') + # On crée le dossier de téléchargement + os.makedirs(data_dir, exist_ok=True) + + # pdfminer.six est un peu trop verbeux en mode debug, donc on relève son niveau de log + logging.getLogger("pdfminer").setLevel(logging.WARNING) + self.session = requests.Session() self.data_dir = data_dir self.found = False @@ -340,11 +404,11 @@ class RAAspotter: def download_file(self, raa): try: os.makedirs( - os.path.dirname(f'{self.data_dir}{raa.get_sha256()}.pdf'), + os.path.dirname(f'{self.data_dir}/raa/{raa.get_sha256()}.pdf'), exist_ok=True ) file = self.get_page(raa.url, 'get') - f = open(f'{self.data_dir}{raa.get_sha256()}.pdf', 'wb') + f = open(f'{self.data_dir}/raa/{raa.get_sha256()}.pdf', 'wb') f.write(file.content) f.close() except (requests.exceptions.ConnectionError, @@ -354,48 +418,6 @@ class RAAspotter: except Exception as exc: logger.warning(f'ATTENTION: Impossible de télécharger le fichier {raa.url}: {exc}') - def parse_pdf(self, raa, keywords): - if not os.path.isfile(f'{self.data_dir}{raa.get_sha256()}.pdf'): - logger.warning(f'ATTENTION: le fichier {raa.get_sha256()}.pdf n\'existe pas') - else: - text = "" - try: - # pdfminer.six est un peu trop verbeux en mode debug, donc on relève son niveau de log - logging.getLogger("pdfminer").setLevel(logging.WARNING) - text = extract_text(f'{self.data_dir}{raa.get_sha256()}.pdf') - except Exception as exc: - logger.warning(f'ATTENTION: Impossible d\'extraire le texte du fichier {raa.get_sha256()}.pdf : {exc}') - - found = False - found_keywords = [] - for keyword in keywords: - if re.search(keyword, text, re.IGNORECASE | re.MULTILINE): - if not found: - url = quote(raa.url, safe='/:') - self.print_output(f'\033[92m{raa.name}\033[0m ({raa.date_str})') - self.print_output(f'URL : {url}') - found = True - self.found = True - self.print_output(f' Le terme \033[1m{keyword}\033[0m a été trouvé.') - found_keywords.append(keyword) - - # Écrit le texte du PDF dans un fichier texte pour une analyse - # future, puis supprime le PDF - f = open(f'{self.data_dir}{raa.get_sha256()}.txt', 'w') - f.write(text) - f.close() - os.remove(f'{self.data_dir}{raa.get_sha256()}.pdf') - if found: - self.print_output('') - url = quote(raa.url, safe='/:') - found_keywords_str = ', '.join( - [str(x) for x in found_keywords] - ) - self.mastodon_toot( - f'{raa.name} ({raa.date_str})\n\nLes termes suivants ont ' - f'été trouvés : {found_keywords_str}.\n\nURL : {url}' - ) - def ocr(self, raa, retry_on_failure=True): cmd = [ 'ocrmypdf', @@ -404,8 +426,8 @@ class RAAspotter: '--redo-ocr', '--skip-big', '500', '--invalidate-digital-signatures', - f'{self.data_dir}{raa.get_sha256()}.pdf', - f'{self.data_dir}{raa.get_sha256()}.pdf' + f'{self.data_dir}/raa/{raa.get_sha256()}.pdf', + f'{self.data_dir}/raa/{raa.get_sha256()}.ocr.pdf' ] logger.debug(f'Lancement de ocrmypdf: {cmd}') try: @@ -419,18 +441,46 @@ class RAAspotter: self.ocr(raa, False) elif (not exc.returncode == 6) and (not exc.returncode == 10) and (not exc.returncode == 4): logger.warning('ATTENTION : Impossible d\'OCRiser le document', exc.returncode, exc.output) + shutil.copy(f'{self.data_dir}/raa/{raa.get_sha256()}.pdf', f'{self.data_dir}/raa/{raa.get_sha256()}.ocr.pdf') + + def search_keywords(self, raa, keywords): + text = open(f'{self.data_dir}/raa/{raa.get_sha256()}.txt').read() + + found = False + found_keywords = [] + for keyword in keywords: + if re.search(keyword, text, re.IGNORECASE | re.MULTILINE): + if not found: + url = quote(raa.url, safe='/:') + self.print_output(f'\033[92m{raa.name}\033[0m ({raa.date_str})') + self.print_output(f'URL : {url}') + found = True + self.found = True + self.print_output(f' Le terme \033[1m{keyword}\033[0m a été trouvé.') + found_keywords.append(keyword) + + if found: + self.print_output('') + url = quote(raa.url, safe='/:') + found_keywords_str = ', '.join( + [str(x) for x in found_keywords] + ) + self.mastodon_toot( + f'{raa.name} ({raa.date_str})\n\nLes termes suivants ont ' + f'été trouvés : {found_keywords_str}.\n\nURL : {url}' + ) def parse_raa(self, elements, keywords): for raa in elements: # Si le fichier n'a pas déjà été parsé et qu'il est postérieur à la # date maximale d'analyse, on le télécharge et on le parse - if raa.date >= self.not_before and \ - not os.path.isfile(f'{self.data_dir}{raa.get_sha256()}.txt'): + if raa.date >= self.not_before and not os.path.isfile(f'{self.data_dir}/raa/{raa.get_sha256()}.txt'): url = quote(raa.url, safe='/:') logger.info(f'Nouveau fichier : {raa.name} ({raa.date_str}). URL : {url}') self.download_file(raa) self.ocr(raa, True) - self.parse_pdf(raa, keywords) + raa.parse(self.data_dir, self.not_before, keywords) + self.search_keywords(raa, keywords) def get_raa(self, page_content): logger.error('Cette fonction doit être surchargée') diff --git a/RAAspotter_ppparis.py b/RAAspotter_ppparis.py index 4a867a4cedf0182fc0c1d6364dd5ac4dec45a979..c7981d40b13dde3f6fe1c90c7a212451c14c8b28 100644 --- a/RAAspotter_ppparis.py +++ b/RAAspotter_ppparis.py @@ -46,8 +46,7 @@ class RAAspotter_ppparis(RAAspotter): url = unquote(url) name = a.find('span').get_text() date = datetime.datetime.strptime(a.find('div', class_="field--type-datetime").get_text().strip(), '%d/%m/%Y') - filename = url.split('/')[-1] - raa = RAAspotter.RAA(url, date, name, filename) + raa = RAAspotter.RAA(url, date, name) elements.append(raa) return elements diff --git a/RAAspotter_pref04.py b/RAAspotter_pref04.py index f79fa5e9e8924df318718dc96a0aa442e142fae6..2a169785f1ba9e46b575a22a72aec0a037fea6d2 100644 --- a/RAAspotter_pref04.py +++ b/RAAspotter_pref04.py @@ -57,8 +57,7 @@ class RAAspotter_pref04(RAAspotter): url = unquote(url) name = a.find('span').previous_sibling.replace('Télécharger ', '').strip() date = datetime.datetime.strptime(a.find('span').get_text().split(' - ')[-1].strip(), '%d/%m/%Y') - filename = url.split('/')[-1] - raa = RAAspotter.RAA(url, date, name, filename) + raa = RAAspotter.RAA(url, date, name) elements.append(raa) return elements diff --git a/RAAspotter_pref05.py b/RAAspotter_pref05.py index c29a2a7a0902378586b779298c235740353841ba..1bfa6f87c9ad8c540db5b8e5192023c592841cac 100644 --- a/RAAspotter_pref05.py +++ b/RAAspotter_pref05.py @@ -97,8 +97,7 @@ class RAAspotter_pref05(RAAspotter): url = unquote(url) name = a.find('span').previous_sibling.replace('Télécharger ', '').strip() date = datetime.datetime.strptime(a.find('span').get_text().split(' - ')[-1].strip(), '%d/%m/%Y') - filename = url.split('/')[-1] - raa = RAAspotter.RAA(url, date, name, filename) + raa = RAAspotter.RAA(url, date, name) elements.append(raa) return elements diff --git a/RAAspotter_pref06.py b/RAAspotter_pref06.py index 75f2d4e565897ca36965c4fe267b9a1e31c2afda..0210da923886356ee2b67265edb30ad9d337ea6a 100644 --- a/RAAspotter_pref06.py +++ b/RAAspotter_pref06.py @@ -103,8 +103,7 @@ class RAAspotter_pref06(RAAspotter): url = unquote(url) name = a.get_text().strip() date = datetime.datetime.strptime(card.find('p', class_='fr-card__detail').get_text().replace('Publié le ', '').strip(), '%d/%m/%Y') - filename = url.split('/')[-1] - raa = RAAspotter.RAA(url, date, name, filename) + raa = RAAspotter.RAA(url, date, name) elements.append(raa) return elements diff --git a/RAAspotter_pref09.py b/RAAspotter_pref09.py index 51c93316708ed6a159f0328568ec4371b4e9635a..20b395cadb248afe0bd8bf646238d72b47a5cf55 100644 --- a/RAAspotter_pref09.py +++ b/RAAspotter_pref09.py @@ -67,8 +67,7 @@ class RAAspotter_pref09(RAAspotter): url = unquote(url) name = a.find('span').previous_sibling.replace('Télécharger ', '').strip() date = datetime.datetime.strptime(a.find('span').get_text().split(' - ')[-1].strip(), '%d/%m/%Y') - filename = url.split('/')[-1] - raa = RAAspotter.RAA(url, date, name, filename) + raa = RAAspotter.RAA(url, date, name) elements.append(raa) return elements diff --git a/RAAspotter_pref13.py b/RAAspotter_pref13.py index e66d4a6643aac64c9f68a43ca792d8b704ddada9..17ca16e52a895522a98466a807cc43ac3b8e8c59 100644 --- a/RAAspotter_pref13.py +++ b/RAAspotter_pref13.py @@ -55,8 +55,7 @@ class RAAspotter_pref13(RAAspotter): url = unquote(url) name = a.find('span').previous_sibling.replace('Télécharger ', '').strip() date = datetime.datetime.strptime(a.find('span').get_text().split(' - ')[-1].strip(), '%d/%m/%Y') - filename = url.split('/')[-1] - raa = RAAspotter.RAA(url, date, name, filename) + raa = RAAspotter.RAA(url, date, name) elements.append(raa) return elements diff --git a/RAAspotter_pref31.py b/RAAspotter_pref31.py index cff6ade4277e3b8aded83cb84528b4fa8a8a7dfe..7e3e72e399cfc09810e838588399be5839b293a8 100644 --- a/RAAspotter_pref31.py +++ b/RAAspotter_pref31.py @@ -69,8 +69,7 @@ class RAAspotter_pref31(RAAspotter): url = unquote(url) name = a.get_text().strip().capitalize() date = datetime.datetime.strptime(a['title'].split(' - ')[-1].strip(), '%d/%m/%Y') - filename = url.split('/')[-1] - raa = RAAspotter.RAA(url, date, name, filename) + raa = RAAspotter.RAA(url, date, name) elements.append(raa) return elements diff --git a/RAAspotter_pref34.py b/RAAspotter_pref34.py index 06c1e1d96765090e8e142191444f74d7bc9228c5..d905e4b09e523872bb09dbf5e1b7e40164389553 100644 --- a/RAAspotter_pref34.py +++ b/RAAspotter_pref34.py @@ -69,8 +69,7 @@ class RAAspotter_pref34(RAAspotter): url = unquote(url) name = a.find('span').previous_sibling.replace('Télécharger ', '').strip() date = datetime.datetime.strptime(a.find('span').get_text().split(' - ')[-1].strip(), '%d/%m/%Y') - filename = url.split('/')[-1] - raa = RAAspotter.RAA(url, date, name, filename) + raa = RAAspotter.RAA(url, date, name) elements.append(raa) return elements diff --git a/RAAspotter_pref35.py b/RAAspotter_pref35.py index b9542b63e5fd5469ee61801a4817f018cbb6a237..469215c040c9b410d3c031f97eb194046a485bf2 100644 --- a/RAAspotter_pref35.py +++ b/RAAspotter_pref35.py @@ -56,8 +56,7 @@ class RAAspotter_pref35(RAAspotter): url = unquote(url) name = a.find('span').previous_sibling.replace('Télécharger ', '').strip() date = datetime.datetime.strptime(a.find('span').get_text().split(' - ')[-1].strip(), '%d/%m/%Y') - filename = url.split('/')[-1] - raa = RAAspotter.RAA(url, date, name, filename) + raa = RAAspotter.RAA(url, date, name) elements.append(raa) return elements diff --git a/RAAspotter_pref38.py b/RAAspotter_pref38.py index 58727dda342614d14f4b0c9a951322a55554fff8..22e0f9f423cde07aa221856e4bc2feddb2439e59 100644 --- a/RAAspotter_pref38.py +++ b/RAAspotter_pref38.py @@ -96,8 +96,7 @@ class RAAspotter_pref38(RAAspotter): url = unquote(url) name = a.find('span').previous_sibling.replace('Télécharger ', '').strip() date = datetime.datetime.strptime(a.find('span').get_text().split(' - ')[-1].strip(), '%d/%m/%Y') - filename = url.split('/')[-1] - raa = RAAspotter.RAA(url, date, name, filename) + raa = RAAspotter.RAA(url, date, name) elements.append(raa) return elements diff --git a/RAAspotter_pref42.py b/RAAspotter_pref42.py index 254f560f50302680bf32d2458a26813de1accb1e..7e246295a863b3f0b8a492fc17b853bea7a7ec19 100644 --- a/RAAspotter_pref42.py +++ b/RAAspotter_pref42.py @@ -76,8 +76,7 @@ class RAAspotter_pref42(RAAspotter): url = unquote(url) name = a.find('span').previous_sibling.replace('Télécharger ', '').strip() date = datetime.datetime.strptime(a.find('span').get_text().split(' - ')[-1].strip(), '%d/%m/%Y') - filename = url.split('/')[-1] - raa = RAAspotter.RAA(url, date, name, filename) + raa = RAAspotter.RAA(url, date, name) elements.append(raa) return elements diff --git a/RAAspotter_pref59.py b/RAAspotter_pref59.py index ebbe460354f3f8930e4caa69495deeb16da63441..e911a7e694ea4313e54c91e33ecef72b98d07933 100644 --- a/RAAspotter_pref59.py +++ b/RAAspotter_pref59.py @@ -80,8 +80,7 @@ class RAAspotter_pref59(RAAspotter): url = unquote(url) name = a.find('span').previous_sibling.replace('Télécharger ', '').strip() date = datetime.datetime.strptime(a.find('span').get_text().split(' - ')[-1].strip(), '%d/%m/%Y') - filename = url.split('/')[-1] - raa = RAAspotter.RAA(url, date, name, filename) + raa = RAAspotter.RAA(url, date, name) elements.append(raa) return elements diff --git a/RAAspotter_pref62.py b/RAAspotter_pref62.py index 75b909c9df0ffdc7e5416510285b86d3d2f7bf71..3f64ccc7d2f1860286cb82ec74b5df1619a10fde 100644 --- a/RAAspotter_pref62.py +++ b/RAAspotter_pref62.py @@ -93,8 +93,7 @@ class RAAspotter_pref62(RAAspotter): url = unquote(url) name = a.find('span').previous_sibling.replace('Télécharger ', '').strip() date = datetime.datetime.strptime(a.find('span').get_text().split(' - ')[-1].strip(), '%d/%m/%Y') - filename = url.split('/')[-1] - raa = RAAspotter.RAA(url, date, name, filename) + raa = RAAspotter.RAA(url, date, name) elements.append(raa) return elements[::-1] diff --git a/RAAspotter_pref64.py b/RAAspotter_pref64.py index 803b0436c707fde078c42d08d91b4e7bf14a15ba..adacb30e2db194c5f082f1c4bac42e7ec5ecdc2f 100644 --- a/RAAspotter_pref64.py +++ b/RAAspotter_pref64.py @@ -99,8 +99,7 @@ class RAAspotter_pref64(RAAspotter): url = unquote(url) name = a.find('span').previous_sibling.replace('Télécharger ', '').strip() date = datetime.datetime.strptime(a.find('span').get_text().split(' - ')[-1].strip(), '%d/%m/%Y') - filename = url.split('/')[-1] - raa = RAAspotter.RAA(url, date, name, filename) + raa = RAAspotter.RAA(url, date, name) elements.append(raa) return elements diff --git a/RAAspotter_pref65.py b/RAAspotter_pref65.py index 880c8a9f68e1d32b2a825884401f0d8814100ddf..68278d2571c7f663c94c1f7578aa45363e58ddfb 100644 --- a/RAAspotter_pref65.py +++ b/RAAspotter_pref65.py @@ -69,8 +69,7 @@ class RAAspotter_pref65(RAAspotter): url = unquote(url) name = a.find('span').previous_sibling.replace('Télécharger ', '').strip() date = datetime.datetime.strptime(a.find('span').get_text().split(' - ')[-1].strip(), '%d/%m/%Y') - filename = url.split('/')[-1] - raa = RAAspotter.RAA(url, date, name, filename) + raa = RAAspotter.RAA(url, date, name) elements.append(raa) return elements diff --git a/RAAspotter_pref66.py b/RAAspotter_pref66.py index a2f4277dc95f1a5154ccfc729ee3ce10465212ec..6ffde9bc73137d3a370cf7b46a342e448bea7b0f 100644 --- a/RAAspotter_pref66.py +++ b/RAAspotter_pref66.py @@ -103,9 +103,8 @@ class RAAspotter_pref66(RAAspotter): name = a.find('span').previous_sibling.replace('Télécharger ', '').strip() else: name = a.get_text().replace('Télécharger ', '').strip() - filename = url.split('/')[-1] - elements.append(RAAspotter.RAA(url, date, name, filename)) + elements.append(RAAspotter.RAA(url, date, name)) return elements # On parse les RAA depuis 2024 @@ -133,7 +132,6 @@ class RAAspotter_pref66(RAAspotter): url = unquote(url) name = page['name'].replace('Télécharger ', '').strip() date = datetime.datetime.strptime(page['details'].replace('Publié le ', '').strip(), '%d/%m/%Y') - filename = url.split('/')[-1] - elements.append(RAAspotter.RAA(url, date, name, filename)) + elements.append(RAAspotter.RAA(url, date, name)) return elements diff --git a/RAAspotter_pref69.py b/RAAspotter_pref69.py index 3c7c9bf61dd0459dc746e15f55c0daa6aa4f4a45..e28a8313c94f6c0fff70b741f885f26e0dec5cc9 100644 --- a/RAAspotter_pref69.py +++ b/RAAspotter_pref69.py @@ -83,8 +83,7 @@ class RAAspotter_pref69(RAAspotter): url = unquote(url) name = a.find('span').previous_sibling.replace('Télécharger ', '').strip() date = datetime.datetime.strptime(a.find('span').get_text().split(' - ')[-1].strip(), '%d/%m/%Y') - filename = url.split('/')[-1] - raa = RAAspotter.RAA(url, date, name, filename) + raa = RAAspotter.RAA(url, date, name) elements.append(raa) return elements diff --git a/RAAspotter_pref80.py b/RAAspotter_pref80.py index 8118d0c0f7247bf824fd0f4948ad1dde20e086bb..b616fd17bcbf99803067911857f22573fb6fdd18 100644 --- a/RAAspotter_pref80.py +++ b/RAAspotter_pref80.py @@ -77,7 +77,6 @@ class RAAspotter_pref80(RAAspotter): # On enlève les espaces insécables, les double-espaces, et le texte « Télécharger » de certains liens name = a.get_text().replace('Télécharger ', '').strip().replace(u"\u00A0", ' ').replace(' ', ' ') if name and not name == '': - filename = url.split('/')[-1] # Certains RAA de la Somme ont une ligne avec les détails du fichier. Si cette ligne # est disponible, on la parse, sinon on devine la date à partir du nom date = None @@ -98,6 +97,6 @@ class RAAspotter_pref80(RAAspotter): if date.year == 9999: logger.warning(f'On ignore {name} (URL : {url})') else: - raa = RAAspotter.RAA(url, date, name, filename) + raa = RAAspotter.RAA(url, date, name) elements.append(raa) return elements[::-1] diff --git a/RAAspotter_pref81.py b/RAAspotter_pref81.py index ca4052b1e39fefedb20a053fa19191b76e6624f4..2f4bf7f1305c49203215cde921cdd18075e900b7 100644 --- a/RAAspotter_pref81.py +++ b/RAAspotter_pref81.py @@ -113,8 +113,7 @@ class RAAspotter_pref81(RAAspotter): url = unquote(url) name = a.find('span').previous_sibling.replace('Télécharger ', '').strip() date = datetime.datetime.strptime(a.find('span').get_text().split(' - ')[-1].strip(), '%d/%m/%Y') - filename = url.split('/')[-1] - raa = RAAspotter.RAA(url, date, name, filename) + raa = RAAspotter.RAA(url, date, name) elements.append(raa) return elements diff --git a/RAAspotter_pref83.py b/RAAspotter_pref83.py index 8e523698500a418f378d2d47589358b88d5a2dfd..fe73b4f8ab88cbbadda250f39b13232959011c92 100644 --- a/RAAspotter_pref83.py +++ b/RAAspotter_pref83.py @@ -88,8 +88,7 @@ class RAAspotter_pref83(RAAspotter): url = unquote(url) name = a.get_text().strip() date = datetime.datetime.strptime(a['title'].split(' - ')[-1].strip(), '%d/%m/%Y') - filename = url.split('/')[-1] - raa = RAAspotter.RAA(url, date, name, filename) + raa = RAAspotter.RAA(url, date, name) elements.append(raa) return elements diff --git a/RAAspotter_pref87.py b/RAAspotter_pref87.py index e9814d07dd6d4c11970bdaec21e19818aafaa721..6436659d62c581e478996381eb7ff39e6a29c5ec 100644 --- a/RAAspotter_pref87.py +++ b/RAAspotter_pref87.py @@ -104,8 +104,7 @@ class RAAspotter_pref87(RAAspotter): url = unquote(url) name = a.find('span').previous_sibling.replace('Télécharger ', '').strip() date = datetime.datetime.strptime(a.find('span').get_text().split(' - ')[-1].strip(), '%d/%m/%Y') - filename = url.split('/')[-1] - raa = RAAspotter.RAA(url, date, name, filename) + raa = RAAspotter.RAA(url, date, name) elements.append(raa) return elements diff --git a/RAAspotter_pref976.py b/RAAspotter_pref976.py index ec78c39e1a73c732f031ae3afd2b8912534d5736..5d17bb0ea5cae33ef260d14c5490e7dd677f928d 100644 --- a/RAAspotter_pref976.py +++ b/RAAspotter_pref976.py @@ -115,8 +115,7 @@ class RAAspotter_pref976(RAAspotter): url = unquote(url) name = a.find('span').previous_sibling.replace('Télécharger ', '').strip() date = datetime.datetime.strptime(a.find('span').get_text().split(' - ')[-1].strip(), '%d/%m/%Y') - filename = url.split('/')[-1] - raa = RAAspotter.RAA(url, date, name, filename) + raa = RAAspotter.RAA(url, date, name) elements.append(raa) return elements diff --git a/cli.py b/cli.py index 52af7b8bb180c159fcc9b21cfe7cc7429d8f698b..31e7f470012891341800b452466670b3774cb644 100755 --- a/cli.py +++ b/cli.py @@ -218,9 +218,6 @@ else: if __PREF_EMAIL_TO and not __PREF_EMAIL_TO == '': __EMAIL_TO = f'{__EMAIL_TO},{__PREF_EMAIL_TO}' -# On crée le dossier de téléchargement -os.makedirs(__DATA_DIR, exist_ok=True) - module = importlib.import_module(f'RAAspotter_{args.pref}') raa_spotter = getattr(module, f'RAAspotter_{args.pref}')(__DATA_DIR)