diff --git a/RAAspotter.py b/RAAspotter.py index e58e3957c50a8a7ffed9a8a0ca843550f71be023..69d9314af26554dc0b78d4f58a05dc53d99ba39d 100644 --- a/RAAspotter.py +++ b/RAAspotter.py @@ -41,7 +41,7 @@ class RAAspotter: self.sha256 = hashlib.sha256(self.filename.encode('utf-8')).hexdigest() return self.sha256 - def __init__(self, data_dir): + def __init__(self, data_dir, user_agent=''): logger.debug('Initialisation de RAAspotter') self.session = requests.Session() @@ -49,6 +49,8 @@ class RAAspotter: self.found = False self.output_file_path = os.path.dirname(os.path.abspath(__file__))+'/output.log' + self.update_user_agent(user_agent) + f = open(self.output_file_path,'w') f.write('') f.close() @@ -84,7 +86,6 @@ class RAAspotter: # On récupère les cookies du navigateur pour les réutiliser plus tard for cookie in browser.get_cookies(): self.session.cookies.set(cookie['name'], cookie['value']) - self.session.headers.update({'User-Agent': self.user_agent}) # On arrête le navigateur browser.quit() @@ -101,10 +102,17 @@ class RAAspotter: f.write(data+"\n") f.close() + def get_page(self, url): + return self.session.get(url) + + def update_user_agent(self, user_agent): + self.user_agent = user_agent + self.session.headers.update({'User-Agent': self.user_agent}) + def download_file(self, raa): try: os.makedirs(os.path.dirname(f'{self.data_dir}{raa.get_sha256()}.pdf'), exist_ok=True) - file = self.session.get(raa.url) + file = self.get_page(raa.url) f = open(f'{self.data_dir}{raa.get_sha256()}.pdf','wb') f.write(file.content) f.close() diff --git a/RAAspotter_ppparis.py b/RAAspotter_ppparis.py index 73cc41178411e86968e9e490598e121ba9b35554..e0fab5a9650dafb0dd4d4de2d7253517ff307d48 100644 --- a/RAAspotter_ppparis.py +++ b/RAAspotter_ppparis.py @@ -11,8 +11,7 @@ class RAAspotter_ppparis(RAAspotter): __USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36' def __init__(self, data_dir): - super().__init__(data_dir) - self.user_agent = self.__USER_AGENT + super().__init__(data_dir, self.__USER_AGENT) def get_raa(self, keywords): self.print_output('RAAspotter_ppparis')