From c250bd98d673fc3c9e441d1faf76eb9b748d3c41 Mon Sep 17 00:00:00 2001
From: Bastien Le Querrec <blq@laquadrature.net>
Date: Sat, 16 Mar 2024 19:25:18 +0100
Subject: [PATCH] =?UTF-8?q?RAAspotter:=20ajoute=20une=20fonction=20pour=20?=
 =?UTF-8?q?r=C3=A9cup=C3=A9rer=20une=20page=20sans=20Selenium?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 RAAspotter.py         | 14 +++++++++++---
 RAAspotter_ppparis.py |  3 +--
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/RAAspotter.py b/RAAspotter.py
index e58e395..69d9314 100644
--- a/RAAspotter.py
+++ b/RAAspotter.py
@@ -41,7 +41,7 @@ class RAAspotter:
         self.sha256 = hashlib.sha256(self.filename.encode('utf-8')).hexdigest()
       return self.sha256
 
-  def __init__(self, data_dir):
+  def __init__(self, data_dir, user_agent=''):
     logger.debug('Initialisation de RAAspotter')
 
     self.session          = requests.Session()
@@ -49,6 +49,8 @@ class RAAspotter:
     self.found            = False
     self.output_file_path = os.path.dirname(os.path.abspath(__file__))+'/output.log'
 
+    self.update_user_agent(user_agent)
+
     f = open(self.output_file_path,'w')
     f.write('')
     f.close()
@@ -84,7 +86,6 @@ class RAAspotter:
     # On récupère les cookies du navigateur pour les réutiliser plus tard
     for cookie in browser.get_cookies():
       self.session.cookies.set(cookie['name'], cookie['value'])
-    self.session.headers.update({'User-Agent': self.user_agent})
 
     # On arrête le navigateur
     browser.quit()
@@ -101,10 +102,17 @@ class RAAspotter:
     f.write(data+"\n")
     f.close()
 
+  def get_page(self, url):
+    return self.session.get(url)
+
+  def update_user_agent(self, user_agent):
+    self.user_agent = user_agent
+    self.session.headers.update({'User-Agent': self.user_agent})
+
   def download_file(self, raa):
     try:
       os.makedirs(os.path.dirname(f'{self.data_dir}{raa.get_sha256()}.pdf'), exist_ok=True)
-      file = self.session.get(raa.url)
+      file = self.get_page(raa.url)
       f = open(f'{self.data_dir}{raa.get_sha256()}.pdf','wb')
       f.write(file.content)
       f.close()
diff --git a/RAAspotter_ppparis.py b/RAAspotter_ppparis.py
index 73cc411..e0fab5a 100644
--- a/RAAspotter_ppparis.py
+++ b/RAAspotter_ppparis.py
@@ -11,8 +11,7 @@ class RAAspotter_ppparis(RAAspotter):
   __USER_AGENT   = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
 
   def __init__(self, data_dir):
-    super().__init__(data_dir)
-    self.user_agent = self.__USER_AGENT
+    super().__init__(data_dir, self.__USER_AGENT)
 
   def get_raa(self, keywords):
     self.print_output('RAAspotter_ppparis')
-- 
GitLab