From 64b7dc1efbfa5a451691c951472fa59f29bb58a7 Mon Sep 17 00:00:00 2001
From: Bastien Le Querrec <blq@laquadrature.net>
Date: Sun, 7 Apr 2024 12:30:48 +0200
Subject: [PATCH] =?UTF-8?q?RAAspotter:=20relance=20les=20requ=C3=AAtes=20e?=
 =?UTF-8?q?n=20timeout?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes #9
---
 RAAspotter.py         | 25 +++++++++++++++++--------
 RAAspotter_ppparis.py |  4 +---
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/RAAspotter.py b/RAAspotter.py
index 470c421..1829441 100644
--- a/RAAspotter.py
+++ b/RAAspotter.py
@@ -10,6 +10,7 @@ import datetime
 from urllib.parse import quote
 
 from selenium import webdriver
+from selenium.common.exceptions import TimeoutException
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support.wait import WebDriverWait
 from selenium.webdriver.support import expected_conditions
@@ -239,7 +240,7 @@ class RAAspotter:
         return False
 
     # On démarre le navigateur
-    def get_session(self, url, wait_element=""):
+    def get_session(self, url, wait_element, remaining_retries=0):
         webdriver_options = webdriver.ChromeOptions()
         webdriver_options.add_argument("--no-sandbox")
         webdriver_options.add_argument("--disable-extensions")
@@ -261,17 +262,25 @@ class RAAspotter:
         # Téléchargement de l'URL
         browser.get(url)
 
-        if not wait_element == "":
+        if wait_element is not None:
             # On attend que le navigateur ait passé les tests anti-robots et
             # que le contenu s'affiche
-            WebDriverWait(browser, 120).until(
-                expected_conditions.presence_of_element_located(
-                    (
-                        By.ID,
-                        wait_element
+            try:
+                WebDriverWait(browser, 60).until(
+                    expected_conditions.presence_of_element_located(
+                        (
+                            By.ID,
+                            wait_element
+                        )
                     )
                 )
-            )
+            except TimeoutException as exc:
+                logger.warning(f'TimeoutException: {exc}')
+                if remaining_retries > 0:
+                    return self.get_session(url, wait_element, (remaining_retries - 1))
+                else:
+                    raise TimeoutException(exc)
+
         page_content = browser.page_source
 
         # On récupère les cookies du navigateur pour les réutiliser plus tard
diff --git a/RAAspotter_ppparis.py b/RAAspotter_ppparis.py
index 326a1b8..36be16e 100644
--- a/RAAspotter_ppparis.py
+++ b/RAAspotter_ppparis.py
@@ -24,7 +24,7 @@ class RAAspotter_ppparis(RAAspotter):
         self.print_output(f'Termes recherchés: {keywords}')
         self.print_output('')
 
-        page_content = self.get_session()
+        page_content = self.get_session(self.__RAA_PAGE, self.__WAIT_ELEMENT, 3)
         raa_elements = self.get_raa_elements(page_content)
         self.parse_raa(raa_elements, keywords.split(','))
         self.mailer()
@@ -52,5 +52,3 @@ class RAAspotter_ppparis(RAAspotter):
                 elements.append(raa)
         return elements
 
-    def get_session(self):
-        return super().get_session(self.__RAA_PAGE, self.__WAIT_ELEMENT)
-- 
GitLab