From 64b7dc1efbfa5a451691c951472fa59f29bb58a7 Mon Sep 17 00:00:00 2001 From: Bastien Le Querrec <blq@laquadrature.net> Date: Sun, 7 Apr 2024 12:30:48 +0200 Subject: [PATCH] =?UTF-8?q?RAAspotter:=20relance=20les=20requ=C3=AAtes=20e?= =?UTF-8?q?n=20timeout?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #9 --- RAAspotter.py | 25 +++++++++++++++++-------- RAAspotter_ppparis.py | 4 +--- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/RAAspotter.py b/RAAspotter.py index 470c421..1829441 100644 --- a/RAAspotter.py +++ b/RAAspotter.py @@ -10,6 +10,7 @@ import datetime from urllib.parse import quote from selenium import webdriver +from selenium.common.exceptions import TimeoutException from selenium.webdriver.common.by import By from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions @@ -239,7 +240,7 @@ class RAAspotter: return False # On démarre le navigateur - def get_session(self, url, wait_element=""): + def get_session(self, url, wait_element, remaining_retries=0): webdriver_options = webdriver.ChromeOptions() webdriver_options.add_argument("--no-sandbox") webdriver_options.add_argument("--disable-extensions") @@ -261,17 +262,25 @@ class RAAspotter: # Téléchargement de l'URL browser.get(url) - if not wait_element == "": + if wait_element is not None: # On attend que le navigateur ait passé les tests anti-robots et # que le contenu s'affiche - WebDriverWait(browser, 120).until( - expected_conditions.presence_of_element_located( - ( - By.ID, - wait_element + try: + WebDriverWait(browser, 60).until( + expected_conditions.presence_of_element_located( + ( + By.ID, + wait_element + ) ) ) - ) + except TimeoutException as exc: + logger.warning(f'TimeoutException: {exc}') + if remaining_retries > 0: + return self.get_session(url, wait_element, (remaining_retries - 1)) + else: + raise TimeoutException(exc) + page_content = browser.page_source # On récupère les cookies du navigateur pour les réutiliser plus tard diff --git a/RAAspotter_ppparis.py b/RAAspotter_ppparis.py index 326a1b8..36be16e 100644 --- a/RAAspotter_ppparis.py +++ b/RAAspotter_ppparis.py @@ -24,7 +24,7 @@ class RAAspotter_ppparis(RAAspotter): self.print_output(f'Termes recherchés: {keywords}') self.print_output('') - page_content = self.get_session() + page_content = self.get_session(self.__RAA_PAGE, self.__WAIT_ELEMENT, 3) raa_elements = self.get_raa_elements(page_content) self.parse_raa(raa_elements, keywords.split(',')) self.mailer() @@ -52,5 +52,3 @@ class RAAspotter_ppparis(RAAspotter): elements.append(raa) return elements - def get_session(self): - return super().get_session(self.__RAA_PAGE, self.__WAIT_ELEMENT) -- GitLab