diff --git a/RAAspotter.py b/RAAspotter.py index 470c4215945f4bcd3aba7821ff25cf92a778517e..18294414af111e6a20eb5a7f02013bf2452ed0a8 100644 --- a/RAAspotter.py +++ b/RAAspotter.py @@ -10,6 +10,7 @@ import datetime from urllib.parse import quote from selenium import webdriver +from selenium.common.exceptions import TimeoutException from selenium.webdriver.common.by import By from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions @@ -239,7 +240,7 @@ class RAAspotter: return False # On démarre le navigateur - def get_session(self, url, wait_element=""): + def get_session(self, url, wait_element, remaining_retries=0): webdriver_options = webdriver.ChromeOptions() webdriver_options.add_argument("--no-sandbox") webdriver_options.add_argument("--disable-extensions") @@ -261,17 +262,25 @@ class RAAspotter: # Téléchargement de l'URL browser.get(url) - if not wait_element == "": + if wait_element is not None: # On attend que le navigateur ait passé les tests anti-robots et # que le contenu s'affiche - WebDriverWait(browser, 120).until( - expected_conditions.presence_of_element_located( - ( - By.ID, - wait_element + try: + WebDriverWait(browser, 60).until( + expected_conditions.presence_of_element_located( + ( + By.ID, + wait_element + ) ) ) - ) + except TimeoutException as exc: + logger.warning(f'TimeoutException: {exc}') + if remaining_retries > 0: + return self.get_session(url, wait_element, (remaining_retries - 1)) + else: + raise TimeoutException(exc) + page_content = browser.page_source # On récupère les cookies du navigateur pour les réutiliser plus tard diff --git a/RAAspotter_ppparis.py b/RAAspotter_ppparis.py index 326a1b8b8bc0ba8e633f2016a4f9698fbc176cce..36be16e12f211cc672ccc33e0e232a511ccac4df 100644 --- a/RAAspotter_ppparis.py +++ b/RAAspotter_ppparis.py @@ -24,7 +24,7 @@ class RAAspotter_ppparis(RAAspotter): self.print_output(f'Termes recherchés: {keywords}') self.print_output('') - page_content = self.get_session() + page_content = self.get_session(self.__RAA_PAGE, self.__WAIT_ELEMENT, 3) raa_elements = self.get_raa_elements(page_content) self.parse_raa(raa_elements, keywords.split(',')) self.mailer() @@ -52,5 +52,3 @@ class RAAspotter_ppparis(RAAspotter): elements.append(raa) return elements - def get_session(self): - return super().get_session(self.__RAA_PAGE, self.__WAIT_ELEMENT)