Skip to content
Extraits de code Groupes Projets
Valider 02d0d660 rédigé par Bastien Le Querrec's avatar Bastien Le Querrec
Parcourir les fichiers

RAAspotter: ajout de la possibilité de changer d'identité Tor

parent 0052efcc
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
......@@ -10,11 +10,12 @@ from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions
from bs4 import BeautifulSoup
from pyvirtualdisplay import Display
from pdfminer.high_level import extract_text
from stem import Signal
from stem.control import Controller
import hashlib
import smtplib
from email.message import EmailMessage
......@@ -52,6 +53,9 @@ class RAAspotter:
self.found = False
self.output_file_path = os.path.dirname(os.path.abspath(__file__))+'/output.log'
self.sleep_time = 0
self.tor_enabled = False
self.tor_max_requests = 0
self.tor_requests = 0
self.update_user_agent(user_agent)
......@@ -59,17 +63,34 @@ class RAAspotter:
f.write('')
f.close()
def enable_tor(self):
def enable_tor(self, max_requests=0):
proxies = {
"http": f"socks5h://localhost:9050",
"https": f"socks5h://localhost:9050",
}
self.tor_enabled = True
self.tor_max_requests = max_requests
self.tor_requests = 0
self.session.proxies.update(proxies)
def disable_tor(self):
proxies = {}
self.tor_enabled = False
self.tor_max_requests = 0
self.tor_requests = 0
self.session.proxies.update(proxies)
def tor_get_new_id(self):
logger.info('Changement d\'identité Tor')
try:
controller = Controller.from_port(port = 9051)
controller.authenticate()
controller.signal(Signal.NEWNYM)
time.sleep(3)
self.tor_requests = 0
except:
logger.debug('Impossible de changer d\'identité Tor')
def get_sub_pages(self, page_content, element, host=""):
soup = BeautifulSoup(page_content, 'html.parser')
sub_pages = []
......@@ -143,9 +164,17 @@ class RAAspotter:
f.close()
def get_page(self, url):
logger.debug(f'Chargement de la page {url}')
if self.sleep_time > 0:
time.sleep(self.sleep_time)
return self.session.get(url)
page = self.session.get(url)
if self.tor_enabled:
self.tor_requests+=1
if self.tor_max_requests>0 and self.tor_requests>self.tor_max_requests:
self.tor_get_new_id()
return page
def update_user_agent(self, user_agent):
self.user_agent = user_agent
......
......@@ -3,3 +3,4 @@ selenium
pyvirtualdisplay
pdfminer.six
requests
stem
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter