Commit 5ab9a7df authored by cynddl's avatar cynddl

Add tentative screenshot routine using Selenium and Firefox

parent 9f0d9b45
from django.db import models
from django.utils.translation import ugettext_lazy as _
from django.core.files.base import ContentFile
from taggit.managers import TaggableManager
from newspaper import Article as ArticleParser
from django_und.models import VoteMixin
from django_fsm import FSMField, transition, RETURN_VALUE
from io import BytesIO
from datetime import datetime
from tempfile import NamedTemporaryFile
from project.settings import env
ARTICLE_SCORE_THRESHOLD = 3
......@@ -114,12 +119,41 @@ class Article(VoteMixin):
# Content extraction
def parse(self):
def fetch_content(self):
lang_lower = self.lang.lower() if self.lang != "NA" else None
article = ArticleParser(url=self.url, language=lang_lower)
article.download()
article.parse()
self.title = article.title
self.extracts = article.text
# self.screenshot = getScreenshot(self.url)
# TODO: save metadata
def fetch_screenshot(self):
from selenium import webdriver
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from PIL import Image
from pyvirtualdisplay import Display
with NamedTemporaryFile() as f, Display(visible=False, size=(3200, 1800)):
binary = None
if hasattr(env, "FIREFOX_BINARY_PATH"):
binary = FirefoxBinary(env.FIREFOX_BINARY_PATH)
profile = None
if hasattr(env, "FIREFOX_PROFILE_PATH"):
profile = webdriver.FirefoxProfile(env.FIREFOX_PROFILE_PATH)
driver = webdriver.Firefox(profile, firefox_binary=binary)
driver.set_window_size(1200, 1800)
driver.get(self.url)
driver.save_screenshot(f.name)
screen = driver.get_screenshot_as_png()
driver.quit()
im = Image.open(BytesIO(screen))
im.thumbnail((240, 360))
im_io = BytesIO()
im.save(im_io, format="PNG")
self.screenshot.save(
"screenshot-%i" % self.id, ContentFile(im_io.getvalue()),
save=True)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment