models.py 5.97 KB
Newer Older
cynddl's avatar
cynddl committed
1
from django.db import models
cynddl's avatar
cynddl committed
2
from django.utils.translation import ugettext_lazy as _
3
from django.core.files.base import ContentFile
luxcem's avatar
luxcem committed
4

cynddl's avatar
cynddl committed
5
from taggit.managers import TaggableManager
luxcem's avatar
luxcem committed
6
from newspaper import Article as ArticleParser
luxcem's avatar
luxcem committed
7
from django_und.models import VoteMixin
8
from django_fsm import FSMField, transition, RETURN_VALUE
luxcem's avatar
luxcem committed
9

10
from io import BytesIO
11
from datetime import datetime
12 13 14
from tempfile import NamedTemporaryFile

from project.settings import env
cynddl's avatar
cynddl committed
15
from rp.utils import cleanup_url
16

cynddl's avatar
cynddl committed
17

18 19
ARTICLE_SCORE_THRESHOLD = 3

luxcem's avatar
luxcem committed
20
STATUS_CHOICES = (
21 22
    ("NEW", _("New")),
    ("DRAFT", _("Draft")),
luxcem's avatar
luxcem committed
23 24 25 26
    ("PUBLISHED", _("Published")),
    ("REJECTED", _("Rejected"))
)

27 28 29 30 31 32
LANG_CHOICES = (
    ("FR", _("French")),
    ("EN", _("English")),
    ("NA", _("Other"))
)

33 34 35 36 37 38 39 40 41 42
URL_HELP_TEXT = """The URL should not contain any marketing tags. We
automatically strip the most known tags."""

TITLE_HELP_TEXT = """Please remove non-necessary parts such as newspapers'
names and leave only the article title."""

EXTRACTS_HELP_TEXT = """Please select short and helpful extracts from the
article content. You should aim at around 500 characters. Use bracket ellipsis
[…] to cut parts not required to understand the context."""

luxcem's avatar
luxcem committed
43

luxcem's avatar
luxcem committed
44
class Article(VoteMixin):
45 46
    status = FSMField(default='NEW', choices=STATUS_CHOICES, protected=True)

47
    url = models.URLField("URL", help_text=URL_HELP_TEXT)
48 49
    lang = models.CharField(
        _("Language"), choices=LANG_CHOICES, default="NA", max_length=50)
50 51 52 53 54 55 56
    metadata = models.TextField(
        _("Opengraph metadata"), blank=True, null=True)
    screenshot = models.ImageField(
        _("Article screenshot"), blank=True, null=True)
    title = models.CharField(
        _("Article title"), max_length=255, default="",
        help_text=TITLE_HELP_TEXT)
cynddl's avatar
cynddl committed
57
    website = models.CharField(_("Website"), max_length=255, default="")
58 59 60
    extracts = models.TextField(
        _("Content extracts"), blank=True, null=True,
        help_text=EXTRACTS_HELP_TEXT)
cynddl's avatar
cynddl committed
61 62 63

    created_at = models.DateTimeField(_("Creation date"), auto_now_add=True)
    updated_at = models.DateTimeField(_("Last update"), auto_now=True)
64 65
    published_at = models.DateTimeField(
        _("Publication date"), blank=True, null=True)
cynddl's avatar
cynddl committed
66

luxcem's avatar
luxcem committed
67 68
    #: priority: True if article have priority
    priority = models.BooleanField(default=False)
69

70
    tags = TaggableManager(blank=True)
71

72 73 74
    class Meta:
        verbose_name = _("Article")
        verbose_name_plural = _("Articles")
luxcem's avatar
luxcem committed
75 76 77 78
        permissions = (
            ("can_change_status", "Can change article status"),
            ("can_change_priority", "Can change article priority"),
            ("can_vote", "Can vote articles"),
79
            ("can_edit", "Can edit articles")
luxcem's avatar
luxcem committed
80
        )
luxcem's avatar
luxcem committed
81 82 83

    def __str__(self):
        return self.title
dave's avatar
dave committed
84

85
    # Finite state logic
86

87
    @transition(field=status, source='DRAFT', target='PUBLISHED',
cynddl's avatar
cynddl committed
88
                permission="rp.can_change_status")
89 90 91 92
    def publish(self):
        self.published_at = datetime.now()

    @transition(field=status, source='NEW', target='DRAFT',
cynddl's avatar
cynddl committed
93
                permission="rp.can_change_status")
94 95 96 97
    def recover(self):
        pass

    @transition(field=status, source=['NEW', 'DRAFT'], target='REJECTED',
cynddl's avatar
cynddl committed
98
                permission="rp.can_change_status")
99 100 101 102
    def reject(self):
        pass

    @transition(field=status, source='DRAFT', target='DRAFT',
cynddl's avatar
cynddl committed
103
                permission="rp.can_change_priority")
104 105 106 107
    def set_priority(self):
        self.priority = True

    @transition(field=status, source='DRAFT', target='DRAFT',
cynddl's avatar
cynddl committed
108
                permission="rp.can_change_priority")
109 110
    def unset_priority(self):
        self.priority = False
111 112 113

    @transition(field=status, source='DRAFT', target='DRAFT')
    @transition(field=status, source='NEW',
cynddl's avatar
cynddl committed
114
                target=RETURN_VALUE('NEW', 'DRAFT'), permission="rp.can_vote")
115 116
    def upvote(self, by=None):
        super(Article, self).upvote(by)
117 118 119 120 121
        if self.und_score >= ARTICLE_SCORE_THRESHOLD:
            return 'DRAFT'
        else:
            return self.status

cynddl's avatar
cynddl committed
122
    @transition(field=status, source='NEW', target='NEW', permission="rp.can_vote")
123
    @transition(field=status, source='DRAFT', target='DRAFT',
cynddl's avatar
cynddl committed
124
                permission="rp.can_vote")
125 126
    def downvote(self, by=None):
        super(Article, self).downvote(by)
127

cynddl's avatar
cynddl committed
128 129 130 131 132 133 134 135 136 137 138 139 140 141
    def add_new_url(url, by=None):
        url = cleanup_url(url)
        article, _ = Article.objects.get_or_create(url=url)

        if article.created_by is None:
            article.created_by = by

        if by is not None:
            article.upvote(by)

        article.save()
        return article


142
    # Content extraction
143

144
    def fetch_content(self):
cynddl's avatar
cynddl committed
145 146 147 148 149
        if self.lang != "NA":
            article = ArticleParser(url=self.url, language=lang_lower)
        else:
            article = ArticleParser(url=self.url)

dave's avatar
dave committed
150 151 152 153
        article.download()
        article.parse()
        self.title = article.title
        self.extracts = article.text
cynddl's avatar
cynddl committed
154
        self.save()
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185

    def fetch_screenshot(self):
        from selenium import webdriver
        from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
        from PIL import Image
        from pyvirtualdisplay import Display

        with NamedTemporaryFile() as f, Display(visible=False, size=(3200, 1800)):
            binary = None
            if hasattr(env, "FIREFOX_BINARY_PATH"):
                binary = FirefoxBinary(env.FIREFOX_BINARY_PATH)

            profile = None
            if hasattr(env, "FIREFOX_PROFILE_PATH"):
                profile = webdriver.FirefoxProfile(env.FIREFOX_PROFILE_PATH)

            driver = webdriver.Firefox(profile, firefox_binary=binary)
            driver.set_window_size(1200, 1800)

            driver.get(self.url)
            driver.save_screenshot(f.name)
            screen = driver.get_screenshot_as_png()
            driver.quit()

            im = Image.open(BytesIO(screen))
            im.thumbnail((240, 360))
            im_io = BytesIO()
            im.save(im_io, format="PNG")
            self.screenshot.save(
                "screenshot-%i" % self.id, ContentFile(im_io.getvalue()),
                save=True)