models.py 6.88 KB
Newer Older
cynddl's avatar
cynddl committed
1
from django.db import models
cynddl's avatar
cynddl committed
2
from django.utils.translation import ugettext_lazy as _
3
from django.core import files
4
from django.core.files.base import ContentFile
luxcem's avatar
luxcem committed
5

cynddl's avatar
cynddl committed
6
from taggit.managers import TaggableManager
luxcem's avatar
luxcem committed
7
from newspaper import Article as ArticleParser
luxcem's avatar
luxcem committed
8
from django_und.models import VoteMixin
9
from django_fsm import FSMField, transition, RETURN_VALUE
luxcem's avatar
luxcem committed
10

11
from io import BytesIO
12
from datetime import datetime
13 14 15
from tempfile import NamedTemporaryFile

from project.settings import env
cynddl's avatar
cynddl committed
16
from rp.utils import cleanup_url
17

cynddl's avatar
cynddl committed
18

19 20
ARTICLE_SCORE_THRESHOLD = 3

luxcem's avatar
luxcem committed
21
STATUS_CHOICES = (
22 23
    ("NEW", _("New")),
    ("DRAFT", _("Draft")),
luxcem's avatar
luxcem committed
24 25 26 27
    ("PUBLISHED", _("Published")),
    ("REJECTED", _("Rejected"))
)

28 29 30 31 32 33
LANG_CHOICES = (
    ("FR", _("French")),
    ("EN", _("English")),
    ("NA", _("Other"))
)

34 35 36 37 38 39 40 41 42 43
URL_HELP_TEXT = """The URL should not contain any marketing tags. We
automatically strip the most known tags."""

TITLE_HELP_TEXT = """Please remove non-necessary parts such as newspapers'
names and leave only the article title."""

EXTRACTS_HELP_TEXT = """Please select short and helpful extracts from the
article content. You should aim at around 500 characters. Use bracket ellipsis
[…] to cut parts not required to understand the context."""

luxcem's avatar
luxcem committed
44

luxcem's avatar
luxcem committed
45
class Article(VoteMixin):
46 47
    status = FSMField(default='NEW', choices=STATUS_CHOICES, protected=True)

48
    url = models.URLField("URL", help_text=URL_HELP_TEXT)
49 50
    lang = models.CharField(
        _("Language"), choices=LANG_CHOICES, default="NA", max_length=50)
51 52 53 54 55 56 57
    metadata = models.TextField(
        _("Opengraph metadata"), blank=True, null=True)
    screenshot = models.ImageField(
        _("Article screenshot"), blank=True, null=True)
    title = models.CharField(
        _("Article title"), max_length=255, default="",
        help_text=TITLE_HELP_TEXT)
cynddl's avatar
cynddl committed
58
    website = models.CharField(_("Website"), max_length=255, default="")
59 60 61
    extracts = models.TextField(
        _("Content extracts"), blank=True, null=True,
        help_text=EXTRACTS_HELP_TEXT)
cynddl's avatar
cynddl committed
62 63

    created_at = models.DateTimeField(_("Creation date"), auto_now_add=True)
64
    created_by = models.CharField(max_length=255, null=True)
cynddl's avatar
cynddl committed
65
    updated_at = models.DateTimeField(_("Last update"), auto_now=True)
66 67
    published_at = models.DateTimeField(
        _("Publication date"), blank=True, null=True)
cynddl's avatar
cynddl committed
68

luxcem's avatar
luxcem committed
69 70
    #: priority: True if article have priority
    priority = models.BooleanField(default=False)
71

72
    tags = TaggableManager(blank=True)
73

74 75 76
    class Meta:
        verbose_name = _("Article")
        verbose_name_plural = _("Articles")
luxcem's avatar
luxcem committed
77 78 79 80
        permissions = (
            ("can_change_status", "Can change article status"),
            ("can_change_priority", "Can change article priority"),
            ("can_vote", "Can vote articles"),
81
            ("can_edit", "Can edit articles")
luxcem's avatar
luxcem committed
82
        )
luxcem's avatar
luxcem committed
83

84 85
        ordering = ["-published_at", "-updated_at", "-created_at"]

luxcem's avatar
luxcem committed
86 87
    def __str__(self):
        return self.title
dave's avatar
dave committed
88

89
    # Finite state logic
90

91
    @transition(field=status, source='DRAFT', target='PUBLISHED',
cynddl's avatar
cynddl committed
92
                permission="rp.can_change_status")
93 94 95 96
    def publish(self):
        self.published_at = datetime.now()

    @transition(field=status, source='NEW', target='DRAFT',
cynddl's avatar
cynddl committed
97
                permission="rp.can_change_status")
98 99 100 101
    def recover(self):
        pass

    @transition(field=status, source=['NEW', 'DRAFT'], target='REJECTED',
cynddl's avatar
cynddl committed
102
                permission="rp.can_change_status")
103 104 105 106
    def reject(self):
        pass

    @transition(field=status, source='DRAFT', target='DRAFT',
cynddl's avatar
cynddl committed
107
                permission="rp.can_change_priority")
108 109 110 111
    def set_priority(self):
        self.priority = True

    @transition(field=status, source='DRAFT', target='DRAFT',
cynddl's avatar
cynddl committed
112
                permission="rp.can_change_priority")
113 114
    def unset_priority(self):
        self.priority = False
115 116 117

    @transition(field=status, source='DRAFT', target='DRAFT')
    @transition(field=status, source='NEW',
cynddl's avatar
cynddl committed
118
                target=RETURN_VALUE('NEW', 'DRAFT'), permission="rp.can_vote")
119 120
    def upvote(self, by=None):
        super(Article, self).upvote(by)
121 122 123 124 125
        if self.und_score >= ARTICLE_SCORE_THRESHOLD:
            return 'DRAFT'
        else:
            return self.status

cynddl's avatar
cynddl committed
126
    @transition(field=status, source='NEW', target='NEW', permission="rp.can_vote")
127
    @transition(field=status, source='DRAFT', target='DRAFT',
cynddl's avatar
cynddl committed
128
                permission="rp.can_vote")
129 130
    def downvote(self, by=None):
        super(Article, self).downvote(by)
131

cynddl's avatar
cynddl committed
132 133 134 135 136
    def add_new_url(url, by=None):
        url = cleanup_url(url)
        article, _ = Article.objects.get_or_create(url=url)

        if article.created_by is None:
137
            article.created_by = str(by)
cynddl's avatar
cynddl committed
138 139 140 141 142 143 144

        if by is not None:
            article.upvote(by)

        article.save()
        return article

145
    # Content extraction
146

147
    def fetch_content(self):
cynddl's avatar
cynddl committed
148
        if self.lang != "NA":
149
            article = ArticleParser(url=self.url, language=self.lang.lower())
cynddl's avatar
cynddl committed
150 151 152
        else:
            article = ArticleParser(url=self.url)

dave's avatar
dave committed
153 154 155 156
        article.download()
        article.parse()
        self.title = article.title
        self.extracts = article.text
cynddl's avatar
cynddl committed
157
        self.save()
158

159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
    def fetch_image(self):
        import requests
        import imghdr

        if self.lang != "NA":
            article = ArticleParser(url=self.url, language=self.lang.lower())
        else:
            article = ArticleParser(url=self.url)

        article.download()
        article.parse()

        img_path = article.meta_img
        if img_path:
            resp = requests.get(img_path, stream=True)
            if resp.status_code == requests.codes.ok:
                fp = BytesIO()
                fp.write(resp.content)

                file_name_ext = imghdr.what(None, resp.content)
                self.screenshot.save(
                    "screenshot-{0}.{1}".format(self.id, file_name_ext),
                    files.File(fp), save=True)



185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
    def fetch_screenshot(self):
        from selenium import webdriver
        from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
        from PIL import Image
        from pyvirtualdisplay import Display

        with NamedTemporaryFile() as f, Display(visible=False, size=(3200, 1800)):
            binary = None
            if hasattr(env, "FIREFOX_BINARY_PATH"):
                binary = FirefoxBinary(env.FIREFOX_BINARY_PATH)

            profile = None
            if hasattr(env, "FIREFOX_PROFILE_PATH"):
                profile = webdriver.FirefoxProfile(env.FIREFOX_PROFILE_PATH)

            driver = webdriver.Firefox(profile, firefox_binary=binary)
            driver.set_window_size(1200, 1800)

            driver.get(self.url)
            driver.save_screenshot(f.name)
            screen = driver.get_screenshot_as_png()
            driver.quit()

            im = Image.open(BytesIO(screen))
            im.thumbnail((240, 360))
            im_io = BytesIO()
            im.save(im_io, format="PNG")
            self.screenshot.save(
                "screenshot-%i" % self.id, ContentFile(im_io.getvalue()),
                save=True)