article.py 5.56 KB
Newer Older
cynddl's avatar
cynddl committed
1
from django.db import models
cynddl's avatar
cynddl committed
2
from django.utils.translation import ugettext_lazy as _
3
from django.core.files.base import ContentFile
luxcem's avatar
luxcem committed
4

cynddl's avatar
cynddl committed
5
from taggit.managers import TaggableManager
luxcem's avatar
luxcem committed
6
from newspaper import Article as ArticleParser
luxcem's avatar
luxcem committed
7
from django_und.models import VoteMixin
8
from django_fsm import FSMField, transition, RETURN_VALUE
luxcem's avatar
luxcem committed
9

10
from io import BytesIO
11
from datetime import datetime
12
13
14
from tempfile import NamedTemporaryFile

from project.settings import env
15

cynddl's avatar
cynddl committed
16

17
18
ARTICLE_SCORE_THRESHOLD = 3

luxcem's avatar
luxcem committed
19
STATUS_CHOICES = (
20
21
    ("NEW", _("New")),
    ("DRAFT", _("Draft")),
luxcem's avatar
luxcem committed
22
23
24
25
    ("PUBLISHED", _("Published")),
    ("REJECTED", _("Rejected"))
)

26
27
28
29
30
31
LANG_CHOICES = (
    ("FR", _("French")),
    ("EN", _("English")),
    ("NA", _("Other"))
)

32
33
34
35
36
37
38
39
40
41
URL_HELP_TEXT = """The URL should not contain any marketing tags. We
automatically strip the most known tags."""

TITLE_HELP_TEXT = """Please remove non-necessary parts such as newspapers'
names and leave only the article title."""

EXTRACTS_HELP_TEXT = """Please select short and helpful extracts from the
article content. You should aim at around 500 characters. Use bracket ellipsis
[…] to cut parts not required to understand the context."""

luxcem's avatar
luxcem committed
42

luxcem's avatar
luxcem committed
43
class Article(VoteMixin):
44
45
    status = FSMField(default='NEW', choices=STATUS_CHOICES, protected=True)

46
    url = models.URLField("URL", help_text=URL_HELP_TEXT)
47
48
    lang = models.CharField(
        _("Language"), choices=LANG_CHOICES, default="NA", max_length=50)
49
50
51
52
53
54
55
    metadata = models.TextField(
        _("Opengraph metadata"), blank=True, null=True)
    screenshot = models.ImageField(
        _("Article screenshot"), blank=True, null=True)
    title = models.CharField(
        _("Article title"), max_length=255, default="",
        help_text=TITLE_HELP_TEXT)
cynddl's avatar
cynddl committed
56
    website = models.CharField(_("Website"), max_length=255, default="")
57
58
59
    extracts = models.TextField(
        _("Content extracts"), blank=True, null=True,
        help_text=EXTRACTS_HELP_TEXT)
cynddl's avatar
cynddl committed
60
61
62

    created_at = models.DateTimeField(_("Creation date"), auto_now_add=True)
    updated_at = models.DateTimeField(_("Last update"), auto_now=True)
63
64
    published_at = models.DateTimeField(
        _("Publication date"), blank=True, null=True)
cynddl's avatar
cynddl committed
65

luxcem's avatar
luxcem committed
66
67
    #: priority: True if article have priority
    priority = models.BooleanField(default=False)
68

69
    tags = TaggableManager(blank=True)
70

71
72
73
    class Meta:
        verbose_name = _("Article")
        verbose_name_plural = _("Articles")
luxcem's avatar
luxcem committed
74
75
76
77
        permissions = (
            ("can_change_status", "Can change article status"),
            ("can_change_priority", "Can change article priority"),
            ("can_vote", "Can vote articles"),
78
            ("can_edit", "Can edit articles")
luxcem's avatar
luxcem committed
79
        )
luxcem's avatar
luxcem committed
80
81
82

    def __str__(self):
        return self.title
dave's avatar
dave committed
83

84
    # Finite state logic
85

86
87
    @transition(field=status, source='DRAFT', target='PUBLISHED',
                permission="can_change_status")
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
    def publish(self):
        self.published_at = datetime.now()

    @transition(field=status, source='NEW', target='DRAFT',
                permission="can_change_status")
    def recover(self):
        pass

    @transition(field=status, source=['NEW', 'DRAFT'], target='REJECTED',
                permission="can_change_status")
    def reject(self):
        pass

    @transition(field=status, source='DRAFT', target='DRAFT',
                permission="can_change_priority")
103
104
105
106
107
108
109
    def set_priority(self):
        self.priority = True

    @transition(field=status, source='DRAFT', target='DRAFT',
                permission="can_change_priority")
    def unset_priority(self):
        self.priority = False
110
111
112
113

    @transition(field=status, source='DRAFT', target='DRAFT')
    @transition(field=status, source='NEW',
                target=RETURN_VALUE('NEW', 'DRAFT'), permission="can_vote")
114
115
    def upvote(self, by=None):
        super(Article, self).upvote(by)
116
117
118
119
120
121
122
123
        if self.und_score >= ARTICLE_SCORE_THRESHOLD:
            return 'DRAFT'
        else:
            return self.status

    @transition(field=status, source='NEW', target='NEW', permission="can_vote")
    @transition(field=status, source='DRAFT', target='DRAFT',
                permission="can_vote")
124
125
    def downvote(self, by=None):
        super(Article, self).downvote(by)
126
127

    # Content extraction
128

129
    def fetch_content(self):
130
131
        lang_lower = self.lang.lower() if self.lang != "NA" else None
        article = ArticleParser(url=self.url, language=lang_lower)
dave's avatar
dave committed
132
133
134
135
        article.download()
        article.parse()
        self.title = article.title
        self.extracts = article.text
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166

    def fetch_screenshot(self):
        from selenium import webdriver
        from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
        from PIL import Image
        from pyvirtualdisplay import Display

        with NamedTemporaryFile() as f, Display(visible=False, size=(3200, 1800)):
            binary = None
            if hasattr(env, "FIREFOX_BINARY_PATH"):
                binary = FirefoxBinary(env.FIREFOX_BINARY_PATH)

            profile = None
            if hasattr(env, "FIREFOX_PROFILE_PATH"):
                profile = webdriver.FirefoxProfile(env.FIREFOX_PROFILE_PATH)

            driver = webdriver.Firefox(profile, firefox_binary=binary)
            driver.set_window_size(1200, 1800)

            driver.get(self.url)
            driver.save_screenshot(f.name)
            screen = driver.get_screenshot_as_png()
            driver.quit()

            im = Image.open(BytesIO(screen))
            im.thumbnail((240, 360))
            im_io = BytesIO()
            im.save(im_io, format="PNG")
            self.screenshot.save(
                "screenshot-%i" % self.id, ContentFile(im_io.getvalue()),
                save=True)