models.py 8.94 KB
Newer Older
cynddl's avatar
cynddl committed
1
from django.db import models
cynddl's avatar
cynddl committed
2
from django.utils.translation import ugettext_lazy as _
3
from django.core import files
luxcem's avatar
luxcem committed
4

cynddl's avatar
cynddl committed
5
from taggit.managers import TaggableManager
6
from newspaper import Article as ArticleParser, ArticleException
7
from django_fsm import FSMField, transition, RETURN_VALUE
luxcem's avatar
luxcem committed
8

9
from io import BytesIO
10
from datetime import datetime
11

cynddl's avatar
cynddl committed
12
from rp.utils import cleanup_url
13

cynddl's avatar
cynddl committed
14

15
16
ARTICLE_SCORE_THRESHOLD = 3

luxcem's avatar
luxcem committed
17
STATUS_CHOICES = (
18
19
    ("NEW", _("New")),
    ("DRAFT", _("Draft")),
luxcem's avatar
luxcem committed
20
21
22
23
    ("PUBLISHED", _("Published")),
    ("REJECTED", _("Rejected"))
)

24
25
26
27
28
29
LANG_CHOICES = (
    ("FR", _("French")),
    ("EN", _("English")),
    ("NA", _("Other"))
)

30
31
32
33
34
35
36
37
38
39
URL_HELP_TEXT = """The URL should not contain any marketing tags. We
automatically strip the most known tags."""

TITLE_HELP_TEXT = """Please remove non-necessary parts such as newspapers'
names and leave only the article title."""

EXTRACTS_HELP_TEXT = """Please select short and helpful extracts from the
article content. You should aim at around 500 characters. Use bracket ellipsis
[…] to cut parts not required to understand the context."""

40
41

class Article(models.Model):
cynddl's avatar
cynddl committed
42
    #: Logical state (eg. article submitted, published, or rejected)
43
44
45
    # This is unprotected because superuser should be able to change
    # the status from the django admin interface
    status = FSMField(default='NEW', choices=STATUS_CHOICES)
46

cynddl's avatar
cynddl committed
47
    #: Original URL
48
    url = models.URLField("URL", help_text=URL_HELP_TEXT)
cynddl's avatar
cynddl committed
49
50

    #: Language of the webpage
51
52
    lang = models.CharField(
        _("Language"), choices=LANG_CHOICES, default="NA", max_length=50)
cynddl's avatar
cynddl committed
53
54

    #: Plain-text Opengraph metadata
55
56
    metadata = models.TextField(
        _("Opengraph metadata"), blank=True, null=True)
cynddl's avatar
cynddl committed
57
58

    #: Screenshot or banner image for the original webpage
59
60
    screenshot = models.ImageField(
        _("Article screenshot"), blank=True, null=True)
cynddl's avatar
cynddl committed
61
62

    #: Article title
63
64
65
    title = models.CharField(
        _("Article title"), max_length=255, default="",
        help_text=TITLE_HELP_TEXT)
cynddl's avatar
cynddl committed
66
67

    #: Short name for the website (eg. "NY Times")
cynddl's avatar
cynddl committed
68
    website = models.CharField(_("Website"), max_length=255, default="")
cynddl's avatar
cynddl committed
69
70

    #: Short content extracts (eg. two to three paragraphs)
71
72
73
    extracts = models.TextField(
        _("Content extracts"), blank=True, null=True,
        help_text=EXTRACTS_HELP_TEXT)
cynddl's avatar
cynddl committed
74

cynddl's avatar
cynddl committed
75
    #: First submission date
cynddl's avatar
cynddl committed
76
    created_at = models.DateTimeField(_("Creation date"), auto_now_add=True)
cynddl's avatar
cynddl committed
77
78

    #: Name of the user who first submitted the article
79
    created_by = models.CharField(max_length=255, null=True)
cynddl's avatar
cynddl committed
80
81

    #: Last update date
cynddl's avatar
cynddl committed
82
    updated_at = models.DateTimeField(_("Last update"), auto_now=True)
cynddl's avatar
cynddl committed
83
84

    #: Published date
85
86
    published_at = models.DateTimeField(
        _("Publication date"), blank=True, null=True)
cynddl's avatar
cynddl committed
87

88
89
90
    #: original state (error code when trying to fetch datas)
    original_status = models.IntegerField(_("Original status"), default="200")

luxcem's avatar
luxcem committed
91
92
    #: priority: True if article have priority
    priority = models.BooleanField(default=False)
93

94
    #: Comma separated list of short tags to describe the article (eg: "Privacy", "Copyright").
95
    tags = TaggableManager(blank=True)
96

97
98
99
    #: Score of the article, modifiedby upvote and downvote methods
    score = models.IntegerField(default=0)

100
101
102
    class Meta:
        verbose_name = _("Article")
        verbose_name_plural = _("Articles")
cynddl's avatar
cynddl committed
103

luxcem's avatar
luxcem committed
104
105
106
107
        permissions = (
            ("can_change_status", "Can change article status"),
            ("can_change_priority", "Can change article priority"),
            ("can_vote", "Can vote articles"),
108
            ("can_edit", "Can edit articles")
luxcem's avatar
luxcem committed
109
        )
luxcem's avatar
luxcem committed
110

cynddl's avatar
cynddl committed
111
        #: By default, sort articles by published, updated, or created date
112
113
        ordering = ["-published_at", "-updated_at", "-created_at"]

luxcem's avatar
luxcem committed
114
    def __str__(self):
cynddl's avatar
cynddl committed
115
        """ Returns article title. """
luxcem's avatar
luxcem committed
116
        return self.title
dave's avatar
dave committed
117

118
    # Finite state logic
119

120
    @transition(field=status, source='DRAFT', target='PUBLISHED',
cynddl's avatar
cynddl committed
121
                permission="rp.can_change_status")
122
    def publish(self):
cynddl's avatar
cynddl committed
123
        """ Publish a complete draft. """
124
125
        self.published_at = datetime.now()

126
    @transition(field=status, source=['NEW', 'REJECTED'], target='DRAFT',
cynddl's avatar
cynddl committed
127
                permission="rp.can_change_status")
128
    def recover(self):
Okhin's avatar
Okhin committed
129
        """ Force an article to be considered as _DRAFT_. """
130
131
132
        pass

    @transition(field=status, source=['NEW', 'DRAFT'], target='REJECTED',
cynddl's avatar
cynddl committed
133
                permission="rp.can_change_status")
134
    def reject(self):
cynddl's avatar
cynddl committed
135
        """ Manual rejection of the article. """
136
137
138
        pass

    @transition(field=status, source='DRAFT', target='DRAFT',
cynddl's avatar
cynddl committed
139
                permission="rp.can_change_priority")
140
    def set_priority(self):
cynddl's avatar
cynddl committed
141
        """ Set the boolean priority of an article to True. """
142
143
144
        self.priority = True

    @transition(field=status, source='DRAFT', target='DRAFT',
cynddl's avatar
cynddl committed
145
                permission="rp.can_change_priority")
146
    def unset_priority(self):
cynddl's avatar
cynddl committed
147
        """ Set the boolean priority of an article to False. """
148
        self.priority = False
149
150
151

    @transition(field=status, source='DRAFT', target='DRAFT')
    @transition(field=status, source='NEW',
cynddl's avatar
cynddl committed
152
                target=RETURN_VALUE('NEW', 'DRAFT'), permission="rp.can_vote")
153
    def upvote(self):
cynddl's avatar
cynddl committed
154
155
156
157
158
        """
        Upvote the article score for the given user and remove previous votes.
        If the score crosses the threshold ```ARTICLE_SCORE_THRESHOLD```,
        automatically moves the article from _NEW_ to _DRAFT_.
        """
159
160
        self.score += 1
        if self.score >= ARTICLE_SCORE_THRESHOLD - 1:
161
162
163
164
            return 'DRAFT'
        else:
            return self.status

165
166
    @transition(field=status, source='NEW', target='NEW',
                permission="rp.can_vote")
167
    @transition(field=status, source='DRAFT', target='DRAFT',
cynddl's avatar
cynddl committed
168
                permission="rp.can_vote")
169
    def downvote(self, by=None):
cynddl's avatar
cynddl committed
170
        """
171
172
173
        Downvote the article score for the given user and remove previous
        votes. Draft articles can be downvoted but will not be moved back in
        the _NEW_ queue.
cynddl's avatar
cynddl committed
174
        """
175
        self.score -= 1
176

177
    @classmethod
Okhin's avatar
Okhin committed
178
    def add_new_url(by=None, **data):
cynddl's avatar
cynddl committed
179
180
181
182
        """ Manually add a new article from its URL.
        Verify if the article has not been submitted before and automatically
        upvote for the given user if applicable.
        """
183
184
        import requests

Okhin's avatar
Okhin committed
185
        url = cleanup_url(data.pop('url', None))
186
187
188
189
190
        tags = data.pop('tags', None)
        (article, created) = Article.objects.get_or_create(url=url,
                                                           defaults=data)

        # If the article was already there, we should upvote it
191
        if not created:
192
193
194
            if article.status == "REJECTED":
                return None
            article.upvote()
cynddl's avatar
cynddl committed
195

196
197
198
        # Let's add the tags
        if tags:
            article.tags.add(','.join([t for t in tags if len(t) > 0]))
199
200
201
        try:
            r = requests.get(url, timeout=0.5)
            article.original_status = r.status_code
202
        except Exception:
203
204
205
            # If the domain name can't be found, we're not even getting into
            # the HTTP protocol So, let's get a specific status for that,
            # one that can be easily identified.
206
            article.original_status = 600
cynddl's avatar
cynddl committed
207
208
209
        article.save()
        return article

210
    # Content extraction
211
    def fetch_content(self):
cynddl's avatar
cynddl committed
212
        if self.lang != "NA":
213
            article = ArticleParser(url=self.url, language=self.lang.lower())
cynddl's avatar
cynddl committed
214
215
216
        else:
            article = ArticleParser(url=self.url)

217
218
219
220
221
222
223
224
225
226
227
        if self.original_status >= 400:
            return

        article.download(request_timeout=1)
        try:
            article.throw_if_not_downloaded_verbose()
        except ArticleException:
            self.original_status = 400
            self.save()
            return

dave's avatar
dave committed
228
229
230
        article.parse()
        self.title = article.title
        self.extracts = article.text
cynddl's avatar
cynddl committed
231
        self.save()
232

233
234
235
    def fetch_metadata(self):
        import opengraph_py3 as og

236
237
238
239
        if self.original_status < 400:
            metadata = og.OpenGraph(url=self.url, )
            self.metadata = metadata.to_json()
            self.save()
240

241
242
243
244
245
246
247
248
249
    def fetch_image(self):
        import requests
        import imghdr

        if self.lang != "NA":
            article = ArticleParser(url=self.url, language=self.lang.lower())
        else:
            article = ArticleParser(url=self.url)

250
251
252
        if self.original_status >= 400:
            return

253
        article.download()
254
255
256
257
258
259
260
        try:
            article.throw_if_not_downloaded_verbose()
        except ArticleException:
            self.original_status = 400
            self.save()
            return

261
262
263
264
265
266
267
268
269
270
271
272
273
        article.parse()

        img_path = article.meta_img
        if img_path:
            resp = requests.get(img_path, stream=True)
            if resp.status_code == requests.codes.ok:
                fp = BytesIO()
                fp.write(resp.content)

                file_name_ext = imghdr.what(None, resp.content)
                self.screenshot.save(
                    "screenshot-{0}.{1}".format(self.id, file_name_ext),
                    files.File(fp), save=True)