models.py 10.6 KB
Newer Older
cynddl's avatar
cynddl committed
1
from django.db import models
cynddl's avatar
cynddl committed
2
from django.utils.translation import ugettext_lazy as _
3
from django.core import files
4
from django.contrib.auth.decorators import permission_required
luxcem's avatar
luxcem committed
5

cynddl's avatar
cynddl committed
6
from taggit.managers import TaggableManager
7
from newspaper import Article as ArticleParser, ArticleException
8
from django_fsm import FSMField, transition, RETURN_VALUE
luxcem's avatar
luxcem committed
9

10
from io import BytesIO
11
from datetime import datetime
12

cynddl's avatar
cynddl committed
13
from rp.utils import cleanup_url
14

cynddl's avatar
cynddl committed
15

16
17
ARTICLE_SCORE_THRESHOLD = 3

luxcem's avatar
luxcem committed
18
STATUS_CHOICES = (
19
20
    ("NEW", _("New")),
    ("DRAFT", _("Draft")),
luxcem's avatar
luxcem committed
21
22
23
24
    ("PUBLISHED", _("Published")),
    ("REJECTED", _("Rejected"))
)

25
26
27
28
29
30
LANG_CHOICES = (
    ("FR", _("French")),
    ("EN", _("English")),
    ("NA", _("Other"))
)

31
32
33
34
35
36
37
38
39
40
URL_HELP_TEXT = """The URL should not contain any marketing tags. We
automatically strip the most known tags."""

TITLE_HELP_TEXT = """Please remove non-necessary parts such as newspapers'
names and leave only the article title."""

EXTRACTS_HELP_TEXT = """Please select short and helpful extracts from the
article content. You should aim at around 500 characters. Use bracket ellipsis
[…] to cut parts not required to understand the context."""

41
42

class Article(models.Model):
cynddl's avatar
cynddl committed
43
    #: Logical state (eg. article submitted, published, or rejected)
44
45
46
    # This is unprotected because superuser should be able to change
    # the status from the django admin interface
    status = FSMField(default='NEW', choices=STATUS_CHOICES)
47

cynddl's avatar
cynddl committed
48
    #: Original URL
49
    url = models.URLField("URL", help_text=URL_HELP_TEXT)
cynddl's avatar
cynddl committed
50
51

    #: Language of the webpage
52
53
    lang = models.CharField(
        _("Language"), choices=LANG_CHOICES, default="NA", max_length=50)
cynddl's avatar
cynddl committed
54
55

    #: Plain-text Opengraph metadata
56
57
    metadata = models.TextField(
        _("Opengraph metadata"), blank=True, null=True)
cynddl's avatar
cynddl committed
58
59

    #: Screenshot or banner image for the original webpage
60
61
    screenshot = models.ImageField(
        _("Article screenshot"), blank=True, null=True)
cynddl's avatar
cynddl committed
62
63

    #: Article title
64
65
66
    title = models.CharField(
        _("Article title"), max_length=255, default="",
        help_text=TITLE_HELP_TEXT)
cynddl's avatar
cynddl committed
67
68

    #: Short name for the website (eg. "NY Times")
cynddl's avatar
cynddl committed
69
    website = models.CharField(_("Website"), max_length=255, default="")
cynddl's avatar
cynddl committed
70
71

    #: Short content extracts (eg. two to three paragraphs)
72
73
74
    extracts = models.TextField(
        _("Content extracts"), blank=True, null=True,
        help_text=EXTRACTS_HELP_TEXT)
cynddl's avatar
cynddl committed
75

cynddl's avatar
cynddl committed
76
    #: First submission date
cynddl's avatar
cynddl committed
77
    created_at = models.DateTimeField(_("Creation date"), auto_now_add=True)
cynddl's avatar
cynddl committed
78
79

    #: Name of the user who first submitted the article
80
    created_by = models.CharField(max_length=255, null=True)
cynddl's avatar
cynddl committed
81
82

    #: Last update date
cynddl's avatar
cynddl committed
83
    updated_at = models.DateTimeField(_("Last update"), auto_now=True)
cynddl's avatar
cynddl committed
84
85

    #: Published date
86
87
    published_at = models.DateTimeField(
        _("Publication date"), blank=True, null=True)
cynddl's avatar
cynddl committed
88

89
90
91
    #: original state (error code when trying to fetch datas)
    original_status = models.IntegerField(_("Original status"), default="200")

luxcem's avatar
luxcem committed
92
93
    #: priority: True if article have priority
    priority = models.BooleanField(default=False)
94

95
    #: List of tags used to add subject and topics to an article
96
    tags = TaggableManager(blank=True)
97

98
99
100
    #: Score of the article, modifiedby upvote and downvote methods
    score = models.IntegerField(default=0)

101
102
103
104
105
106
    #: If the publication is "archived" (not visible by default from the public
    #: feeds), this flag is set to True
    archive = models.BooleanField(_("Article archived"), default=False)

    #: If the article is quoting something LQDN said or wrote
    quote = models.BooleanField(_("Article directly quotes us"),
107
                                  default=False)
108
109
110
111

    #: If the article speaks about something LQDN did or wrote
    speak = models.BooleanField(_("Article speaks of us"), default=False)

112
113
114
    class Meta:
        verbose_name = _("Article")
        verbose_name_plural = _("Articles")
cynddl's avatar
cynddl committed
115

luxcem's avatar
luxcem committed
116
117
118
119
        permissions = (
            ("can_change_status", "Can change article status"),
            ("can_change_priority", "Can change article priority"),
            ("can_vote", "Can vote articles"),
120
            ("can_edit", "Can edit articles")
luxcem's avatar
luxcem committed
121
        )
luxcem's avatar
luxcem committed
122

cynddl's avatar
cynddl committed
123
        #: By default, sort articles by published, updated, or created date
124
125
        ordering = ["-published_at", "-updated_at", "-created_at"]

luxcem's avatar
luxcem committed
126
    def __str__(self):
cynddl's avatar
cynddl committed
127
        """ Returns article title. """
luxcem's avatar
luxcem committed
128
        return self.title
dave's avatar
dave committed
129

130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
    # Flags logic
    def toggle_speak(self):
        """Toggle the speak flag"""
        self.speak = not self.speak
        self.save()

    def toggle_archive(self):
        """Toggle the archive flag"""
        self.archive = not self.archive
        self.save()

    def toggle_quote(self):
        """Toggle the quote flag"""
        self.quote = not self.quote
        self.save()

146
147
148
    @transition(field=status, source=['DRAFT', 'NEW', 'PUBLISHED'],
                target=RETURN_VALUE('DRAFT', 'NEW', 'PUBLISHED',),
                permission="rp.can_edit")
149
150
151
152
153
154
155
156
157
    def set_flags(self, archive=False, speak=False, quote=False):
        """
        This method is used to set _all_ the flags in the state their given as
        arguments of this method. The default is False which will unset all flags.
        """
        self.archive = archive
        self.speak = speak
        self.quote = quote
        self.save()
158
        return self.status
159

160
    # Finite state logic
161
    @transition(field=status, source='DRAFT', target='PUBLISHED',
cynddl's avatar
cynddl committed
162
                permission="rp.can_change_status")
163
    def publish(self):
cynddl's avatar
cynddl committed
164
        """ Publish a complete draft. """
165
166
        self.published_at = datetime.now()

167
    @transition(field=status, source=['NEW', 'REJECTED'], target='DRAFT',
cynddl's avatar
cynddl committed
168
                permission="rp.can_change_status")
169
    def recover(self):
Okhin's avatar
Okhin committed
170
        """ Force an article to be considered as _DRAFT_. """
171
172
173
        pass

    @transition(field=status, source=['NEW', 'DRAFT'], target='REJECTED',
cynddl's avatar
cynddl committed
174
                permission="rp.can_change_status")
175
    def reject(self):
cynddl's avatar
cynddl committed
176
        """ Manual rejection of the article. """
177
178
179
        pass

    @transition(field=status, source='DRAFT', target='DRAFT',
cynddl's avatar
cynddl committed
180
                permission="rp.can_change_priority")
181
    def set_priority(self):
Okhin's avatar
Okhin committed
182
183
        """ Set the boolean priority of an article to True.
        The article must be a _DRAFT_"""
184
185
186
        self.priority = True

    @transition(field=status, source='DRAFT', target='DRAFT',
cynddl's avatar
cynddl committed
187
                permission="rp.can_change_priority")
188
    def unset_priority(self):
Okhin's avatar
Okhin committed
189
190
        """ Set the boolean priority of an article to False. 
        The article must be a _DRAFT_."""
191
        self.priority = False
192
193
194

    @transition(field=status, source='DRAFT', target='DRAFT')
    @transition(field=status, source='NEW',
cynddl's avatar
cynddl committed
195
                target=RETURN_VALUE('NEW', 'DRAFT'), permission="rp.can_vote")
196
    def upvote(self):
cynddl's avatar
cynddl committed
197
198
199
200
201
        """
        Upvote the article score for the given user and remove previous votes.
        If the score crosses the threshold ```ARTICLE_SCORE_THRESHOLD```,
        automatically moves the article from _NEW_ to _DRAFT_.
        """
202
        self.score += 1
203
        if self.score >= ARTICLE_SCORE_THRESHOLD:
204
205
206
207
            return 'DRAFT'
        else:
            return self.status

208
209
    @transition(field=status, source='NEW', target='NEW',
                permission="rp.can_vote")
210
    @transition(field=status, source='DRAFT', target='DRAFT',
cynddl's avatar
cynddl committed
211
                permission="rp.can_vote")
212
    def downvote(self, by=None):
cynddl's avatar
cynddl committed
213
        """
214
215
216
        Downvote the article score for the given user and remove previous
        votes. Draft articles can be downvoted but will not be moved back in
        the _NEW_ queue.
cynddl's avatar
cynddl committed
217
        """
218
        self.score -= 1
219

220
    @classmethod
Okhin's avatar
Okhin committed
221
    def add_new_url(by=None, **data):
cynddl's avatar
cynddl committed
222
223
224
225
        """ Manually add a new article from its URL.
        Verify if the article has not been submitted before and automatically
        upvote for the given user if applicable.
        """
226
227
        import requests

Okhin's avatar
Okhin committed
228
        url = cleanup_url(data.pop('url', None))
229
230
231
232
        tags = data.pop('tags', None)
        (article, created) = Article.objects.get_or_create(url=url,
                                                           defaults=data)

233
234
235
236
237
238
        # Always upvote the article, except if it was rejected before.
        # It's either a new one, and adding it count as a vote, or it existed
        # already, and this is a vote.
        if article.status == "REJECTED":
            return None
        article.upvote()
cynddl's avatar
cynddl committed
239

240
241
        # Let's add the tags
        if tags:
Okhin's avatar
Okhin committed
242
            article.tags.add(','.join([t['name'] for t in tags if len(t) > 0]))
243
244
245
        try:
            r = requests.get(url, timeout=0.5)
            article.original_status = r.status_code
246
        except Exception:
247
248
249
            # If the domain name can't be found, we're not even getting into
            # the HTTP protocol So, let's get a specific status for that,
            # one that can be easily identified.
250
            article.original_status = 600
251
252

        article.url = url
cynddl's avatar
cynddl committed
253
254
255
        article.save()
        return article

256
    # Content extraction
257
    def fetch_content(self):
cynddl's avatar
cynddl committed
258
        if self.lang != "NA":
259
            article = ArticleParser(url=self.url, language=self.lang.lower())
cynddl's avatar
cynddl committed
260
261
262
        else:
            article = ArticleParser(url=self.url)

263
264
265
266
267
268
269
270
271
272
273
        if self.original_status >= 400:
            return

        article.download(request_timeout=1)
        try:
            article.throw_if_not_downloaded_verbose()
        except ArticleException:
            self.original_status = 400
            self.save()
            return

dave's avatar
dave committed
274
275
276
        article.parse()
        self.title = article.title
        self.extracts = article.text
cynddl's avatar
cynddl committed
277
        self.save()
278

279
280
281
    def fetch_metadata(self):
        import opengraph_py3 as og

282
283
284
285
        if self.original_status < 400:
            metadata = og.OpenGraph(url=self.url, )
            self.metadata = metadata.to_json()
            self.save()
286

287
288
289
290
291
292
293
294
295
    def fetch_image(self):
        import requests
        import imghdr

        if self.lang != "NA":
            article = ArticleParser(url=self.url, language=self.lang.lower())
        else:
            article = ArticleParser(url=self.url)

296
297
298
        if self.original_status >= 400:
            return

299
        article.download()
300
301
302
303
304
305
306
        try:
            article.throw_if_not_downloaded_verbose()
        except ArticleException:
            self.original_status = 400
            self.save()
            return

307
308
309
310
311
312
313
314
315
316
317
318
319
        article.parse()

        img_path = article.meta_img
        if img_path:
            resp = requests.get(img_path, stream=True)
            if resp.status_code == requests.codes.ok:
                fp = BytesIO()
                fp.write(resp.content)

                file_name_ext = imghdr.what(None, resp.content)
                self.screenshot.save(
                    "screenshot-{0}.{1}".format(self.id, file_name_ext),
                    files.File(fp), save=True)