models.py 9.9 KB
Newer Older
cynddl's avatar
cynddl committed
1
from django.db import models
cynddl's avatar
cynddl committed
2
from django.utils.translation import ugettext_lazy as _
3
from django.core import files
luxcem's avatar
luxcem committed
4

cynddl's avatar
cynddl committed
5
from taggit.managers import TaggableManager
6
from newspaper import Article as ArticleParser, ArticleException
7
from django_fsm import FSMField, transition, RETURN_VALUE
luxcem's avatar
luxcem committed
8

9
from io import BytesIO
10
from datetime import datetime
11

cynddl's avatar
cynddl committed
12
from rp.utils import cleanup_url
13

cynddl's avatar
cynddl committed
14

15
16
ARTICLE_SCORE_THRESHOLD = 3

luxcem's avatar
luxcem committed
17
STATUS_CHOICES = (
18
19
    ("NEW", _("New")),
    ("DRAFT", _("Draft")),
luxcem's avatar
luxcem committed
20
21
22
23
    ("PUBLISHED", _("Published")),
    ("REJECTED", _("Rejected"))
)

24
25
26
27
28
29
LANG_CHOICES = (
    ("FR", _("French")),
    ("EN", _("English")),
    ("NA", _("Other"))
)

30
31
32
33
34
35
36
37
38
39
URL_HELP_TEXT = """The URL should not contain any marketing tags. We
automatically strip the most known tags."""

TITLE_HELP_TEXT = """Please remove non-necessary parts such as newspapers'
names and leave only the article title."""

EXTRACTS_HELP_TEXT = """Please select short and helpful extracts from the
article content. You should aim at around 500 characters. Use bracket ellipsis
[…] to cut parts not required to understand the context."""

40
41

class Article(models.Model):
cynddl's avatar
cynddl committed
42
    #: Logical state (eg. article submitted, published, or rejected)
43
44
45
    # This is unprotected because superuser should be able to change
    # the status from the django admin interface
    status = FSMField(default='NEW', choices=STATUS_CHOICES)
46

cynddl's avatar
cynddl committed
47
    #: Original URL
48
    url = models.URLField("URL", help_text=URL_HELP_TEXT)
cynddl's avatar
cynddl committed
49
50

    #: Language of the webpage
51
52
    lang = models.CharField(
        _("Language"), choices=LANG_CHOICES, default="NA", max_length=50)
cynddl's avatar
cynddl committed
53
54

    #: Plain-text Opengraph metadata
55
56
    metadata = models.TextField(
        _("Opengraph metadata"), blank=True, null=True)
cynddl's avatar
cynddl committed
57
58

    #: Screenshot or banner image for the original webpage
59
60
    screenshot = models.ImageField(
        _("Article screenshot"), blank=True, null=True)
cynddl's avatar
cynddl committed
61
62

    #: Article title
63
64
65
    title = models.CharField(
        _("Article title"), max_length=255, default="",
        help_text=TITLE_HELP_TEXT)
cynddl's avatar
cynddl committed
66
67

    #: Short name for the website (eg. "NY Times")
cynddl's avatar
cynddl committed
68
    website = models.CharField(_("Website"), max_length=255, default="")
cynddl's avatar
cynddl committed
69
70

    #: Short content extracts (eg. two to three paragraphs)
71
72
73
    extracts = models.TextField(
        _("Content extracts"), blank=True, null=True,
        help_text=EXTRACTS_HELP_TEXT)
cynddl's avatar
cynddl committed
74

cynddl's avatar
cynddl committed
75
    #: First submission date
cynddl's avatar
cynddl committed
76
    created_at = models.DateTimeField(_("Creation date"), auto_now_add=True)
cynddl's avatar
cynddl committed
77
78

    #: Name of the user who first submitted the article
79
    created_by = models.CharField(max_length=255, null=True)
cynddl's avatar
cynddl committed
80
81

    #: Last update date
cynddl's avatar
cynddl committed
82
    updated_at = models.DateTimeField(_("Last update"), auto_now=True)
cynddl's avatar
cynddl committed
83
84

    #: Published date
85
86
    published_at = models.DateTimeField(
        _("Publication date"), blank=True, null=True)
cynddl's avatar
cynddl committed
87

88
89
90
    #: original state (error code when trying to fetch datas)
    original_status = models.IntegerField(_("Original status"), default="200")

luxcem's avatar
luxcem committed
91
92
    #: priority: True if article have priority
    priority = models.BooleanField(default=False)
93

94
    #: List of tags used to add subject and topics to an article
95
    tags = TaggableManager(blank=True)
96

97
98
99
    #: Score of the article, modifiedby upvote and downvote methods
    score = models.IntegerField(default=0)

100
101
102
103
104
105
    #: If the publication is "archived" (not visible by default from the public
    #: feeds), this flag is set to True
    archive = models.BooleanField(_("Article archived"), default=False)

    #: If the article is quoting something LQDN said or wrote
    quote = models.BooleanField(_("Article directly quotes us"),
Okhin's avatar
Okhin committed
106
                                default=False)
107
108
109
110

    #: If the article speaks about something LQDN did or wrote
    speak = models.BooleanField(_("Article speaks of us"), default=False)

111
112
113
    class Meta:
        verbose_name = _("Article")
        verbose_name_plural = _("Articles")
cynddl's avatar
cynddl committed
114

luxcem's avatar
luxcem committed
115
116
117
118
        permissions = (
            ("can_change_status", "Can change article status"),
            ("can_change_priority", "Can change article priority"),
            ("can_vote", "Can vote articles"),
119
            ("can_edit", "Can edit articles")
luxcem's avatar
luxcem committed
120
        )
luxcem's avatar
luxcem committed
121

cynddl's avatar
cynddl committed
122
        #: By default, sort articles by published, updated, or created date
123
124
        ordering = ["-published_at", "-updated_at", "-created_at"]

luxcem's avatar
luxcem committed
125
    def __str__(self):
126
127
        """ Returns article title. Or URL if title is empty"""
        return self.title if self.title else self.url
dave's avatar
dave committed
128

129
    # Finite state logic
130
    @transition(field=status, source='DRAFT', target='PUBLISHED',
cynddl's avatar
cynddl committed
131
                permission="rp.can_change_status")
132
    def publish(self):
cynddl's avatar
cynddl committed
133
        """ Publish a complete draft. """
134
135
        self.published_at = datetime.now()

136
    @transition(field=status, source=['NEW', 'REJECTED'], target='DRAFT',
cynddl's avatar
cynddl committed
137
                permission="rp.can_change_status")
138
    def recover(self):
Okhin's avatar
Okhin committed
139
        """ Force an article to be considered as _DRAFT_. """
140
141
142
        pass

    @transition(field=status, source=['NEW', 'DRAFT'], target='REJECTED',
cynddl's avatar
cynddl committed
143
                permission="rp.can_change_status")
144
    def reject(self):
cynddl's avatar
cynddl committed
145
        """ Manual rejection of the article. """
146
147
148
        pass

    @transition(field=status, source='DRAFT', target='DRAFT',
cynddl's avatar
cynddl committed
149
                permission="rp.can_change_priority")
150
    def set_priority(self):
Okhin's avatar
Okhin committed
151
152
        """ Set the boolean priority of an article to True.
        The article must be a _DRAFT_"""
153
154
155
        self.priority = True

    @transition(field=status, source='DRAFT', target='DRAFT',
cynddl's avatar
cynddl committed
156
                permission="rp.can_change_priority")
157
    def unset_priority(self):
Okhin's avatar
Okhin committed
158
159
        """ Set the boolean priority of an article to False. 
        The article must be a _DRAFT_."""
160
        self.priority = False
161
162
163

    @transition(field=status, source='DRAFT', target='DRAFT')
    @transition(field=status, source='NEW',
cynddl's avatar
cynddl committed
164
                target=RETURN_VALUE('NEW', 'DRAFT'), permission="rp.can_vote")
165
    def upvote(self):
cynddl's avatar
cynddl committed
166
167
168
169
170
        """
        Upvote the article score for the given user and remove previous votes.
        If the score crosses the threshold ```ARTICLE_SCORE_THRESHOLD```,
        automatically moves the article from _NEW_ to _DRAFT_.
        """
171
        self.score += 1
172
        if self.score >= ARTICLE_SCORE_THRESHOLD:
173
174
175
176
            return 'DRAFT'
        else:
            return self.status

Okhin's avatar
Okhin committed
177
178
179
180
181
182
183
184
    @transition(field=status, source='PUBLISHED', target='DRAFT',
                permission="rp.can_change_status")
    def unpublish(self):
        """
        Unpublish an article from the RP, and reset it back to a _DRAFT_.
        """
        pass

185
186
    @transition(field=status, source='NEW', target='NEW',
                permission="rp.can_vote")
187
    @transition(field=status, source='DRAFT', target='DRAFT',
cynddl's avatar
cynddl committed
188
                permission="rp.can_vote")
189
    def downvote(self, by=None):
cynddl's avatar
cynddl committed
190
        """
191
192
193
        Downvote the article score for the given user and remove previous
        votes. Draft articles can be downvoted but will not be moved back in
        the _NEW_ queue.
cynddl's avatar
cynddl committed
194
        """
195
        self.score -= 1
196

197
    @classmethod
Okhin's avatar
Okhin committed
198
    def add_new_url(by=None, **data):
cynddl's avatar
cynddl committed
199
200
201
202
        """ Manually add a new article from its URL.
        Verify if the article has not been submitted before and automatically
        upvote for the given user if applicable.
        """
203
204
        import requests

Okhin's avatar
Okhin committed
205
        url = cleanup_url(data.pop('url', None))
206
207
208
209
        tags = data.pop('tags', None)
        (article, created) = Article.objects.get_or_create(url=url,
                                                           defaults=data)

210
211
212
213
214
215
        # Always upvote the article, except if it was rejected before.
        # It's either a new one, and adding it count as a vote, or it existed
        # already, and this is a vote.
        if article.status == "REJECTED":
            return None
        article.upvote()
cynddl's avatar
cynddl committed
216

217
218
        # Let's add the tags
        if tags:
219
            article.tags.add(*tags)
Okhin's avatar
Okhin committed
220
            article.save()
221
222
223
        try:
            r = requests.get(url, timeout=0.5)
            article.original_status = r.status_code
224
        except Exception:
225
226
227
            # If the domain name can't be found, we're not even getting into
            # the HTTP protocol So, let's get a specific status for that,
            # one that can be easily identified.
228
            article.original_status = 600
229
230

        article.url = url
cynddl's avatar
cynddl committed
231
        article.save()
232
        article.refresh_from_db()
cynddl's avatar
cynddl committed
233
234
        return article

235
    # Content extraction
236
    def fetch_content(self):
cynddl's avatar
cynddl committed
237
        if self.lang != "NA":
238
            article = ArticleParser(url=self.url, language=self.lang.lower())
cynddl's avatar
cynddl committed
239
240
241
        else:
            article = ArticleParser(url=self.url)

242
243
244
245
246
247
248
249
250
251
252
        if self.original_status >= 400:
            return

        article.download(request_timeout=1)
        try:
            article.throw_if_not_downloaded_verbose()
        except ArticleException:
            self.original_status = 400
            self.save()
            return

dave's avatar
dave committed
253
254
255
        article.parse()
        self.title = article.title
        self.extracts = article.text
cynddl's avatar
cynddl committed
256
        self.save()
257

258
259
260
    def fetch_metadata(self):
        import opengraph_py3 as og

261
262
263
264
        if self.original_status < 400:
            metadata = og.OpenGraph(url=self.url, )
            self.metadata = metadata.to_json()
            self.save()
265

266
267
268
269
270
271
272
273
274
    def fetch_image(self):
        import requests
        import imghdr

        if self.lang != "NA":
            article = ArticleParser(url=self.url, language=self.lang.lower())
        else:
            article = ArticleParser(url=self.url)

275
276
277
        if self.original_status >= 400:
            return

278
        article.download()
279
280
281
282
283
284
285
        try:
            article.throw_if_not_downloaded_verbose()
        except ArticleException:
            self.original_status = 400
            self.save()
            return

286
287
288
289
290
291
292
293
294
295
296
297
298
        article.parse()

        img_path = article.meta_img
        if img_path:
            resp = requests.get(img_path, stream=True)
            if resp.status_code == requests.codes.ok:
                fp = BytesIO()
                fp.write(resp.content)

                file_name_ext = imghdr.what(None, resp.content)
                self.screenshot.save(
                    "screenshot-{0}.{1}".format(self.id, file_name_ext),
                    files.File(fp), save=True)