models.py 8.52 KB
Newer Older
cynddl's avatar
cynddl committed
1
from django.db import models
cynddl's avatar
cynddl committed
2
from django.utils.translation import ugettext_lazy as _
3
from django.core import files
luxcem's avatar
luxcem committed
4

cynddl's avatar
cynddl committed
5
from taggit.managers import TaggableManager
6
from newspaper import Article as ArticleParser, ArticleException
luxcem's avatar
luxcem committed
7
from django_und.models import VoteMixin
8
from django_fsm import FSMField, transition, RETURN_VALUE
luxcem's avatar
luxcem committed
9

10
from io import BytesIO
11
from datetime import datetime
12

cynddl's avatar
cynddl committed
13
from rp.utils import cleanup_url
14

cynddl's avatar
cynddl committed
15

16
17
ARTICLE_SCORE_THRESHOLD = 3

luxcem's avatar
luxcem committed
18
STATUS_CHOICES = (
19
20
    ("NEW", _("New")),
    ("DRAFT", _("Draft")),
luxcem's avatar
luxcem committed
21
22
23
24
    ("PUBLISHED", _("Published")),
    ("REJECTED", _("Rejected"))
)

25
26
27
28
29
30
LANG_CHOICES = (
    ("FR", _("French")),
    ("EN", _("English")),
    ("NA", _("Other"))
)

31
32
33
34
35
36
37
38
39
40
URL_HELP_TEXT = """The URL should not contain any marketing tags. We
automatically strip the most known tags."""

TITLE_HELP_TEXT = """Please remove non-necessary parts such as newspapers'
names and leave only the article title."""

EXTRACTS_HELP_TEXT = """Please select short and helpful extracts from the
article content. You should aim at around 500 characters. Use bracket ellipsis
[…] to cut parts not required to understand the context."""

luxcem's avatar
luxcem committed
41

luxcem's avatar
luxcem committed
42
class Article(VoteMixin):
cynddl's avatar
cynddl committed
43
    #: Logical state (eg. article submitted, published, or rejected)
44
45
46
    # This is unprotected because superuser should be able to change
    # the status from the django admin interface
    status = FSMField(default='NEW', choices=STATUS_CHOICES)
47

cynddl's avatar
cynddl committed
48
    #: Original URL
49
    url = models.URLField("URL", help_text=URL_HELP_TEXT)
cynddl's avatar
cynddl committed
50
51

    #: Language of the webpage
52
53
    lang = models.CharField(
        _("Language"), choices=LANG_CHOICES, default="NA", max_length=50)
cynddl's avatar
cynddl committed
54
55

    #: Plain-text Opengraph metadata
56
57
    metadata = models.TextField(
        _("Opengraph metadata"), blank=True, null=True)
cynddl's avatar
cynddl committed
58
59

    #: Screenshot or banner image for the original webpage
60
61
    screenshot = models.ImageField(
        _("Article screenshot"), blank=True, null=True)
cynddl's avatar
cynddl committed
62
63

    #: Article title
64
65
66
    title = models.CharField(
        _("Article title"), max_length=255, default="",
        help_text=TITLE_HELP_TEXT)
cynddl's avatar
cynddl committed
67
68

    #: Short name for the website (eg. "NY Times")
cynddl's avatar
cynddl committed
69
    website = models.CharField(_("Website"), max_length=255, default="")
cynddl's avatar
cynddl committed
70
71

    #: Short content extracts (eg. two to three paragraphs)
72
73
74
    extracts = models.TextField(
        _("Content extracts"), blank=True, null=True,
        help_text=EXTRACTS_HELP_TEXT)
cynddl's avatar
cynddl committed
75

cynddl's avatar
cynddl committed
76
    #: First submission date
cynddl's avatar
cynddl committed
77
    created_at = models.DateTimeField(_("Creation date"), auto_now_add=True)
cynddl's avatar
cynddl committed
78
79

    #: Name of the user who first submitted the article
80
    created_by = models.CharField(max_length=255, null=True)
cynddl's avatar
cynddl committed
81
82

    #: Last update date
cynddl's avatar
cynddl committed
83
    updated_at = models.DateTimeField(_("Last update"), auto_now=True)
cynddl's avatar
cynddl committed
84
85

    #: Published date
86
87
    published_at = models.DateTimeField(
        _("Publication date"), blank=True, null=True)
cynddl's avatar
cynddl committed
88

89
90
91
    #: original state (error code when trying to fetch datas)
    original_status = models.IntegerField(_("Original status"), default="200")

luxcem's avatar
luxcem committed
92
93
    #: priority: True if article have priority
    priority = models.BooleanField(default=False)
94

cynddl's avatar
cynddl committed
95
    #: List of short tags to describe the article (eg. "Privacy", "Copyright")
96
    tags = TaggableManager(blank=True)
97

98
99
100
    class Meta:
        verbose_name = _("Article")
        verbose_name_plural = _("Articles")
cynddl's avatar
cynddl committed
101

luxcem's avatar
luxcem committed
102
103
104
105
        permissions = (
            ("can_change_status", "Can change article status"),
            ("can_change_priority", "Can change article priority"),
            ("can_vote", "Can vote articles"),
106
            ("can_edit", "Can edit articles")
luxcem's avatar
luxcem committed
107
        )
luxcem's avatar
luxcem committed
108

cynddl's avatar
cynddl committed
109
        #: By default, sort articles by published, updated, or created date
110
111
        ordering = ["-published_at", "-updated_at", "-created_at"]

luxcem's avatar
luxcem committed
112
    def __str__(self):
cynddl's avatar
cynddl committed
113
        """ Returns article title. """
luxcem's avatar
luxcem committed
114
        return self.title
dave's avatar
dave committed
115

116
    # Finite state logic
117

118
    @transition(field=status, source='DRAFT', target='PUBLISHED',
cynddl's avatar
cynddl committed
119
                permission="rp.can_change_status")
120
    def publish(self):
cynddl's avatar
cynddl committed
121
        """ Publish a complete draft. """
122
123
124
        self.published_at = datetime.now()

    @transition(field=status, source='NEW', target='DRAFT',
cynddl's avatar
cynddl committed
125
                permission="rp.can_change_status")
126
    def recover(self):
cynddl's avatar
cynddl committed
127
        """ Force an article to be considered as a draft. """
128
129
130
        pass

    @transition(field=status, source=['NEW', 'DRAFT'], target='REJECTED',
cynddl's avatar
cynddl committed
131
                permission="rp.can_change_status")
132
    def reject(self):
cynddl's avatar
cynddl committed
133
        """ Manual rejection of the article. """
134
135
136
        pass

    @transition(field=status, source='DRAFT', target='DRAFT',
cynddl's avatar
cynddl committed
137
                permission="rp.can_change_priority")
138
    def set_priority(self):
cynddl's avatar
cynddl committed
139
        """ Set the boolean priority of an article to True. """
140
141
142
        self.priority = True

    @transition(field=status, source='DRAFT', target='DRAFT',
cynddl's avatar
cynddl committed
143
                permission="rp.can_change_priority")
144
    def unset_priority(self):
cynddl's avatar
cynddl committed
145
        """ Set the boolean priority of an article to False. """
146
        self.priority = False
147
148
149

    @transition(field=status, source='DRAFT', target='DRAFT')
    @transition(field=status, source='NEW',
cynddl's avatar
cynddl committed
150
                target=RETURN_VALUE('NEW', 'DRAFT'), permission="rp.can_vote")
151
    def upvote(self, by=None):
cynddl's avatar
cynddl committed
152
153
154
155
156
        """
        Upvote the article score for the given user and remove previous votes.
        If the score crosses the threshold ```ARTICLE_SCORE_THRESHOLD```,
        automatically moves the article from _NEW_ to _DRAFT_.
        """
157
        super(Article, self).upvote(by)
158
159
160
161
162
        if self.und_score >= ARTICLE_SCORE_THRESHOLD:
            return 'DRAFT'
        else:
            return self.status

163
164
    @transition(field=status, source='NEW', target='NEW',
                permission="rp.can_vote")
165
    @transition(field=status, source='DRAFT', target='DRAFT',
cynddl's avatar
cynddl committed
166
                permission="rp.can_vote")
167
    def downvote(self, by=None):
cynddl's avatar
cynddl committed
168
        """
169
170
171
        Downvote the article score for the given user and remove previous
        votes. Draft articles can be downvoted but will not be moved back in
        the _NEW_ queue.
cynddl's avatar
cynddl committed
172
        """
173
        super(Article, self).downvote(by)
174

cynddl's avatar
cynddl committed
175
    def add_new_url(url, by=None):
cynddl's avatar
cynddl committed
176
177
178
179
        """ Manually add a new article from its URL.
        Verify if the article has not been submitted before and automatically
        upvote for the given user if applicable.
        """
180
181
        import requests

cynddl's avatar
cynddl committed
182
183
184
185
        url = cleanup_url(url)
        article, _ = Article.objects.get_or_create(url=url)

        if article.created_by is None:
186
            article.created_by = str(by)
cynddl's avatar
cynddl committed
187
188
189
190

        if by is not None:
            article.upvote(by)

191
192
193
194
195
196
197
        try:
            r = requests.get(url, timeout=0.5)
            article.original_status = r.status_code
        except:
            # If the name can't be found, we're not even getting into the HTTP protocol
            # So, let's get a specific status for that, one that can be identified.
            article.original_status = 600
cynddl's avatar
cynddl committed
198
199
200
        article.save()
        return article

201
    # Content extraction
202

203
    def fetch_content(self):
cynddl's avatar
cynddl committed
204
        if self.lang != "NA":
205
            article = ArticleParser(url=self.url, language=self.lang.lower())
cynddl's avatar
cynddl committed
206
207
208
        else:
            article = ArticleParser(url=self.url)

209
210
211
212
213
214
215
216
217
218
219
        if self.original_status >= 400:
            return

        article.download(request_timeout=1)
        try:
            article.throw_if_not_downloaded_verbose()
        except ArticleException:
            self.original_status = 400
            self.save()
            return

dave's avatar
dave committed
220
221
222
        article.parse()
        self.title = article.title
        self.extracts = article.text
cynddl's avatar
cynddl committed
223
        self.save()
224

225
226
227
    def fetch_metadata(self):
        import opengraph_py3 as og

228
229
230
231
        if self.original_status < 400:
            metadata = og.OpenGraph(url=self.url, )
            self.metadata = metadata.to_json()
            self.save()
232

233
234
235
236
237
238
239
240
241
    def fetch_image(self):
        import requests
        import imghdr

        if self.lang != "NA":
            article = ArticleParser(url=self.url, language=self.lang.lower())
        else:
            article = ArticleParser(url=self.url)

242
243
244
        if self.original_status >= 400:
            return

245
        article.download()
246
247
248
249
250
251
252
        try:
            article.throw_if_not_downloaded_verbose()
        except ArticleException:
            self.original_status = 400
            self.save()
            return

253
254
255
256
257
258
259
260
261
262
263
264
265
        article.parse()

        img_path = article.meta_img
        if img_path:
            resp = requests.get(img_path, stream=True)
            if resp.status_code == requests.codes.ok:
                fp = BytesIO()
                fp.write(resp.content)

                file_name_ext = imghdr.what(None, resp.content)
                self.screenshot.save(
                    "screenshot-{0}.{1}".format(self.id, file_name_ext),
                    files.File(fp), save=True)