models.py 8.64 KB
Newer Older
cynddl's avatar
cynddl committed
1
from django.db import models
cynddl's avatar
cynddl committed
2
from django.utils.translation import ugettext_lazy as _
3
from django.core import files
luxcem's avatar
luxcem committed
4

cynddl's avatar
cynddl committed
5
from taggit.managers import TaggableManager
6
from newspaper import Article as ArticleParser, ArticleException
luxcem's avatar
luxcem committed
7
from django_und.models import VoteMixin
8
from django_fsm import FSMField, transition, RETURN_VALUE
luxcem's avatar
luxcem committed
9

10
from io import BytesIO
11
from datetime import datetime
12

cynddl's avatar
cynddl committed
13
from rp.utils import cleanup_url
14

cynddl's avatar
cynddl committed
15

16 17
ARTICLE_SCORE_THRESHOLD = 3

luxcem's avatar
luxcem committed
18
STATUS_CHOICES = (
19 20
    ("NEW", _("New")),
    ("DRAFT", _("Draft")),
luxcem's avatar
luxcem committed
21 22 23 24
    ("PUBLISHED", _("Published")),
    ("REJECTED", _("Rejected"))
)

25 26 27 28 29 30
LANG_CHOICES = (
    ("FR", _("French")),
    ("EN", _("English")),
    ("NA", _("Other"))
)

31 32 33 34 35 36 37 38 39 40
URL_HELP_TEXT = """The URL should not contain any marketing tags. We
automatically strip the most known tags."""

TITLE_HELP_TEXT = """Please remove non-necessary parts such as newspapers'
names and leave only the article title."""

EXTRACTS_HELP_TEXT = """Please select short and helpful extracts from the
article content. You should aim at around 500 characters. Use bracket ellipsis
[…] to cut parts not required to understand the context."""

luxcem's avatar
luxcem committed
41
class Article(VoteMixin):
cynddl's avatar
cynddl committed
42
    #: Logical state (eg. article submitted, published, or rejected)
43 44 45
    # This is unprotected because superuser should be able to change
    # the status from the django admin interface
    status = FSMField(default='NEW', choices=STATUS_CHOICES)
46

cynddl's avatar
cynddl committed
47
    #: Original URL
48
    url = models.URLField("URL", help_text=URL_HELP_TEXT)
cynddl's avatar
cynddl committed
49 50

    #: Language of the webpage
51 52
    lang = models.CharField(
        _("Language"), choices=LANG_CHOICES, default="NA", max_length=50)
cynddl's avatar
cynddl committed
53 54

    #: Plain-text Opengraph metadata
55 56
    metadata = models.TextField(
        _("Opengraph metadata"), blank=True, null=True)
cynddl's avatar
cynddl committed
57 58

    #: Screenshot or banner image for the original webpage
59 60
    screenshot = models.ImageField(
        _("Article screenshot"), blank=True, null=True)
cynddl's avatar
cynddl committed
61 62

    #: Article title
63 64 65
    title = models.CharField(
        _("Article title"), max_length=255, default="",
        help_text=TITLE_HELP_TEXT)
cynddl's avatar
cynddl committed
66 67

    #: Short name for the website (eg. "NY Times")
cynddl's avatar
cynddl committed
68
    website = models.CharField(_("Website"), max_length=255, default="")
cynddl's avatar
cynddl committed
69 70

    #: Short content extracts (eg. two to three paragraphs)
71 72 73
    extracts = models.TextField(
        _("Content extracts"), blank=True, null=True,
        help_text=EXTRACTS_HELP_TEXT)
cynddl's avatar
cynddl committed
74

cynddl's avatar
cynddl committed
75
    #: First submission date
cynddl's avatar
cynddl committed
76
    created_at = models.DateTimeField(_("Creation date"), auto_now_add=True)
cynddl's avatar
cynddl committed
77 78

    #: Name of the user who first submitted the article
79
    created_by = models.CharField(max_length=255, null=True)
cynddl's avatar
cynddl committed
80 81

    #: Last update date
cynddl's avatar
cynddl committed
82
    updated_at = models.DateTimeField(_("Last update"), auto_now=True)
cynddl's avatar
cynddl committed
83 84

    #: Published date
85 86
    published_at = models.DateTimeField(
        _("Publication date"), blank=True, null=True)
cynddl's avatar
cynddl committed
87

88 89 90
    #: original state (error code when trying to fetch datas)
    original_status = models.IntegerField(_("Original status"), default="200")

luxcem's avatar
luxcem committed
91 92
    #: priority: True if article have priority
    priority = models.BooleanField(default=False)
93

cynddl's avatar
cynddl committed
94
    #: List of short tags to describe the article (eg. "Privacy", "Copyright")
95
    tags = TaggableManager(blank=True)
96

97 98 99
    class Meta:
        verbose_name = _("Article")
        verbose_name_plural = _("Articles")
cynddl's avatar
cynddl committed
100

luxcem's avatar
luxcem committed
101 102 103 104
        permissions = (
            ("can_change_status", "Can change article status"),
            ("can_change_priority", "Can change article priority"),
            ("can_vote", "Can vote articles"),
105
            ("can_edit", "Can edit articles")
luxcem's avatar
luxcem committed
106
        )
luxcem's avatar
luxcem committed
107

cynddl's avatar
cynddl committed
108
        #: By default, sort articles by published, updated, or created date
109 110
        ordering = ["-published_at", "-updated_at", "-created_at"]

luxcem's avatar
luxcem committed
111
    def __str__(self):
cynddl's avatar
cynddl committed
112
        """ Returns article title. """
luxcem's avatar
luxcem committed
113
        return self.title
dave's avatar
dave committed
114

115
    # Finite state logic
116

117
    @transition(field=status, source='DRAFT', target='PUBLISHED',
cynddl's avatar
cynddl committed
118
                permission="rp.can_change_status")
119
    def publish(self):
cynddl's avatar
cynddl committed
120
        """ Publish a complete draft. """
121 122 123
        self.published_at = datetime.now()

    @transition(field=status, source='NEW', target='DRAFT',
cynddl's avatar
cynddl committed
124
                permission="rp.can_change_status")
125
    def recover(self):
cynddl's avatar
cynddl committed
126
        """ Force an article to be considered as a draft. """
127 128 129
        pass

    @transition(field=status, source=['NEW', 'DRAFT'], target='REJECTED',
cynddl's avatar
cynddl committed
130
                permission="rp.can_change_status")
131
    def reject(self):
cynddl's avatar
cynddl committed
132
        """ Manual rejection of the article. """
133 134 135
        pass

    @transition(field=status, source='DRAFT', target='DRAFT',
cynddl's avatar
cynddl committed
136
                permission="rp.can_change_priority")
137
    def set_priority(self):
cynddl's avatar
cynddl committed
138
        """ Set the boolean priority of an article to True. """
139 140 141
        self.priority = True

    @transition(field=status, source='DRAFT', target='DRAFT',
cynddl's avatar
cynddl committed
142
                permission="rp.can_change_priority")
143
    def unset_priority(self):
cynddl's avatar
cynddl committed
144
        """ Set the boolean priority of an article to False. """
145
        self.priority = False
146 147 148

    @transition(field=status, source='DRAFT', target='DRAFT')
    @transition(field=status, source='NEW',
cynddl's avatar
cynddl committed
149
                target=RETURN_VALUE('NEW', 'DRAFT'), permission="rp.can_vote")
150
    def upvote(self, by=None):
cynddl's avatar
cynddl committed
151 152 153 154 155
        """
        Upvote the article score for the given user and remove previous votes.
        If the score crosses the threshold ```ARTICLE_SCORE_THRESHOLD```,
        automatically moves the article from _NEW_ to _DRAFT_.
        """
156
        super(Article, self).upvote(by)
157 158 159 160 161
        if self.und_score >= ARTICLE_SCORE_THRESHOLD:
            return 'DRAFT'
        else:
            return self.status

162 163
    @transition(field=status, source='NEW', target='NEW',
                permission="rp.can_vote")
164
    @transition(field=status, source='DRAFT', target='DRAFT',
cynddl's avatar
cynddl committed
165
                permission="rp.can_vote")
166
    def downvote(self, by=None):
cynddl's avatar
cynddl committed
167
        """
168 169 170
        Downvote the article score for the given user and remove previous
        votes. Draft articles can be downvoted but will not be moved back in
        the _NEW_ queue.
cynddl's avatar
cynddl committed
171
        """
172
        super(Article, self).downvote(by)
173

174
    @classmethod
Okhin's avatar
Okhin committed
175
    def add_new_url(by=None, **data):
cynddl's avatar
cynddl committed
176 177 178 179
        """ Manually add a new article from its URL.
        Verify if the article has not been submitted before and automatically
        upvote for the given user if applicable.
        """
180 181
        import requests

Okhin's avatar
Okhin committed
182
        url = cleanup_url(data.pop('url', None))
183 184
        article, created = Article.objects.get_or_create(url=url,
                                                         defaults=data)
cynddl's avatar
cynddl committed
185

186 187 188
        # Is the article was already there, we should upvote it
        if not created:
            article.upvote(str(by))
cynddl's avatar
cynddl committed
189

190 191 192 193
        try:
            r = requests.get(url, timeout=0.5)
            article.original_status = r.status_code
        except:
194 195 196
            # If the domain name can't be found, we're not even getting into
            # the HTTP protocol So, let's get a specific status for that,
            # one that can be easily identified.
197
            article.original_status = 600
cynddl's avatar
cynddl committed
198 199 200
        article.save()
        return article

201
    # Content extraction
202

203
    def fetch_content(self):
cynddl's avatar
cynddl committed
204
        if self.lang != "NA":
205
            article = ArticleParser(url=self.url, language=self.lang.lower())
cynddl's avatar
cynddl committed
206 207 208
        else:
            article = ArticleParser(url=self.url)

209 210 211 212 213 214 215 216 217 218 219
        if self.original_status >= 400:
            return

        article.download(request_timeout=1)
        try:
            article.throw_if_not_downloaded_verbose()
        except ArticleException:
            self.original_status = 400
            self.save()
            return

dave's avatar
dave committed
220 221 222
        article.parse()
        self.title = article.title
        self.extracts = article.text
cynddl's avatar
cynddl committed
223
        self.save()
224

225 226 227
    def fetch_metadata(self):
        import opengraph_py3 as og

228 229 230 231
        if self.original_status < 400:
            metadata = og.OpenGraph(url=self.url, )
            self.metadata = metadata.to_json()
            self.save()
232

233 234 235 236 237 238 239 240 241
    def fetch_image(self):
        import requests
        import imghdr

        if self.lang != "NA":
            article = ArticleParser(url=self.url, language=self.lang.lower())
        else:
            article = ArticleParser(url=self.url)

242 243 244
        if self.original_status >= 400:
            return

245
        article.download()
246 247 248 249 250 251 252
        try:
            article.throw_if_not_downloaded_verbose()
        except ArticleException:
            self.original_status = 400
            self.save()
            return

253 254 255 256 257 258 259 260 261 262 263 264 265
        article.parse()

        img_path = article.meta_img
        if img_path:
            resp = requests.get(img_path, stream=True)
            if resp.status_code == requests.codes.ok:
                fp = BytesIO()
                fp.write(resp.content)

                file_name_ext = imghdr.what(None, resp.content)
                self.screenshot.save(
                    "screenshot-{0}.{1}".format(self.id, file_name_ext),
                    files.File(fp), save=True)