models.py 6.03 KB
Newer Older
cynddl's avatar
cynddl committed
1
from django.db import models
cynddl's avatar
cynddl committed
2
from django.utils.translation import ugettext_lazy as _
3
from django.core.files.base import ContentFile
luxcem's avatar
luxcem committed
4

cynddl's avatar
cynddl committed
5
from taggit.managers import TaggableManager
luxcem's avatar
luxcem committed
6
from newspaper import Article as ArticleParser
luxcem's avatar
luxcem committed
7
from django_und.models import VoteMixin
8
from django_fsm import FSMField, transition, RETURN_VALUE
luxcem's avatar
luxcem committed
9

10
from io import BytesIO
11
from datetime import datetime
12
13
14
from tempfile import NamedTemporaryFile

from project.settings import env
cynddl's avatar
cynddl committed
15
from rp.utils import cleanup_url
16

cynddl's avatar
cynddl committed
17

18
19
ARTICLE_SCORE_THRESHOLD = 3

luxcem's avatar
luxcem committed
20
STATUS_CHOICES = (
21
22
    ("NEW", _("New")),
    ("DRAFT", _("Draft")),
luxcem's avatar
luxcem committed
23
24
25
26
    ("PUBLISHED", _("Published")),
    ("REJECTED", _("Rejected"))
)

27
28
29
30
31
32
LANG_CHOICES = (
    ("FR", _("French")),
    ("EN", _("English")),
    ("NA", _("Other"))
)

33
34
35
36
37
38
39
40
41
42
URL_HELP_TEXT = """The URL should not contain any marketing tags. We
automatically strip the most known tags."""

TITLE_HELP_TEXT = """Please remove non-necessary parts such as newspapers'
names and leave only the article title."""

EXTRACTS_HELP_TEXT = """Please select short and helpful extracts from the
article content. You should aim at around 500 characters. Use bracket ellipsis
[…] to cut parts not required to understand the context."""

luxcem's avatar
luxcem committed
43

luxcem's avatar
luxcem committed
44
class Article(VoteMixin):
45
46
    status = FSMField(default='NEW', choices=STATUS_CHOICES, protected=True)

47
    url = models.URLField("URL", help_text=URL_HELP_TEXT)
48
49
    lang = models.CharField(
        _("Language"), choices=LANG_CHOICES, default="NA", max_length=50)
50
51
52
53
54
55
56
    metadata = models.TextField(
        _("Opengraph metadata"), blank=True, null=True)
    screenshot = models.ImageField(
        _("Article screenshot"), blank=True, null=True)
    title = models.CharField(
        _("Article title"), max_length=255, default="",
        help_text=TITLE_HELP_TEXT)
cynddl's avatar
cynddl committed
57
    website = models.CharField(_("Website"), max_length=255, default="")
58
59
60
    extracts = models.TextField(
        _("Content extracts"), blank=True, null=True,
        help_text=EXTRACTS_HELP_TEXT)
cynddl's avatar
cynddl committed
61
62

    created_at = models.DateTimeField(_("Creation date"), auto_now_add=True)
63
    created_by = models.CharField(max_length=255, null=True)
cynddl's avatar
cynddl committed
64
    updated_at = models.DateTimeField(_("Last update"), auto_now=True)
65
66
    published_at = models.DateTimeField(
        _("Publication date"), blank=True, null=True)
cynddl's avatar
cynddl committed
67

luxcem's avatar
luxcem committed
68
69
    #: priority: True if article have priority
    priority = models.BooleanField(default=False)
70

71
    tags = TaggableManager(blank=True)
72

73
74
75
    class Meta:
        verbose_name = _("Article")
        verbose_name_plural = _("Articles")
luxcem's avatar
luxcem committed
76
77
78
79
        permissions = (
            ("can_change_status", "Can change article status"),
            ("can_change_priority", "Can change article priority"),
            ("can_vote", "Can vote articles"),
80
            ("can_edit", "Can edit articles")
luxcem's avatar
luxcem committed
81
        )
luxcem's avatar
luxcem committed
82
83
84

    def __str__(self):
        return self.title
dave's avatar
dave committed
85

86
    # Finite state logic
87

88
    @transition(field=status, source='DRAFT', target='PUBLISHED',
cynddl's avatar
cynddl committed
89
                permission="rp.can_change_status")
90
91
92
93
    def publish(self):
        self.published_at = datetime.now()

    @transition(field=status, source='NEW', target='DRAFT',
cynddl's avatar
cynddl committed
94
                permission="rp.can_change_status")
95
96
97
98
    def recover(self):
        pass

    @transition(field=status, source=['NEW', 'DRAFT'], target='REJECTED',
cynddl's avatar
cynddl committed
99
                permission="rp.can_change_status")
100
101
102
103
    def reject(self):
        pass

    @transition(field=status, source='DRAFT', target='DRAFT',
cynddl's avatar
cynddl committed
104
                permission="rp.can_change_priority")
105
106
107
108
    def set_priority(self):
        self.priority = True

    @transition(field=status, source='DRAFT', target='DRAFT',
cynddl's avatar
cynddl committed
109
                permission="rp.can_change_priority")
110
111
    def unset_priority(self):
        self.priority = False
112
113
114

    @transition(field=status, source='DRAFT', target='DRAFT')
    @transition(field=status, source='NEW',
cynddl's avatar
cynddl committed
115
                target=RETURN_VALUE('NEW', 'DRAFT'), permission="rp.can_vote")
116
117
    def upvote(self, by=None):
        super(Article, self).upvote(by)
118
119
120
121
122
        if self.und_score >= ARTICLE_SCORE_THRESHOLD:
            return 'DRAFT'
        else:
            return self.status

cynddl's avatar
cynddl committed
123
    @transition(field=status, source='NEW', target='NEW', permission="rp.can_vote")
124
    @transition(field=status, source='DRAFT', target='DRAFT',
cynddl's avatar
cynddl committed
125
                permission="rp.can_vote")
126
127
    def downvote(self, by=None):
        super(Article, self).downvote(by)
128

cynddl's avatar
cynddl committed
129
130
131
132
133
    def add_new_url(url, by=None):
        url = cleanup_url(url)
        article, _ = Article.objects.get_or_create(url=url)

        if article.created_by is None:
134
            article.created_by = str(by)
cynddl's avatar
cynddl committed
135
136
137
138
139
140
141
142

        if by is not None:
            article.upvote(by)

        article.save()
        return article


143
    # Content extraction
144

145
    def fetch_content(self):
cynddl's avatar
cynddl committed
146
147
148
149
150
        if self.lang != "NA":
            article = ArticleParser(url=self.url, language=lang_lower)
        else:
            article = ArticleParser(url=self.url)

dave's avatar
dave committed
151
152
153
154
        article.download()
        article.parse()
        self.title = article.title
        self.extracts = article.text
cynddl's avatar
cynddl committed
155
        self.save()
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186

    def fetch_screenshot(self):
        from selenium import webdriver
        from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
        from PIL import Image
        from pyvirtualdisplay import Display

        with NamedTemporaryFile() as f, Display(visible=False, size=(3200, 1800)):
            binary = None
            if hasattr(env, "FIREFOX_BINARY_PATH"):
                binary = FirefoxBinary(env.FIREFOX_BINARY_PATH)

            profile = None
            if hasattr(env, "FIREFOX_PROFILE_PATH"):
                profile = webdriver.FirefoxProfile(env.FIREFOX_PROFILE_PATH)

            driver = webdriver.Firefox(profile, firefox_binary=binary)
            driver.set_window_size(1200, 1800)

            driver.get(self.url)
            driver.save_screenshot(f.name)
            screen = driver.get_screenshot_as_png()
            driver.quit()

            im = Image.open(BytesIO(screen))
            im.thumbnail((240, 360))
            im_io = BytesIO()
            im.save(im_io, format="PNG")
            self.screenshot.save(
                "screenshot-%i" % self.id, ContentFile(im_io.getvalue()),
                save=True)