from django.db import models from django.utils.translation import ugettext_lazy as _ from django.core import files from taggit.managers import TaggableManager from newspaper import Article as ArticleParser, ArticleException from django_fsm import FSMField, transition, RETURN_VALUE from io import BytesIO from datetime import datetime from rp.utils import cleanup_url ARTICLE_SCORE_THRESHOLD = 3 STATUS_CHOICES = ( ("NEW", _("New")), ("DRAFT", _("Draft")), ("PUBLISHED", _("Published")), ("REJECTED", _("Rejected")) ) LANG_CHOICES = ( ("FR", _("French")), ("EN", _("English")), ("NA", _("Other")) ) URL_HELP_TEXT = """The URL should not contain any marketing tags. We automatically strip the most known tags.""" TITLE_HELP_TEXT = """Please remove non-necessary parts such as newspapers' names and leave only the article title.""" EXTRACTS_HELP_TEXT = """Please select short and helpful extracts from the article content. You should aim at around 500 characters. Use bracket ellipsis […] to cut parts not required to understand the context.""" class Article(models.Model): #: Logical state (eg. article submitted, published, or rejected) # This is unprotected because superuser should be able to change # the status from the django admin interface status = FSMField(default='NEW', choices=STATUS_CHOICES) #: Original URL url = models.URLField("URL", help_text=URL_HELP_TEXT) #: Language of the webpage lang = models.CharField( _("Language"), choices=LANG_CHOICES, default="NA", max_length=50) #: Plain-text Opengraph metadata metadata = models.TextField( _("Opengraph metadata"), blank=True, null=True) #: Screenshot or banner image for the original webpage screenshot = models.ImageField( _("Article screenshot"), blank=True, null=True) #: Article title title = models.CharField( _("Article title"), max_length=255, default="", help_text=TITLE_HELP_TEXT) #: Short name for the website (eg. "NY Times") website = models.CharField(_("Website"), max_length=255, default="") #: Short content extracts (eg. two to three paragraphs) extracts = models.TextField( _("Content extracts"), blank=True, null=True, help_text=EXTRACTS_HELP_TEXT) #: First submission date created_at = models.DateTimeField(_("Creation date"), auto_now_add=True) #: Name of the user who first submitted the article created_by = models.CharField(max_length=255, null=True) #: Last update date updated_at = models.DateTimeField(_("Last update"), auto_now=True) #: Published date published_at = models.DateTimeField( _("Publication date"), blank=True, null=True) #: original state (error code when trying to fetch datas) original_status = models.IntegerField(_("Original status"), default="200") #: priority: True if article have priority priority = models.BooleanField(default=False) #: List of tags used to add subject and topics to an article tags = TaggableManager(blank=True) #: Score of the article, modifiedby upvote and downvote methods score = models.IntegerField(default=0) #: If the publication is "archived" (not visible by default from the public #: feeds), this flag is set to True archive = models.BooleanField(_("Article archived"), default=False) #: If the article is quoting something LQDN said or wrote quote = models.BooleanField(_("Article directly quotes us"), default=False) #: If the article speaks about something LQDN did or wrote speak = models.BooleanField(_("Article speaks of us"), default=False) class Meta: verbose_name = _("Article") verbose_name_plural = _("Articles") permissions = ( ("can_change_status", "Can change article status"), ("can_change_priority", "Can change article priority"), ("can_vote", "Can vote articles"), ("can_edit", "Can edit articles") ) #: By default, sort articles by published, updated, or created date ordering = ["-published_at", "-updated_at", "-created_at"] def __str__(self): """ Returns article title. """ return self.title # Flags logic def toggle_speak(self): """Toggle the speak flag""" self.speak = not self.speak self.save() def toggle_archive(self): """Toggle the archive flag""" self.archive = not self.archive self.save() def toggle_quote(self): """Toggle the quote flag""" self.quote = not self.quote self.save() @transition(field=status, source=['DRAFT', 'NEW', 'PUBLISHED'], target=RETURN_VALUE('DRAFT', 'NEW', 'PUBLISHED',), permission="rp.can_edit") def set_flags(self, archive=False, speak=False, quote=False): """ This method is used to set _all_ the flags in the state their given as arguments of this method. The default is False which will unset all flags. """ self.archive = archive self.speak = speak self.quote = quote self.save() return self.status # Finite state logic @transition(field=status, source='DRAFT', target='PUBLISHED', permission="rp.can_change_status") def publish(self): """ Publish a complete draft. """ self.published_at = datetime.now() @transition(field=status, source=['NEW', 'REJECTED'], target='DRAFT', permission="rp.can_change_status") def recover(self): """ Force an article to be considered as _DRAFT_. """ pass @transition(field=status, source=['NEW', 'DRAFT'], target='REJECTED', permission="rp.can_change_status") def reject(self): """ Manual rejection of the article. """ pass @transition(field=status, source='DRAFT', target='DRAFT', permission="rp.can_change_priority") def set_priority(self): """ Set the boolean priority of an article to True. The article must be a _DRAFT_""" self.priority = True @transition(field=status, source='DRAFT', target='DRAFT', permission="rp.can_change_priority") def unset_priority(self): """ Set the boolean priority of an article to False. The article must be a _DRAFT_.""" self.priority = False @transition(field=status, source='DRAFT', target='DRAFT') @transition(field=status, source='NEW', target=RETURN_VALUE('NEW', 'DRAFT'), permission="rp.can_vote") def upvote(self): """ Upvote the article score for the given user and remove previous votes. If the score crosses the threshold ```ARTICLE_SCORE_THRESHOLD```, automatically moves the article from _NEW_ to _DRAFT_. """ self.score += 1 if self.score >= ARTICLE_SCORE_THRESHOLD: return 'DRAFT' else: return self.status @transition(field=status, source='NEW', target='NEW', permission="rp.can_vote") @transition(field=status, source='DRAFT', target='DRAFT', permission="rp.can_vote") def downvote(self, by=None): """ Downvote the article score for the given user and remove previous votes. Draft articles can be downvoted but will not be moved back in the _NEW_ queue. """ self.score -= 1 @classmethod def add_new_url(by=None, **data): """ Manually add a new article from its URL. Verify if the article has not been submitted before and automatically upvote for the given user if applicable. """ import requests url = cleanup_url(data.pop('url', None)) tags = data.pop('tags', None) (article, created) = Article.objects.get_or_create(url=url, defaults=data) # Always upvote the article, except if it was rejected before. # It's either a new one, and adding it count as a vote, or it existed # already, and this is a vote. if article.status == "REJECTED": return None article.upvote() # Let's add the tags if tags: article.tags.set(*tags) article.save() try: r = requests.get(url, timeout=0.5) article.original_status = r.status_code except Exception: # If the domain name can't be found, we're not even getting into # the HTTP protocol So, let's get a specific status for that, # one that can be easily identified. article.original_status = 600 article.url = url article.save() return article # Content extraction def fetch_content(self): if self.lang != "NA": article = ArticleParser(url=self.url, language=self.lang.lower()) else: article = ArticleParser(url=self.url) if self.original_status >= 400: return article.download(request_timeout=1) try: article.throw_if_not_downloaded_verbose() except ArticleException: self.original_status = 400 self.save() return article.parse() self.title = article.title self.extracts = article.text self.save() def fetch_metadata(self): import opengraph_py3 as og if self.original_status < 400: metadata = og.OpenGraph(url=self.url, ) self.metadata = metadata.to_json() self.save() def fetch_image(self): import requests import imghdr if self.lang != "NA": article = ArticleParser(url=self.url, language=self.lang.lower()) else: article = ArticleParser(url=self.url) if self.original_status >= 400: return article.download() try: article.throw_if_not_downloaded_verbose() except ArticleException: self.original_status = 400 self.save() return article.parse() img_path = article.meta_img if img_path: resp = requests.get(img_path, stream=True) if resp.status_code == requests.codes.ok: fp = BytesIO() fp.write(resp.content) file_name_ext = imghdr.what(None, resp.content) self.screenshot.save( "screenshot-{0}.{1}".format(self.id, file_name_ext), files.File(fp), save=True)