Commit 59056ba6 authored by Okhin's avatar Okhin
Browse files

Adding a status code for the original article and avoid unnecessary queries

parent 22703fed
Pipeline #2544 passed with stages
in 2 minutes and 53 seconds
# -*- coding: utf-8 -*-
# Generated by Django 1.11.2 on 2019-04-04 09:23
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('rp', '0019_auto_20190320_1150'),
]
operations = [
migrations.AddField(
model_name='article',
name='original_status',
field=models.IntegerField(default='200', verbose_name='Original status'),
),
]
...@@ -3,7 +3,7 @@ from django.utils.translation import ugettext_lazy as _ ...@@ -3,7 +3,7 @@ from django.utils.translation import ugettext_lazy as _
from django.core import files from django.core import files
from taggit.managers import TaggableManager from taggit.managers import TaggableManager
from newspaper import Article as ArticleParser from newspaper import Article as ArticleParser, ArticleException
from django_und.models import VoteMixin from django_und.models import VoteMixin
from django_fsm import FSMField, transition, RETURN_VALUE from django_fsm import FSMField, transition, RETURN_VALUE
...@@ -86,6 +86,9 @@ class Article(VoteMixin): ...@@ -86,6 +86,9 @@ class Article(VoteMixin):
published_at = models.DateTimeField( published_at = models.DateTimeField(
_("Publication date"), blank=True, null=True) _("Publication date"), blank=True, null=True)
#: original state (error code when trying to fetch datas)
original_status = models.IntegerField(_("Original status"), default="200")
#: priority: True if article have priority #: priority: True if article have priority
priority = models.BooleanField(default=False) priority = models.BooleanField(default=False)
...@@ -174,6 +177,8 @@ class Article(VoteMixin): ...@@ -174,6 +177,8 @@ class Article(VoteMixin):
Verify if the article has not been submitted before and automatically Verify if the article has not been submitted before and automatically
upvote for the given user if applicable. upvote for the given user if applicable.
""" """
import requests
url = cleanup_url(url) url = cleanup_url(url)
article, _ = Article.objects.get_or_create(url=url) article, _ = Article.objects.get_or_create(url=url)
...@@ -183,6 +188,13 @@ class Article(VoteMixin): ...@@ -183,6 +188,13 @@ class Article(VoteMixin):
if by is not None: if by is not None:
article.upvote(by) article.upvote(by)
try:
r = requests.get(url, timeout=0.5)
article.original_status = r.status_code
except:
# If the name can't be found, we're not even getting into the HTTP protocol
# So, let's get a specific status for that, one that can be identified.
article.original_status = 600
article.save() article.save()
return article return article
...@@ -194,7 +206,17 @@ class Article(VoteMixin): ...@@ -194,7 +206,17 @@ class Article(VoteMixin):
else: else:
article = ArticleParser(url=self.url) article = ArticleParser(url=self.url)
article.download() if self.original_status >= 400:
return
article.download(request_timeout=1)
try:
article.throw_if_not_downloaded_verbose()
except ArticleException:
self.original_status = 400
self.save()
return
article.parse() article.parse()
self.title = article.title self.title = article.title
self.extracts = article.text self.extracts = article.text
...@@ -203,17 +225,10 @@ class Article(VoteMixin): ...@@ -203,17 +225,10 @@ class Article(VoteMixin):
def fetch_metadata(self): def fetch_metadata(self):
import opengraph_py3 as og import opengraph_py3 as og
if self.lang != "NA": if self.original_status < 400:
article = ArticleParser(url=self.url, language=self.lang.lower()) metadata = og.OpenGraph(url=self.url, )
else: self.metadata = metadata.to_json()
article = ArticleParser(url=self.url) self.save()
try:
metadata = og.OpenGraph(url=self.url)
article.metadata = metadata.to_json()
article.save()
except Exception:
pass
def fetch_image(self): def fetch_image(self):
import requests import requests
...@@ -224,7 +239,17 @@ class Article(VoteMixin): ...@@ -224,7 +239,17 @@ class Article(VoteMixin):
else: else:
article = ArticleParser(url=self.url) article = ArticleParser(url=self.url)
if self.original_status >= 400:
return
article.download() article.download()
try:
article.throw_if_not_downloaded_verbose()
except ArticleException:
self.original_status = 400
self.save()
return
article.parse() article.parse()
img_path = article.meta_img img_path = article.meta_img
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment