Commit 59056ba6 authored by okhin's avatar okhin 🚴

Adding a status code for the original article and avoid unnecessary queries

parent 22703fed
Pipeline #2544 passed with stages
in 2 minutes and 53 seconds
# -*- coding: utf-8 -*-
# Generated by Django 1.11.2 on 2019-04-04 09:23
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('rp', '0019_auto_20190320_1150'),
]
operations = [
migrations.AddField(
model_name='article',
name='original_status',
field=models.IntegerField(default='200', verbose_name='Original status'),
),
]
......@@ -3,7 +3,7 @@ from django.utils.translation import ugettext_lazy as _
from django.core import files
from taggit.managers import TaggableManager
from newspaper import Article as ArticleParser
from newspaper import Article as ArticleParser, ArticleException
from django_und.models import VoteMixin
from django_fsm import FSMField, transition, RETURN_VALUE
......@@ -86,6 +86,9 @@ class Article(VoteMixin):
published_at = models.DateTimeField(
_("Publication date"), blank=True, null=True)
#: original state (error code when trying to fetch datas)
original_status = models.IntegerField(_("Original status"), default="200")
#: priority: True if article have priority
priority = models.BooleanField(default=False)
......@@ -174,6 +177,8 @@ class Article(VoteMixin):
Verify if the article has not been submitted before and automatically
upvote for the given user if applicable.
"""
import requests
url = cleanup_url(url)
article, _ = Article.objects.get_or_create(url=url)
......@@ -183,6 +188,13 @@ class Article(VoteMixin):
if by is not None:
article.upvote(by)
try:
r = requests.get(url, timeout=0.5)
article.original_status = r.status_code
except:
# If the name can't be found, we're not even getting into the HTTP protocol
# So, let's get a specific status for that, one that can be identified.
article.original_status = 600
article.save()
return article
......@@ -194,7 +206,17 @@ class Article(VoteMixin):
else:
article = ArticleParser(url=self.url)
article.download()
if self.original_status >= 400:
return
article.download(request_timeout=1)
try:
article.throw_if_not_downloaded_verbose()
except ArticleException:
self.original_status = 400
self.save()
return
article.parse()
self.title = article.title
self.extracts = article.text
......@@ -203,17 +225,10 @@ class Article(VoteMixin):
def fetch_metadata(self):
import opengraph_py3 as og
if self.lang != "NA":
article = ArticleParser(url=self.url, language=self.lang.lower())
else:
article = ArticleParser(url=self.url)
try:
metadata = og.OpenGraph(url=self.url)
article.metadata = metadata.to_json()
article.save()
except Exception:
pass
if self.original_status < 400:
metadata = og.OpenGraph(url=self.url, )
self.metadata = metadata.to_json()
self.save()
def fetch_image(self):
import requests
......@@ -224,7 +239,17 @@ class Article(VoteMixin):
else:
article = ArticleParser(url=self.url)
if self.original_status >= 400:
return
article.download()
try:
article.throw_if_not_downloaded_verbose()
except ArticleException:
self.original_status = 400
self.save()
return
article.parse()
img_path = article.meta_img
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment