Commit cc6c394a authored by dave's avatar dave

add article parsing

parent 0129c98c
from django.db import models
from django.utils.translation import ugettext_lazy as _
from taggit.managers import TaggableManager
from apps.rp.utils.screenshot import getScreenshot
from .vote import UnDVotedMixin
from newspaper import Article
class Article(UnDVotedMixin):
url = models.URLField("URL")
......@@ -25,3 +26,12 @@ class Article(UnDVotedMixin):
def __str__(self):
return self.title
def parse(self):
article = Article(url=self.url, language=self.lang)
article.download()
article.parse()
self.title = article.title
self.extracts = article.text
# self.screenshot = getScreenshot(self.url)
# TODO: save metadata
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment