Commit 22703fed authored by okhin's avatar okhin 🚴

Merge branch '8-backend-parsing-contenu-metadonnees-images' into 'rp2'

Resolve "Backend : parsing contenu/métadonnées/images"

See merge request !26
parents 060aac81 4ca3bc56
Pipeline #2543 passed with stages
in 3 minutes and 2 seconds
...@@ -41,13 +41,15 @@ class Command(BaseCommand): ...@@ -41,13 +41,15 @@ class Command(BaseCommand):
# First, let's get the data from presse table # First, let's get the data from presse table
presse = c.execute("SELECT * FROM presse") presse = c.execute("SELECT * FROM presse")
print("Importing 0/{} from previous database".format(presse)) print("Importing 0/{} from previous database\r".format(presse))
# And here we go # And here we go
done = 0 done = 0
errors = 0
for item in c.fetchall(): for item in c.fetchall():
done += 1 done += 1
print("Importing {}/{} from previous database".format(done, print("Importing {}/{} from previous database - {} errors\r".format(done,
presse)) presse,
errors))
c.execute("SELECT nid, vid FROM node WHERE nid=%s", (item['nid'],)) c.execute("SELECT nid, vid FROM node WHERE nid=%s", (item['nid'],))
node = c.fetchone() node = c.fetchone()
if node is None: if node is None:
...@@ -57,6 +59,7 @@ class Command(BaseCommand): ...@@ -57,6 +59,7 @@ class Command(BaseCommand):
(node['vid'],)) (node['vid'],))
revision = c.fetchone() revision = c.fetchone()
if revision is None: if revision is None:
errors += 1
continue continue
# Récupérons l'article si il existe en base # Récupérons l'article si il existe en base
...@@ -85,8 +88,9 @@ class Command(BaseCommand): ...@@ -85,8 +88,9 @@ class Command(BaseCommand):
try: try:
article.fetch_content() article.fetch_content()
article.fetch_image() article.fetch_image()
article.fetch_metadata()
except Exception: except Exception:
pass errors += 1
if article.status not in ("DRAFT", "PUBLISHED", ): if article.status not in ("DRAFT", "PUBLISHED", ):
article.recover() article.recover()
......
...@@ -6,7 +6,6 @@ from taggit.managers import TaggableManager ...@@ -6,7 +6,6 @@ from taggit.managers import TaggableManager
from newspaper import Article as ArticleParser from newspaper import Article as ArticleParser
from django_und.models import VoteMixin from django_und.models import VoteMixin
from django_fsm import FSMField, transition, RETURN_VALUE from django_fsm import FSMField, transition, RETURN_VALUE
import opengraph_py3 as og
from io import BytesIO from io import BytesIO
from datetime import datetime from datetime import datetime
...@@ -184,8 +183,6 @@ class Article(VoteMixin): ...@@ -184,8 +183,6 @@ class Article(VoteMixin):
if by is not None: if by is not None:
article.upvote(by) article.upvote(by)
# Let's get metadata import
article.metadata = og.OpenGraph(url=url)
article.save() article.save()
return article return article
...@@ -203,6 +200,21 @@ class Article(VoteMixin): ...@@ -203,6 +200,21 @@ class Article(VoteMixin):
self.extracts = article.text self.extracts = article.text
self.save() self.save()
def fetch_metadata(self):
import opengraph_py3 as og
if self.lang != "NA":
article = ArticleParser(url=self.url, language=self.lang.lower())
else:
article = ArticleParser(url=self.url)
try:
metadata = og.OpenGraph(url=self.url)
article.metadata = metadata.to_json()
article.save()
except Exception:
pass
def fetch_image(self): def fetch_image(self):
import requests import requests
import imghdr import imghdr
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
<a class="btn btn-outline-primary" href="?fetch_content">Fetch content</a> <a class="btn btn-outline-primary" href="?fetch_content">Fetch content</a>
<a class="btn btn-outline-primary" href="?fetch_image">Fetch image</a> <a class="btn btn-outline-primary" href="?fetch_image">Fetch image</a>
<a class="btn btn-outline-primary" href="?fetch_metadata">Fetch metadata</a>
</p> </p>
<div class="ml-auto"> <div class="ml-auto">
<span>Save and&nbsp;</span> <span>Save and&nbsp;</span>
......
...@@ -102,6 +102,8 @@ class ArticleEdit(PermissionRequiredMixin, UpdateView): ...@@ -102,6 +102,8 @@ class ArticleEdit(PermissionRequiredMixin, UpdateView):
self.object.fetch_content() self.object.fetch_content()
elif 'fetch_image' in self.request.GET: elif 'fetch_image' in self.request.GET:
self.object.fetch_image() self.object.fetch_image()
elif 'fetch_metadata' in self.request.GET:
self.object.fetch_metadata()
context = self.get_context_data(object=self.object) context = self.get_context_data(object=self.object)
return self.render_to_response(context) return self.render_to_response(context)
......
...@@ -15,5 +15,5 @@ url ...@@ -15,5 +15,5 @@ url
coreapi coreapi
pygments pygments
markdown markdown
opengraph-py3 opengraph_py3
-e git+https://github.com/mariocesar/sorl-thumbnail.git#egg=sorl-thumbnail -e git+https://github.com/mariocesar/sorl-thumbnail.git#egg=sorl-thumbnail
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment