Commit 060aac81 authored by okhin's avatar okhin 🚴

Merge branch '8-backend-parsing-contenu-metadonnees-images' into 'rp2'

Resolve "Backend : parsing contenu/métadonnées/images"

See merge request !21
parents a4f84f9b 9041d877
Pipeline #2539 passed with stages
in 2 minutes and 55 seconds
apps/rp/fixtures/all_articles.json filter=lfs diff=lfs merge=lfs -text
......@@ -50,21 +50,36 @@ Groups have to be initialized with the following command:
You can run migrations with :
```sh
$ python manage.py migrate
```
You can seed the database with a provided fixtures
located in apps/rp/fixtures/all_articles.json:
```sh
$ python manage.py loaddata all_articles.json
```
## Dev server
To launch the dev server :
```sh
$ python manage.py runserver
```
## Tests
To launch tests install requirements-tests.txt
```sh
$ pip install -r requirements-tests.txt
$ pytest
```
Coverage with
```sh
$ pytest --cov=.
```
import re
import MySQLdb as ms
from MySQLdb.cursors import DictCursor
from django.core.management.base import BaseCommand
from rp.models import Article
class Command(BaseCommand):
help = """
Import data from the old press review. Should only be used by
LQDN staff since the dataformat of the old website only make
sense in their context.
It takes three arguments:
--host hostname
--sql_user username
--sql_password password
host defaults to 127.0.0.1.
"""
def add_arguments(self, parser):
parser.add_argument('--host',
default='127.0.0.1',
dest='host',
nargs='?')
parser.add_argument('--sql_user',
dest='user',
nargs='+')
parser.add_argument('--sql_password',
dest='password',
nargs='+')
def handle(self, *args, **options):
db = ms.connect(host=options['host'],
user=options['user'][0],
password=options['password'][0],
db='site')
c = db.cursor(DictCursor)
# First, let's get the data from presse table
presse = c.execute("SELECT * FROM presse")
print("Importing 0/{} from previous database".format(presse))
# And here we go
done = 0
for item in c.fetchall():
done += 1
print("Importing {}/{} from previous database".format(done,
presse))
c.execute("SELECT nid, vid FROM node WHERE nid=%s", (item['nid'],))
node = c.fetchone()
if node is None:
continue
c.execute("SELECT body FROM node_revisions WHERE vid=%s",
(node['vid'],))
revision = c.fetchone()
if revision is None:
continue
# Récupérons l'article si il existe en base
article = Article.add_new_url(url=item['url'])
if item['lang'] != "":
article.lang = item['lang']
article.published_at = item['date_publi']
article.title = item['title']
# Let's extract the website from the title
website = re.search(r'\[(.*)]', item['title'])
if website:
article.website = website.group(1)
# Augmentons le score si nécessaire
if item['note'] > 0:
article.und_score_up = item['note']
if item['note'] < 0:
article.und_score_down = abs(item['note'])
article.save()
article.refresh_from_db()
if item['published'] >= 1:
# Let's get the extracts
article.extracts = revision['body']
try:
article.fetch_content()
article.fetch_image()
except Exception:
pass
if article.status not in ("DRAFT", "PUBLISHED", ):
article.recover()
if item['published'] >= 2:
if article.status != "PUBLISHED":
article.publish()
article.save()
......@@ -33,5 +33,5 @@ class ArticleSerializer(serializers.ModelSerializer):
class Meta:
model = Article
fields = ('id', 'url', 'title', 'tags', 'extracts',
fields = ('id', 'url', 'title', 'tags', 'extracts', 'status',
'und_score_up', 'und_score_down')
......@@ -5,7 +5,7 @@ from rp.models import Article
class ArticlesFeed(Feed):
title = "Revue de presse de la Quadrature"
link = "/feed/"
link = "/feeds/"
description = "La revue de presse recense les articles de presse relatifs aux sujets de la Quadrature. Elle est compilée chaque jour par ses bénévoles, à partir de la presse francophone et internationale. Bonne lecture !"
def __init__(self, **kwargs):
......@@ -18,6 +18,9 @@ class ArticlesFeed(Feed):
def item_title(self, item):
return item.title
def item_status(self, item):
return item.status
def item_description(self, item):
return item.extracts
......
This source diff could not be displayed because it is stored in LFS. You can view the blob instead.
......@@ -6,6 +6,7 @@ from taggit.managers import TaggableManager
from newspaper import Article as ArticleParser
from django_und.models import VoteMixin
from django_fsm import FSMField, transition, RETURN_VALUE
import opengraph_py3 as og
from io import BytesIO
from datetime import datetime
......@@ -183,6 +184,8 @@ class Article(VoteMixin):
if by is not None:
article.upvote(by)
# Let's get metadata import
article.metadata = og.OpenGraph(url=url)
article.save()
return article
......
......@@ -15,4 +15,5 @@ url
coreapi
pygments
markdown
opengraph-py3
-e git+https://github.com/mariocesar/sorl-thumbnail.git#egg=sorl-thumbnail
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment