Skip to content
Extraits de code Groupes Projets
Valider 566f4a22 rédigé par Nicolas Joyard's avatar Nicolas Joyard
Parcourir les fichiers

First pass at position import

parent 4bf3a6e5
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
......@@ -258,6 +258,10 @@ LOGGING = {
'handlers': ['console'],
'level': LOG_LEVEL,
},
'representatives_positions': {
'handlers': ['console'],
'level': LOG_LEVEL
},
'representatives_recommendations': {
'handlers': ['console'],
'level': LOG_LEVEL
......
# coding: utf-8
import csv
import django
from django.apps import apps
from django.db import transaction
import logging
import sys
import re
from representatives_positions.models import Position
from representatives.models import Representative
logger = logging.getLogger(__name__)
class PositionImporter:
def __init__(self):
self.rep_cache = {}
def get_rep(self, first_name, last_name):
key = '%s %s' % (first_name, last_name)
rep = self.rep_cache.get(key, None)
if rep is None:
try:
rep = Representative.objects.get(first_name=first_name,
last_name=last_name)
self.rep_cache[key] = rep
except Representative.DoesNotExist:
rep = None
return rep
def import_row(self, row):
if len(row['date']) == 0:
logger.warn('Cannot import dateless position for %s %s on URL %s' %
(row['first_name'], row['last_name'], row['url']))
return False
rep = self.get_rep(row['first_name'], row['last_name'])
if rep is None:
logger.warn('Could not find rep %s %s' % (row['first_name'],
row['last_name']))
return False
text = re.sub('(^<p>|</p>$)', '', row['content'])
if row['title'] is not None and len(row['title']) > 0:
text = '%s\n%s' % (row['title'], text)
try:
position = Position.objects.get(representative=rep,
link=row['url'])
except Position.DoesNotExist:
position = Position(
representative=rep,
link=row['url'],
datetime=row['date'],
text=text,
published=True
)
position.save()
logger.info('Created position for %s %s on URL %s' % (
row['first_name'], row['last_name'], row['url']))
return True
def main(stream=None):
"""
Imports positions from an old memopol instance.
Usage:
cat positions.csv | memopol_import_positions
The input CSV file should be generated by the following query:
SELECT CONCAT(o.content, '|', o.url, '|', o.title, '|', ro.date, '|',
r.first_name, '|', r.last_name)
FROM reps_opinion o
INNER JOIN reps_opinionrep ro ON ro.opinion_id = o.id
INNER JOIN reps_representative r ON r.id = ro.representative_id
WHERE o.institution='EU'
"""
if not apps.ready:
django.setup()
importer = PositionImporter()
rejected = []
imported = 0
reader = csv.DictReader(stream or sys.stdin, delimiter='|', fieldnames=[
'content',
'url',
'title',
'date',
'first_name',
'last_name'
], quoting=csv.QUOTE_NONE)
for row in reader:
if not importer.import_row(row):
rejected.append(row)
else:
imported = imported + 1
logger.info('%d rows imported, %d rows rejected', imported, len(rejected))
......@@ -44,7 +44,8 @@ setup(name='political-memory',
},
entry_points={
'console_scripts': [
'memopol_import_recommendations = representatives_recommendations.contrib.import_recommendations:main'
'memopol_import_positions = representatives_positions.contrib.import_positions:main',
'memopol_import_recommendations = representatives_recommendations.contrib.import_recommendations:main',
]
}
)
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter