Commit 566f4a22 authored by Nicolas Joyard's avatar Nicolas Joyard

First pass at position import

parent 4bf3a6e5
......@@ -258,6 +258,10 @@ LOGGING = {
'handlers': ['console'],
'level': LOG_LEVEL,
},
'representatives_positions': {
'handlers': ['console'],
'level': LOG_LEVEL
},
'representatives_recommendations': {
'handlers': ['console'],
'level': LOG_LEVEL
......
# coding: utf-8
import csv
import django
from django.apps import apps
from django.db import transaction
import logging
import sys
import re
from representatives_positions.models import Position
from representatives.models import Representative
logger = logging.getLogger(__name__)
class PositionImporter:
def __init__(self):
self.rep_cache = {}
def get_rep(self, first_name, last_name):
key = '%s %s' % (first_name, last_name)
rep = self.rep_cache.get(key, None)
if rep is None:
try:
rep = Representative.objects.get(first_name=first_name,
last_name=last_name)
self.rep_cache[key] = rep
except Representative.DoesNotExist:
rep = None
return rep
def import_row(self, row):
if len(row['date']) == 0:
logger.warn('Cannot import dateless position for %s %s on URL %s' %
(row['first_name'], row['last_name'], row['url']))
return False
rep = self.get_rep(row['first_name'], row['last_name'])
if rep is None:
logger.warn('Could not find rep %s %s' % (row['first_name'],
row['last_name']))
return False
text = re.sub('(^<p>|</p>$)', '', row['content'])
if row['title'] is not None and len(row['title']) > 0:
text = '%s\n%s' % (row['title'], text)
try:
position = Position.objects.get(representative=rep,
link=row['url'])
except Position.DoesNotExist:
position = Position(
representative=rep,
link=row['url'],
datetime=row['date'],
text=text,
published=True
)
position.save()
logger.info('Created position for %s %s on URL %s' % (
row['first_name'], row['last_name'], row['url']))
return True
def main(stream=None):
"""
Imports positions from an old memopol instance.
Usage:
cat positions.csv | memopol_import_positions
The input CSV file should be generated by the following query:
SELECT CONCAT(o.content, '|', o.url, '|', o.title, '|', ro.date, '|',
r.first_name, '|', r.last_name)
FROM reps_opinion o
INNER JOIN reps_opinionrep ro ON ro.opinion_id = o.id
INNER JOIN reps_representative r ON r.id = ro.representative_id
WHERE o.institution='EU'
"""
if not apps.ready:
django.setup()
importer = PositionImporter()
rejected = []
imported = 0
reader = csv.DictReader(stream or sys.stdin, delimiter='|', fieldnames=[
'content',
'url',
'title',
'date',
'first_name',
'last_name'
], quoting=csv.QUOTE_NONE)
for row in reader:
if not importer.import_row(row):
rejected.append(row)
else:
imported = imported + 1
logger.info('%d rows imported, %d rows rejected', imported, len(rejected))
......@@ -44,7 +44,8 @@ setup(name='political-memory',
},
entry_points={
'console_scripts': [
'memopol_import_recommendations = representatives_recommendations.contrib.import_recommendations:main'
'memopol_import_positions = representatives_positions.contrib.import_positions:main',
'memopol_import_recommendations = representatives_recommendations.contrib.import_recommendations:main',
]
}
)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment