From 566f4a22e26c33c897bcb326ff7aa3fad384adda Mon Sep 17 00:00:00 2001 From: Nicolas Joyard <joyard.nicolas@gmail.com> Date: Mon, 30 May 2016 21:42:45 +0200 Subject: [PATCH] First pass at position import --- memopol/settings.py | 4 + representatives_positions/contrib/__init__.py | 0 .../contrib/import_positions.py | 109 ++++++++++++++++++ setup.py | 3 +- 4 files changed, 115 insertions(+), 1 deletion(-) create mode 100644 representatives_positions/contrib/__init__.py create mode 100644 representatives_positions/contrib/import_positions.py diff --git a/memopol/settings.py b/memopol/settings.py index 48def9fe..ffdd506a 100644 --- a/memopol/settings.py +++ b/memopol/settings.py @@ -258,6 +258,10 @@ LOGGING = { 'handlers': ['console'], 'level': LOG_LEVEL, }, + 'representatives_positions': { + 'handlers': ['console'], + 'level': LOG_LEVEL + }, 'representatives_recommendations': { 'handlers': ['console'], 'level': LOG_LEVEL diff --git a/representatives_positions/contrib/__init__.py b/representatives_positions/contrib/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/representatives_positions/contrib/import_positions.py b/representatives_positions/contrib/import_positions.py new file mode 100644 index 00000000..1718985e --- /dev/null +++ b/representatives_positions/contrib/import_positions.py @@ -0,0 +1,109 @@ +# coding: utf-8 + +import csv +import django +from django.apps import apps +from django.db import transaction +import logging +import sys +import re + +from representatives_positions.models import Position +from representatives.models import Representative + +logger = logging.getLogger(__name__) + + +class PositionImporter: + def __init__(self): + self.rep_cache = {} + + def get_rep(self, first_name, last_name): + key = '%s %s' % (first_name, last_name) + rep = self.rep_cache.get(key, None) + + if rep is None: + try: + rep = Representative.objects.get(first_name=first_name, + last_name=last_name) + self.rep_cache[key] = rep + except Representative.DoesNotExist: + rep = None + + return rep + + def import_row(self, row): + if len(row['date']) == 0: + logger.warn('Cannot import dateless position for %s %s on URL %s' % + (row['first_name'], row['last_name'], row['url'])) + return False + + rep = self.get_rep(row['first_name'], row['last_name']) + if rep is None: + logger.warn('Could not find rep %s %s' % (row['first_name'], + row['last_name'])) + return False + + text = re.sub('(^<p>|</p>$)', '', row['content']) + if row['title'] is not None and len(row['title']) > 0: + text = '%s\n%s' % (row['title'], text) + + try: + position = Position.objects.get(representative=rep, + link=row['url']) + except Position.DoesNotExist: + position = Position( + representative=rep, + link=row['url'], + datetime=row['date'], + text=text, + published=True + ) + position.save() + logger.info('Created position for %s %s on URL %s' % ( + row['first_name'], row['last_name'], row['url'])) + + return True + + +def main(stream=None): + """ + Imports positions from an old memopol instance. + + Usage: + cat positions.csv | memopol_import_positions + + The input CSV file should be generated by the following query: + SELECT CONCAT(o.content, '|', o.url, '|', o.title, '|', ro.date, '|', + r.first_name, '|', r.last_name) + FROM reps_opinion o + INNER JOIN reps_opinionrep ro ON ro.opinion_id = o.id + INNER JOIN reps_representative r ON r.id = ro.representative_id + WHERE o.institution='EU' + + """ + + if not apps.ready: + django.setup() + + importer = PositionImporter() + rejected = [] + imported = 0 + + reader = csv.DictReader(stream or sys.stdin, delimiter='|', fieldnames=[ + 'content', + 'url', + 'title', + 'date', + 'first_name', + 'last_name' + ], quoting=csv.QUOTE_NONE) + + for row in reader: + if not importer.import_row(row): + rejected.append(row) + else: + imported = imported + 1 + + logger.info('%d rows imported, %d rows rejected', imported, len(rejected)) + diff --git a/setup.py b/setup.py index 9c917c96..d8950c82 100644 --- a/setup.py +++ b/setup.py @@ -44,7 +44,8 @@ setup(name='political-memory', }, entry_points={ 'console_scripts': [ - 'memopol_import_recommendations = representatives_recommendations.contrib.import_recommendations:main' + 'memopol_import_positions = representatives_positions.contrib.import_positions:main', + 'memopol_import_recommendations = representatives_recommendations.contrib.import_recommendations:main', ] } ) -- GitLab