From 566f4a22e26c33c897bcb326ff7aa3fad384adda Mon Sep 17 00:00:00 2001
From: Nicolas Joyard <joyard.nicolas@gmail.com>
Date: Mon, 30 May 2016 21:42:45 +0200
Subject: [PATCH] First pass at position import

---
 memopol/settings.py                           |   4 +
 representatives_positions/contrib/__init__.py |   0
 .../contrib/import_positions.py               | 109 ++++++++++++++++++
 setup.py                                      |   3 +-
 4 files changed, 115 insertions(+), 1 deletion(-)
 create mode 100644 representatives_positions/contrib/__init__.py
 create mode 100644 representatives_positions/contrib/import_positions.py

diff --git a/memopol/settings.py b/memopol/settings.py
index 48def9fe..ffdd506a 100644
--- a/memopol/settings.py
+++ b/memopol/settings.py
@@ -258,6 +258,10 @@ LOGGING = {
             'handlers': ['console'],
             'level': LOG_LEVEL,
         },
+        'representatives_positions': {
+            'handlers': ['console'],
+            'level': LOG_LEVEL
+        },
         'representatives_recommendations': {
             'handlers': ['console'],
             'level': LOG_LEVEL
diff --git a/representatives_positions/contrib/__init__.py b/representatives_positions/contrib/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/representatives_positions/contrib/import_positions.py b/representatives_positions/contrib/import_positions.py
new file mode 100644
index 00000000..1718985e
--- /dev/null
+++ b/representatives_positions/contrib/import_positions.py
@@ -0,0 +1,109 @@
+# coding: utf-8
+
+import csv
+import django
+from django.apps import apps
+from django.db import transaction
+import logging
+import sys
+import re
+
+from representatives_positions.models import Position
+from representatives.models import Representative
+
+logger = logging.getLogger(__name__)
+
+
+class PositionImporter:
+    def __init__(self):
+        self.rep_cache = {}
+
+    def get_rep(self, first_name, last_name):
+        key = '%s %s' % (first_name, last_name)
+        rep = self.rep_cache.get(key, None)
+
+        if rep is None:
+            try:
+                rep = Representative.objects.get(first_name=first_name,
+                    last_name=last_name)
+                self.rep_cache[key] = rep
+            except Representative.DoesNotExist:
+                rep = None
+
+        return rep
+
+    def import_row(self, row):
+        if len(row['date']) == 0:
+            logger.warn('Cannot import dateless position for %s %s on URL %s' %
+                (row['first_name'], row['last_name'], row['url']))
+            return False
+
+        rep = self.get_rep(row['first_name'], row['last_name'])
+        if rep is None:
+            logger.warn('Could not find rep %s %s' % (row['first_name'],
+                row['last_name']))
+            return False
+
+        text = re.sub('(^<p>|</p>$)', '', row['content'])
+        if row['title'] is not None and len(row['title']) > 0:
+            text = '%s\n%s' % (row['title'], text)
+
+        try:
+            position = Position.objects.get(representative=rep,
+                link=row['url'])
+        except Position.DoesNotExist:
+            position = Position(
+                representative=rep,
+                link=row['url'],
+                datetime=row['date'],
+                text=text,
+                published=True
+            )
+            position.save()
+            logger.info('Created position for %s %s on URL %s' % (
+                row['first_name'], row['last_name'], row['url']))
+
+        return True
+
+
+def main(stream=None):
+    """
+    Imports positions from an old memopol instance.
+
+    Usage:
+        cat positions.csv | memopol_import_positions
+
+    The input CSV file should be generated by the following query:
+        SELECT CONCAT(o.content, '|', o.url, '|', o.title, '|', ro.date, '|',
+            r.first_name, '|', r.last_name)
+        FROM reps_opinion o
+        INNER JOIN reps_opinionrep ro ON ro.opinion_id = o.id
+        INNER JOIN reps_representative r ON r.id = ro.representative_id
+        WHERE o.institution='EU'
+
+    """
+
+    if not apps.ready:
+        django.setup()
+
+    importer = PositionImporter()
+    rejected = []
+    imported = 0
+
+    reader = csv.DictReader(stream or sys.stdin, delimiter='|', fieldnames=[
+        'content',
+        'url',
+        'title',
+        'date',
+        'first_name',
+        'last_name'
+    ], quoting=csv.QUOTE_NONE)
+
+    for row in reader:
+        if not importer.import_row(row):
+            rejected.append(row)
+        else:
+            imported = imported + 1
+
+    logger.info('%d rows imported, %d rows rejected', imported, len(rejected))
+
diff --git a/setup.py b/setup.py
index 9c917c96..d8950c82 100644
--- a/setup.py
+++ b/setup.py
@@ -44,7 +44,8 @@ setup(name='political-memory',
     },
     entry_points={
         'console_scripts': [
-            'memopol_import_recommendations = representatives_recommendations.contrib.import_recommendations:main'
+            'memopol_import_positions = representatives_positions.contrib.import_positions:main',
+            'memopol_import_recommendations = representatives_recommendations.contrib.import_recommendations:main',
         ]
     }
 )
-- 
GitLab