import_positions.py 4 KB
Newer Older
Nicolas Joyard's avatar
Nicolas Joyard committed
1 2 3 4 5 6 7 8 9 10 11
# coding: utf-8

import csv
import django
from django.apps import apps
import logging
import sys
import re

from representatives_positions.models import Position
from representatives.models import Representative
12
from .import_data import position_dates, rep_names
Nicolas Joyard's avatar
Nicolas Joyard committed
13 14 15 16 17 18 19 20 21 22 23 24

logger = logging.getLogger(__name__)


class PositionImporter:
    def __init__(self):
        self.rep_cache = {}

    def get_rep(self, first_name, last_name):
        key = '%s %s' % (first_name, last_name)
        rep = self.rep_cache.get(key, None)

25
        # Find rep
Nicolas Joyard's avatar
Nicolas Joyard committed
26 27 28 29 30 31 32 33
        if rep is None:
            try:
                rep = Representative.objects.get(first_name=first_name,
                    last_name=last_name)
                self.rep_cache[key] = rep
            except Representative.DoesNotExist:
                rep = None

34 35 36 37 38 39 40 41 42 43 44
        # Not found => try to use an alternate name
        if rep is None:
            newname = rep_names.get(key, None)
            if newname is not None:
                try:
                    rep = Representative.objects.get(first_name=newname[0],
                        last_name=newname[1])
                    self.rep_cache[key] = rep
                except Representative.DoesNotExist:
                    rep = None

Nicolas Joyard's avatar
Nicolas Joyard committed
45 46 47 48
        return rep

    def import_row(self, row):
        try:
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
            if len(row['date']) == 0:
                if len(row['url']) == 0:
                    row['date'] = '2010-01-01'
                    row['url'] = '/'
                else:
                    row['date'] = position_dates.get(row['url'], None)

                    if row['date'] is None:
                        logger.warn('Dateless position for %s %s on URL %s' %
                            (row['first_name'], row['last_name'], row['url']))
                        return False

            rep = self.get_rep(row['first_name'], row['last_name'])
            if rep is None:
                logger.warn('Could not find rep %s %s' % (row['first_name'],
                    row['last_name']))
                return False

            text = re.sub('(^<p>|</p>$)', '', row['content'])
            if row['title'] is not None and len(row['title']) > 0:
                text = '%s\n%s' % (row['title'], text)

            try:
                position = Position.objects.get(representative=rep,
                    link=row['url'])
            except Position.DoesNotExist:
                position = Position(
                    representative=rep,
                    link=row['url'],
                    datetime=row['date'],
                    text=text,
                    published=True
                )
                position.save()
                logger.info('Created position for %s %s on URL %s' % (
                    row['first_name'], row['last_name'], row['url']))

            return True
87 88
        except Exception:
            logger.exception('error trying to import position %s', str(row))
89
            return False
Nicolas Joyard's avatar
Nicolas Joyard committed
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130


def main(stream=None):
    """
    Imports positions from an old memopol instance.

    Usage:
        cat positions.csv | memopol_import_positions

    The input CSV file should be generated by the following query:
        SELECT CONCAT(o.content, '|', o.url, '|', o.title, '|', ro.date, '|',
            r.first_name, '|', r.last_name)
        FROM reps_opinion o
        INNER JOIN reps_opinionrep ro ON ro.opinion_id = o.id
        INNER JOIN reps_representative r ON r.id = ro.representative_id
        WHERE o.institution='EU'

    """

    if not apps.ready:
        django.setup()

    importer = PositionImporter()
    rejected = []
    imported = 0

    reader = csv.DictReader(stream or sys.stdin, delimiter='|', fieldnames=[
        'content',
        'url',
        'title',
        'date',
        'first_name',
        'last_name'
    ], quoting=csv.QUOTE_NONE)

    for row in reader:
        if not importer.import_row(row):
            rejected.append(row)
        else:
            imported = imported + 1

131 132
    logger.info('%d rows imported or already present, %d rows rejected',
        imported, len(rejected))