Commit bd8f1bb6 authored by njoyard's avatar njoyard

Merge pull request #84 from political-memory/import-old-data

Add import scripts for old data, fixes #58
parents eeb3b220 24e97fe5
[run]
omit =
representatives_positions/contrib/*
representatives_recommendations/contrib/*
......@@ -258,6 +258,14 @@ LOGGING = {
'handlers': ['console'],
'level': LOG_LEVEL,
},
'representatives_positions': {
'handlers': ['console'],
'level': LOG_LEVEL
},
'representatives_recommendations': {
'handlers': ['console'],
'level': LOG_LEVEL
},
'representatives_votes': {
'handlers': ['console'],
'level': LOG_LEVEL,
......
[pytest]
DJANGO_SETTINGS_MODULE=memopol.settings
addopts = --cov=. --create-db
addopts = --cov-config .coveragerc --cov=. --create-db
# coding: utf-8
import csv
import django
from django.apps import apps
import logging
import sys
import re
from representatives_positions.models import Position
from representatives.models import Representative
logger = logging.getLogger(__name__)
class PositionImporter:
def __init__(self):
self.rep_cache = {}
def get_rep(self, first_name, last_name):
key = '%s %s' % (first_name, last_name)
rep = self.rep_cache.get(key, None)
if rep is None:
try:
rep = Representative.objects.get(first_name=first_name,
last_name=last_name)
self.rep_cache[key] = rep
except Representative.DoesNotExist:
rep = None
return rep
def import_row(self, row):
if len(row['date']) == 0:
logger.warn('Cannot import dateless position for %s %s on URL %s' %
(row['first_name'], row['last_name'], row['url']))
return False
rep = self.get_rep(row['first_name'], row['last_name'])
if rep is None:
logger.warn('Could not find rep %s %s' % (row['first_name'],
row['last_name']))
return False
text = re.sub('(^<p>|</p>$)', '', row['content'])
if row['title'] is not None and len(row['title']) > 0:
text = '%s\n%s' % (row['title'], text)
try:
position = Position.objects.get(representative=rep,
link=row['url'])
except Position.DoesNotExist:
position = Position(
representative=rep,
link=row['url'],
datetime=row['date'],
text=text,
published=True
)
position.save()
logger.info('Created position for %s %s on URL %s' % (
row['first_name'], row['last_name'], row['url']))
return True
def main(stream=None):
"""
Imports positions from an old memopol instance.
Usage:
cat positions.csv | memopol_import_positions
The input CSV file should be generated by the following query:
SELECT CONCAT(o.content, '|', o.url, '|', o.title, '|', ro.date, '|',
r.first_name, '|', r.last_name)
FROM reps_opinion o
INNER JOIN reps_opinionrep ro ON ro.opinion_id = o.id
INNER JOIN reps_representative r ON r.id = ro.representative_id
WHERE o.institution='EU'
"""
if not apps.ready:
django.setup()
importer = PositionImporter()
rejected = []
imported = 0
reader = csv.DictReader(stream or sys.stdin, delimiter='|', fieldnames=[
'content',
'url',
'title',
'date',
'first_name',
'last_name'
], quoting=csv.QUOTE_NONE)
for row in reader:
if not importer.import_row(row):
rejected.append(row)
else:
imported = imported + 1
logger.info('%d rows imported, %d rows rejected', imported, len(rejected))
......@@ -2,10 +2,11 @@ from django.db import models
from django.core.urlresolvers import reverse
from django.template.defaultfilters import truncatewords
from taggit.managers import TaggableManager
from representatives.models import Representative
class Position(models.Model):
representative = models.ForeignKey('representatives.representative',
representative = models.ForeignKey(Representative,
related_name='positions')
datetime = models.DateField()
text = models.TextField()
......
# coding: utf-8
# flake8: noqa
dossier_mappings = {
# Patent law: patentability of computer-implemented inventions
"Directive on patentability of \"computer-implemented inventions\" (software patents), 1st reading":
"2002/0047(COD)",
# Criminal measures aimed at ensuring the enforcement of intellectual property rights
"Criminal measures aimed at ensuring the enforcement of intellectual property rights (IPRED 2), 1st reading":
"2005/0127(COD)",
# Electronic communications: common regulatory framework for networks and services, access, interconnection and authorisation ['Telecoms Package' (amend. Directives 2002/19/EC, 2002/20/EC and 2002/21/EC)]
"Directives reforming the EU's regulatory framework for electronic communications networks and services (telecoms package), 1st reading":
"2007/0247(COD)",
"Directives reforming the EU's regulatory framework for electronic communications networks and services (telecoms package), 2nd reading":
"2007/0247(COD)",
# Cultural industries in Europe
"Rapport Bono on cultural industries in Europe":
"2007/2153(INI)",
# Strengthening security and fundamental freedoms on the Internet
"Rapport Lambrinidis on strengthening security and fundamental freedoms on the Internet":
"2008/2160(INI)",
# Enforcement of intellectual property rights in the internal market
"Rapport Gallo on enforcement of intellectual property rights in the internal market":
"2009/2178(INI)",
# Resolution on the Anti-Counterfeiting Trade Agreement (ACTA)
"Resolution on Anti-Counterfeiting Trade Agreement (ACTA)":
"2010/2935(RSP)",
# Enhanced cooperation in the area of the creation of unitary patent protection: implementation
"A7-0001/2012":
"2011/0093(COD)",
# EU/Australia, Canada, Japan, Korea, Mexico, Morocco, New Zealand, Singapore, Switzerland and United States Anti-Counterfeiting Trade Agreement (ACTA)
"A7-0204/2012":
"2011/0167(NLE)",
}
resolutions = [
u'résolution législative',
u'résolution',
'legislative resolution',
'resolution'
]
# coding: utf-8
import csv
import django
from django.apps import apps
import logging
import sys
from representatives_recommendations.models import Recommendation
from representatives_votes.models import Dossier, Proposal
from .import_data import dossier_mappings, resolutions
logger = logging.getLogger(__name__)
class RecommendationImporter:
def __init__(self):
self.dossier_cache = {}
def get_dossier(self, title):
dossier = self.dossier_cache.get(title, None)
if dossier is None:
ref = dossier_mappings.get(title, None)
if ref is not None:
query = {'reference': ref}
else:
query = {'title__iexact': title}
try:
dossier = Dossier.objects.get(**query)
self.dossier_cache[title] = dossier
except Dossier.DoesNotExist:
dossier = None
return dossier
def get_proposal(self, dossier, kind):
kinds = [kind]
try:
resolutions.index(kind.lower())
kinds.extend(resolutions)
except ValueError:
pass
for k in kinds:
try:
return Proposal.objects.get(dossier=dossier, kind__iexact=k)
except Proposal.DoesNotExist:
continue
return None
def import_row(self, row):
dossier = self.get_dossier(row['title'])
if dossier is None:
logger.warn('No dossier "%s"' % row['title'])
return False
proposal = self.get_proposal(dossier, row['part'])
if proposal is None:
logger.warn('No proposal "%s" for dossier %s (%d): "%s"' % (
row['part'].decode('utf-8'), dossier.reference, dossier.pk,
row['title']))
return False
weight = int(row['weight']) * int(row['ponderation'])
descr = row['description'].strip()
if len(descr) == 0:
descr = '%s on %s' % (row['part'], dossier.reference)
try:
recom = Recommendation.objects.get(proposal=proposal)
except Recommendation.DoesNotExist:
recom = Recommendation(
proposal=proposal,
recommendation=row['recommendation'],
title=descr,
weight=weight
)
recom.save()
logger.info('Created recommendation with weight %s for %s: %s' % (
weight,
row['title'],
row['part']
))
return True
def main(stream=None):
"""
Imports recommendations from an old memopol instance.
Usage:
cat recommendations.csv | memopol_import_recommendations
The input CSV file should be generated by the following query:
SELECT CONCAT(r.description, '|', r.weight, '|', r.recommendation, '|',
r.part, '|', p.title, '|', p.ponderation)
FROM votes_recommendation r
LEFT JOIN votes_proposal p ON r.proposal_id = p.id
WHERE p.institution = 'EU'
"""
if not apps.ready:
django.setup()
importer = RecommendationImporter()
rejected = []
imported = 0
reader = csv.DictReader(stream or sys.stdin, delimiter='|', fieldnames=[
'description',
'weight',
'recommendation',
'part',
'title',
'ponderation'
], quoting=csv.QUOTE_NONE)
for row in reader:
if not importer.import_row(row):
rejected.append(row)
else:
imported = imported + 1
logger.info('%d rows imported, %d rows rejected', imported, len(rejected))
......@@ -41,5 +41,11 @@ setup(name='political-memory',
'pytest-cov==2.2.0',
'codecov',
]
},
entry_points={
'console_scripts': [
'memopol_import_positions = representatives_positions.contrib.import_positions:main', # noqa
'memopol_import_recommendations = representatives_recommendations.contrib.import_recommendations:main', # noqa
]
}
)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment