Commit bf1a24df authored by Nicolas Joyard's avatar Nicolas Joyard

Improve position import (dateless + wrong mep names)

parent bd8f1bb6
# coding: utf-8
# flake8: noqa
# This dict is used to find dates from URLs for dateless positions
position_dates = {
"http://www.dailymotion.com/video/x8pi7a_marielle-de-sarnez-l-europe-et-les_news#from=embed&start=1": "2009-03-18",
"http://tempsreel.nouvelobs.com/election-presidentielle-2012/20111215.OBS6873/interview-eva-joly-legalisons-le-partage-sur-internet.html": "2011-12-17",
"http://patricklehyaricpe.wordpress.com/2010/03/10/le-parlement-eurpeen-a-battu-swift-il-doit-battre-acta/": "2010-03-10",
"http://www.sanchezschmid.eu/uploads/PDF/lettre-MT2S-1Trim.pdf": "2010-01-01",
"http://www.tokia-saifi.eu/index.php?option=com_content&view=article&id=201:halte-a-la-contrefacon-et-au-piratage&catid=65:commerce-international&Itemid=96&lang=fr": "2010-01-01",
"http://www.tokia-saifi.eu/index.php?option=com_content&view=article&id=283%3Ale-piratage-en-ligne-reprime-&catid=34%3Aactualites&lang=fr": "2010-01-01",
"http://www.tokia-saifi.eu/index.php?option=com_content&view=article&id=296%3Alacta-un-bouclier-protecteur-pour-lindustrie-europeenne-&catid=34%3Aactualites&lang=fr": "2010-01-01",
"http://www.tokia-saifi.eu/index.php?option=com_content&view=article&id=230%3Aqlacta-un-accord-commercial-international-essentiel-pour-lutter-contre-la-contrefacon-a-lechelle-internationaleq-tokia-saifi-ump-ppe-f&catid=35%3Ainterventions-en-seance-pleniere&Itemid=59&lang=fr": "2010-01-01",
"http://www.marietjeschaake.eu/12/05/2011/une-%C2%AB-diplomatie-culturelle-%C2%BB-pour-promouvoir-les-valeurs-europeennes-2/?lang=fr": "2011-05-12",
"http://www.eurocitoyenne.fr/content/acta-un-rejet-de-cet-accord-par-le-parlement-europeen-est-possible-sil-y-une-mobilisation-co": "2010-01-01",
"http://www.eurocitoyenne.fr/content/arretons-la-piraterie-en-public": "2010-01-01",
"http://www.eurocitoyenne.fr/content/le-guide-juridique-la-protection-des-donnees-personnelles-sans-vie-privee-pas-de-liberte": "2010-01-01",
}
# This dict maps full names from old memopol to [1st, last] names in new memopol
# Necessary because we sometimes have different accents/hyphenation/separation, or additional/missing name parts.
rep_names = {
u"Alexander Graf LAMBSDORFF": [u"Alexander Graf", u"Graf LAMBSDORFF"],
u"Carlos José ITURGAIZ ANGULO": [u"Carlos", u"ITURGAIZ"],
u"Cristian Silviu BUŞOI": [u"Cristian-Silviu", u"BUŞOI"],
u"Eider GARDIAZÁBAL RUBIAL": [u"Eider", u"GARDIAZABAL RUBIAL"],
u"Filiz Hakaeva HYUSMENOVA": [u"Filiz", u"HYUSMENOVA"],
u"Glenis WILLMOTT": [u"Dame Glenis", u"WILLMOTT"],
u"Iliana Malinova IOTOVA": [u"Iliana", u"IOTOVA"],
u"Janusz Władysław ZEMKE": [u"Janusz", u"ZEMKE"],
u"Marielle de SARNEZ": [u"Marielle", u"de SARNEZ"],
u"Monica Luisa MACOVEI": [u"Monica", u"MACOVEI"],
u"Róża Gräfin von THUN UND HOHENSTEIN": [u"Róża Gräfin", u"von THUN UND HOHENSTEIN"],
u"Santiago FISAS AYXELA": [u"Santiago", u"FISAS AYXELÀ"],
u"Sophia in 't VELD": [u"Sophia", u"in 't VELD"],
u"Vasilica Viorica DĂNCILĂ": [u"Viorica", u"DĂNCILĂ"],
u"Wim van de CAMP": [u"Wim", u"van de CAMP"]
}
......@@ -9,6 +9,7 @@ import re
from representatives_positions.models import Position
from representatives.models import Representative
from .import_data import position_dates, rep_names
logger = logging.getLogger(__name__)
......@@ -21,6 +22,7 @@ class PositionImporter:
key = '%s %s' % (first_name, last_name)
rep = self.rep_cache.get(key, None)
# Find rep
if rep is None:
try:
rep = Representative.objects.get(first_name=first_name,
......@@ -29,13 +31,31 @@ class PositionImporter:
except Representative.DoesNotExist:
rep = None
# Not found => try to use an alternate name
if rep is None:
newname = rep_names.get(key, None)
if newname is not None:
try:
rep = Representative.objects.get(first_name=newname[0],
last_name=newname[1])
self.rep_cache[key] = rep
except Representative.DoesNotExist:
rep = None
return rep
def import_row(self, row):
if len(row['date']) == 0:
logger.warn('Cannot import dateless position for %s %s on URL %s' %
(row['first_name'], row['last_name'], row['url']))
return False
if len(row['url']) == 0:
row['date'] = '2010-01-01'
row['url'] = '/'
else:
row['date'] = position_dates.get(row['url'], None)
if row['date'] is None:
logger.warn('Dateless position for %s %s on URL %s' %
(row['first_name'], row['last_name'], row['url']))
return False
rep = self.get_rep(row['first_name'], row['last_name'])
if rep is None:
......@@ -104,4 +124,5 @@ def main(stream=None):
else:
imported = imported + 1
logger.info('%d rows imported, %d rows rejected', imported, len(rejected))
logger.info('%d rows imported or already present, %d rows rejected',
imported, len(rejected))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment