Commit 87c60f6d authored by James Pic's avatar James Pic

Merge pull request #19 from njoyard/francedata

Add import from FranceData
parents 1e3bf2c9 3c9db4dc
......@@ -14,6 +14,9 @@ script:
- django-admin migrate
- flake8 representatives_votes/ --exclude migrations --ignore E128
- py.test
- cat representatives_votes/contrib/francedata/tests/dossiers_input.json | francedata_import_dossiers
- cat representatives_votes/contrib/francedata/tests/scrutins_input.json | francedata_import_scrutins
- cat representatives_votes/contrib/francedata/tests/votes_input.json | francedata_import_votes
- cat representatives_votes/contrib/parltrack/tests/dossiers_fixture.json | parltrack_import_dossiers
- cat representatives_votes/contrib/parltrack/tests/votes_fixture.json | parltrack_import_votes
after_success:
......
# coding: utf-8
import sys
import ijson
import logging
import django
from django.apps import apps
from representatives_votes.models import Dossier
logger = logging.getLogger(__name__)
def find_dossier(data):
'''
Find dossier with reference matching either 'url_an' or 'url_sen',
create it if not found. Ensure its reference and source are 'url_an' if
both fields are present.
'''
changed = False
dossier = None
reffield = None
for field in [k for k in ('url_an', 'url_sen') if k in data]:
try:
dossier = Dossier.objects.get(reference=data[field])
reffield = field
break
except Dossier.DoesNotExist:
pass
if dossier is None:
reffield = 'url_an' if 'url_an' in data else 'url_sen'
dossier = Dossier(reference=data[reffield])
logger.debug('Created dossier %s' % data[reffield])
changed = True
if 'url_an' in data and reffield != 'url_an':
logger.debug('Changed dossier reference to %s' % data['url_an'])
dossier.reference = data['url_an']
changed = True
return dossier, changed
def parse_dossier_data(data):
dossier, changed = find_dossier(data)
thisurl = data['url_an' if data['chambre'] == 'AN' else 'url_sen']
if dossier.reference != dossier.link:
logger.debug('Changed dossier link to %s' % dossier.reference)
dossier.link = dossier.reference
changed = True
title = data['titre']
if dossier.reference == thisurl and dossier.title != title:
logger.debug('Changed dossier title to %s' % title)
dossier.title = title
changed = True
if 'url_an' in data and 'url_sen' in data:
ext_link = data['url_sen']
if dossier.ext_link != ext_link:
logger.debug('Changed dossier ext. link to %s' % ext_link)
dossier.ext_link = ext_link
changed = True
if changed:
logger.debug('Saved dossier %s' % dossier.reference)
dossier.save()
def main(stream=None):
if not apps.ready:
django.setup()
for data in ijson.items(stream or sys.stdin, 'item'):
parse_dossier_data(data)
# coding: utf-8
from datetime import datetime
import ijson
import logging
from pytz import timezone as date_timezone
import sys
import django
from django.apps import apps
from django.utils.timezone import make_aware as date_make_aware
from representatives_votes.models import Dossier, Proposal
logger = logging.getLogger(__name__)
def _parse_date(date_str):
return date_make_aware(
datetime.strptime(date_str, "%Y-%m-%d"),
date_timezone('Europe/Paris')
)
def _get_unique_title(proposal_pk, candidate):
title = candidate
try:
exists = Proposal.objects.get(title=title)
except Proposal.DoesNotExist:
exists = None
if exists and exists.pk != proposal_pk:
num = 1
while exists and exists.pk != proposal_pk:
title = '%s (%d)' % (candidate, num)
try:
exists = Proposal.objects.get(title=title)
except Proposal.DoesNotExist:
exists = None
num = num + 1
logger.debug('Made unique title %s' % title)
return title
class ScrutinImporter:
dossiers_ref = None
dossiers_ext = None
def get_dossier(self, url):
if self.dossiers_ref is None:
self.dossiers_ref = {
d[0]: d[1] for d in Dossier.objects.values_list('reference',
'pk')
}
if self.dossiers_ext is None:
self.dossiers_ext = {
d[0]: d[1] for d in Dossier.objects.exclude(ext_link='')
.values_list('ext_link', 'pk')
}
return self.dossiers_ref.get(url, self.dossiers_ext.get(url, None))
def parse_scrutin_data(self, data):
ref = data['url']
if 'dossier_url' not in data:
logger.debug('Cannot create proposal without dossier')
return
dossier = self.get_dossier(data['dossier_url'])
if dossier is None:
logger.debug('Cannot create proposal for unknown dossier %s'
% data['dossier_url'])
return
changed = False
try:
proposal = Proposal.objects.get(reference=ref)
except Proposal.DoesNotExist:
proposal = Proposal(reference=ref, total_for=0, total_against=0,
total_abstain=0)
logger.debug('Created proposal %s' % ref)
changed = True
values = dict(
title=_get_unique_title(proposal.pk, data["objet"]),
datetime=_parse_date(data["date"]),
dossier_id=dossier,
kind='dossier'
)
for key, value in values.items():
if value != getattr(proposal, key, None):
logger.debug('Changed proposal %s to %s' % (key, value))
setattr(proposal, key, value)
changed = True
if changed:
logger.debug('Updated proposal %s' % ref)
proposal.save()
def main(stream=None):
if not apps.ready:
django.setup()
importer = ScrutinImporter()
for data in ijson.items(stream or sys.stdin, 'item'):
importer.parse_scrutin_data(data)
# coding: utf-8
import ijson
import logging
import sys
import django
from django.apps import apps
from django.utils.text import slugify
from representatives_votes.models import Proposal, Representative, Vote
logger = logging.getLogger(__name__)
class VotesImporter:
deputes_slug = None
deputes_rid = None
scrutins = None
touched = []
positions = dict(
pour="for",
contre="against",
abstention="abstain"
)
def get_depute_by_name(self, prenom, nom):
if self.deputes_slug is None:
self.deputes_slug = {
slugify(r[0]): r[1] for r in
Representative.objects.values_list('full_name', 'pk')
}
full = (u'%s %s' % (prenom, nom)).replace(u' ', ' ')
return self.deputes_slug.get(slugify(full), None)
def get_depute_by_url(self, url):
if self.deputes_rid is None:
self.deputes_rid = {
r[0]: r[1] for r in
Representative.objects.values_list('remote_id', 'pk')
}
return self.deputes_rid.get(url, None)
def get_scrutin(self, ref):
if self.scrutins is None:
self.scrutins = {
s[0]: s[1] for s in Proposal.objects.values_list('reference',
'pk')
}
return self.scrutins.get(ref, None)
def parse_vote_data(self, data):
scrutin = self.get_scrutin(data['scrutin_url'])
if scrutin is None:
logger.debug('Cannot import vote for unknown scrutin %s'
% data['scrutin_url'])
return
if 'parl_url' in data:
repdesc = data['parl_url']
depute = self.get_depute_by_url(data['parl_url'])
else:
repdesc = '%s %s' % (data['prenom'], data['nom'])
depute = self.get_depute_by_name(data['prenom'], data['nom'])
if depute is None:
logger.debug('Cannot import vote by unknown rep %s' % repdesc)
return
if not data['division'].lower() in self.positions:
logger.debug('Cannot import vote for invalid position %s'
% data['division'])
return
position = self.positions[data['division'].lower()]
changed = False
try:
vote = Vote.objects.get(representative_id=depute,
proposal_id=scrutin)
except Vote.DoesNotExist:
vote = Vote(representative_id=depute, proposal_id=scrutin)
logger.debug('Created vote for rep %s on %s' % (depute, scrutin))
changed = True
if vote.position != position:
logger.debug('Changed vote position to %s' % position)
changed = True
vote.position = position
if changed:
logger.debug('Updated vote for rep %s on %s' % (depute, scrutin))
self.touched.append(scrutin)
vote.save()
def update_totals(self):
proposals = [Proposal.objects.get(pk=pk) for pk in self.touched]
for proposal in proposals:
changed = False
for pos in self.positions.values():
count = Vote.objects.filter(proposal_id=proposal.pk,
position=pos).count()
if getattr(proposal, 'total_%s' % pos, None) != count:
logger.debug('Changed %s count for proposal %s to %s' % (
pos, proposal.pk, count))
setattr(proposal, 'total_%s' % pos, count)
changed = True
if changed:
logger.debug('Updated proposal %s' % proposal.pk)
proposal.save()
def main(stream=None):
if not apps.ready:
django.setup()
importer = VotesImporter()
for data in ijson.items(stream or sys.stdin, 'item'):
importer.parse_vote_data(data)
importer.update_totals()
[
{
"fields": {
"updated": "2016-02-14T13:16:31.417Z",
"reference": "http://www.assemblee-nationale.fr/14/dossiers/liberte_maires_rythmes_scolaires_premier_degre.asp",
"title": "Education : libre choix des maires concernant les rythmes scolaires dans le premier degr\u00e9",
"text": "",
"created": "2016-02-14T13:16:31.417Z",
"link": "http://www.assemblee-nationale.fr/14/dossiers/liberte_maires_rythmes_scolaires_premier_degre.asp",
"ext_link": "",
"fingerprint": "5d1707e6663bb28d0308cdb36e9e91c5f235f8a1"
},
"model": "representatives_votes.dossier",
"pk": 1
},
{
"fields": {
"updated": "2016-02-14T13:16:31.428Z",
"reference": "http://www.assemblee-nationale.fr/14/dossiers/action_publique_territoriale_metropoles.asp",
"title": "Collectivit\u00e9s territoriales : action publique territoriale et m\u00e9tropoles",
"text": "",
"created": "2016-02-14T13:16:31.428Z",
"link": "http://www.assemblee-nationale.fr/14/dossiers/action_publique_territoriale_metropoles.asp",
"ext_link": "http://www.senat.fr/dossier-legislatif/pjl12-495.html",
"fingerprint": "c03f5e32f66e5f03ebe0a5d100f2f4ade941accc"
},
"model": "representatives_votes.dossier",
"pk": 2
},
{
"fields": {
"updated": "2016-02-21T14:34:35.721Z",
"reference": "http://www.senat.fr/dossier-legislatif/ppl13-799.html",
"title": "Protection de l'enfant",
"text": "",
"created": "2016-02-21T14:34:35.721Z",
"link": "http://www.senat.fr/dossier-legislatif/ppl13-799.html",
"ext_link": "",
"fingerprint": "5c58ebd00b9614e0965f173c8acb2bb77eb28f00"
},
"model": "representatives_votes.dossier",
"pk": 3
}
]
[
{
"chambre": "AN",
"url_an": "http://www.assemblee-nationale.fr/14/dossiers/liberte_maires_rythmes_scolaires_premier_degre.asp",
"titre": "Education : libre choix des maires concernant les rythmes scolaires dans le premier degr\u00e9"
},
{
"chambre": "SEN",
"url_sen": "http://www.senat.fr/dossier-legislatif/pjl12-495.html",
"titre": "Modernisation de l'action publique territoriale et affirmation des m\u00e9tropoles"
},
{
"chambre": "AN",
"url_an": "http://www.assemblee-nationale.fr/14/dossiers/action_publique_territoriale_metropoles.asp",
"url_sen": "http://www.senat.fr/dossier-legislatif/pjl12-495.html",
"titre": "Collectivit\u00e9s territoriales : action publique territoriale et m\u00e9tropoles"
},
{
"chambre": "SEN",
"url_sen": "http://www.senat.fr/dossier-legislatif/ppl13-799.html",
"titre": "Protection de l'enfant"
}
]
\ No newline at end of file
[
{
"fields": {
"updated": "2016-02-14T14:01:37.343Z",
"last_name": "",
"photo": "http://www.nosdeputes.fr/depute/photo/bernard-roman",
"created": "2016-02-14T14:01:37.343Z",
"gender": 2,
"remote_id": "2611",
"first_name": "",
"cv": "",
"active": true,
"birth_place": "Lille (Nord)",
"full_name": "Bernard Roman",
"fingerprint": "e28b45cced3c89ad3835fbdf261367ebea91b180",
"birth_date": "1952-07-15",
"slug": "bernard-roman"
},
"model": "representatives.representative",
"pk": 1
}
]
[
{
"fields": {
"updated": "2016-02-14T13:44:37.550Z",
"total_for": 0,
"description": "",
"reference": "http://www.assemblee-nationale.fr/scrutins/detail/(legislature)/14/(num)/740",
"title": "La motion de rejet pr\u00e9alable, pr\u00e9sent\u00e9e par m. le roux, de la proposition de loi permettant le libre choix des maires concernant les rythmes scolaires dans l'enseignement du premier degr\u00e9.",
"dossier": 1,
"created": "2016-02-14T13:44:37.550Z",
"kind": "dossier",
"datetime": "2013-12-04T23:00:00Z",
"total_against": 0,
"fingerprint": "40bb927c36b00bb688c1d7e7f4be5b9a1aae4af3",
"total_abstain": 0
},
"model": "representatives_votes.proposal",
"pk": 1
},
{
"fields": {
"updated": "2016-02-14T13:44:37.578Z",
"total_for": 0,
"description": "",
"reference": "http://www.assemblee-nationale.fr/scrutins/detail/(legislature)/14/(num)/740-2",
"title": "La motion de rejet pr\u00e9alable, pr\u00e9sent\u00e9e par m. le roux, de la proposition de loi permettant le libre choix des maires concernant les rythmes scolaires dans l'enseignement du premier degr\u00e9. (1)",
"dossier": 1,
"created": "2016-02-14T13:44:37.578Z",
"kind": "dossier",
"datetime": "2013-12-05T23:00:00Z",
"total_against": 0,
"fingerprint": "a8709fb12e8e6e4a5f46931d855bf70453dd7fd2",
"total_abstain": 0
},
"model": "representatives_votes.proposal",
"pk": 2
},
{
"fields": {
"updated": "2016-02-14T13:44:37.587Z",
"total_for": 0,
"description": "",
"reference": "http://www.assemblee-nationale.fr/scrutins/detail/(legislature)/14/(num)/748",
"title": "L'amendement n\u00b0 381 de m. dolez \u00e0 l'article 2 du projet de loi de modernisation de l'action publique territoriale et d'affirmation des m\u00e9tropoles.",
"dossier": 2,
"created": "2016-02-14T13:44:37.587Z",
"kind": "dossier",
"datetime": "2013-12-10T23:00:00Z",
"total_against": 0,
"fingerprint": "abf1dbdff878fa750f6ffb33fb362cb734e553e3",
"total_abstain": 0
},
"model": "representatives_votes.proposal",
"pk": 3
},
{
"fields": {
"updated": "2016-02-14T13:44:37.596Z",
"total_for": 0,
"description": "",
"reference": "http://www.assemblee-nationale.fr/scrutins/detail/(legislature)/14/(num)/747",
"title": "L'amendement n\u00b0 379 de m. dolez \u00e0 l'article 1er a du projet de loi de modernisation de l'action publique territoriale et d'affirmation des m\u00e9tropoles.",
"dossier": 2,
"created": "2016-02-14T13:44:37.596Z",
"kind": "dossier",
"datetime": "2013-12-10T23:00:00Z",
"total_against": 0,
"fingerprint": "3b75e49e1be0c8efc3706b9896cc1cb1f76dd9a7",
"total_abstain": 0
},
"model": "representatives_votes.proposal",
"pk": 4
}
]
[
{
"chambre": "AN",
"objet": "La motion de rejet pr\u00e9alable, pr\u00e9sent\u00e9e par m. le roux, de la proposition de loi permettant le libre choix des maires concernant les rythmes scolaires dans l'enseignement du premier degr\u00e9.",
"url": "http://www.assemblee-nationale.fr/scrutins/detail/(legislature)/14/(num)/740",
"numero": "740",
"dossier_url": "http://www.assemblee-nationale.fr/14/dossiers/liberte_maires_rythmes_scolaires_premier_degre.asp",
"date": "2013-12-05"
},
{
"chambre": "AN",
"objet": "La motion de rejet pr\u00e9alable, pr\u00e9sent\u00e9e par m. le roux, de la proposition de loi permettant le libre choix des maires concernant les rythmes scolaires dans l'enseignement du premier degr\u00e9.",
"url": "http://www.assemblee-nationale.fr/scrutins/detail/(legislature)/14/(num)/740-2",
"numero": "740-2",
"dossier_url": "http://www.assemblee-nationale.fr/14/dossiers/liberte_maires_rythmes_scolaires_premier_degre.asp",
"date": "2013-12-06"
},
{
"chambre": "AN",
"objet": "L'amendement n\u00b0 381 de m. dolez \u00e0 l'article 2 du projet de loi de modernisation de l'action publique territoriale et d'affirmation des m\u00e9tropoles.",
"url": "http://www.assemblee-nationale.fr/scrutins/detail/(legislature)/14/(num)/748",
"numero": "748",
"dossier_url": "http://www.assemblee-nationale.fr/14/dossiers/action_publique_territoriale_metropoles.asp",
"date": "2013-12-11"
},
{
"chambre": "AN",
"objet": "L'amendement n\u00b0 379 de m. dolez \u00e0 l'article 1er a du projet de loi de modernisation de l'action publique territoriale et d'affirmation des m\u00e9tropoles.",
"url": "http://www.assemblee-nationale.fr/scrutins/detail/(legislature)/14/(num)/747",
"numero": "747",
"dossier_url": "http://www.assemblee-nationale.fr/14/dossiers/action_publique_territoriale_metropoles.asp",
"date": "2013-12-11"
},
{
"chambre": "AN",
"objet": "Dossier inexistant.",
"url": "http://www.assemblee-nationale.fr/404",
"numero": "000",
"dossier_url": "http://www.assemblee-nationale.fr/14/dossiers/inexistant",
"date": "2099-12-11"
}
]
\ No newline at end of file
import copy
import os
import pytest
from django.core.serializers.json import Deserializer
from django.core.management import call_command
from representatives.models import Representative
from representatives_votes.contrib.francedata import import_dossiers
from representatives_votes.contrib.francedata import import_scrutins
from representatives_votes.contrib.francedata import import_votes
from representatives_votes.models import Dossier, Proposal, Vote
def _get_testdata(filename):
return os.path.join(os.path.dirname(__file__), filename)
def _test_import(fixtures, scenario, callback):
for model in (Representative, Dossier, Proposal, Vote):
model.objects.all().delete()
for fix in fixtures:
call_command('loaddata', fix)
inputfile = _get_testdata('%s_input.json' % scenario)
expected = _get_testdata('%s_expected.json' % scenario)
# Disable django auto fields
exclude = ('id', '_state', 'created', 'updated', 'fingerprint')
with open(inputfile, 'r') as f:
callback(f)
with open(expected, 'r') as f:
for obj in Deserializer(f.read()):
compare = copy.copy(obj.object.__dict__)
for f in exclude:
if f in compare:
compare.pop(f)
type(obj.object).objects.get(**compare)
@pytest.mark.django_db
def test_francedata_import_dossiers():
fixtures = []