Commit 094808d7 authored by njoyard's avatar njoyard Committed by GitHub

Merge pull request #29 from political-memory/add-document

Add Document model
parents 2c7a1dce 3944388c
......@@ -2,14 +2,22 @@
from django.contrib import admin
from .models import Dossier, Proposal, Vote
from .models import Dossier, Document, Proposal, Vote
class DossierAdmin(admin.ModelAdmin):
list_display = ('id', 'reference', 'title', 'link')
list_display = ('id', 'reference', 'title')
search_fields = ('reference', 'title')
class DocumentAdmin(admin.ModelAdmin):
list_display = ('dossier_reference', 'kind', 'title', 'link')
search_fields = ('reference', 'dossier__reference', 'title')
def dossier_reference(self, obj):
return obj.dossier.reference
class ProposalAdmin(admin.ModelAdmin):
list_display = (
'reference',
......@@ -49,5 +57,6 @@ class VoteAdmin(admin.ModelAdmin):
return obj.proposal.reference
admin.site.register(Dossier, DossierAdmin)
admin.site.register(Document, DocumentAdmin)
admin.site.register(Proposal, ProposalAdmin)
admin.site.register(Vote, VoteAdmin)
......@@ -51,7 +51,8 @@ class DossierViewSet(viewsets.ReadOnlyModelViewSet):
def retrieve(self, request, pk=None):
self.serializer_class = DossierDetailSerializer
self.queryset = self.queryset.prefetch_related('proposals')
self.queryset = self.queryset.prefetch_related('proposals',
'documents')
return super(DossierViewSet, self).retrieve(request, pk)
......
......@@ -3,27 +3,48 @@
import sys
import ijson
import logging
import re
import django
from django.apps import apps
from django.db import transaction
from representatives_votes.models import Dossier
from representatives.contrib.francedata.import_representatives import \
ensure_chambers
from representatives.models import Chamber
from representatives_votes.models import Document, Dossier
logger = logging.getLogger(__name__)
def extract_reference(url):
m = re.search(r'/dossier-legislatif/([^./]+)\.html', url)
if m:
return m.group(1)
m = re.search(r'/(\d+)/dossiers/([^./]+)\.asp', url)
if m:
return '%s/%s' % (m.group(1), m.group(2))
m = re.search(r'/dossiers/([^./]+)\.asp', url)
if m:
return m.group(1)
return None
def find_dossier(data):
'''
Find dossier with reference matching either 'url_an' or 'url_sen',
create it if not found. Ensure its reference and source are 'url_an' if
both fields are present.
Find dossier with reference matching either 'ref_an' or 'ref_sen',
create it if not found. Ensure its reference is 'ref_an' if both fields
are present.
'''
changed = False
dossier = None
reffield = None
for field in [k for k in ('url_an', 'url_sen') if k in data]:
for field in [k for k in ('ref_an', 'ref_sen') if k in data]:
try:
dossier = Dossier.objects.get(reference=data[field])
reffield = field
......@@ -32,50 +53,85 @@ def find_dossier(data):
pass
if dossier is None:
reffield = 'url_an' if 'url_an' in data else 'url_sen'
reffield = 'ref_an' if 'ref_an' in data else 'ref_sen'
dossier = Dossier(reference=data[reffield])
logger.debug('Created dossier %s' % data[reffield])
changed = True
if 'url_an' in data and reffield != 'url_an':
logger.debug('Changed dossier reference to %s' % data['url_an'])
dossier.reference = data['url_an']
if 'ref_an' in data and reffield != 'ref_an':
logger.debug('Changed dossier reference to %s' % data['ref_an'])
dossier.reference = data['ref_an']
changed = True
return dossier, changed
def parse_dossier_data(data):
dossier, changed = find_dossier(data)
def handle_document(dossier, chamber, url):
doc_changed = False
try:
doc = Document.objects.get(chamber=chamber, dossier=dossier,
kind='procedure-file')
except Document.DoesNotExist:
doc = Document(chamber=chamber, dossier=dossier, kind='procedure-file')
logger.debug('Created %s document for dossier %s' %
(chamber.abbreviation, dossier.title))
doc_changed = True
if doc.link != url:
logger.debug('Changing %s url from %s to %s' %
(chamber.abbreviation, doc.link, url))
doc.link = url
doc_changed = True
if doc_changed:
doc.save()
def parse_dossier_data(data, an, sen):
if 'url_an' in data:
ref_an = extract_reference(data['url_an'])
if ref_an is None:
logger.warn('No reference for dossier %s' % data['url_an'])
return
else:
data['ref_an'] = ref_an
if 'url_sen' in data:
ref_sen = extract_reference(data['url_sen'])
if ref_sen is None:
logger.warn('No reference for dossier %s' % data['url_sen'])
return
else:
data['ref_sen'] = ref_sen
thisurl = data['url_an' if data['chambre'] == 'AN' else 'url_sen']
dossier, changed = find_dossier(data)
if dossier.reference != dossier.link:
logger.debug('Changed dossier link to %s' % dossier.reference)
dossier.link = dossier.reference
changed = True
thisref = data['ref_an' if data['chambre'] == 'AN' else 'ref_sen']
title = data['titre']
if dossier.reference == thisurl and dossier.title != title:
if dossier.reference == thisref and dossier.title != title:
logger.debug('Changed dossier title to %s' % title)
dossier.title = title
changed = True
if 'url_an' in data and 'url_sen' in data:
ext_link = data['url_sen']
if dossier.ext_link != ext_link:
logger.debug('Changed dossier ext. link to %s' % ext_link)
dossier.ext_link = ext_link
changed = True
with transaction.atomic():
if changed:
logger.debug('Saved dossier %s' % dossier.reference)
dossier.save()
if 'url_an' in data:
handle_document(dossier, an, data['url_an'])
if changed:
logger.debug('Saved dossier %s' % dossier.reference)
dossier.save()
if 'url_sen' in data:
handle_document(dossier, sen, data['url_sen'])
def main(stream=None):
if not apps.ready:
django.setup()
ensure_chambers()
an = Chamber.objects.get(abbreviation='AN')
sen = Chamber.objects.get(abbreviation='SEN')
for data in ijson.items(stream or sys.stdin, 'item'):
parse_dossier_data(data)
parse_dossier_data(data, an, sen)
......@@ -48,23 +48,16 @@ def _get_unique_title(proposal_pk, candidate):
class ScrutinImporter:
dossiers_ref = None
dossiers_ext = None
dossiers = {}
def get_dossier(self, url):
if self.dossiers_ref is None:
self.dossiers_ref = {
d[0]: d[1] for d in Dossier.objects.values_list('reference',
'pk')
}
if self.dossiers_ext is None:
self.dossiers_ext = {
d[0]: d[1] for d in Dossier.objects.exclude(ext_link='')
.values_list('ext_link', 'pk')
}
if url not in self.dossiers:
try:
self.dossiers[url] = Dossier.objects.get(documents__link=url)
except Dossier.DoesNotExist:
return None
return self.dossiers_ref.get(url, self.dossiers_ext.get(url, None))
return self.dossiers[url]
def parse_scrutin_data(self, data):
ref = data['url']
......@@ -91,7 +84,7 @@ class ScrutinImporter:
values = dict(
title=_get_unique_title(proposal.pk, data["objet"]),
datetime=_parse_date(data["date"]),
dossier_id=dossier,
dossier_id=dossier.pk,
kind='dossier'
)
......
[
{
"fields" : {
"abbreviation": "AN",
"country": 1095,
"name": "Assembl\u00e9e nationale"
},
"model": "representatives.chamber",
"pk": 2
},
{
"fields": {
"abbreviation": "SEN",
"country": 1095,
"name": "S\u00e9nat"
},
"model": "representatives.chamber",
"pk": 3
},
{
"fields": {
"updated": "2016-02-14T13:16:31.417Z",
"reference": "http://www.assemblee-nationale.fr/14/dossiers/liberte_maires_rythmes_scolaires_premier_degre.asp",
"title": "Education : libre choix des maires concernant les rythmes scolaires dans le premier degr\u00e9",
"text": "",
"created": "2016-02-14T13:16:31.417Z",
"link": "http://www.assemblee-nationale.fr/14/dossiers/liberte_maires_rythmes_scolaires_premier_degre.asp",
"ext_link": ""
"updated": "2016-07-07T20:23:24.303Z",
"title": "Education : libre choix des maires concernant les rythmes scolaires dans le premier degr\u00e9",
"reference": "14/liberte_maires_rythmes_scolaires_premier_degre",
"created": "2016-07-07T20:23:24.302Z"
},
"model": "representatives_votes.dossier",
"pk": 1
},
{
"fields": {
"updated": "2016-02-14T13:16:31.428Z",
"reference": "http://www.assemblee-nationale.fr/14/dossiers/action_publique_territoriale_metropoles.asp",
"title": "Collectivit\u00e9s territoriales : action publique territoriale et m\u00e9tropoles",
"text": "",
"created": "2016-02-14T13:16:31.428Z",
"link": "http://www.assemblee-nationale.fr/14/dossiers/action_publique_territoriale_metropoles.asp",
"ext_link": "http://www.senat.fr/dossier-legislatif/pjl12-495.html"
"updated": "2016-07-07T20:23:24.365Z",
"title": "Collectivit\u00e9s territoriales : action publique territoriale et m\u00e9tropoles",
"reference": "14/action_publique_territoriale_metropoles",
"created": "2016-07-07T20:23:24.332Z"
},
"model": "representatives_votes.dossier",
"pk": 2
},
{
"fields": {
"updated": "2016-02-21T14:34:35.721Z",
"reference": "http://www.senat.fr/dossier-legislatif/ppl13-799.html",
"title": "Protection de l'enfant",
"text": "",
"created": "2016-02-21T14:34:35.721Z",
"link": "http://www.senat.fr/dossier-legislatif/ppl13-799.html",
"ext_link": ""
"updated": "2016-07-07T20:23:24.410Z",
"title": "Protection de l'enfant",
"reference": "ppl13-799",
"created": "2016-07-07T20:23:24.410Z"
},
"model": "representatives_votes.dossier",
"pk": 3
},
{
"fields": {
"updated": "2016-07-07T20:23:24.307Z",
"title": "",
"dossier": 1,
"created": "2016-07-07T20:23:24.307Z",
"kind": "procedure-file",
"chamber": 2,
"link": "http://www.assemblee-nationale.fr/14/dossiers/liberte_maires_rythmes_scolaires_premier_degre.asp"
},
"model": "representatives_votes.document",
"pk": 1
},
{
"fields": {
"updated": "2016-07-07T20:23:24.335Z",
"title": "",
"dossier": 2,
"created": "2016-07-07T20:23:24.335Z",
"kind": "procedure-file",
"chamber": 3,
"link": "http://www.senat.fr/dossier-legislatif/pjl12-495.html"
},
"model": "representatives_votes.document",
"pk": 2
},
{
"fields": {
"updated": "2016-07-07T20:23:24.371Z",
"title": "",
"dossier": 2,
"created": "2016-07-07T20:23:24.371Z",
"kind": "procedure-file",
"chamber": 2,
"link": "http://www.assemblee-nationale.fr/14/dossiers/action_publique_territoriale_metropoles.asp"
},
"model": "representatives_votes.document",
"pk": 3
},
{
"fields": {
"updated": "2016-07-07T20:23:24.415Z",
"title": "",
"dossier": 3,
"created": "2016-07-07T20:23:24.415Z",
"kind": "procedure-file",
"chamber": 3,
"link": "http://www.senat.fr/dossier-legislatif/ppl13-799.html"
},
"model": "representatives_votes.document",
"pk": 4
}
]
......@@ -6,8 +6,10 @@ import urllib2
import ijson
import django
from django.apps import apps
from django.db import transaction
from representatives_votes.models import Dossier
from representatives.models import Chamber
from representatives_votes.models import Dossier, Document
from .import_votes import Command
logger = logging.getLogger(__name__)
......@@ -17,38 +19,51 @@ URL = 'http://parltrack.euwiki.org/dumps/ep_dossiers.json.xz'
LOCAL_PATH = 'ep_dossiers.json.xz'
def parse_dossier_data(data):
def parse_dossier_data(data, ep):
"""Parse data from parltarck dossier export (1 dossier) Update dossier
if it existed before, this function goal is to import and update a
dossier, not to import all parltrack data
"""
changed = False
doc_changed = False
ref = data['procedure']['reference']
logger.debug('Processing dossier %s', ref)
try:
dossier = Dossier.objects.get(reference=ref)
except Dossier.DoesNotExist:
dossier = Dossier(reference=ref)
logger.debug('Dossier did not exist')
changed = True
if dossier.title != data['procedure']['title']:
logger.debug('Title changed from "%s" to "%s"', dossier.title,
data['procedure']['title'])
dossier.title = data['procedure']['title']
changed = True
source = data['meta']['source'].replace('&l=en', '')
if dossier.link != source:
logger.debug('Source changed from "%s" to "%s"', dossier.link, source)
dossier.link = source
changed = True
if changed:
logger.info('Updated dossier %s', ref)
dossier.save()
with transaction.atomic():
try:
dossier = Dossier.objects.get(reference=ref)
except Dossier.DoesNotExist:
dossier = Dossier(reference=ref)
logger.debug('Dossier did not exist')
changed = True
if dossier.title != data['procedure']['title']:
logger.debug('Title changed from "%s" to "%s"', dossier.title,
data['procedure']['title'])
dossier.title = data['procedure']['title']
changed = True
if changed:
logger.info('Updated dossier %s', ref)
dossier.save()
source = data['meta']['source'].replace('&l=en', '')
try:
doc = Document.objects.get(dossier=dossier, kind='procedure-file')
except Document.DoesNotExist:
doc = Document(dossier=dossier, kind='procedure-file', chamber=ep)
logger.debug('Document for dossier %s did not exist', ref)
doc_changed = True
if doc.link != source:
logger.debug('Link changed from %s to %s', doc.link, source)
doc.link = source
doc_changed = True
if doc_changed:
logger.info('Updated document %s for dossier %s', doc.link, ref)
doc.save()
if 'votes' in data.keys() and 'epref' in data['votes']:
command = Command()
......@@ -68,13 +83,15 @@ def import_single(stream):
if not apps.ready:
django.setup()
ep = Chamber.objects.get(abbreviation='EP')
for data in ijson.items(stream, ''):
parse_dossier_data(data)
parse_dossier_data(data, ep)
def main(stream=None):
if not apps.ready:
django.setup()
ep = Chamber.objects.get(abbreviation='EP')
for data in ijson.items(stream or sys.stdin, 'item'):
parse_dossier_data(data)
parse_dossier_data(data, ep)
[
{
"fields": {
"updated": "2015-12-13T10:11:31.369Z",
"reference": "2012/2002(INI)",
"title": "Agenda for change: the future of EU development policy",
"text": "",
"created": "2015-12-13T10:11:31.369Z",
"link": "http://www.europarl.europa.eu/oeil/popups/ficheprocedure.do?reference=2012/2002(INI)"
"updated": "2016-07-08T05:17:40.580Z",
"title": "Agenda for change: the future of EU development policy",
"reference": "2012/2002(INI)",
"created": "2016-07-08T05:17:40.580Z"
},
"model": "representatives_votes.dossier",
"pk": 1
},
{
"fields": {
"updated": "2015-12-13T10:11:31.378Z",
"reference": "2015/2132(BUD)",
"title": "2016 general budget: all sections",
"text": "",
"created": "2015-12-13T10:11:31.378Z",
"link": "http://www.europarl.europa.eu/oeil/popups/ficheprocedure.do?reference=2015/2132(BUD)"
"updated": "2016-07-08T05:17:40.617Z",
"title": "2016 general budget: all sections",
"reference": "2015/2132(BUD)",
"created": "2016-07-08T05:17:40.616Z"
},
"model": "representatives_votes.dossier",
"pk": 2
},
{
"fields": {
"updated": "2015-12-13T10:11:31.388Z",
"reference": "2013/2857(DEA)",
"title": "Scheme of control and enforcement applicable in the area covered by the Convention on future multilateral cooperation in the North-East Atlantic fisheries",
"text": "",
"created": "2015-12-13T10:11:31.388Z",
"link": "http://www.europarl.europa.eu/oeil/popups/ficheprocedure.do?reference=2013/2857(DEA)"
"updated": "2016-07-08T05:17:40.644Z",
"title": "Scheme of control and enforcement applicable in the area covered by the Convention on future multilateral cooperation in the North-East Atlantic fisheries",
"reference": "2013/2857(DEA)",
"created": "2016-07-08T05:17:40.644Z"
},
"model": "representatives_votes.dossier",
"pk": 3
},
{
"fields": {
"updated": "2015-12-13T10:11:31.398Z",
"reference": "2015/2623(DEA)",
"title": "Scheme of control and enforcement applicable in the area covered by the Convention on future multilateral cooperation in the North-East Atlantic fisheries",
"text": "",
"created": "2015-12-13T10:11:31.398Z",
"link": "http://www.europarl.europa.eu/oeil/popups/ficheprocedure.do?reference=2015/2623(DEA)"
"updated": "2016-07-08T05:17:40.682Z",
"title": "Scheme of control and enforcement applicable in the area covered by the Convention on future multilateral cooperation in the North-East Atlantic fisheries",
"reference": "2015/2623(DEA)",
"created": "2016-07-08T05:17:40.682Z"
},
"model": "representatives_votes.dossier",
"pk": 4
},
{
"fields": {
"updated": "2015-12-13T10:11:31.408Z",
"reference": "2009/0051(COD)",
"title": "Scheme of control and enforcement applicable in the area covered by the Convention on future multilateral cooperation in the North-East Atlantic fisheries",
"text": "",
"created": "2015-12-13T10:11:31.408Z",
"link": "http://www.europarl.europa.eu/oeil/popups/ficheprocedure.do?reference=2009/0051(COD)"
"updated": "2016-07-08T05:17:40.719Z",
"title": "Scheme of control and enforcement applicable in the area covered by the Convention on future multilateral cooperation in the North-East Atlantic fisheries",
"reference": "2009/0051(COD)",
"created": "2016-07-08T05:17:40.719Z"
},
"model": "representatives_votes.dossier",
"pk": 5
},
{
"fields": {
"updated": "2016-07-08T05:17:40.582Z",
"title": "",
"dossier": 1,
"created": "2016-07-08T05:17:40.582Z",
"kind": "procedure-file",
"chamber": 1,
"link": "http://www.europarl.europa.eu/oeil/popups/ficheprocedure.do?reference=2012/2002(INI)"
},
"model": "representatives_votes.document",
"pk": 1
},
{
"fields": {
"updated": "2016-07-08T05:17:40.619Z",
"title": "",
"dossier": 2,
"created": "2016-07-08T05:17:40.619Z",
"kind": "procedure-file",
"chamber": 1,
"link": "http://www.europarl.europa.eu/oeil/popups/ficheprocedure.do?reference=2015/2132(BUD)"
},
"model": "representatives_votes.document",
"pk": 2
},
{
"fields": {
"updated": "2016-07-08T05:17:40.646Z",
"title": "",
"dossier": 3,
"created": "2016-07-08T05:17:40.646Z",
"kind": "procedure-file",
"chamber": 1,
"link": "http://www.europarl.europa.eu/oeil/popups/ficheprocedure.do?reference=2013/2857(DEA)"
},
"model": "representatives_votes.document",
"pk": 3
},
{
"fields": {
"updated": "2016-07-08T05:17:40.684Z",
"title": "",
"dossier": 4,
"created": "2016-07-08T05:17:40.684Z",
"kind": "procedure-file",