import_dossiers.py 2.97 KB
Newer Older
1 2 3
# coding: utf-8
import logging
import sys
4
import urllib2
5 6 7 8

import ijson
import django
from django.apps import apps
9
from django.db import transaction
10

11 12
from representatives.models import Chamber
from representatives_votes.models import Dossier, Document
13
from .import_votes import Command
14 15 16

logger = logging.getLogger(__name__)

17
BASEURL = 'http://parltrack.euwiki.org/'
18 19 20 21
URL = 'http://parltrack.euwiki.org/dumps/ep_dossiers.json.xz'
LOCAL_PATH = 'ep_dossiers.json.xz'


22
def parse_dossier_data(data, ep):
23 24 25 26 27
    """Parse data from parltarck dossier export (1 dossier) Update dossier
    if it existed before, this function goal is to import and update a
    dossier, not to import all parltrack data
    """
    changed = False
28
    doc_changed = False
29 30 31 32
    ref = data['procedure']['reference']

    logger.debug('Processing dossier %s', ref)

33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
    with transaction.atomic():
        try:
            dossier = Dossier.objects.get(reference=ref)
        except Dossier.DoesNotExist:
            dossier = Dossier(reference=ref)
            logger.debug('Dossier did not exist')
            changed = True

        if dossier.title != data['procedure']['title']:
            logger.debug('Title changed from "%s" to "%s"', dossier.title,
                         data['procedure']['title'])
            dossier.title = data['procedure']['title']
            changed = True

        if changed:
            logger.info('Updated dossier %s', ref)
            dossier.save()

        source = data['meta']['source'].replace('&l=en', '')
        try:
            doc = Document.objects.get(dossier=dossier, kind='procedure-file')
        except Document.DoesNotExist:
            doc = Document(dossier=dossier, kind='procedure-file', chamber=ep)
            logger.debug('Document for dossier %s did not exist', ref)
            doc_changed = True

        if doc.link != source:
            logger.debug('Link changed from %s to %s', doc.link, source)
            doc.link = source
            doc_changed = True

        if doc_changed:
            logger.info('Updated document %s for dossier %s', doc.link, ref)
            doc.save()
67

68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
    if 'votes' in data.keys() and 'epref' in data['votes']:
        command = Command()
        command.init_cache()
        command.parse_vote_data(data['votes'])


def sync_dossier(reference):
    url = '%s/dossier/%s?format=json' % (BASEURL, reference)

    logger.debug('Syncing dossier from %s' % url)
    with urllib2.urlopen(url) as stream:
        import_single(stream)


def import_single(stream):
    if not apps.ready:
        django.setup()

86
    ep = Chamber.objects.get(abbreviation='EP')
87
    for data in ijson.items(stream, ''):
88
        parse_dossier_data(data, ep)
89

90 91 92 93 94

def main(stream=None):
    if not apps.ready:
        django.setup()

95
    ep = Chamber.objects.get(abbreviation='EP')
96
    for data in ijson.items(stream or sys.stdin, 'item'):
97 98
        try:
            parse_dossier_data(data, ep)
99 100
        except Exception:
            logger.exception('error trying to import dossier %s', str(data))