import_dossiers.py 2.97 KB
Newer Older
1
2
3
# coding: utf-8
import logging
import sys
4
import urllib2
5
6
7
8

import ijson
import django
from django.apps import apps
9
from django.db import transaction
10

11
12
from representatives.models import Chamber
from representatives_votes.models import Dossier, Document
13
from .import_votes import Command
14
15
16

logger = logging.getLogger(__name__)

17
BASEURL = 'http://parltrack.euwiki.org/'
18
19
20
21
URL = 'http://parltrack.euwiki.org/dumps/ep_dossiers.json.xz'
LOCAL_PATH = 'ep_dossiers.json.xz'


22
def parse_dossier_data(data, ep):
23
24
25
26
27
    """Parse data from parltarck dossier export (1 dossier) Update dossier
    if it existed before, this function goal is to import and update a
    dossier, not to import all parltrack data
    """
    changed = False
28
    doc_changed = False
29
30
31
32
    ref = data['procedure']['reference']

    logger.debug('Processing dossier %s', ref)

33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
    with transaction.atomic():
        try:
            dossier = Dossier.objects.get(reference=ref)
        except Dossier.DoesNotExist:
            dossier = Dossier(reference=ref)
            logger.debug('Dossier did not exist')
            changed = True

        if dossier.title != data['procedure']['title']:
            logger.debug('Title changed from "%s" to "%s"', dossier.title,
                         data['procedure']['title'])
            dossier.title = data['procedure']['title']
            changed = True

        if changed:
            logger.info('Updated dossier %s', ref)
            dossier.save()

        source = data['meta']['source'].replace('&l=en', '')
        try:
            doc = Document.objects.get(dossier=dossier, kind='procedure-file')
        except Document.DoesNotExist:
            doc = Document(dossier=dossier, kind='procedure-file', chamber=ep)
            logger.debug('Document for dossier %s did not exist', ref)
            doc_changed = True

        if doc.link != source:
            logger.debug('Link changed from %s to %s', doc.link, source)
            doc.link = source
            doc_changed = True

        if doc_changed:
            logger.info('Updated document %s for dossier %s', doc.link, ref)
            doc.save()
67

68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
    if 'votes' in data.keys() and 'epref' in data['votes']:
        command = Command()
        command.init_cache()
        command.parse_vote_data(data['votes'])


def sync_dossier(reference):
    url = '%s/dossier/%s?format=json' % (BASEURL, reference)

    logger.debug('Syncing dossier from %s' % url)
    with urllib2.urlopen(url) as stream:
        import_single(stream)


def import_single(stream):
    if not apps.ready:
        django.setup()

86
    ep = Chamber.objects.get(abbreviation='EP')
87
    for data in ijson.items(stream, ''):
88
        parse_dossier_data(data, ep)
89

90
91
92
93
94

def main(stream=None):
    if not apps.ready:
        django.setup()

95
    ep = Chamber.objects.get(abbreviation='EP')
96
    for data in ijson.items(stream or sys.stdin, 'item'):
97
98
        try:
            parse_dossier_data(data, ep)
99
100
        except Exception:
            logger.exception('error trying to import dossier %s', str(data))