import_dossiers.py 3.86 KB
Newer Older
Nicolas Joyard's avatar
Nicolas Joyard committed
1
2
3
4
5
# coding: utf-8

import sys
import ijson
import logging
6
import re
Nicolas Joyard's avatar
Nicolas Joyard committed
7
8
9

import django
from django.apps import apps
10
from django.db import transaction
Nicolas Joyard's avatar
Nicolas Joyard committed
11

12
13
14
15
from representatives.contrib.francedata.import_representatives import \
    ensure_chambers
from representatives.models import Chamber
from representatives_votes.models import Document, Dossier
Nicolas Joyard's avatar
Nicolas Joyard committed
16
17
18
19

logger = logging.getLogger(__name__)


20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
def extract_reference(url):
    m = re.search(r'/dossier-legislatif/([^./]+)\.html', url)
    if m:
        return m.group(1)

    m = re.search(r'/(\d+)/dossiers/([^./]+)\.asp', url)
    if m:
        return '%s/%s' % (m.group(1), m.group(2))

    m = re.search(r'/dossiers/([^./]+)\.asp', url)
    if m:
        return m.group(1)

    return None


36
37
def find_dossier(data):
    '''
38
39
40
    Find dossier with reference matching either 'ref_an' or 'ref_sen',
    create it if not found.  Ensure its reference is 'ref_an' if both fields
    are present.
41
42
    '''

Nicolas Joyard's avatar
Nicolas Joyard committed
43
    changed = False
44
45
    dossier = None
    reffield = None
Nicolas Joyard's avatar
Nicolas Joyard committed
46

47
    for field in [k for k in ('ref_an', 'ref_sen') if k in data]:
48
49
50
51
52
53
54
55
        try:
            dossier = Dossier.objects.get(reference=data[field])
            reffield = field
            break
        except Dossier.DoesNotExist:
            pass

    if dossier is None:
56
        reffield = 'ref_an' if 'ref_an' in data else 'ref_sen'
57
58
59
60
        dossier = Dossier(reference=data[reffield])
        logger.debug('Created dossier %s' % data[reffield])
        changed = True

61
62
63
    if 'ref_an' in data and reffield != 'ref_an':
        logger.debug('Changed dossier reference to %s' % data['ref_an'])
        dossier.reference = data['ref_an']
64
65
66
67
68
        changed = True

    return dossier, changed


69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
def handle_document(dossier, chamber, url):
    doc_changed = False
    try:
        doc = Document.objects.get(chamber=chamber, dossier=dossier,
                                   kind='procedure-file')
    except Document.DoesNotExist:
        doc = Document(chamber=chamber, dossier=dossier, kind='procedure-file')
        logger.debug('Created %s document for dossier %s' %
            (chamber.abbreviation, dossier.title))
        doc_changed = True

    if doc.link != url:
        logger.debug('Changing %s url from %s to %s' %
            (chamber.abbreviation, doc.link, url))
        doc.link = url
        doc_changed = True

    if doc_changed:
        doc.save()


def parse_dossier_data(data, an, sen):
    if 'url_an' in data:
        ref_an = extract_reference(data['url_an'])
        if ref_an is None:
            logger.warn('No reference for dossier %s' % data['url_an'])
            return
        else:
            data['ref_an'] = ref_an

    if 'url_sen' in data:
        ref_sen = extract_reference(data['url_sen'])
        if ref_sen is None:
            logger.warn('No reference for dossier %s' % data['url_sen'])
            return
        else:
            data['ref_sen'] = ref_sen
106

107
    dossier, changed = find_dossier(data)
108

109
    thisref = data['ref_an' if data['chambre'] == 'AN' else 'ref_sen']
Nicolas Joyard's avatar
Nicolas Joyard committed
110
111

    title = data['titre']
112
    if dossier.reference == thisref and dossier.title != title:
Nicolas Joyard's avatar
Nicolas Joyard committed
113
114
115
116
        logger.debug('Changed dossier title to %s' % title)
        dossier.title = title
        changed = True

117
118
119
120
121
122
123
    with transaction.atomic():
        if changed:
            logger.debug('Saved dossier %s' % dossier.reference)
            dossier.save()

        if 'url_an' in data:
            handle_document(dossier, an, data['url_an'])
Nicolas Joyard's avatar
Nicolas Joyard committed
124

125
126
        if 'url_sen' in data:
            handle_document(dossier, sen, data['url_sen'])
Nicolas Joyard's avatar
Nicolas Joyard committed
127
128
129
130
131
132


def main(stream=None):
    if not apps.ready:
        django.setup()

133
134
135
    ensure_chambers()
    an = Chamber.objects.get(abbreviation='AN')
    sen = Chamber.objects.get(abbreviation='SEN')
Nicolas Joyard's avatar
Nicolas Joyard committed
136
    for data in ijson.items(stream or sys.stdin, 'item'):
137
138
        try:
            parse_dossier_data(data, an, sen)
139
140
        except Exception:
            logger.exception('error trying to import dossier %s', str(data))