import_representatives.py 15.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
# coding: utf-8

import logging
import sys
from datetime import datetime

import django.dispatch
import ijson
import django
from django.apps import apps
from django.db import transaction
from django.utils import timezone
13
from django.utils.text import slugify
14
15
16

from representatives.models import (Address, Constituency, Country, Email,
                                    Group, Mandate, Phone, Representative,
17
                                    WebSite, Chamber)
18
19
20

logger = logging.getLogger(__name__)

Jamesie Pic's avatar
Jamesie Pic committed
21
22
representative_pre_import = django.dispatch.Signal(
    providing_args=['representative_data'])
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64


def _parse_date(date):
    return datetime.strptime(date, "%Y-%m-%dT00:%H:00").date()


class GenericImporter(object):

    def pre_import(self):
        self.import_start_datetime = timezone.now()

    def post_import(self):
        # Clean not touched models
        models = [Representative, Group, Constituency,
                  Mandate, Address, Phone, Email, WebSite]
        for model in models:
            model.objects.filter(
                updated__lt=self.import_start_datetime).delete()

    def touch_model(self, model, **data):
        '''
        This method create or look up a model with the given data
        it saves the given model if it exists, updating its
        updated field
        '''
        instance, created = model.objects.get_or_create(**data)

        if not created:
            if instance.updated < self.import_start_datetime:
                instance.save()     # Updates updated field

        return (instance, created)


class ParltrackImporter(GenericImporter):
    url = 'http://parltrack.euwiki.org/dumps/ep_meps_current.json.xz'
    check_etag = True

    def parse_date(self, date):
        return _parse_date(date)

    def __init__(self):
Jamesie Pic's avatar
Jamesie Pic committed
65
66
67
        self.cache = {
            'countries': {c.name: c.pk for c in Country.objects.all()},
        }
68
        self.ep_chamber, _ = Chamber.objects.get_or_create(
69
            name='European Parliament', abbreviation='EP')
70
71
72
73
74
        self.ep_constituency, _ = Constituency.objects.get_or_create(
            name='European Parliament')
        self.ep_group, _ = Group.objects.get_or_create(
            name='European Parliament', kind='chamber', abbreviation='EP',
            chamber=self.ep_chamber)
75
76
77
78
79
80
81
82

    @transaction.atomic
    def manage_mep(self, mep_json):
        '''
        Import a mep as a representative from the json dict fetched from
        parltrack
        '''

Jamesie Pic's avatar
Jamesie Pic committed
83
84
85
86
87
88
89
90
91
92
        # Some versions of memopol will connect to this and skip inactive meps.
        responses = representative_pre_import.send(sender=self,
                representative_data=mep_json)

        for receiver, response in responses:
            if response is False:
                logger.debug(
                    'Skipping MEP %s', mep_json['Name']['full'])
                return

93
        changed = False
Nicolas Joyard's avatar
Nicolas Joyard committed
94
        slug = slugify('%s-%s' % (
95
            mep_json["Name"]["full"] if 'full' in mep_json["Name"]
Nicolas Joyard's avatar
Nicolas Joyard committed
96
97
98
            else mep_json["Name"]["sur"] + " " + mep_json["Name"]["family"],
            _parse_date(mep_json["Birth"]["date"])
        ))
99
        try:
100
            representative = Representative.objects.get(slug=slug)
101
        except Representative.DoesNotExist:
102
103
            representative = Representative(slug=slug)
            changed = True
104
105

        # Save representative attributes
106
        self.import_representative_details(representative, mep_json, changed)
107
108
109
110
111
112
113
114
115

        self.add_mandates(representative, mep_json)

        self.add_contacts(representative, mep_json)

        logger.debug('Imported MEP %s', unicode(representative))

        return representative

116
117
118
119
    def import_representative_details(self, representative, mep_json, changed):
        if representative.active != mep_json['active']:
            representative.active = mep_json['active']
            changed = True
120
121

        if mep_json.get("Birth"):
122
123
124
125
            birth_date = _parse_date(mep_json["Birth"]["date"])
            if representative.birth_date != birth_date:
                representative.birth_date = birth_date
                changed = True
126
            if "place" in mep_json["Birth"]:
127
128
129
130
131
132
133
134
                birth_place = mep_json["Birth"]["place"]
                if representative.birth_place != birth_place:
                    representative.birth_place = birth_place
                    changed = True

        if representative.first_name != mep_json["Name"]["sur"]:
            representative.first_name = mep_json["Name"]["sur"]
            changed = True
135

136
        last_name = mep_json["Name"]["family"]
137

138
139
140
141
142
143
144
        if representative.full_name != mep_json["Name"]["full"]:
            representative.full_name = mep_json["Name"]["full"]
            changed = True

        if representative.photo != mep_json["Photo"]:
            representative.photo = mep_json["Photo"]
            changed = True
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178

        fix_last_name_with_prefix = {
            "Esther de LANGE": "de LANGE",
            "Patricia van der KAMMEN": "van der KAMMEN",
            "Judith A. MERKIES": "MERKIES",
            "Heinz K. BECKER": "BECKER",
            "Cornelis de JONG": "de JONG",
            "Peter van DALEN": "van DALEN",
            "Sophia in 't VELD": "in 't VELD",
            "Marielle de SARNEZ": "de SARNEZ",
            "Anne E. JENSEN": "JENSEN",
            "Wim van de CAMP": "van de CAMP",
            "Lambert van NISTELROOIJ": "van NISTELROOIJ",
            "Johannes Cornelis van BAALEN": "van BAALEN",
            "Ioannis A. TSOUKALAS": "TSOUKALAS",
            "Pilar del CASTILLO VERA": "del CASTILLO VERA",
            "Luis de GRANDES PASCUAL": "de GRANDES PASCUAL",
            "Philippe de VILLIERS": "de VILLIERS",
            "Daniël van der STOEP": "van der STOEP",
            "William (The Earl of) DARTMOUTH": "(The Earl of) Dartmouth",
            "Bairbre de BRÚN": u'de Br\xfan',
            "Karl von WOGAU": u'von WOGAU',
            "Ieke van den BURG": u'van den BURG',
            "Manuel António dos SANTOS": u'dos SANTOS',
            "Paul van BUITENEN": u'van BUITENEN',
            "Elly de GROEN-KOUWENHOVEN": u'de GROEN-KOUWENHOVEN',
            "Margrietus van den BERG": u'van den BERG',
            u'Dani\xebl van der STOEP': u'van der STOEP',
            "Alexander Graf LAMBSDORFF": u'Graf LAMBSDORFF',
            u'Bairbre de BR\xdaN': u'de BR\xdaN',
            'Luigi de MAGISTRIS': 'de MAGISTRIS',
        }

        if fix_last_name_with_prefix.get(representative.full_name):
179
180
181
182
183
184
185
186
187
            last_name = fix_last_name_with_prefix[representative.full_name]
        elif last_name == "J.A.J. STASSEN":
            last_name = "STASSEN"

        if representative.last_name != last_name:
            representative.last_name = last_name
            changed = True

        gender_convertion_dict = {u"F": 1, u"M": 2}
188
        if 'Gender' in mep_json:
189
            gender = gender_convertion_dict.get(mep_json['Gender'], 0)
190
        else:
191
192
193
194
            gender = 0
        if representative.gender != gender:
            representative.gender = gender
            changed = True
195

196
197
198
199
        cv = "\n".join([cv_title for cv_title in mep_json.get("CV", [])])
        if representative.cv != cv:
            representative.cv = cv
            changed = True
200

201
202
        if changed:
            representative.save()
203
204

    def add_mandates(self, representative, mep_json):
205
        def create_mandate(mandate_data, representative, group, constituency):
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
            if mandate_data.get("start"):
                begin_date = _parse_date(mandate_data.get("start"))
            if mandate_data.get("end"):
                end_date = _parse_date(mandate_data.get("end"))

            role = mandate_data['role'] if 'role' in mandate_data else ''
            mandate, _ = Mandate.objects.get_or_create(
                representative=representative,
                group=group,
                constituency=constituency,
                role=role,
                begin_date=begin_date,
                end_date=end_date
            )

            if _:
                logger.debug('Created mandate %s with %s', mandate.pk,
                             mandate_data)

        # Committee
        for mandate_data in mep_json.get('Committees', []):
            if mandate_data.get("committee_id"):
                group, _ = self.touch_model(model=Group,
                        abbreviation=mandate_data['committee_id'],
230
231
                        kind='committee', name=mandate_data['Organization'],
                        chamber=self.ep_chamber)
232

233
234
                create_mandate(mandate_data, representative, group,
                               self.ep_constituency)
235
236
237
238
239

        # Delegations
        for mandate_data in mep_json.get('Delegations', []):
            group, _ = self.touch_model(model=Group,
                                        kind='delegation',
240
241
                                        name=mandate_data['Organization'],
                                        chamber=self.ep_chamber
242
243
                                        )

244
245
            create_mandate(mandate_data, representative, group,
                           self.ep_constituency)
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266

        # Group
        convert = {
            "S&D": "SD",
            "NA": "NI",
            "ID": "IND/DEM",
            "PPE": "EPP",
            "Verts/ALE": "Greens/EFA"}
        for mandate_data in mep_json.get('Groups', []):
            if not mandate_data.get('groupid'):
                continue

            if isinstance(mandate_data.get('groupid'), list):
                abbreviation = mandate_data.get('groupid')[0]
            else:
                abbreviation = mandate_data.get('groupid')

            abbreviation = convert.get(abbreviation, abbreviation)
            group, _ = self.touch_model(model=Group,
                                        abbreviation=abbreviation,
                                        kind='group',
267
268
                                        name=mandate_data['Organization'],
                                        chamber=self.ep_chamber
269
270
                                        )

271
272
            create_mandate(mandate_data, representative, group,
                           self.ep_constituency)
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288

        # Countries
        for mandate_data in mep_json.get('Constituencies', []):
            if not mandate_data:
                continue

            _country = Country.objects.get(name=mandate_data['country'])

            group, _ = self.touch_model(model=Group,
                                        abbreviation=_country.code,
                                        kind='country',
                                        name=_country.name
                                        )

            local_party = mandate_data['party'] if mandate_data[
                'party'] and mandate_data['party'] != '-' else 'unknown'
Jamesie Pic's avatar
Jamesie Pic committed
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305

            country_id = (self.cache['countries'].get(mandate_data['country'])
                if 'country' in mandate_data else None)

            save_constituency = False
            try:
                constituency = Constituency.objects.get(name=local_party)
            except Constituency.DoesNotExist:
                constituency = Constituency(name=local_party)
                save_constituency = True

            if constituency.country_id != country_id:
                constituency.country_id = country_id
                save_constituency = True

            if save_constituency:
                constituency.save()
306

307
            create_mandate(mandate_data, representative, group, constituency)
308

309
310
            create_mandate(mandate_data, representative, self.ep_group,
                           self.ep_constituency)
311

312
313
314
315
316
317
318
319
320
        # Organisations
        for mandate_data in mep_json.get('Staff', []):

            group, _ = self.touch_model(model=Group,
                                        abbreviation='',
                                        kind='organization',
                                        name=mandate_data['Organization']
                                        )

321
322
            create_mandate(mandate_data, representative, group,
                           self.ep_constituency)
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372

    def add_contacts(self, representative, mep_json):
        # Addresses
        if mep_json.get('Addresses', None):
            address = mep_json.get('Addresses')

            belgium = Country.objects.get(name="Belgium")
            france = Country.objects.get(name="France")

            for city in address:
                if city in ['Brussels', 'Strasbourg']:
                    if city == 'Brussels':
                        country = belgium
                        street = u"rue Wiertz / Wiertzstraat"
                        number = '60'
                        postcode = '1047'
                        name = "Brussels European Parliament"
                    elif city == 'Strasbourg':
                        country = france
                        street = u"Av. du Président Robert Schuman - CS 91024"
                        number = '1'
                        postcode = '67070'
                        name = "Strasbourg European Parliament"

                    address_model, _ = self.touch_model(model=Address,
                        representative=representative, country=country,
                        city=city,
                        floor=address[city]['Address']['Office'][:3],
                        office_number=address[city]['Address']['Office'][3:],
                        street=street, number=number, postcode=postcode,
                        kind='official', name=name)

                    self.touch_model(model=Phone,
                        representative=representative, address=address_model,
                        kind='office phone',
                        number=address[city].get('Phone', ''))

        # Emails
        if mep_json.get('Mail', None):
            mails = mep_json.get('Mail')
            if not isinstance(mails, list):
                mails = list(mails)

            for mail in mails:
                self.touch_model(
                    model=Email,
                    representative=representative,
                    kind=('official' if '@europarl.europa.eu' in mail
                        else 'other'),
                    email=mail)
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391

        # EP page
        changed = False
        try:
            site = WebSite.objects.get(kind='EP',
                                       representative=representative)
        except WebSite.DoesNotExist:
            site = WebSite(kind='EP', representative=representative)
            changed = True

        uid = mep_json['UserID']
        url = 'http://www.europarl.europa.eu/meps/en/%s/_home.html' % uid
        if site.url != url:
            site.url = url
            changed = True

        if changed:
            site.save()

392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
        # WebSite
        websites = mep_json.get('Homepage', [])
        for url in websites:
            self.touch_model(model=WebSite,
                             url=url,
                             representative=representative
                             )

        if mep_json.get('Twitter', None):
            self.touch_model(model=WebSite,
                             representative=representative,
                             kind='twitter',
                             url=mep_json.get('Twitter')[0]
                             )

        if mep_json.get('Facebook', None):
            self.touch_model(model=WebSite,
                             representative=representative,
                             kind='facebook',
                             url=mep_json.get('Facebook')[0]
                             )


def main(stream=None):
    if not apps.ready:
        django.setup()

    importer = ParltrackImporter()
    GenericImporter.pre_import(importer)

    for data in ijson.items(stream or sys.stdin, 'item'):
423
424
        try:
            importer.manage_mep(data)
425
426
        except Exception:
            logger.exception('error trying to import rep %s', str(data))
427

428
429
430
    # Commenting for now, it's a bit dangerous, if a json file was corrupt it
    # would drop valid data !
    # importer.post_import()