import_representatives.py 16 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
# coding: utf-8

import logging
import sys
from datetime import datetime

import django.dispatch
import ijson
import django
from django.apps import apps
from django.db import transaction
from django.utils import timezone
13
from django.utils.text import slugify
14
15
16

from representatives.models import (Address, Constituency, Country, Email,
                                    Group, Mandate, Phone, Representative,
17
                                    WebSite, Chamber)
18
19
20

logger = logging.getLogger(__name__)

Jamesie Pic's avatar
Jamesie Pic committed
21
22
representative_pre_import = django.dispatch.Signal(
    providing_args=['representative_data'])
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64


def _parse_date(date):
    return datetime.strptime(date, "%Y-%m-%dT00:%H:00").date()


class GenericImporter(object):

    def pre_import(self):
        self.import_start_datetime = timezone.now()

    def post_import(self):
        # Clean not touched models
        models = [Representative, Group, Constituency,
                  Mandate, Address, Phone, Email, WebSite]
        for model in models:
            model.objects.filter(
                updated__lt=self.import_start_datetime).delete()

    def touch_model(self, model, **data):
        '''
        This method create or look up a model with the given data
        it saves the given model if it exists, updating its
        updated field
        '''
        instance, created = model.objects.get_or_create(**data)

        if not created:
            if instance.updated < self.import_start_datetime:
                instance.save()     # Updates updated field

        return (instance, created)


class ParltrackImporter(GenericImporter):
    url = 'http://parltrack.euwiki.org/dumps/ep_meps_current.json.xz'
    check_etag = True

    def parse_date(self, date):
        return _parse_date(date)

    def __init__(self):
Jamesie Pic's avatar
Jamesie Pic committed
65
66
67
        self.cache = {
            'countries': {c.name: c.pk for c in Country.objects.all()},
        }
68
        self.ep_chamber, _ = Chamber.objects.get_or_create(
69
            name='European Parliament', abbreviation='EP')
70
71
72
73
74
        self.ep_constituency, _ = Constituency.objects.get_or_create(
            name='European Parliament')
        self.ep_group, _ = Group.objects.get_or_create(
            name='European Parliament', kind='chamber', abbreviation='EP',
            chamber=self.ep_chamber)
75
76
77
78
79
80
81
82

    @transaction.atomic
    def manage_mep(self, mep_json):
        '''
        Import a mep as a representative from the json dict fetched from
        parltrack
        '''

Jamesie Pic's avatar
Jamesie Pic committed
83
84
85
86
87
88
89
90
91
92
        # Some versions of memopol will connect to this and skip inactive meps.
        responses = representative_pre_import.send(sender=self,
                representative_data=mep_json)

        for receiver, response in responses:
            if response is False:
                logger.debug(
                    'Skipping MEP %s', mep_json['Name']['full'])
                return

93
        changed = False
94
95
        # Issue 185. We must have a Birth date for our mep, to allow import
        # and slugifying stuff.
Okhin's avatar
Okhin committed
96
97
        if "Birth" not in mep_json:
            mep_json["Birth"] = {"date": "9999-01-01T00:00:00", "place": ""}
98

Nicolas Joyard's avatar
Nicolas Joyard committed
99
        slug = slugify('%s-%s' % (
100
            mep_json["Name"]["full"] if 'full' in mep_json["Name"]
Nicolas Joyard's avatar
Nicolas Joyard committed
101
102
103
            else mep_json["Name"]["sur"] + " " + mep_json["Name"]["family"],
            _parse_date(mep_json["Birth"]["date"])
        ))
104
        try:
105
            representative = Representative.objects.get(slug=slug)
106
        except Representative.DoesNotExist:
107
108
            representative = Representative(slug=slug)
            changed = True
109
110

        # Save representative attributes
111
        self.import_representative_details(representative, mep_json, changed)
112
113
114
115
116
117
118
119
120

        self.add_mandates(representative, mep_json)

        self.add_contacts(representative, mep_json)

        logger.debug('Imported MEP %s', unicode(representative))

        return representative

121
122
123
124
    def import_representative_details(self, representative, mep_json, changed):
        if representative.active != mep_json['active']:
            representative.active = mep_json['active']
            changed = True
125
126

        if mep_json.get("Birth"):
127
128
129
130
            birth_date = _parse_date(mep_json["Birth"]["date"])
            if representative.birth_date != birth_date:
                representative.birth_date = birth_date
                changed = True
131
            if "place" in mep_json["Birth"]:
132
133
134
135
136
137
138
139
                birth_place = mep_json["Birth"]["place"]
                if representative.birth_place != birth_place:
                    representative.birth_place = birth_place
                    changed = True

        if representative.first_name != mep_json["Name"]["sur"]:
            representative.first_name = mep_json["Name"]["sur"]
            changed = True
140

141
        last_name = mep_json["Name"]["family"]
142

143
144
145
146
147
148
149
        if representative.full_name != mep_json["Name"]["full"]:
            representative.full_name = mep_json["Name"]["full"]
            changed = True

        if representative.photo != mep_json["Photo"]:
            representative.photo = mep_json["Photo"]
            changed = True
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183

        fix_last_name_with_prefix = {
            "Esther de LANGE": "de LANGE",
            "Patricia van der KAMMEN": "van der KAMMEN",
            "Judith A. MERKIES": "MERKIES",
            "Heinz K. BECKER": "BECKER",
            "Cornelis de JONG": "de JONG",
            "Peter van DALEN": "van DALEN",
            "Sophia in 't VELD": "in 't VELD",
            "Marielle de SARNEZ": "de SARNEZ",
            "Anne E. JENSEN": "JENSEN",
            "Wim van de CAMP": "van de CAMP",
            "Lambert van NISTELROOIJ": "van NISTELROOIJ",
            "Johannes Cornelis van BAALEN": "van BAALEN",
            "Ioannis A. TSOUKALAS": "TSOUKALAS",
            "Pilar del CASTILLO VERA": "del CASTILLO VERA",
            "Luis de GRANDES PASCUAL": "de GRANDES PASCUAL",
            "Philippe de VILLIERS": "de VILLIERS",
            "Daniël van der STOEP": "van der STOEP",
            "William (The Earl of) DARTMOUTH": "(The Earl of) Dartmouth",
            "Bairbre de BRÚN": u'de Br\xfan',
            "Karl von WOGAU": u'von WOGAU',
            "Ieke van den BURG": u'van den BURG',
            "Manuel António dos SANTOS": u'dos SANTOS',
            "Paul van BUITENEN": u'van BUITENEN',
            "Elly de GROEN-KOUWENHOVEN": u'de GROEN-KOUWENHOVEN',
            "Margrietus van den BERG": u'van den BERG',
            u'Dani\xebl van der STOEP': u'van der STOEP',
            "Alexander Graf LAMBSDORFF": u'Graf LAMBSDORFF',
            u'Bairbre de BR\xdaN': u'de BR\xdaN',
            'Luigi de MAGISTRIS': 'de MAGISTRIS',
        }

        if fix_last_name_with_prefix.get(representative.full_name):
184
185
186
187
188
189
190
191
192
            last_name = fix_last_name_with_prefix[representative.full_name]
        elif last_name == "J.A.J. STASSEN":
            last_name = "STASSEN"

        if representative.last_name != last_name:
            representative.last_name = last_name
            changed = True

        gender_convertion_dict = {u"F": 1, u"M": 2}
193
        if 'Gender' in mep_json:
194
            gender = gender_convertion_dict.get(mep_json['Gender'], 0)
195
        else:
196
197
198
199
            gender = 0
        if representative.gender != gender:
            representative.gender = gender
            changed = True
200

201
202
203
204
        cv = "\n".join([cv_title for cv_title in mep_json.get("CV", [])])
        if representative.cv != cv:
            representative.cv = cv
            changed = True
205

206
207
        if changed:
            representative.save()
208
209

    def add_mandates(self, representative, mep_json):
210
        def create_mandate(mandate_data, representative, group, constituency):
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
            if mandate_data.get("start"):
                begin_date = _parse_date(mandate_data.get("start"))
            if mandate_data.get("end"):
                end_date = _parse_date(mandate_data.get("end"))

            role = mandate_data['role'] if 'role' in mandate_data else ''
            mandate, _ = Mandate.objects.get_or_create(
                representative=representative,
                group=group,
                constituency=constituency,
                role=role,
                begin_date=begin_date,
                end_date=end_date
            )

            if _:
                logger.debug('Created mandate %s with %s', mandate.pk,
                             mandate_data)

        # Committee
        for mandate_data in mep_json.get('Committees', []):
            if mandate_data.get("committee_id"):
                group, _ = self.touch_model(model=Group,
                        abbreviation=mandate_data['committee_id'],
235
236
                        kind='committee', name=mandate_data['Organization'],
                        chamber=self.ep_chamber)
237

238
239
                create_mandate(mandate_data, representative, group,
                               self.ep_constituency)
240
241
242
243
244

        # Delegations
        for mandate_data in mep_json.get('Delegations', []):
            group, _ = self.touch_model(model=Group,
                                        kind='delegation',
245
246
                                        name=mandate_data['Organization'],
                                        chamber=self.ep_chamber
247
248
                                        )

249
250
            create_mandate(mandate_data, representative, group,
                           self.ep_constituency)
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271

        # Group
        convert = {
            "S&D": "SD",
            "NA": "NI",
            "ID": "IND/DEM",
            "PPE": "EPP",
            "Verts/ALE": "Greens/EFA"}
        for mandate_data in mep_json.get('Groups', []):
            if not mandate_data.get('groupid'):
                continue

            if isinstance(mandate_data.get('groupid'), list):
                abbreviation = mandate_data.get('groupid')[0]
            else:
                abbreviation = mandate_data.get('groupid')

            abbreviation = convert.get(abbreviation, abbreviation)
            group, _ = self.touch_model(model=Group,
                                        abbreviation=abbreviation,
                                        kind='group',
272
273
                                        name=mandate_data['Organization'],
                                        chamber=self.ep_chamber
274
275
                                        )

276
277
            create_mandate(mandate_data, representative, group,
                           self.ep_constituency)
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293

        # Countries
        for mandate_data in mep_json.get('Constituencies', []):
            if not mandate_data:
                continue

            _country = Country.objects.get(name=mandate_data['country'])

            group, _ = self.touch_model(model=Group,
                                        abbreviation=_country.code,
                                        kind='country',
                                        name=_country.name
                                        )

            local_party = mandate_data['party'] if mandate_data[
                'party'] and mandate_data['party'] != '-' else 'unknown'
Jamesie Pic's avatar
Jamesie Pic committed
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310

            country_id = (self.cache['countries'].get(mandate_data['country'])
                if 'country' in mandate_data else None)

            save_constituency = False
            try:
                constituency = Constituency.objects.get(name=local_party)
            except Constituency.DoesNotExist:
                constituency = Constituency(name=local_party)
                save_constituency = True

            if constituency.country_id != country_id:
                constituency.country_id = country_id
                save_constituency = True

            if save_constituency:
                constituency.save()
311

312
            create_mandate(mandate_data, representative, group, constituency)
313

314
315
            create_mandate(mandate_data, representative, self.ep_group,
                           self.ep_constituency)
316

317
318
319
320
321
322
323
324
325
        # Organisations
        for mandate_data in mep_json.get('Staff', []):

            group, _ = self.touch_model(model=Group,
                                        abbreviation='',
                                        kind='organization',
                                        name=mandate_data['Organization']
                                        )

326
327
            create_mandate(mandate_data, representative, group,
                           self.ep_constituency)
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377

    def add_contacts(self, representative, mep_json):
        # Addresses
        if mep_json.get('Addresses', None):
            address = mep_json.get('Addresses')

            belgium = Country.objects.get(name="Belgium")
            france = Country.objects.get(name="France")

            for city in address:
                if city in ['Brussels', 'Strasbourg']:
                    if city == 'Brussels':
                        country = belgium
                        street = u"rue Wiertz / Wiertzstraat"
                        number = '60'
                        postcode = '1047'
                        name = "Brussels European Parliament"
                    elif city == 'Strasbourg':
                        country = france
                        street = u"Av. du Président Robert Schuman - CS 91024"
                        number = '1'
                        postcode = '67070'
                        name = "Strasbourg European Parliament"

                    address_model, _ = self.touch_model(model=Address,
                        representative=representative, country=country,
                        city=city,
                        floor=address[city]['Address']['Office'][:3],
                        office_number=address[city]['Address']['Office'][3:],
                        street=street, number=number, postcode=postcode,
                        kind='official', name=name)

                    self.touch_model(model=Phone,
                        representative=representative, address=address_model,
                        kind='office phone',
                        number=address[city].get('Phone', ''))

        # Emails
        if mep_json.get('Mail', None):
            mails = mep_json.get('Mail')
            if not isinstance(mails, list):
                mails = list(mails)

            for mail in mails:
                self.touch_model(
                    model=Email,
                    representative=representative,
                    kind=('official' if '@europarl.europa.eu' in mail
                        else 'other'),
                    email=mail)
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396

        # EP page
        changed = False
        try:
            site = WebSite.objects.get(kind='EP',
                                       representative=representative)
        except WebSite.DoesNotExist:
            site = WebSite(kind='EP', representative=representative)
            changed = True

        uid = mep_json['UserID']
        url = 'http://www.europarl.europa.eu/meps/en/%s/_home.html' % uid
        if site.url != url:
            site.url = url
            changed = True

        if changed:
            site.save()

397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
        # WebSite
        websites = mep_json.get('Homepage', [])
        for url in websites:
            self.touch_model(model=WebSite,
                             url=url,
                             representative=representative
                             )

        if mep_json.get('Twitter', None):
            self.touch_model(model=WebSite,
                             representative=representative,
                             kind='twitter',
                             url=mep_json.get('Twitter')[0]
                             )

        if mep_json.get('Facebook', None):
            self.touch_model(model=WebSite,
                             representative=representative,
                             kind='facebook',
                             url=mep_json.get('Facebook')[0]
                             )


def main(stream=None):
    if not apps.ready:
        django.setup()

    importer = ParltrackImporter()
    GenericImporter.pre_import(importer)

    for data in ijson.items(stream or sys.stdin, 'item'):
428
429
        try:
            importer.manage_mep(data)
430
431
        except Exception:
            logger.exception('error trying to import rep %s', str(data))
432

433
434
435
    # Commenting for now, it's a bit dangerous, if a json file was corrupt it
    # would drop valid data !
    # importer.post_import()