import_votes.py 8.58 KB
Newer Older
1
2
# coding: utf-8
import logging
3
import re
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import sys
from os.path import join

import django.dispatch
import ijson
import django
from django.apps import apps
from dateutil.parser import parse as date_parse
from django.db import transaction
from django.utils.timezone import make_aware as date_make_aware
from pytz import timezone as date_timezone

from representatives.models import Representative
from representatives_votes.models import Dossier, Proposal, Vote


logger = logging.getLogger(__name__)
vote_pre_import = django.dispatch.Signal(providing_args=['vote_data'])


def _parse_date(date_str):
    return date_make_aware(
        date_parse(date_str),
        date_timezone('Europe/Brussels'))

JSON_URL = 'http://parltrack.euwiki.org/dumps/ep_votes.json.xz'
DESTINATION = join('/tmp', 'ep_votes.json')
31
32
RE_COMVOTE_REF = re.compile(r'&reference=([^&]+)')
POSITION_MAP = {k: k for k in ('For', 'Against', 'Abstain')}
33
34
35
36
37
38
39
40


class Command(object):
    def init_cache(self):
        self.cache = dict()
        self.index_representatives()
        self.index_dossiers()

41
42
43
44
45
46
47
48
49
    def should_skip(self, proposal_data):
        responses = vote_pre_import.send(sender=self, vote_data=proposal_data)

        for receiver, response in responses:
            if response is False:
                return True

        return False

50
51
52
53
    def parse_vote_data(self, vote_data):
        """
        Parse data from parltrack votes db dumps (1 proposal)
        """
54
55
56
57
        keys = vote_data.keys()
        if 'ep_ref' in keys:
            vote_data['epref'] = vote_data['ep_ref']
        elif 'epref' not in keys:
58
            logger.debug('Could not import data without epref %s',
59
60
61
                vote_data.get('title',
                              vote_data.get('doc',
                                            vote_data.get('url', '?'))))
62
63
64
65
66
            return

        dossier_pk = self.get_dossier(vote_data['epref'])

        if not dossier_pk:
Nicolas Joyard's avatar
Fix qa    
Nicolas Joyard committed
67
68
            logger.debug('Cannot find dossier with remote id %s',
                         vote_data['epref'])
69
70
            return

71
72
        if 'committee' in vote_data:
            return self.parse_committee_vote_data(
Nicolas Joyard's avatar
Nicolas Joyard committed
73
                proposal_data=vote_data,
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
                dossier_pk=dossier_pk
            )
        else:
            return self.parse_proposal_data(
                proposal_data=vote_data,
                dossier_pk=dossier_pk
            )

    def parse_proposal_totals(self, data, position_map=POSITION_MAP):
        totals = {}

        for position in ('For', 'Abstain', 'Against'):
            position_data = data.get(position_map[position], {})
            position_total = position_data.get('total', 0)

            if isinstance(position_total, str) and position_total.isdigit():
                position_total = int(position_total)

            totals['total_%s' % position.lower()] = position_total

        return totals

    def parse_proposal_votes(self, proposal, data, position_map=POSITION_MAP):
        logger.info(
            u'Looking for votes in proposal {}'.format(proposal.title))

        for position in ('For', 'Abstain', 'Against'):
Nicolas Joyard's avatar
Nicolas Joyard committed
101
102
            for group_vote_data in data.get(position_map[position],
                                            {}).get('groups', {}):
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
                for vote_data in group_vote_data['votes']:
                    if not isinstance(vote_data, dict):
                        logger.error('Skipping vote data %s for proposal %s',
                                     vote_data, data['_id'])
                        continue

                    representative_pk = self.get_representative(vote_data)

                    if representative_pk is None:
                        logger.error('Could not find mep for %s', vote_data)
                        continue

                    changed = False
                    try:
                        vote = Vote.objects.get(
                            representative_id=representative_pk,
                            proposal_id=proposal.pk)
                    except Vote.DoesNotExist:
                        vote = Vote(proposal_id=proposal.pk,
                                    representative_id=representative_pk)
                        changed = True

                    if vote.position != position.lower():
                        changed = True
                        vote.position = position.lower()

                    if changed:
                        vote.save()
                        logger.debug('Save vote %s for MEP %s on %s #%s to %s',
Nicolas Joyard's avatar
Nicolas Joyard committed
132
133
                                     vote.pk, representative_pk,
                                     proposal.title, proposal.pk, position)
134
135
136

    @transaction.atomic
    def parse_committee_vote_data(self, proposal_data, dossier_pk):
Nicolas Joyard's avatar
Nicolas Joyard committed
137
        title = u'%s vote on %s' % (proposal_data['committee'],
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
                                    proposal_data['doc'])
        changed = False

        try:
            proposal = Proposal.objects.get(title=title, dossier_id=dossier_pk)
        except Proposal.DoesNotExist:
            proposal = Proposal(title=title, dossier_id=dossier_pk)
            changed = True

        try:
            ref = RE_COMVOTE_REF.search(proposal_data['url']).group(1)
        except:
            logger.debug(u'Cannot find proposal reference for %s' % title)
            return

        data_map = dict(
            datetime=_parse_date(proposal_data['ts']),
            reference=ref,
Nicolas Joyard's avatar
Nicolas Joyard committed
156
            kind='committee-vote'
157
158
        )

159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
        position_map = {
            'For': '+',
            'Against': '-',
            'Abstain': '0'
        }

        data_map.update(self.parse_proposal_totals(proposal_data,
                                                   position_map))

        for key, value in data_map.items():
            if value != getattr(proposal, key, None):
                setattr(proposal, key, value)
                changed = True

        if changed:
            proposal.save()

        if self.should_skip(proposal_data):
            logger.debug(
                u'Skipping votes for dossier %s', proposal_data.get(
Nicolas Joyard's avatar
Nicolas Joyard committed
179
                    'epref', title))
180
181
182
183
            return

        self.parse_proposal_votes(proposal, proposal_data, position_map)

184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
    @transaction.atomic
    def parse_proposal_data(self, proposal_data, dossier_pk):
        """Get or Create a proposal model from raw data"""
        if 'issue_type' not in proposal_data.keys():
            logger.debug('This proposal data without issue_type: %s',
                         proposal_data['epref'])
            return

        changed = False
        try:
            proposal = Proposal.objects.get(title=proposal_data['title'])
        except Proposal.DoesNotExist:
            proposal = Proposal(title=proposal_data['title'])
            changed = True

        data_map = dict(
            title=proposal_data['title'],
            datetime=_parse_date(proposal_data['ts']),
            dossier_id=dossier_pk,
            reference=proposal_data.get('report'),
            kind=proposal_data.get('issue_type')
        )

207
        data_map.update(self.parse_proposal_totals(proposal_data))
208
209
210
211
212
213
214
215
216

        for key, value in data_map.items():
            if value != getattr(proposal, key, None):
                setattr(proposal, key, value)
                changed = True

        if changed:
            proposal.save()

217
218
        if self.should_skip(proposal_data):
            logger.debug(
219
                u'Skipping votes for dossier %s', proposal_data.get(
220
221
                    'epref', proposal_data['title']))
            return
222

223
        self.parse_proposal_votes(proposal, proposal_data)
224
225
226
227
228
229
230
231
232
233
234
235

        return proposal

    def index_dossiers(self):
        self.cache['dossiers'] = {
            d[0]: d[1] for d in Dossier.objects.values_list('reference', 'pk')
        }

    def get_dossier(self, reference):
        return self.cache['dossiers'].get(reference, None)

    def index_representatives(self):
236
237
238
239
240
241
242
        epre = r'/meps/en/(\d+)/_home.html'
        self.cache['meps'] = {
            int(re.search(epre, l[0]).group(1)): l[1] for l in
            Representative.objects.prefetch_related('website_set')
            .filter(website__kind='EP')
            .values_list('website__url', 'pk')
        }
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257

    def get_representative(self, vote_data):
        if vote_data.get('ep_id', None) is None:
            return
        return self.cache['meps'].get(int(vote_data['ep_id']), None)


def main(stream=None):
    if not apps.ready:
        django.setup()

    command = Command()
    command.init_cache()

    for vote_data in ijson.items(stream or sys.stdin, 'item'):
258
259
        try:
            command.parse_vote_data(vote_data)
260
261
        except Exception:
            logger.exception('error trying to import vote %s', str(vote_data))