Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Political Memory
memopol
Commits
5bdee8f1
Commit
5bdee8f1
authored
Jul 07, 2016
by
Nicolas Joyard
Browse files
Move dossier links to new Document model
parent
2c7a1dce
Changes
7
Hide whitespace changes
Inline
Side-by-side
representatives_votes/admin.py
View file @
5bdee8f1
...
...
@@ -2,14 +2,22 @@
from
django.contrib
import
admin
from
.models
import
Dossier
,
Proposal
,
Vote
from
.models
import
Dossier
,
Document
,
Proposal
,
Vote
class
DossierAdmin
(
admin
.
ModelAdmin
):
list_display
=
(
'id'
,
'reference'
,
'title'
,
'link'
)
list_display
=
(
'id'
,
'reference'
,
'title'
)
search_fields
=
(
'reference'
,
'title'
)
class
DocumentAdmin
(
admin
.
ModelAdmin
):
list_display
=
(
'dossier_reference'
,
'kind'
,
'title'
,
'link'
)
search_fields
=
(
'reference'
,
'dossier__reference'
,
'title'
)
def
dossier_reference
(
self
,
obj
):
return
obj
.
dossier
.
reference
class
ProposalAdmin
(
admin
.
ModelAdmin
):
list_display
=
(
'reference'
,
...
...
@@ -49,5 +57,6 @@ class VoteAdmin(admin.ModelAdmin):
return
obj
.
proposal
.
reference
admin
.
site
.
register
(
Dossier
,
DossierAdmin
)
admin
.
site
.
register
(
Document
,
DocumentAdmin
)
admin
.
site
.
register
(
Proposal
,
ProposalAdmin
)
admin
.
site
.
register
(
Vote
,
VoteAdmin
)
representatives_votes/contrib/francedata/import_dossiers.py
View file @
5bdee8f1
...
...
@@ -3,27 +3,48 @@
import
sys
import
ijson
import
logging
import
re
import
django
from
django.apps
import
apps
from
django.db
import
transaction
from
representatives_votes.models
import
Dossier
from
representatives.contrib.francedata.import_representatives
import
\
ensure_chambers
from
representatives.models
import
Chamber
from
representatives_votes.models
import
Document
,
Dossier
logger
=
logging
.
getLogger
(
__name__
)
def
extract_reference
(
url
):
m
=
re
.
search
(
r
'/dossier-legislatif/([^./]+)\.html'
,
url
)
if
m
:
return
m
.
group
(
1
)
m
=
re
.
search
(
r
'/(\d+)/dossiers/([^./]+)\.asp'
,
url
)
if
m
:
return
'%s/%s'
%
(
m
.
group
(
1
),
m
.
group
(
2
))
m
=
re
.
search
(
r
'/dossiers/([^./]+)\.asp'
,
url
)
if
m
:
return
m
.
group
(
1
)
return
None
def
find_dossier
(
data
):
'''
Find dossier with reference matching either '
url
_an' or '
url
_sen',
create it if not found. Ensure its reference
and source are 'url_an' if
both fields
are present.
Find dossier with reference matching either '
ref
_an' or '
ref
_sen',
create it if not found. Ensure its reference
is 'ref_an' if both fields
are present.
'''
changed
=
False
dossier
=
None
reffield
=
None
for
field
in
[
k
for
k
in
(
'
url
_an'
,
'
url
_sen'
)
if
k
in
data
]:
for
field
in
[
k
for
k
in
(
'
ref
_an'
,
'
ref
_sen'
)
if
k
in
data
]:
try
:
dossier
=
Dossier
.
objects
.
get
(
reference
=
data
[
field
])
reffield
=
field
...
...
@@ -32,50 +53,85 @@ def find_dossier(data):
pass
if
dossier
is
None
:
reffield
=
'
url
_an'
if
'
url
_an'
in
data
else
'
url
_sen'
reffield
=
'
ref
_an'
if
'
ref
_an'
in
data
else
'
ref
_sen'
dossier
=
Dossier
(
reference
=
data
[
reffield
])
logger
.
debug
(
'Created dossier %s'
%
data
[
reffield
])
changed
=
True
if
'
url
_an'
in
data
and
reffield
!=
'
url
_an'
:
logger
.
debug
(
'Changed dossier reference to %s'
%
data
[
'
url
_an'
])
dossier
.
reference
=
data
[
'
url
_an'
]
if
'
ref
_an'
in
data
and
reffield
!=
'
ref
_an'
:
logger
.
debug
(
'Changed dossier reference to %s'
%
data
[
'
ref
_an'
])
dossier
.
reference
=
data
[
'
ref
_an'
]
changed
=
True
return
dossier
,
changed
def
parse_dossier_data
(
data
):
dossier
,
changed
=
find_dossier
(
data
)
def
handle_document
(
dossier
,
chamber
,
url
):
doc_changed
=
False
try
:
doc
=
Document
.
objects
.
get
(
chamber
=
chamber
,
dossier
=
dossier
,
kind
=
'procedure-file'
)
except
Document
.
DoesNotExist
:
doc
=
Document
(
chamber
=
chamber
,
dossier
=
dossier
,
kind
=
'procedure-file'
)
logger
.
debug
(
'Created %s document for dossier %s'
%
(
chamber
.
abbreviation
,
dossier
.
title
))
doc_changed
=
True
if
doc
.
link
!=
url
:
logger
.
debug
(
'Changing %s url from %s to %s'
%
(
chamber
.
abbreviation
,
doc
.
link
,
url
))
doc
.
link
=
url
doc_changed
=
True
if
doc_changed
:
doc
.
save
()
def
parse_dossier_data
(
data
,
an
,
sen
):
if
'url_an'
in
data
:
ref_an
=
extract_reference
(
data
[
'url_an'
])
if
ref_an
is
None
:
logger
.
warn
(
'No reference for dossier %s'
%
data
[
'url_an'
])
return
else
:
data
[
'ref_an'
]
=
ref_an
if
'url_sen'
in
data
:
ref_sen
=
extract_reference
(
data
[
'url_sen'
])
if
ref_sen
is
None
:
logger
.
warn
(
'No reference for dossier %s'
%
data
[
'url_sen'
])
return
else
:
data
[
'ref_sen'
]
=
ref_sen
thisurl
=
data
[
'url_an'
if
data
[
'chambre'
]
==
'AN'
else
'url_sen'
]
dossier
,
changed
=
find_dossier
(
data
)
if
dossier
.
reference
!=
dossier
.
link
:
logger
.
debug
(
'Changed dossier link to %s'
%
dossier
.
reference
)
dossier
.
link
=
dossier
.
reference
changed
=
True
thisref
=
data
[
'ref_an'
if
data
[
'chambre'
]
==
'AN'
else
'ref_sen'
]
title
=
data
[
'titre'
]
if
dossier
.
reference
==
this
url
and
dossier
.
title
!=
title
:
if
dossier
.
reference
==
this
ref
and
dossier
.
title
!=
title
:
logger
.
debug
(
'Changed dossier title to %s'
%
title
)
dossier
.
title
=
title
changed
=
True
if
'url_an'
in
data
and
'url_sen'
in
data
:
ext_link
=
data
[
'url_sen'
]
if
dossier
.
ext_link
!=
ext_link
:
logger
.
debug
(
'Changed dossier ext. link to %s'
%
ext_link
)
dossier
.
ext_link
=
ext_link
changed
=
True
with
transaction
.
atomic
():
if
changed
:
logger
.
debug
(
'Saved dossier %s'
%
dossier
.
reference
)
dossier
.
save
()
if
'url_an'
in
data
:
handle_document
(
dossier
,
an
,
data
[
'url_an'
])
if
changed
:
logger
.
debug
(
'Saved dossier %s'
%
dossier
.
reference
)
dossier
.
save
()
if
'url_sen'
in
data
:
handle_document
(
dossier
,
sen
,
data
[
'url_sen'
])
def
main
(
stream
=
None
):
if
not
apps
.
ready
:
django
.
setup
()
ensure_chambers
()
an
=
Chamber
.
objects
.
get
(
abbreviation
=
'AN'
)
sen
=
Chamber
.
objects
.
get
(
abbreviation
=
'SEN'
)
for
data
in
ijson
.
items
(
stream
or
sys
.
stdin
,
'item'
):
parse_dossier_data
(
data
)
parse_dossier_data
(
data
,
an
,
sen
)
representatives_votes/contrib/francedata/import_scrutins.py
View file @
5bdee8f1
...
...
@@ -48,23 +48,16 @@ def _get_unique_title(proposal_pk, candidate):
class
ScrutinImporter
:
dossiers_ref
=
None
dossiers_ext
=
None
dossiers
=
{}
def
get_dossier
(
self
,
url
):
if
self
.
dossiers_ref
is
None
:
self
.
dossiers_ref
=
{
d
[
0
]:
d
[
1
]
for
d
in
Dossier
.
objects
.
values_list
(
'reference'
,
'pk'
)
}
if
self
.
dossiers_ext
is
None
:
self
.
dossiers_ext
=
{
d
[
0
]:
d
[
1
]
for
d
in
Dossier
.
objects
.
exclude
(
ext_link
=
''
)
.
values_list
(
'ext_link'
,
'pk'
)
}
if
url
not
in
self
.
dossiers
:
try
:
self
.
dossiers
[
url
]
=
Dossier
.
objects
.
get
(
documents__link
=
url
)
except
Dossier
.
DoesNotExist
:
return
None
return
self
.
dossiers
_ref
.
get
(
url
,
self
.
dossiers_ext
.
get
(
url
,
None
))
return
self
.
dossiers
[
url
]
def
parse_scrutin_data
(
self
,
data
):
ref
=
data
[
'url'
]
...
...
@@ -91,7 +84,7 @@ class ScrutinImporter:
values
=
dict
(
title
=
_get_unique_title
(
proposal
.
pk
,
data
[
"objet"
]),
datetime
=
_parse_date
(
data
[
"date"
]),
dossier_id
=
dossier
,
dossier_id
=
dossier
.
pk
,
kind
=
'dossier'
)
...
...
representatives_votes/contrib/parltrack/import_dossiers.py
View file @
5bdee8f1
...
...
@@ -6,8 +6,10 @@ import urllib2
import
ijson
import
django
from
django.apps
import
apps
from
django.db
import
transaction
from
representatives_votes.models
import
Dossier
from
representatives.models
import
Chamber
from
representatives_votes.models
import
Dossier
,
Document
from
.import_votes
import
Command
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -17,38 +19,51 @@ URL = 'http://parltrack.euwiki.org/dumps/ep_dossiers.json.xz'
LOCAL_PATH
=
'ep_dossiers.json.xz'
def
parse_dossier_data
(
data
):
def
parse_dossier_data
(
data
,
ep
):
"""Parse data from parltarck dossier export (1 dossier) Update dossier
if it existed before, this function goal is to import and update a
dossier, not to import all parltrack data
"""
changed
=
False
doc_changed
=
False
ref
=
data
[
'procedure'
][
'reference'
]
logger
.
debug
(
'Processing dossier %s'
,
ref
)
try
:
dossier
=
Dossier
.
objects
.
get
(
reference
=
ref
)
except
Dossier
.
DoesNotExist
:
dossier
=
Dossier
(
reference
=
ref
)
logger
.
debug
(
'Dossier did not exist'
)
changed
=
True
if
dossier
.
title
!=
data
[
'procedure'
][
'title'
]:
logger
.
debug
(
'Title changed from "%s" to "%s"'
,
dossier
.
title
,
data
[
'procedure'
][
'title'
])
dossier
.
title
=
data
[
'procedure'
][
'title'
]
changed
=
True
source
=
data
[
'meta'
][
'source'
].
replace
(
'&l=en'
,
''
)
if
dossier
.
link
!=
source
:
logger
.
debug
(
'Source changed from "%s" to "%s"'
,
dossier
.
link
,
source
)
dossier
.
link
=
source
changed
=
True
if
changed
:
logger
.
info
(
'Updated dossier %s'
,
ref
)
dossier
.
save
()
with
transaction
.
atomic
():
try
:
dossier
=
Dossier
.
objects
.
get
(
reference
=
ref
)
except
Dossier
.
DoesNotExist
:
dossier
=
Dossier
(
reference
=
ref
)
logger
.
debug
(
'Dossier did not exist'
)
changed
=
True
if
dossier
.
title
!=
data
[
'procedure'
][
'title'
]:
logger
.
debug
(
'Title changed from "%s" to "%s"'
,
dossier
.
title
,
data
[
'procedure'
][
'title'
])
dossier
.
title
=
data
[
'procedure'
][
'title'
]
changed
=
True
if
changed
:
logger
.
info
(
'Updated dossier %s'
,
ref
)
dossier
.
save
()
source
=
data
[
'meta'
][
'source'
].
replace
(
'&l=en'
,
''
)
try
:
doc
=
Document
.
objects
.
get
(
dossier
=
dossier
,
kind
=
'procedure-file'
)
except
Document
.
DoesNotExist
:
doc
=
Document
(
dossier
=
dossier
,
kind
=
'procedure-file'
,
chamber
=
ep
)
logger
.
debug
(
'Document for dossier %s did not exist'
,
ref
)
doc_changed
=
True
if
doc
.
link
!=
source
:
logger
.
debug
(
'Link changed from %s to %s'
,
doc
.
link
,
source
)
doc
.
link
=
source
doc_changed
=
True
if
doc_changed
:
logger
.
info
(
'Updated document %s for dossier %s'
,
doc
.
link
,
ref
)
doc
.
save
()
if
'votes'
in
data
.
keys
()
and
'epref'
in
data
[
'votes'
]:
command
=
Command
()
...
...
@@ -68,13 +83,15 @@ def import_single(stream):
if
not
apps
.
ready
:
django
.
setup
()
ep
=
Chamber
.
objects
.
get
(
abbreviation
=
'EP'
)
for
data
in
ijson
.
items
(
stream
,
''
):
parse_dossier_data
(
data
)
parse_dossier_data
(
data
,
ep
)
def
main
(
stream
=
None
):
if
not
apps
.
ready
:
django
.
setup
()
ep
=
Chamber
.
objects
.
get
(
abbreviation
=
'EP'
)
for
data
in
ijson
.
items
(
stream
or
sys
.
stdin
,
'item'
):
parse_dossier_data
(
data
)
parse_dossier_data
(
data
,
ep
)
representatives_votes/migrations/0012_document.py
0 → 100644
View file @
5bdee8f1
# -*- coding: utf-8 -*-
from
__future__
import
unicode_literals
import
logging
from
django.db
import
migrations
,
models
def
migrate_dossier_links
(
apps
,
schema_editor
):
logger
=
logging
.
getLogger
(
'migrate_dossier_links'
)
# Get model managers
Chamber
=
apps
.
get_model
(
"representatives"
,
"Chamber"
)
Dossier
=
apps
.
get_model
(
"representatives_votes"
,
"Dossier"
)
Document
=
apps
.
get_model
(
"representatives_votes"
,
"Document"
)
docs
=
[]
# EP dossiers
ep_chamber
=
Chamber
.
objects
.
get
(
abbreviation
=
'EP'
)
ep_link
=
'europarl.europa.eu'
for
dossier
in
Dossier
.
objects
.
filter
(
link__icontains
=
ep_link
):
logger
.
info
(
'Create document %s for dossier %s'
%
(
dossier
.
link
,
dossier
.
reference
))
docs
.
append
(
Document
(
chamber
=
ep_chamber
,
dossier
=
dossier
,
link
=
dossier
.
link
,
kind
=
'procedure-file'
))
# France dossiers
try
:
an_chamber
=
Chamber
.
objects
.
get
(
abbreviation
=
'AN'
)
sen_chamber
=
Chamber
.
objects
.
get
(
abbreviation
=
'SEN'
)
except
Chamber
.
DoesNotExist
:
return
an_link
=
'assemblee-nationale.fr'
sen_link
=
'senat.fr'
for
dossier
in
Dossier
.
objects
.
filter
(
link__icontains
=
an_link
):
logger
.
info
(
'Create document %s for dossier %s'
%
(
dossier
.
link
,
dossier
.
reference
))
docs
.
append
(
Document
(
chamber
=
an_chamber
,
dossier
=
dossier
,
link
=
dossier
.
link
,
kind
=
'procedure-file'
))
for
dossier
in
Dossier
.
objects
.
filter
(
ext_link__icontains
=
an_link
):
logger
.
info
(
'Create document %s for dossier %s'
%
(
dossier
.
link
,
dossier
.
reference
))
docs
.
append
(
Document
(
chamber
=
an_chamber
,
dossier
=
dossier
,
link
=
dossier
.
ext_link
,
kind
=
'procedure-file'
))
for
dossier
in
Dossier
.
objects
.
filter
(
link__icontains
=
sen_link
):
logger
.
info
(
'Create document %s for dossier %s'
%
(
dossier
.
link
,
dossier
.
reference
))
docs
.
append
(
Document
(
chamber
=
sen_chamber
,
dossier
=
dossier
,
link
=
dossier
.
link
,
kind
=
'procedure-file'
))
for
dossier
in
Dossier
.
objects
.
filter
(
ext_link__icontains
=
an_link
):
logger
.
info
(
'Create document %s for dossier %s'
%
(
dossier
.
link
,
dossier
.
reference
))
docs
.
append
(
Document
(
chamber
=
sen_chamber
,
dossier
=
dossier
,
link
=
dossier
.
ext_link
,
kind
=
'procedure-file'
))
# Create all dossiers
logger
.
info
(
'Saving %s documents...'
%
len
(
docs
))
Document
.
objects
.
bulk_create
(
docs
)
class
Migration
(
migrations
.
Migration
):
dependencies
=
[
(
'representatives'
,
'0019_remove_fingerprints'
),
(
'representatives_votes'
,
'0011_remove_fingerprints'
),
]
operations
=
[
migrations
.
CreateModel
(
name
=
'Document'
,
fields
=
[
(
'id'
,
models
.
AutoField
(
verbose_name
=
'ID'
,
serialize
=
False
,
auto_created
=
True
,
primary_key
=
True
)),
(
'created'
,
models
.
DateTimeField
(
auto_now_add
=
True
)),
(
'updated'
,
models
.
DateTimeField
(
auto_now
=
True
)),
(
'title'
,
models
.
CharField
(
max_length
=
1000
)),
(
'kind'
,
models
.
CharField
(
default
=
b
''
,
max_length
=
255
,
blank
=
True
)),
(
'link'
,
models
.
URLField
(
max_length
=
1000
)),
(
'chamber'
,
models
.
ForeignKey
(
to
=
'representatives.Chamber'
)),
(
'dossier'
,
models
.
ForeignKey
(
related_name
=
'documents'
,
to
=
'representatives_votes.Dossier'
)),
],
options
=
{
'abstract'
:
False
,
},
),
migrations
.
RunPython
(
migrate_dossier_links
),
migrations
.
RemoveField
(
model_name
=
'dossier'
,
name
=
'link'
,
),
migrations
.
RemoveField
(
model_name
=
'dossier'
,
name
=
'ext_link'
,
),
]
representatives_votes/models.py
View file @
5bdee8f1
# coding: utf-8
from
django.db
import
models
from
representatives.models
import
Representative
,
TimeStampedModel
from
representatives.models
import
Chamber
,
Representative
,
TimeStampedModel
class
Dossier
(
TimeStampedModel
):
title
=
models
.
CharField
(
max_length
=
1000
)
reference
=
models
.
CharField
(
max_length
=
200
,
unique
=
True
)
text
=
models
.
TextField
(
blank
=
True
,
default
=
''
)
link
=
models
.
URLField
()
ext_link
=
models
.
URLField
(
blank
=
True
,
default
=
''
)
class
Meta
:
unique_together
=
((
'title'
,
'reference'
))
...
...
@@ -18,6 +16,14 @@ class Dossier(TimeStampedModel):
return
unicode
(
self
.
title
)
class
Document
(
TimeStampedModel
):
dossier
=
models
.
ForeignKey
(
Dossier
,
related_name
=
'documents'
)
chamber
=
models
.
ForeignKey
(
Chamber
)
title
=
models
.
CharField
(
max_length
=
1000
)
kind
=
models
.
CharField
(
max_length
=
255
,
blank
=
True
,
default
=
''
)
link
=
models
.
URLField
(
max_length
=
1000
)
class
Proposal
(
TimeStampedModel
):
dossier
=
models
.
ForeignKey
(
Dossier
,
related_name
=
'proposals'
)
title
=
models
.
CharField
(
max_length
=
1000
,
unique
=
True
)
...
...
setup.py
View file @
5bdee8f1
...
...
@@ -13,7 +13,7 @@ setup(
keywords
=
'django government parliament votes'
,
install_requires
=
[
'django>1.8,<1.9'
,
'django-representatives>=0.0.2
7
'
,
'django-representatives>=0.0.2
9
'
,
'py-dateutil>=2,<3'
,
'ijson>=2,<3'
,
'pytz'
,
# Always use up-to-date TZ data
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment