an-deputes.sh 3.62 KB
Newer Older
Bastien Le Querrec's avatar
Bastien Le Querrec committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/env bash

# Extract data from the JSON files here: http://data.assemblee-nationale.fr/acteurs/deputes-en-exercice to data/an/

set -e

cd $(dirname $0)

find data/an/json/acteur/ -type f | sed 's/\.json//i' | sed 's/data\/an\/json\/acteur\///i' | while read -r key; do
  echo "Parsing ${key}..."

  first_name=$(jq -r .acteur.etatCivil.ident.prenom "data/an/json/acteur/${key}.json")
  last_name=$(jq -r .acteur.etatCivil.ident.nom "data/an/json/acteur/${key}.json")

  email=$(jq -r '.acteur.adresses.adresse | map(. | select(.type=="15")) | .[].valElec' "data/an/json/acteur/${key}.json" | tac)
  phoneRaw=$(jq -r '.acteur.adresses.adresse | map(. | select(.type=="11")) | .[].valElec' "data/an/json/acteur/${key}.json" | tac)

  IFS=$'\n'
  phone=""
  if [ ! -z "$phoneRaw" ]; then
    for i in $phoneRaw; do
      phone="$(echo ${i} | tr -d ' .' | sed 's/(0)//i' | sed 's/^00/\+/i' | sed 's/^0590/\+590/i' | sed 's/^0596/\+596/i' | sed 's/^0594/\+594/i' | sed 's/^0262/\+262/i' | sed 's/^0508/\+508/i' | sed 's/^0269/\+262269/i' )"$'\n'"${phone}"
    done
  fi

  phoneChamber=$(jq -r "map(select(.name==\"${first_name} ${last_name}\")) | .[].phone" "an-deputes-phones.json" | tr -d ' .')

  if [ ! -z "$phoneChamber" ]; then
    phone=$(echo "${phone}" | sed "s/${phoneChamber}//g" | sort)
    phone="${phoneChamber}"$'\n'"${phone}"
  fi

  twitter=$(jq -r '.acteur.adresses.adresse | map(. | select(.type=="24")) | .[].valElec' "data/an/json/acteur/${key}.json" | sed 's/\@//i' | head -1)
  facebook=$(jq -r '.acteur.adresses.adresse | map(. | select(.type=="25")) | .[].valElec' "data/an/json/acteur/${key}.json" | head -1)

  commissionsRef=$(jq -r '.acteur.mandats[] | map(. | select(.typeOrgane=="COMPER" or .typeOrgane=="COMNL")) | .[].organes.organeRef' "data/an/json/acteur/${key}.json" | sort -u)

  commissions=""
  if [ ! -z "$commissionsRef" ]; then
    for i in $commissionsRef; do
      commissions="${commissions}"$'\n'"$(jq -r .organe.libelle data/an/json/organe/${i}.json)"
    done
  fi

  county=$(jq -r '.acteur.mandats[] | map(. | select(.typeOrgane=="ASSEMBLEE")) | .[].election.lieu.departement' "data/an/json/acteur/${key}.json" | head -1)

  groupRef=$(jq -r '.acteur.mandats[] | map(. | select(.typeOrgane=="GP")) | .[].organes.organeRef' "data/an/json/acteur/${key}.json" | head -1)
  group=$(jq -r .organe.libelle data/an/json/organe/${groupRef}.json)

  photo=$(echo ${key} | sed 's/PA//i')

  filename=$(echo "${last_name} ${first_name}" | iconv -f utf8 -t ascii//TRANSLIT | awk '{print tolower($0)}' | tr " " "_")

  echo "id: ${key}" > "../data/an/${filename}.yml"
  echo "last_name: ${last_name}" >> "../data/an/${filename}.yml"
  echo "first_name: ${first_name}" >> "../data/an/${filename}.yml"
  echo "group: ${group}" >> "../data/an/${filename}.yml"
  echo "county: ${county}" >> "../data/an/${filename}.yml"

  echo "commissions:" >> "../data/an/${filename}.yml"
  if [ ! -z "${commissions}" ]; then
    for i in ${commissions}; do
      echo "- \"${i}\"" >> "../data/an/${filename}.yml"
    done
  fi

  echo "phone:" >> "../data/an/${filename}.yml"
  if [ ! -z "${phone}" ]; then
    for i in ${phone}; do
      echo "- \"${i}\"" >> "../data/an/${filename}.yml"
    done
  fi

  echo "email:" >> "../data/an/${filename}.yml"
  if [ ! -z "${email}" ]; then
    for i in ${email}; do
      echo "- \"${i}\"" >> "../data/an/${filename}.yml"
    done
  fi

  echo "twitter: ${twitter}" >> "../data/an/${filename}.yml"
  echo "photo: ${photo}" >> "../data/an/${filename}.yml"

  if [ ! -f "../images/an/${photo}.jpg" ]; then
    wget --quiet "http://www2.assemblee-nationale.fr/static/tribun/15/photos/${photo}.jpg" -O "../images/an/${photo}.jpg"
  fi

done