Commit fbc3f716 authored by Michael Witrant's avatar Michael Witrant

save results to yaml

parent 7eb6eeab
class Answer < Struct.new(:name, :files, :annexes, :languages)
def initialize(*args)
super
self.annexes ||= []
self.files ||= []
self.languages ||= []
end
end
This diff is collapsed.
......@@ -3,54 +3,53 @@ require "rubygems"
require "bundler/setup"
require 'cgi'
require 'iconv'
files = %w(organisations.links public_authorities.links)
file = files.first
links = File.read(file).split("\n")
class Answer < Struct.new(:name, :files, :annexes, :languages)
def initialize(*args)
super
self.annexes ||= []
self.files ||= []
self.languages ||= []
require 'answer'
require 'yaml'
files = %w(organisations public_authorities)
files.each do |type|
file = type + ".links"
links = File.read(file).split("\n")
answers = {}
links.each do |url|
name = File.basename(URI.parse(url).path, ".pdf")
name = CGI::unescape(name)
#name = Iconv.iconv("utf-8", "iso8859-15", name).first
names = name.split("_")
language = names.pop
annex = names.index { |part| part =~ /^annex/ }
if annex
names[annex..-1] = []
end
name = names.map(&:capitalize).join(" ")
#name = "#{name} (#{language})"
answer = answers[name] ||= Answer.new(name)
if annex
answer.annexes << url
else
answer.files << url
end
answer.languages << language
answer.languages.uniq!
end
end
answers = {}
links.each do |url|
name = File.basename(URI.parse(url).path, ".pdf")
name = CGI::unescape(name)
#name = Iconv.iconv("utf-8", "iso8859-15", name).first
names = name.split("_")
language = names.pop
annex = names.index { |part| part =~ /^annex/ }
if annex
names[annex..-1] = []
end
name = names.map(&:capitalize).join(" ")
#name = "#{name} (#{language})"
answer = answers[name] ||= Answer.new(name)
if annex
answer.annexes << url
else
answer.files << url
answers.sort.each do |name, answer|
puts "#{answer.name} (#{answer.languages.join(",")})"
(answer.files + answer.annexes).each do |url|
# p answer
name = File.basename(URI.parse(url).path)
puts " " + name
end
end
answer.languages << language
answer.languages.uniq!
end
answers.sort.each do |name, answer|
puts "#{answer.name} (#{answer.languages.join(",")})"
(answer.files + answer.annexes).each do |url|
# p answer
name = File.basename(URI.parse(url).path)
puts " " + name
File.open(type + ".yaml", "w") do |f|
f.puts answers.to_yaml
end
end
---
Uk: !ruby/struct:Answer
name: Uk
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/6436e9a1-1ba6-414e-af8c-d50fa3ac2c52/uk_governement.pdf
annexes: []
languages:
- governement
"": !ruby/struct:Answer
name: ""
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/4edaf82a-d143-4a07-9880-e59529afc595/france.pdf
annexes: []
languages:
- france
Portugal: !ruby/struct:Answer
name: Portugal
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/fcf00a0f-7d50-4423-8101-2deb638cd16e/portugal_pt.pdf
annexes: []
languages:
- pt
Spain Ministry Of Justice: !ruby/struct:Answer
name: Spain Ministry Of Justice
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/8c1ac475-47f7-486f-adc6-d68fddd7a141/spain_ministry_of_justice_es.pdf
annexes: []
languages:
- es
Italy Ministry Of Agrifood: !ruby/struct:Answer
name: Italy Ministry Of Agrifood
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/0c470e92-6585-442f-9d26-e76046744085/italy_ministry_of_agrifood_en.pdf
annexes: []
languages:
- en
Hungary Ministry Public administration And Justice: !ruby/struct:Answer
name: Hungary Ministry Public administration And Justice
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/7668b8f8-3080-4de2-8a90-62c7d6be0e60/hungary_ministry_public_%20administration_and_justice_en.pdf
annexes: []
languages:
- en
Finland Ministry Of Empl And Economy: !ruby/struct:Answer
name: Finland Ministry Of Empl And Economy
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/cce241c1-0810-4d19-b288-5c5f6d8e26d4/finland_ministry_of_empl_and_economy_en.pdf
annexes: []
languages:
- en
Romania: !ruby/struct:Answer
name: Romania
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/919f019f-bb07-443d-801c-f6db3a668134/romania_en.pdf
- https://circabc.europa.eu/d/d/workspace/SpacesStore/046d1325-6cc6-4f8e-aee5-25e354b60f69/romania_ro.pdf
annexes: []
languages:
- en
- ro
Bulgaria: !ruby/struct:Answer
name: Bulgaria
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/3346b4d7-2171-428d-a9af-fe31a3dd67c9/bulgaria_en.pdf
annexes: []
languages:
- en
Ak Oesterreich: !ruby/struct:Answer
name: Ak Oesterreich
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/39852257-57a4-4002-a6df-8b799661c3ea/ak_oesterreich_de.pdf
annexes: []
languages:
- de
Lithuania Ministry Culture: !ruby/struct:Answer
name: Lithuania Ministry Culture
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/d941e7ac-0ad0-4934-b05d-711f816b95e7/lithuania_ministry_culture_lt.pdf
annexes: []
languages:
- lt
Wko Austria: !ruby/struct:Answer
name: Wko Austria
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/df9bf458-a928-43c2-931e-fe722ca1e317/wko_austria_de.pdf
annexes: []
languages:
- de
Italy Office Of The Prime Minister: !ruby/struct:Answer
name: Italy Office Of The Prime Minister
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/71f09778-f7f6-4b49-af15-3dd02d03c42e/italy_office_of_the_prime_minister_it.pdf
annexes: []
languages:
- it
Finnish Commerce Federation: !ruby/struct:Answer
name: Finnish Commerce Federation
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/13b94e78-5719-4892-af14-469f47d8cf2d/finnish_commerce_federation_fi.pdf
annexes: []
languages:
- fi
Belgium: !ruby/struct:Answer
name: Belgium
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/32fe9217-6c5a-4b3b-a954-88e491337f7f/belgium_en.pdf
annexes: []
languages:
- en
Slovakia: !ruby/struct:Answer
name: Slovakia
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/05142029-2e42-43d6-b4b8-0bb42d5e836a/slovakia_sk.pdf
annexes: []
languages:
- sk
Latvia: !ruby/struct:Answer
name: Latvia
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/0f084a74-d6ca-41e2-afe5-97b74f539ea3/latvia_en.pdf
annexes: []
languages:
- en
Danish Chamber Of Commerce: !ruby/struct:Answer
name: Danish Chamber Of Commerce
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/295fb523-cbec-4f7f-b9f1-55ec2715652b/danish_chamber_of_commerce_en.pdf
annexes: []
languages:
- en
Poland Ministry Culture: !ruby/struct:Answer
name: Poland Ministry Culture
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/e4d28106-09a7-4989-8aad-cfbe36fdfc02/poland_ministry_culture_en.pdf
- https://circabc.europa.eu/d/d/workspace/SpacesStore/b32505cb-63b5-4828-a4dd-81ba03fe8a43/poland_ministry_culture_pl.pdf
annexes: []
languages:
- en
- pl
Parti Pirate: !ruby/struct:Answer
name: Parti Pirate
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/54f046e5-c3b3-4b75-8394-0f84d204ae97/parti_pirate_fr.pdf
annexes: []
languages:
- fr
Malta: !ruby/struct:Answer
name: Malta
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/c8884ac2-d625-44e5-a3a7-7e7b98b111ac/malta_en.pdf
annexes: []
languages:
- en
Denmark: !ruby/struct:Answer
name: Denmark
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/f37efa48-d5c4-4b22-8d87-893d50170a70/denmark_en.pdf
annexes: []
languages:
- en
Netherlands Ministry Security Justice: !ruby/struct:Answer
name: Netherlands Ministry Security Justice
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/11bc6214-9b28-4d89-a1f6-58bf0a4e1abd/netherlands_ministry_security_justice_nl.pdf
annexes:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/607ebe73-a7ae-489e-8f14-2dff48521320/netherlands_ministry_security_justice_annex1_nl.pdf
- https://circabc.europa.eu/d/d/workspace/SpacesStore/36934912-7ec6-4f14-aa80-e8780fa56cb0/netherlands_ministry_security_justice_annex2_nl.pdf
languages:
- nl
Ireland: !ruby/struct:Answer
name: Ireland
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/ddc21a57-b983-4ede-a02f-59dd3f2bc8a6/ireland_en.pdf
annexes: []
languages:
- en
Hadopi: !ruby/struct:Answer
name: Hadopi
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/c2cdc614-5073-4306-8293-2c40c9fa84da/hadopi_fr.pdf
annexes: []
languages:
- fr
Czech Republic: !ruby/struct:Answer
name: Czech Republic
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/2c8a3544-c150-4fc0-941b-6496375682b8/czech_republic_en.pdf
annexes: []
languages:
- en
Germany: !ruby/struct:Answer
name: Germany
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/0ef00dda-f4be-4950-bfad-f48baf11cb0c/germany_de.pdf
annexes: []
languages:
- de
European Parliament Committee On Legal Affairs: !ruby/struct:Answer
name: European Parliament Committee On Legal Affairs
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/fa293e2c-cfae-43ed-abcb-59ac2d31a4fb/european_parliament_committee_on_legal_affairs_en.pdf
annexes: []
languages:
- en
Deutscher Bundestag: !ruby/struct:Answer
name: Deutscher Bundestag
files:
- https://circabc.europa.eu/d/d/workspace/SpacesStore/82d024ea-ce88-4974-9122-2bf368f91906/deutscher_bundestag_de.pdf
annexes: []
languages:
- de
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment