Commit 7eb6eeab authored by Michael Witrant's avatar Michael Witrant

parse annexes and languages

parent e6c73c9c
require "rubygems"
require "bundler/setup"
require 'cgi'
require 'iconv'
files = %w(organisations.links public_authorities.links)
......@@ -8,12 +10,47 @@ file = files.first
links = File.read(file).split("\n")
class Answer < Struct.new(:name, :files, :annexes, :languages)
def initialize(*args)
super
self.annexes ||= []
self.files ||= []
self.languages ||= []
end
end
answers = {}
links.each do |url|
name = File.basename(URI.parse(url).path, ".pdf")
name = CGI::unescape(name)
#name = Iconv.iconv("utf-8", "iso8859-15", name).first
names = name.split("_")
language = names.pop
annex = names.index { |part| part =~ /^annex/ }
if annex
names[annex..-1] = []
end
name = names.map(&:capitalize).join(" ")
p [name, language]
#name = "#{name} (#{language})"
answer = answers[name] ||= Answer.new(name)
if annex
answer.annexes << url
else
answer.files << url
end
answer.languages << language
answer.languages.uniq!
end
answers.sort.each do |name, answer|
puts "#{answer.name} (#{answer.languages.join(",")})"
(answer.files + answer.annexes).each do |url|
# p answer
name = File.basename(URI.parse(url).path)
puts " " + name
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment