parser.rb 1.16 KB
Newer Older
Michael Witrant's avatar
Michael Witrant committed
1
2
3

require "rubygems"
require "bundler/setup"
Michael Witrant's avatar
Michael Witrant committed
4
5
require 'cgi'
require 'iconv'
Michael Witrant's avatar
Michael Witrant committed
6

Michael Witrant's avatar
Michael Witrant committed
7
files = %w(organisations.links public_authorities.links)
Michael Witrant's avatar
Michael Witrant committed
8
9

file = files.first
Michael Witrant's avatar
Michael Witrant committed
10

Michael Witrant's avatar
Michael Witrant committed
11
links = File.read(file).split("\n")
Michael Witrant's avatar
Michael Witrant committed
12

Michael Witrant's avatar
Michael Witrant committed
13
14
15
16
17
18
19
20
21
22
23
class Answer < Struct.new(:name, :files, :annexes, :languages)
  def initialize(*args)
    super
    self.annexes ||= []
    self.files ||= []
    self.languages ||= []
  end
end

answers = {}

Michael Witrant's avatar
Michael Witrant committed
24
25
links.each do |url|
  name = File.basename(URI.parse(url).path, ".pdf")
Michael Witrant's avatar
Michael Witrant committed
26
27
  name = CGI::unescape(name)
  #name = Iconv.iconv("utf-8", "iso8859-15", name).first
Michael Witrant's avatar
Michael Witrant committed
28
29
30
  names = name.split("_")
  language = names.pop
  
Michael Witrant's avatar
Michael Witrant committed
31
32
33
34
35
  annex = names.index { |part| part =~ /^annex/ }
  if annex
    names[annex..-1] = []
  end
  
Michael Witrant's avatar
Michael Witrant committed
36
  name = names.map(&:capitalize).join(" ")
Michael Witrant's avatar
Michael Witrant committed
37
38
39
40
41
42
43
44
45
46
  #name = "#{name} (#{language})"
  
  answer = answers[name] ||= Answer.new(name)
  if annex
    answer.annexes << url
  else
    answer.files << url
  end
  answer.languages << language
  answer.languages.uniq!
Michael Witrant's avatar
Michael Witrant committed
47
end
Michael Witrant's avatar
Michael Witrant committed
48

Michael Witrant's avatar
Michael Witrant committed
49
50
51
52
53
54
55
56
answers.sort.each do |name, answer|
  puts "#{answer.name} (#{answer.languages.join(",")})"
  (answer.files + answer.annexes).each do |url|
    # p answer
    name = File.basename(URI.parse(url).path)
    puts "  " + name
  end
end