Commit e6c73c9c authored by Michael Witrant's avatar Michael Witrant

parse generated files

parent 242ea49d
......@@ -2,24 +2,14 @@
require "rubygems"
require "bundler/setup"
require "nokogiri"
files = %w(organisations.html public_authorities.html)
files = %w(organisations.links public_authorities.links)
file = files.first
doc = Nokogiri::HTML(File.read(file))
links = doc.css("a").map do |link|
href =link["href"]
href if href =~ /\.pdf$/i
end.compact.uniq { |url| URI.parse(url).path }
parsed_names = []
links = File.read(file).split("\n")
links.each do |url|
name = File.basename(URI.parse(url).path, ".pdf")
next if parsed_names.include?(name)
parsed_names << name
names = name.split("_")
language = names.pop
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment