Commit 9e3cae2b authored by Michael Witrant's avatar Michael Witrant
parse names

parent aa5d40e5
......@@ -12,6 +12,18 @@ doc = Nokogiri::HTML(
links = doc.css("a").map do |link|
href =link["href"]
href if href =~ /\.pdf$/i
end.compact.uniq { |url| URI.parse(url).path }
parsed_names = []
links.each do |url|
name = File.basename(URI.parse(url).path, ".pdf")
next if parsed_names.include?(name)
parsed_names << name
names = name.split("_")
language = names.pop
name =" ")
p [name, language]
p links
