Commit 9e3cae2b authored by Michael Witrant's avatar Michael Witrant

parse names

parent aa5d40e5
......@@ -12,6 +12,18 @@ doc = Nokogiri::HTML(File.read(file))
links = doc.css("a").map do |link|
href =link["href"]
href if href =~ /\.pdf$/i
end.compact
end.compact.uniq { |url| URI.parse(url).path }
parsed_names = []
links.each do |url|
name = File.basename(URI.parse(url).path, ".pdf")
next if parsed_names.include?(name)
parsed_names << name
names = name.split("_")
language = names.pop
name = names.map(&:capitalize).join(" ")
p [name, language]
end
p links
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment