diff --git a/consultation_ipred/parser.rb b/consultation_ipred/parser.rb index f272c1786afc4a50325d30438869c61fd9a23a21..17d2df969f3d09185de6ffb437bdf917db9b875c 100644 --- a/consultation_ipred/parser.rb +++ b/consultation_ipred/parser.rb @@ -12,6 +12,18 @@ doc = Nokogiri::HTML(File.read(file)) links = doc.css("a").map do |link| href =link["href"] href if href =~ /\.pdf$/i -end.compact +end.compact.uniq { |url| URI.parse(url).path } + +parsed_names = [] + +links.each do |url| + name = File.basename(URI.parse(url).path, ".pdf") + next if parsed_names.include?(name) + parsed_names << name + names = name.split("_") + language = names.pop + + name = names.map(&:capitalize).join(" ") + p [name, language] +end -p links