Commit ebdbb823 authored by Michael Witrant's avatar Michael Witrant
Browse files

moved method to a class

parent a8c7e554
...@@ -8,8 +8,8 @@ require 'nokogiri' ...@@ -8,8 +8,8 @@ require 'nokogiri'
require 'erb' require 'erb'
require 'ostruct' require 'ostruct'
options = {} class AmendmentExtractor
def debug(value) def debug(value)
if $DEBUG if $DEBUG
case value case value
when String when String
...@@ -19,29 +19,22 @@ def debug(value) ...@@ -19,29 +19,22 @@ def debug(value)
end end
STDERR.puts output STDERR.puts output
end end
end end
extra_args = cli '--xml-dump' => lambda { |path| options[:xml_dump_path] = path },
'-d --debug' => lambda { $DEBUG = true },
'-1 --one' => lambda { options[:parse_only_one] = true },
'-n --number' => lambda { |num| options[:parse_only_num] = num }
opendocument_path = extra_args.first
raise "usage: #$0 <OpenDocument file>" unless opendocument_path
xml = nil def extract(opendocument_path, options = {})
Zippy.open(opendocument_path) do |zip| xml = nil
Zippy.open(opendocument_path) do |zip|
xml = zip['content.xml'] xml = zip['content.xml']
end end
doc = Nokogiri::XML::Document.parse(xml) doc = Nokogiri::XML::Document.parse(xml)
if options[:xml_dump_path] if options[:xml_dump_path]
File.open(options[:xml_dump_path], "w") { |f| f.write doc.to_xml(indent: 2) } File.open(options[:xml_dump_path], "w") { |f| f.write doc.to_xml(indent: 2) }
end end
styles = {} styles = {}
doc.css("style|style").each do |node| doc.css("style|style").each do |node|
name = node["style:name"] name = node["style:name"]
style = {} style = {}
...@@ -51,15 +44,15 @@ doc.css("style|style").each do |node| ...@@ -51,15 +44,15 @@ doc.css("style|style").each do |node|
end end
styles[name] = style styles[name] = style
end end
text = doc.xpath('//office:text').first text = doc.xpath('//office:text').first
raise "no office:text found" unless text raise "no office:text found" unless text
amend_start = nil amend_start = nil
amend_nodes = [] amend_nodes = []
text.children.each_with_index do |node, i| text.children.each_with_index do |node, i|
if node.search("[text()='<Amend>']").size > 0 if node.search("[text()='<Amend>']").size > 0
amend_start = i amend_start = i
elsif node.search("[text()='</Amend>']").size > 0 elsif node.search("[text()='</Amend>']").size > 0
...@@ -70,13 +63,13 @@ text.children.each_with_index do |node, i| ...@@ -70,13 +63,13 @@ text.children.each_with_index do |node, i|
amend_nodes << text.children.slice(amend_start..amend_end) amend_nodes << text.children.slice(amend_start..amend_end)
amend_start = nil amend_start = nil
end end
end end
debug amendments_found: amend_nodes.length debug amendments_found: amend_nodes.length
amendments = [] amendments = []
amend_nodes.each do |nodes| amend_nodes.each do |nodes|
amend_text = nodes.map(&:text).join amend_text = nodes.map(&:text).join
debug amend_text: amend_text unless options[:parse_only_num] debug amend_text: amend_text unless options[:parse_only_num]
...@@ -84,7 +77,7 @@ amend_nodes.each do |nodes| ...@@ -84,7 +77,7 @@ amend_nodes.each do |nodes|
num_am = amend_doc.xpath("//NumAm").first.text num_am = amend_doc.xpath("//NumAm").first.text
next if options[:parse_only_num] and num_am != options[:parse_only_num] next if options[:parse_only_num] and num_am != options[:parse_only_num].to_s
doc_amend = amend_doc.xpath("//DocAmend").first.text doc_amend = amend_doc.xpath("//DocAmend").first.text
article = amend_doc.xpath("//Article").first.text article = amend_doc.xpath("//Article").first.text
...@@ -133,14 +126,30 @@ amend_nodes.each do |nodes| ...@@ -133,14 +126,30 @@ amend_nodes.each do |nodes|
amendments << amendment amendments << amendment
break if options[:parse_only_one] break if options[:parse_only_one]
end end
template = ERB.new File.read('template.erb'), nil, '-' template = ERB.new File.read('template.erb'), nil, '-'
amendments.each do |amendment| result = []
amendments.each do |amendment|
amendment_binding = OpenStruct.new(amendment).instance_eval { binding } amendment_binding = OpenStruct.new(amendment).instance_eval { binding }
output = template.result(amendment_binding) output = template.result(amendment_binding)
puts output result << output
end
result.join("\n")
end
end end
if $0 == __FILE__
options = {}
extra_args = cli '--xml-dump' => lambda { |path| options[:xml_dump_path] = path },
'-d --debug' => lambda { $DEBUG = true },
'-1 --one' => lambda { options[:parse_only_one] = true },
'-n --number' => lambda { |num| options[:parse_only_num] = num }
opendocument_path = extra_args.first
raise "usage: #$0 <OpenDocument file>" unless opendocument_path
puts AmendmentExtractor.new.extract(opendocument_path, options)
end
...@@ -5,13 +5,15 @@ require 'bundler/setup' ...@@ -5,13 +5,15 @@ require 'bundler/setup'
require 'sinatra' require 'sinatra'
require 'haml' require 'haml'
require './extract_amendments'
get '/' do get '/' do
haml :index haml :index
end end
post '/extract' do post '/extract' do
haml :extract, locals: {result: %x(ruby extract_amendments.rb #{params['file'][:tempfile].path})} result = AmendmentExtractor.new.extract(params['file'][:tempfile].path)
haml :extract, locals: {result: result}
end end
__END__ __END__
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment