Commit ebdbb823 authored by Michael Witrant's avatar Michael Witrant
Browse files

moved method to a class

parent a8c7e554
......@@ -8,8 +8,8 @@ require 'nokogiri'
require 'erb'
require 'ostruct'
options = {}
def debug(value)
class AmendmentExtractor
def debug(value)
if $DEBUG
case value
when String
......@@ -19,29 +19,22 @@ def debug(value)
end
STDERR.puts output
end
end
extra_args = cli '--xml-dump' => lambda { |path| options[:xml_dump_path] = path },
'-d --debug' => lambda { $DEBUG = true },
'-1 --one' => lambda { options[:parse_only_one] = true },
'-n --number' => lambda { |num| options[:parse_only_num] = num }
opendocument_path = extra_args.first
raise "usage: #$0 <OpenDocument file>" unless opendocument_path
end
xml = nil
Zippy.open(opendocument_path) do |zip|
def extract(opendocument_path, options = {})
xml = nil
Zippy.open(opendocument_path) do |zip|
xml = zip['content.xml']
end
end
doc = Nokogiri::XML::Document.parse(xml)
doc = Nokogiri::XML::Document.parse(xml)
if options[:xml_dump_path]
if options[:xml_dump_path]
File.open(options[:xml_dump_path], "w") { |f| f.write doc.to_xml(indent: 2) }
end
end
styles = {}
doc.css("style|style").each do |node|
styles = {}
doc.css("style|style").each do |node|
name = node["style:name"]
style = {}
......@@ -51,15 +44,15 @@ doc.css("style|style").each do |node|
end
styles[name] = style
end
end
text = doc.xpath('//office:text').first
raise "no office:text found" unless text
text = doc.xpath('//office:text').first
raise "no office:text found" unless text
amend_start = nil
amend_nodes = []
amend_start = nil
amend_nodes = []
text.children.each_with_index do |node, i|
text.children.each_with_index do |node, i|
if node.search("[text()='<Amend>']").size > 0
amend_start = i
elsif node.search("[text()='</Amend>']").size > 0
......@@ -70,13 +63,13 @@ text.children.each_with_index do |node, i|
amend_nodes << text.children.slice(amend_start..amend_end)
amend_start = nil
end
end
end
debug amendments_found: amend_nodes.length
debug amendments_found: amend_nodes.length
amendments = []
amendments = []
amend_nodes.each do |nodes|
amend_nodes.each do |nodes|
amend_text = nodes.map(&:text).join
debug amend_text: amend_text unless options[:parse_only_num]
......@@ -84,7 +77,7 @@ amend_nodes.each do |nodes|
num_am = amend_doc.xpath("//NumAm").first.text
next if options[:parse_only_num] and num_am != options[:parse_only_num]
next if options[:parse_only_num] and num_am != options[:parse_only_num].to_s
doc_amend = amend_doc.xpath("//DocAmend").first.text
article = amend_doc.xpath("//Article").first.text
......@@ -133,14 +126,30 @@ amend_nodes.each do |nodes|
amendments << amendment
break if options[:parse_only_one]
end
end
template = ERB.new File.read('template.erb'), nil, '-'
template = ERB.new File.read('template.erb'), nil, '-'
amendments.each do |amendment|
result = []
amendments.each do |amendment|
amendment_binding = OpenStruct.new(amendment).instance_eval { binding }
output = template.result(amendment_binding)
puts output
result << output
end
result.join("\n")
end
end
if $0 == __FILE__
options = {}
extra_args = cli '--xml-dump' => lambda { |path| options[:xml_dump_path] = path },
'-d --debug' => lambda { $DEBUG = true },
'-1 --one' => lambda { options[:parse_only_one] = true },
'-n --number' => lambda { |num| options[:parse_only_num] = num }
opendocument_path = extra_args.first
raise "usage: #$0 <OpenDocument file>" unless opendocument_path
puts AmendmentExtractor.new.extract(opendocument_path, options)
end
......@@ -5,13 +5,15 @@ require 'bundler/setup'
require 'sinatra'
require 'haml'
require './extract_amendments'
get '/' do
haml :index
end
post '/extract' do
haml :extract, locals: {result: %x(ruby extract_amendments.rb #{params['file'][:tempfile].path})}
result = AmendmentExtractor.new.extract(params['file'][:tempfile].path)
haml :extract, locals: {result: result}
end
__END__
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment