extract_amendments.rb 1.86 KB
Newer Older
Michael Witrant's avatar
Michael Witrant committed
1 2 3 4 5
#!/usr/bin/env ruby

require 'rubygems'
require 'bundler/setup'
require 'zippy'
Michael Witrant's avatar
Michael Witrant committed
6
require 'clik'
Michael Witrant's avatar
Michael Witrant committed
7
require 'nokogiri'
Michael Witrant's avatar
Michael Witrant committed
8

Michael Witrant's avatar
Michael Witrant committed
9
xml_dump_path = nil
10 11 12
def debug(values)
end
parse_only_one = false  
Michael Witrant's avatar
Michael Witrant committed
13

Michael Witrant's avatar
Michael Witrant committed
14
extra_args = cli '--xml-dump' => lambda { |path| xml_dump_path = path },
15 16
                 '-d --debug' => lambda { def debug(values) p values; end },
                 '-1 --one'   => lambda { parse_only_one = true }
Michael Witrant's avatar
Michael Witrant committed
17

Michael Witrant's avatar
Michael Witrant committed
18
opendocument_path = extra_args.first
Michael Witrant's avatar
Michael Witrant committed
19 20 21 22 23 24 25
raise "usage: #$0 <OpenDocument file>" unless opendocument_path

xml = nil
Zippy.open(opendocument_path) do |zip|
  xml = zip['content.xml']
end

Michael Witrant's avatar
Michael Witrant committed
26 27
doc = Nokogiri::XML::Document.parse(xml)

Michael Witrant's avatar
Michael Witrant committed
28
if xml_dump_path
Michael Witrant's avatar
Michael Witrant committed
29
  File.open(xml_dump_path, "w") { |f| f.write doc.to_xml(indent: 2) }
Michael Witrant's avatar
Michael Witrant committed
30
end
Michael Witrant's avatar
Michael Witrant committed
31 32 33 34 35


text = doc.xpath('//office:text').first
raise "no office:text found" unless text

Michael Witrant's avatar
Michael Witrant committed
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
amend_start = nil
amend_nodes = []

text.children.each_with_index do |node, i|
  if node.search("[text()='<Amend>']").size > 0
    amend_start = i
  elsif node.search("[text()='</Amend>']").size > 0
    if amend_start.nil?
      raise "amend end before amend start"
    end
    amend_end = i
    amend_nodes << text.children.slice(amend_start..amend_end)
    amend_start = nil
  end
end

Michael Witrant's avatar
Michael Witrant committed
52 53
puts "#{amend_nodes.length} amendments found"

Michael Witrant's avatar
Michael Witrant committed
54 55
amendments = []

Michael Witrant's avatar
Michael Witrant committed
56 57 58 59 60 61 62 63 64 65
amend_nodes.each do |nodes|
  amend_text = nodes.map(&:text).join
  debug amend_text: amend_text
  
  amend_doc = Nokogiri::XML::Document.parse(amend_text)
  
  num_am = amend_doc.xpath("//NumAm").first.text
  doc_amend = amend_doc.xpath("//DocAmend").first.text
  article = amend_doc.xpath("//Article").first.text
  
Michael Witrant's avatar
Michael Witrant committed
66 67 68 69 70 71
  amendment = {
    num_am: num_am,
    doc_amend: doc_amend,
    article: article,
  }
  debug amendment
Michael Witrant's avatar
Michael Witrant committed
72 73 74 75 76 77 78 79
  
  tables = nodes.css('table|table')
  raise "amendment table not found" if tables.size == 0
  raise "too many tables" if tables.size > 1
  table = tables.first
  
  
  
Michael Witrant's avatar
Michael Witrant committed
80
  amendments << amendment
81 82
  
  break if parse_only_one
Michael Witrant's avatar
Michael Witrant committed
83 84
end