extract_amendments.rb 1.5 KB
Newer Older
Michael Witrant's avatar
Michael Witrant committed
1 2 3 4 5
#!/usr/bin/env ruby

require 'rubygems'
require 'bundler/setup'
require 'zippy'
Michael Witrant's avatar
Michael Witrant committed
6
require 'clik'
Michael Witrant's avatar
Michael Witrant committed
7
require 'nokogiri'
Michael Witrant's avatar
Michael Witrant committed
8

Michael Witrant's avatar
Michael Witrant committed
9
xml_dump_path = nil
Michael Witrant's avatar
Michael Witrant committed
10
def debug(values); end
Michael Witrant's avatar
Michael Witrant committed
11

Michael Witrant's avatar
Michael Witrant committed
12 13
extra_args = cli '--xml-dump' => lambda { |path| xml_dump_path = path },
                 '-d --debug' => lambda { def debug(values) p values; end }
Michael Witrant's avatar
Michael Witrant committed
14

Michael Witrant's avatar
Michael Witrant committed
15
opendocument_path = extra_args.first
Michael Witrant's avatar
Michael Witrant committed
16 17 18 19 20 21 22
raise "usage: #$0 <OpenDocument file>" unless opendocument_path

xml = nil
Zippy.open(opendocument_path) do |zip|
  xml = zip['content.xml']
end

Michael Witrant's avatar
Michael Witrant committed
23 24
doc = Nokogiri::XML::Document.parse(xml)

Michael Witrant's avatar
Michael Witrant committed
25
if xml_dump_path
Michael Witrant's avatar
Michael Witrant committed
26
  File.open(xml_dump_path, "w") { |f| f.write doc.to_xml(indent: 2) }
Michael Witrant's avatar
Michael Witrant committed
27
end
Michael Witrant's avatar
Michael Witrant committed
28 29 30 31 32


text = doc.xpath('//office:text').first
raise "no office:text found" unless text

Michael Witrant's avatar
Michael Witrant committed
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
amend_start = nil
amend_nodes = []

text.children.each_with_index do |node, i|
  if node.search("[text()='<Amend>']").size > 0
    amend_start = i
  elsif node.search("[text()='</Amend>']").size > 0
    if amend_start.nil?
      raise "amend end before amend start"
    end
    amend_end = i
    amend_nodes << text.children.slice(amend_start..amend_end)
    amend_start = nil
  end
end

Michael Witrant's avatar
Michael Witrant committed
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
puts "#{amend_nodes.length} amendments found"

amend_nodes.each do |nodes|
  amend_text = nodes.map(&:text).join
  debug amend_text: amend_text
  
  amend_doc = Nokogiri::XML::Document.parse(amend_text)
  
  num_am = amend_doc.xpath("//NumAm").first.text
  doc_amend = amend_doc.xpath("//DocAmend").first.text
  article = amend_doc.xpath("//Article").first.text
  
  debug num_am: num_am, doc_amend: doc_amend, article: article
end

Michael Witrant's avatar
Michael Witrant committed
64