Module: MifParserUtils

Included in:
MifTableParser, MifToHtmlParser
Defined in:
lib/mifparserutils.rb

Constant Summary collapse

NEED_SPACE_BETWEEN_LABEL_AND_NUMBER_REGEX =
Regexp.new('(\s+)(\S+)\n(\s+)%span\.(\S+)_number\n(\s+)(\S+)\n(\s+)(\]?,)', Regexp::MULTILINE)
NEED_SPACE_BETWEEN_LABEL_AND_NUMBER_REGEX_2 =
Regexp.new('(\s+)(\S+)\n(\s+)%span\.(\S+)_number\n(\s+)(\S+)\n', Regexp::MULTILINE)
NEED_SPACE_BETWEEN_LABEL_AND_XREF_REGEX =
Regexp.new('(\s+\S+)\n(\s+)%(span)#(\S+)\.Xref(?:\{ :href => "([^"]+)" \})?\n(\s+)(\S+)\n(\s+)(\]?\)|,.+)', Regexp::MULTILINE)
NEED_SPACE_BETWEEN_LABEL_AND_XREF_REGEX_2 =
Regexp.new('(\s+\S+)\n(\s+)%(span)#(\S+)\.Xref(?:\{ :href => "([^"]+)" \})?\n(\s+)(\S+)\n(\s+)(\]?\)|,)', Regexp::MULTILINE)
COMPRESS_WHITESPACE =
/(Letter|FrameData|Dropcap|SmallCaps|Bold|Italic|\w+_number|PgfNumString_\d|(clause_.+\})|(name.+\})|Abt\d)\n/
COMPRESS_WHITESPACE_2 =
/(^\s*(#|%).+(PgfNumString|\w+_text|PageStart|Number|Xref|Page|Line|STText|Sponsor|AmendmentNumber_PgfTag|Given|Stageheader|Shorttitle))\n/
COMPRESS_WHITESPACE_3 =
/(^\s*(.BillTitle|%a.+\}))\n/
TOGGLE_SHOW_REGEXP =
Regexp.new('(\s+)%span\.ClauseTitle_text<\n(\s+)([^\n]+)\n(\s+)\#(\d+)\.ClauseText', Regexp::MULTILINE)
TOGGLE_SHOW_REGEXP_2 =
Regexp.new('(\s+)%span\.ClauseTitle_text<\n(\s+)([^\n]+)\n(\s+)\#(\d+en)\.ClauseTextWithExplanatoryNote', Regexp::MULTILINE)
COMPRESS_WHITESPACE_4 =
Regexp.new('(%a\{ :name => "[^"]+" \})<>\n(\s+#\d+)', Regexp::MULTILINE)
COMPRESS_WHITESPACE_5 =
Regexp.new('(“\n\s+)(%a\{[^\{]+\})<\n', Regexp::MULTILINE)
AMEND_REF =
Regexp.new('%a.AmendmentReference\{ :href => "([^"]+)" \}<')
/(\s<a (.+?<\/a>\S(;|\.)?(\s|$)))/
SPAN_REGEX =
/(\s<span (.+?<\/span>\S(;|\.)?(\s|$)))/

Instance Method Summary collapse

Instance Method Details

#clean(element) ⇒ Object


95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/mifparserutils.rb', line 95

def clean element
  element.at('text()').to_s[/`(.+)'/]
  text = $1
  if text
    text.gsub!('\xd4 ', '‘')
    text.gsub!('\xd5 ','’')
    text.gsub!('\xd2 ','“')
    text.gsub!('\xd3 ','”')
    text.gsub!('&amp;','&')
    text.gsub!('&','&amp;')
  else
    ''
  end
  text
end

#for_each_match(pattern, text) ⇒ Object


22
23
24
25
26
27
28
29
30
# File 'lib/mifparserutils.rb', line 22

def for_each_match pattern, text
  matches = []
  text.scan(pattern) do |match|
    matches << match
  end
  matches.each do |match|
    yield match
  end
end

#format_haml(haml, clauses_file_name = nil) ⇒ Object


63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/mifparserutils.rb', line 63

def format_haml haml, clauses_file_name=nil
  haml = haml.gsub(NEED_SPACE_BETWEEN_LABEL_AND_NUMBER_REGEX,  '\1\2 <span class="\4_number">\6</span>\8')
  haml.gsub!(NEED_SPACE_BETWEEN_LABEL_AND_NUMBER_REGEX_2,  '\1\2 <span class="\4_number">\6</span>' + "\n")

  haml.gsub!(NEED_SPACE_BETWEEN_LABEL_AND_XREF_REGEX, '\1 <span class="Xref" id="\4">\7</span>\9')
  haml.gsub!(NEED_SPACE_BETWEEN_LABEL_AND_XREF_REGEX_2, '\1 <span class="Xref" id="\4">\7</span>\9')

  haml.gsub!(COMPRESS_WHITESPACE, '\1' + "<>\n")
  haml.gsub!(COMPRESS_WHITESPACE_2, '\1' + "<\n")
  haml.gsub!(COMPRESS_WHITESPACE_3, '\1' + "<\n")

  haml.gsub!(TOGGLE_SHOW_REGEXP,   '\1= link_to_function \'<img alt="" id="\5_img" src="/images/down-arrow.png">\', "$(\'#\5\').toggle();imgswap(\'\5_img\')"' + "" + '\1%span.ClauseTitle_text<' + "\n" + '\2= link_to_function "\3", "$(\'#\5\').toggle();imgswap(\'\5_img\')"' + "\n" + '\4#\5.ClauseText')
  haml.gsub!(TOGGLE_SHOW_REGEXP_2, '\1= link_to_function \'<img alt="" id="\5_img" src="/images/down-arrow.png">\', "$(\'#\5\').toggle();imgswap(\'\5_img\')"' + "" + '\1%span.ClauseTitle_text<' + "\n" + '\2= link_to_function "\3", "$(\'#\5\').toggle();imgswap(\'\5_img\')"' + "\n" + '\4#\5.ClauseTextWithExplanatoryNote')

  haml.gsub!(COMPRESS_WHITESPACE_4, '\1' + "\n" + '\2')
  haml.gsub!(COMPRESS_WHITESPACE_5, '\1\2<>' + "\n" )

  if clauses_file_name
    link = '%a.AmendmentReference{ :href => "http://localhost:3000/convert?file=' + URI.encode(clauses_file_name) + '\1" }<'
    haml.gsub!(AMEND_REF, link)
  end
  haml.gsub!('\&nbsp; ','\ ')
  haml.gsub!(/(\s+)\\.\n/,'\1%span<>' + '\1  \.' + "\n")
  haml.gsub!(';}<>', ';}')
  haml.gsub!(/^(\s+)\.$/,'\1\\.')
  
  haml.gsub!(', :style => "trim_outside_whitespace" }', ' }>')
  haml.gsub!('{ :style => "trim_outside_whitespace" }', '>')
  haml.gsub!('}><>','}><')
  haml
end

#get_attributes(element, includes = nil) ⇒ Object


147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/mifparserutils.rb', line 147

def get_attributes element, includes=nil
  element = (element/'Attributes') if @e_tag == 'Clauses.ar'
  attributes = (element/'../Attributes/Attribute')
  attribute_list = ''
  if attributes && attributes.size > 0
    attributes.each do |attribute|
      name = clean(attribute.at('AttrName'))
      if name[/\.(.+)/]
        name = $1
      end
      value = clean(attribute.at('AttrValue'))
      if includes.blank? || includes.include?(name)
        attribute_list += %Q| #{name}="#{value}"|
      end
    end
  end
  attribute_list
end

#get_char(element) ⇒ Object


111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/mifparserutils.rb', line 111

def get_char element
  char = element.at('text()').to_s
  case char
    when 'NoHyphen'
      ''
    when 'SoftHyphen'
      '-'
    when 'EmSpace'
      ' '
    when 'EnSpace'
      ' '
    when 'Pound'
      '£'
    when 'EmDash'
      '—'
    when 'HardReturn'
      "/n"
    when 'HardSpace'
      " "
    when 'Tab'
      ' '
    else
      '[[' + char + ']]'
  end
end

#get_uid(element) ⇒ Object


166
167
168
# File 'lib/mifparserutils.rb', line 166

def get_uid element
  element.at('../Unique/text()').to_s
end

#make_attr(text) ⇒ Object


59
60
61
# File 'lib/mifparserutils.rb', line 59

def make_attr text
  text.gsub(' => ','=').gsub(', :',' ').sub(' :',' ').strip
end

#postprocess(text) ⇒ Object


53
54
55
56
57
# File 'lib/mifparserutils.rb', line 53

def postprocess text
  text.gsub!('&nbsp;<a',' <a')
  text.gsub!('&nbsp;<span',' <span')
  text
end

#preprocess(text) ⇒ Object


34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/mifparserutils.rb', line 34

def preprocess text
  for_each_match LINK_REGEX, text do |match|
    replace = match[1]
    if (index = replace.gsub('</a><a','').rindex('<a'))
      index = replace.rindex('<a')
      replace = replace[(index + 3)..(replace.size - 1)]
    end
    text.sub!(" <a #{replace}", "&nbsp;<a style='trim_outside_whitespace' #{replace}")
  end
  for_each_match SPAN_REGEX, text do |match|
    replace = match[1]
    if (index = replace.rindex('<span'))
      replace = replace[(index + 6)..(replace.size - 1)]
    end
    text.sub!(" <span #{replace}", "&nbsp;<span style='trim_outside_whitespace' #{replace}")
  end
  text
end

#start_tag(tag, element) ⇒ Object


137
138
139
140
141
142
143
144
145
# File 'lib/mifparserutils.rb', line 137

def start_tag tag, element
  attributes = get_attributes(element)
  tag = %Q|<#{tag} id="#{get_uid(element)}"#{attributes}>|
  if @suffix
    tag += @suffix.to_s
    @suffix = nil
  end
  tag
end