Class: HtmlCompressor::Compressor

Inherits:
Object
  • Object
show all
Defined in:
lib/htmlcompressor/compressor.rb

Constant Summary collapse

JS_COMPRESSOR_YUI =
"yui"
JS_COMPRESSOR_CLOSURE =
"closure"
PHP_TAG_PATTERN =

Predefined pattern that matches <?php ... ?> tags. Could be passed inside a list to #setPreservePatterns(List) setPreservePatterns method.

/<\?php.*?\?>/im
SERVER_SCRIPT_TAG_PATTERN =

Predefined pattern that matches &lt;% ... %> tags. Could be passed inside a list to #setPreservePatterns(List) setPreservePatterns method.

/<%.*?%>/m
SERVER_SIDE_INCLUDE_PATTERN =

Predefined pattern that matches &lt;--# ... --> tags. Could be passed inside a list to #setPreservePatterns(List) setPreservePatterns method.

/<!--\s*#.*?-->/m
BLOCK_TAGS_MIN =

Predefined list of tags that are very likely to be block-level. Could be passed to #setRemoveSurroundingSpaces(String) setRemoveSurroundingSpaces method.

"html,head,body,br,p"
BLOCK_TAGS_MAX =

Predefined list of tags that are block-level by default, excluding &lt;div> and &lt;li> tags. Table tags are also included. Could be passed to #setRemoveSurroundingSpaces(String) setRemoveSurroundingSpaces method.

BLOCK_TAGS_MIN + ",h1,h2,h3,h4,h5,h6,blockquote,center,dl,fieldset,form,frame,frameset,hr,noframes,ol,table,tbody,tr,td,th,tfoot,thead,ul"
ALL_TAGS =

Could be passed to #setRemoveSurroundingSpaces(String) setRemoveSurroundingSpaces method to remove all surrounding spaces (not recommended).

"all"
TEMP_COND_COMMENT_BLOCK =

temp replacements for preserved blocks

"%%%~COMPRESS~COND~{0,number,#}~%%%"
TEMP_PRE_BLOCK =
"%%%~COMPRESS~PRE~{0,number,#}~%%%"
TEMP_TEXT_AREA_BLOCK =
"%%%~COMPRESS~TEXTAREA~{0,number,#}~%%%"
TEMP_SCRIPT_BLOCK =
"%%%~COMPRESS~SCRIPT~{0,number,#}~%%%"
TEMP_STYLE_BLOCK =
"%%%~COMPRESS~STYLE~{0,number,#}~%%%"
TEMP_EVENT_BLOCK =
"%%%~COMPRESS~EVENT~{0,number,#}~%%%"
TEMP_LINE_BREAK_BLOCK =
"%%%~COMPRESS~LT~{0,number,#}~%%%"
TEMP_SKIP_BLOCK =
"%%%~COMPRESS~SKIP~{0,number,#}~%%%"
TEMP_USER_BLOCK =
"%%%~COMPRESS~USER{0,number,#}~{1,number,#}~%%%"
EMPTY_PATTERN =

compiled regex patterns

Regexp.new("\\s")
SKIP_PATTERN =
Regexp.new("<!--\\s*\\{\\{\\{\\s*-->(.*?)<!--\\s*\\}\\}\\}\\s*-->", Regexp::MULTILINE | Regexp::IGNORECASE)
COND_COMMENT_PATTERN =
Regexp.new("(<!(?:--)?\\[[^\\]]+?\\]>)(.*?)(<!\\[[^\\]]+\\]-->)", Regexp::MULTILINE | Regexp::IGNORECASE)
COMMENT_PATTERN =
Regexp.new("<!---->|<!--[^\\[].*?-->", Regexp::MULTILINE | Regexp::IGNORECASE)
INTERTAG_PATTERN_TAG_TAG =
Regexp.new(">\\s+<", Regexp::MULTILINE | Regexp::IGNORECASE)
INTERTAG_PATTERN_TAG_CUSTOM =
Regexp.new(">\\s+%%%~", Regexp::MULTILINE | Regexp::IGNORECASE)
INTERTAG_PATTERN_CUSTOM_TAG =
Regexp.new("~%%%\\s+<", Regexp::MULTILINE | Regexp::IGNORECASE)
INTERTAG_PATTERN_CUSTOM_CUSTOM =
Regexp.new("~%%%\\s+%%%~", Regexp::MULTILINE | Regexp::IGNORECASE)
MULTISPACE_PATTERN =
Regexp.new("\\s+", Regexp::MULTILINE | Regexp::IGNORECASE)
TAG_END_SPACE_PATTERN =
Regexp.new("(<(?:[^>]+?))(?:\\s+?)(/?>)", Regexp::MULTILINE | Regexp::IGNORECASE)
TAG_LAST_UNQUOTED_VALUE_PATTERN =
Regexp.new("=\\s*[a-z0-9\\-_]+$", Regexp::IGNORECASE)
TAG_QUOTE_PATTERN =
Regexp.new("\\s*=\\s*([\"'])([a-z0-9\\-_]+?)\\1(/?)(?=[^<]*?>)", Regexp::IGNORECASE)
PRE_PATTERN =
Regexp.new("(<pre[^>]*?>)(.*?)(</pre>)", Regexp::MULTILINE | Regexp::IGNORECASE)
TA_PATTERN =
Regexp.new("(<textarea[^>]*?>)(.*?)(</textarea>)", Regexp::MULTILINE | Regexp::IGNORECASE)
SCRIPT_PATTERN =
Regexp.new("(<script[^>]*?>)(.*?)(</script>)", Regexp::MULTILINE | Regexp::IGNORECASE)
STYLE_PATTERN =
Regexp.new("(<style[^>]*?>)(.*?)(</style>)", Regexp::MULTILINE | Regexp::IGNORECASE)
TAG_PROPERTY_PATTERN =
Regexp.new("(\\s\\w+)\\s*=\\s*(?=[^<]*?>)", Regexp::IGNORECASE)
CDATA_PATTERN =
Regexp.new("\\s*<!\\[CDATA\\[(.*?)\\]\\]>\\s*", Regexp::MULTILINE | Regexp::IGNORECASE)
DOCTYPE_PATTERN =
Regexp.new("<!DOCTYPE[^>]*>", Regexp::MULTILINE | Regexp::IGNORECASE)
TYPE_ATTR_PATTERN =
Regexp.new("type\\s*=\\s*([\\\"']*)(.+?)\\1", Regexp::MULTILINE | Regexp::IGNORECASE)
JS_TYPE_ATTR_PATTERN =
Regexp.new("(<script[^>]*)type\\s*=\\s*([\"']*)(?:text|application)\/javascript\\2([^>]*>)", Regexp::MULTILINE | Regexp::IGNORECASE)
JS_LANG_ATTR_PATTERN =
Regexp.new("(<script[^>]*)language\\s*=\\s*([\"']*)javascript\\2([^>]*>)", Regexp::MULTILINE | Regexp::IGNORECASE)
STYLE_TYPE_ATTR_PATTERN =
Regexp.new("(<style[^>]*)type\\s*=\\s*([\"']*)text/(?:style|css)\\2([^>]*>)", Regexp::MULTILINE | Regexp::IGNORECASE)
Regexp.new("(<link[^>]*)type\\s*=\\s*([\"']*)text/(?:css|plain)\\2([^>]*>)", Regexp::MULTILINE | Regexp::IGNORECASE)
Regexp.new("<link(?:[^>]*)rel\\s*=\\s*([\"']*)(?:alternate\\s+)?stylesheet\\1(?:[^>]*)>", Regexp::MULTILINE | Regexp::IGNORECASE)
FORM_METHOD_ATTR_PATTERN =
Regexp.new("(<form[^>]*)method\\s*=\\s*([\"']*)get\\2([^>]*>)", Regexp::MULTILINE | Regexp::IGNORECASE)
INPUT_TYPE_ATTR_PATTERN =
Regexp.new("(<input[^>]*)type\\s*=\\s*([\"']*)text\\2([^>]*>)", Regexp::MULTILINE | Regexp::IGNORECASE)
BOOLEAN_ATTR_PATTERN =
Regexp.new("(<\\w+[^>]*[\"' ])(checked|selected|disabled|readonly)\\s*=\\s*([\"']*)\\w*\\3([^>]*>)", Regexp::MULTILINE | Regexp::IGNORECASE)
EVENT_JS_PROTOCOL_PATTERN =
Regexp.new("^javascript:\\s*(.+)", Regexp::MULTILINE | Regexp::IGNORECASE)
HTTP_PROTOCOL_PATTERN =
Regexp.new("(<[^>]+?(?:href|src|cite|action)\\s*=\\s*['\"])http:(//[^>]+?>)", Regexp::MULTILINE | Regexp::IGNORECASE)
HTTPS_PROTOCOL_PATTERN =
Regexp.new("(<[^>]+?(?:href|src|cite|action)\\s*=\\s*['\"])https:(//[^>]+?>)", Regexp::MULTILINE | Regexp::IGNORECASE)
REL_EXTERNAL_PATTERN =
Regexp.new("<(?:[^>]*)rel\\s*=\\s*([\"']*)(?:alternate\\s+)?external\\1(?:[^>]*)>", Regexp::MULTILINE | Regexp::IGNORECASE)
EVENT_PATTERN1 =

unmasked: son+s*=s*”*(?:\.[^“\rn]*)*”“

Regexp.new("(\\son[a-z]+\\s*=\\s*\")([^\"\\\\\\r\\n]*(?:\\\\.[^\"\\\\\\r\\n]*)*)(\")", Regexp::IGNORECASE)
EVENT_PATTERN2 =
Regexp.new("(\\son[a-z]+\\s*=\\s*')([^'\\\\\\r\\n]*(?:\\\\.[^'\\\\\\r\\n]*)*)(')", Regexp::IGNORECASE)
LINE_BREAK_PATTERN =
Regexp.new("(?:[[:blank:]]*(\\r?\\n)[[:blank:]]*)+")
SURROUNDING_SPACES_MIN_PATTERN =
Regexp.new("\\s*(</?(?:" + BLOCK_TAGS_MIN.gsub(",", "|") + ")(?:>|[\\s/][^>]*>))\\s*", Regexp::MULTILINE | Regexp::IGNORECASE)
SURROUNDING_SPACES_MAX_PATTERN =
Regexp.new("\\s*(</?(?:" + BLOCK_TAGS_MAX.gsub(",", "|") + ")(?:>|[\\s/][^>]*>))\\s*", Regexp::MULTILINE | Regexp::IGNORECASE)
SURROUNDING_SPACES_ALL_PATTERN =
Regexp.new("\\s*(<[^>]+>)\\s*", Regexp::MULTILINE | Regexp::IGNORECASE)
TEMP_COND_COMMENT_PATTERN =

patterns for searching for temporary replacements

Regexp.new("%%%~COMPRESS~COND~(\\d+?)~%%%")
TEMP_PRE_PATTERN =
Regexp.new("%%%~COMPRESS~PRE~(\\d+?)~%%%")
TEMP_TEXT_AREA_PATTERN =
Regexp.new("%%%~COMPRESS~TEXTAREA~(\\d+?)~%%%")
TEMP_SCRIPT_PATTERN =
Regexp.new("%%%~COMPRESS~SCRIPT~(\\d+?)~%%%")
TEMP_STYLE_PATTERN =
Regexp.new("%%%~COMPRESS~STYLE~(\\d+?)~%%%")
TEMP_EVENT_PATTERN =
Regexp.new("%%%~COMPRESS~EVENT~(\\d+?)~%%%")
TEMP_SKIP_PATTERN =
Regexp.new("%%%~COMPRESS~SKIP~(\\d+?)~%%%")
TEMP_LINE_BREAK_PATTERN =
Regexp.new("%%%~COMPRESS~LT~(\\d+?)~%%%")
JAVASCRIPT_COMPRESSORS_OPTIONS =
{
  :closure => { :compilation_level => 'ADVANCED_OPTIMIZATIONS' },
  :yui => { :munge => true, :preserve_semicolons => true, :optimize => true, :line_break => nil }
}
CSS_COMPRESSORS_OPTIONS =
{
  :yui => { :line_break => -1 }
}
DEFAULT_OPTIONS =
{
  :enabled => true,

  # default settings
  :remove_comments => true,
  :remove_multi_spaces => true,
  :remove_spaces_inside_tags => true,

  # optional settings
  :javascript_compressor => :yui,
  :css_compressor => :yui,
  :remove_intertag_spaces => false,
  :remove_quotes => false,
  :compress_javascript => false,
  :compress_css => false,
  :simple_doctype => false,
  :remove_script_attributes => false,
  :remove_style_attributes => false,
  :remove_link_attributes => false,
  :remove_form_attributes => false,
  :remove_input_attributes => false,
  :simple_boolean_attributes => false,
  :remove_javascript_protocol => false,
  :remove_http_protocol => false,
  :remove_https_protocol => false,
  :preserve_line_breaks => false,
  :remove_surrounding_spaces => nil,

  :preserve_patterns => nil
}

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Compressor

Returns a new instance of Compressor


135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# File 'lib/htmlcompressor/compressor.rb', line 135

def initialize(options = {})

  @options = DEFAULT_OPTIONS.merge(options)

  if @options[:compress_js_templates]
    @options[:remove_quotes] = false

    js_template_types = [ 'text/x-jquery-tmpl' ]

    unless @options[:compress_js_templates].is_a? TrueClass
      js_template_types << @options[:compress_js_templates]
      js_template_types.flatten!
    end

    @options[:js_template_types] = js_template_types
  else
    @options[:js_template_types] = []
  end

  detect_external_compressors
end

Instance Method Details

#compress(html) ⇒ Object


193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
# File 'lib/htmlcompressor/compressor.rb', line 193

def compress html
  if not @options[:enabled] or html.nil? or html.length == 0
    return html
  end

  # preserved block containers
  condCommentBlocks = []
  preBlocks = []
  taBlocks = []
  scriptBlocks = []
  styleBlocks = []
  eventBlocks = []
  skipBlocks = []
  lineBreakBlocks = []
  userBlocks = []

  # preserve blocks
  html = preserve_blocks(html, preBlocks, taBlocks, scriptBlocks, styleBlocks, eventBlocks, condCommentBlocks, skipBlocks, lineBreakBlocks, userBlocks)

  # process pure html
  html = process_html(html)

  # process preserved blocks
  process_preserved_blocks(preBlocks, taBlocks, scriptBlocks, styleBlocks, eventBlocks, condCommentBlocks, skipBlocks, lineBreakBlocks, userBlocks)

  # put preserved blocks back
  html = return_blocks(html, preBlocks, taBlocks, scriptBlocks, styleBlocks, eventBlocks, condCommentBlocks, skipBlocks, lineBreakBlocks, userBlocks)

  html
end

#detect_external_compressorsObject


157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# File 'lib/htmlcompressor/compressor.rb', line 157

def detect_external_compressors
  @javascript_compressors = {}
  @css_compressors = {}

  # Try Closure.
  begin
    require 'closure-compiler'
    @javascript_compressors[:closure] = Closure::Compiler
  rescue LoadError
  end

  # Try YUI
  begin
    require 'yui/compressor'
    @javascript_compressors[:yui] = YUI::JavaScriptCompressor
    @css_compressors[:yui] = YUI::CssCompressor
  rescue LoadError
  end
end

#get_css_compressor(compressor_name) ⇒ Object


185
186
187
188
189
190
191
# File 'lib/htmlcompressor/compressor.rb', line 185

def get_css_compressor(compressor_name)

  if @css_compressors.has_key? compressor_name
    @css_compressors[compressor_name].new CSS_COMPRESSORS_OPTIONS[compressor_name]
  end

end

#get_javascript_compressor(compressor_name) ⇒ Object


177
178
179
180
181
182
183
# File 'lib/htmlcompressor/compressor.rb', line 177

def get_javascript_compressor(compressor_name)

  if @javascript_compressors.has_key? compressor_name
    @javascript_compressors[compressor_name].new JAVASCRIPT_COMPRESSORS_OPTIONS[compressor_name]
  end

end