Module: BlueCloth::Transform

Defined in:
lib/bluecloth/transform.rb,
lib/bluecloth/transform/util.rb,
lib/bluecloth/transform/links.rb,
lib/bluecloth/transform/inline.rb,
lib/bluecloth/transform/blocks.rb

Defined Under Namespace

Classes: FormatError, RenderState

Constant Summary

TabWidth =

Tab width for #detab if none is specified

4
EmptyElementSuffix =

The tag-closing string -- set to '>' for HTML

"/>";
EscapeTable =

Table of MD5 sums for escaped characters

'\\`*_{}[]()#.!'.split(//).inject({}) do |table, char|
hash = Digest::MD5::hexdigest(char)

table[char] = {
		:md5 => hash,
	:md5re => Regexp::new(hash),
	:re  => Regexp::new('\\\\' + Regexp::escape(char)),
}
  table
end
HTMLCommentRegexp =

Matching constructs for tokenizing X/HTML

%r{ <! ( -- .*? -- \s* )+ > }mx
XMLProcInstRegexp =
%r{ <\? .*? \?> }mx
MetaTag =
Regexp::union( HTMLCommentRegexp, XMLProcInstRegexp )
HTMLTagOpenRegexp =
%r{ < [a-z/!$] [^<>]* }imx
HTMLTagCloseRegexp =
%r{ > }x
HTMLTagPart =
Regexp::union( HTMLTagOpenRegexp, HTMLTagCloseRegexp )
LinkRegex =

Link defs are in the form: ^[id]: url "optional title"

%r{
		^[ ]*\[(.+)\]:		# id = $1
 [ ]*
 \n?				# maybe *one* newline
 [ ]*
		<?(\S+?)>?				# url = $2
 [ ]*
 \n?				# maybe one newline
 [ ]*
		(?:
# Titles are delimited by "quotes" or (parens).
["(]
(.+?)			# title = $3
[")]			# Matching ) or "
[ ]*
		)?	# title is optional
		(?:\n+|\Z)
}x
AutoAnchorURLRegexp =
/<((https?|ftp):[^'">\s]+)>/
AutoAnchorEmailRegexp =
%r{
		<
		(
[-.\w]+
\@
[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
		)
		>
}xi
Encoders =

Encoder functions to turn characters of an email address into encoded entities.

[
	lambda {|char| "&#%03d;" % char},
	lambda {|char| "&#x%X;" % char},
	lambda {|char| char.chr },
]
RefLinkIdRegex =

Pattern to match the linkid part of an anchor tag for reference-style links.

%r{
		[ ]?					# Optional leading space
		(?:\n[ ]*)?				# Optional newline + spaces
		\[
(.*?)				# Id = $1
		\]
}x
InlineLinkRegex =
%r{
		\(						# Literal paren
[ ]*				# Zero or more spaces
<?(.+?)>?			# URI = $1
[ ]*				# Zero or more spaces
(?:					# 
	([\"\'])		# Opening quote char = $2
	(.*?)			# Title = $3
	\2				# Matching quote char
)?					# Title is optional
		\)
}x
BoldRegexps =

Pattern to match strong emphasis in Markdown text

[
 %r{ \b(\_\_) (\S|\S.*?\S) \1\b }x,
 %r{ (\*\*) (\S|\S.*?\S) \1 }x
]
ItalicRegexps =

Pattern to match normal emphasis in Markdown text

[
  %r{ (\*) (\S|\S.*?\S) \1 }x,
  %r{ \b(_) (\S|\S.*?\S) \1\b }x
]
InlineImageRegexp =

Next, handle inline images: ![alt text](url "optional title") Don't forget: encode * and _

%r{
		(					# Whole match = $1
!\[ (.*?) \]	# alt text = $2
 \([ ]*
<?(\S+?)>?		# source url = $3
   [ ]*
(?:				# 
  (["'])		# quote char = $4
  (.*?)			# title = $5
  \4			# matching quote
  [ ]*
)?				# title is optional
 \)
		)
}xs
ReferenceImageRegexp =

Reference-style images

%r{
		(					# Whole match = $1
!\[ (.*?) \]	# Alt text = $2
[ ]?			# Optional space
(?:\n[ ]*)?		# One optional newline + spaces
\[ (.*?) \]		# id = $3
		)
}xs
CodeEscapeRegexp =

Regexp to match special characters in a code block

%r{( \* | _ | \{ | \} | \[ | \] | \\ )}x
StrictBlockTags =

The list of tags which are considered block-level constructs and an alternation pattern suitable for use in regexps made from the list

%w[ p
StrictTagPattern =
StrictBlockTags.join('|')
LooseBlockTags =
StrictBlockTags - %w[ins
LooseTagPattern =
LooseBlockTags.join('|')
StrictBlockRegex =

Nested blocks: <div> <div> tags for inner block must be indented. </div> </div>

%r{
		^						# Start of line
		<(#{StrictTagPattern})	# Start tag: \2
		\b						# word break
		(.*\n)*?				# Any number of lines, minimal match
		</\1>					# Matching end tag
		[ ]*					# trailing spaces
		$						# End of line or document
}ix
LooseBlockRegex =

More-liberal block-matching

%r{
		^						# Start of line
		<(#{LooseTagPattern})	# start tag: \2
		\b						# word break
		(.*\n)*?				# Any number of lines, minimal match
		.*</\1>					# Anything + Matching end tag
		[ ]*					# trailing spaces
		$						# End of line or document
}ix
HruleBlockRegex =

Special case for <hr />.

%r{
		(						# $1
\A\n?				# Start of doc + optional \n
|					# or
.*\n\n				# anything + blank line
		)
		(						# save in $2
[ ]*				# Any spaces
<hr					# Tag open
\b					# Word break
([^<>])*?			# Attributes
/?>					# Tag close
$					# followed by a blank line or end of document
		)
}ix
ListMarkerOl =

Patterns to match and transform lists

%r{\d+\.}
ListMarkerUl =
%r{[*+-]}
ListMarkerAny =
Regexp::union( ListMarkerOl, ListMarkerUl )
ListRegexp =
%r{
 (?:
^[ ]{0,#{TabWidth - 1}}		# Indent < tab width
(#{ListMarkerAny})			# unordered or ordered ($1)
[ ]+						# At least one space
 )
 (?m:.+?)						# item content (include newlines)
 (?:
  \z						# Either EOF
|							#  or
  \n{2,}					# Blank line...
  (?=\S)					# ...followed by non-space
  (?![ ]*					# ...but not another item
	(#{ListMarkerAny})
   [ ]+)
 )
}x
ListItemRegexp =

Pattern for transforming list items

%r{
		(\n)?							# leading line = $1
		(^[ ]*)							# leading whitespace = $2
		(#{ListMarkerAny}) [ ]+			# list marker = $3
		((?m:.+?)						# list item text   = $4
		(\n{1,2}))
		(?= \n* (\z | \2 (#{ListMarkerAny}) [ ]+))
}x
CodeBlockRegexp =

Pattern for matching codeblocks

%r{
		(?:\n\n|\A)
		(									# $1 = the code block
 (?:
(?:[ ]{#{TabWidth}} | \t)		# a tab or tab-width of spaces
.*\n+
 )+
		)
		(^[ ]{0,#{TabWidth - 1}}\S|\Z)		# Lookahead for non-space at
								# line-start, or end of doc
}x
BlockQuoteRegexp =

Pattern for matching Markdown blockquote blocks

%r{
 (?:
^[ ]*>[ ]?		# '>' at the start of a line
  .+\n			# rest of the first line
(?:.+\n)*		# subsequent consecutive lines
\n*				# blanks
 )+
}x
PreChunk =
%r{ ( ^ \s* <pre> .+? </pre> ) }xm
SetextHeaderRegexp =

Regex for matching Setext-style headers

%r{
		(.+)			# The title text ($1)
		\n
		([\-=])+		# Match a line of = or -. Save only one in $2.
		[ ]*\n+
}x
AtxHeaderRegexp =

Regexp for matching ATX-style headers

%r{
		^(\#{1,6})	# $1 = string of #'s
		[ ]*
		(.+?)		# $2 = Header text
		[ ]*
		\#*			# optional closing #'s (not counted)
		\n+
}x

Instance Method Summary (collapse)

Instance Method Details

- (Object) apply_block_transforms(str, rs)

Do block-level transforms on a copy of str using the specified render state rs and return the results.



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/bluecloth/transform/blocks.rb', line 5

def apply_block_transforms( str, rs )
	# Port: This was called '_runBlockGamut' in the original

	@log.debug "Applying block transforms to:\n  %p" % str
	text = transform_headers( str, rs )
	text = transform_hrules( text, rs )
	text = transform_lists( text, rs )
	text = transform_code_blocks( text, rs )
	text = transform_block_quotes( text, rs )
	text = transform_auto_links( text, rs )
	text = hide_html_blocks( text, rs )

	text = form_paragraphs( text, rs )

	@log.debug "Done with block transforms:\n  %p" % text
	return text
end

- (Object) apply_span_transforms(str, rs)

Apply Markdown span transforms to a copy of the specified str with the given render state rs and return it.



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/bluecloth/transform/inline.rb', line 5

def apply_span_transforms( str, rs )
	@log.debug "Applying span transforms to:\n  %p" % str

	str = transform_code_spans( str, rs )
	str = encode_html( str )
	str = transform_images( str, rs )
	str = transform_anchors( str, rs )
	str = transform_italic_and_bold( str, rs )

	# Hard breaks
	str.gsub!( / {2,}\n/, "<br#{EmptyElementSuffix}\n" )

	@log.debug "Done with span transforms:\n  %p" % str
	return str
end

- (Object) detab(string, tabwidth = TabWidth)

Convert tabs to spaces



98
99
100
101
102
103
104
# File 'lib/bluecloth/transform.rb', line 98

def detab(string, tabwidth = TabWidth)
	string.split("\n").collect { |line|
		line.gsub(/(.*?)\t/) do
			$1 + ' ' * (tabwidth - $1.length % tabwidth)
		end
	}.join("\n")
end

- (Object) encode_backslash_escapes(str)

Return a copy of the given str with any backslashed special character in it replaced with MD5 placeholders.



237
238
239
240
241
242
243
244
245
246
247
# File 'lib/bluecloth/transform/inline.rb', line 237

def encode_backslash_escapes( str )
	# Make a copy with any double-escaped backslashes encoded
	text = str.gsub( /\\\\/, EscapeTable['\\'][:md5] )
	
	EscapeTable.each_pair {|char, esc|
		next if char == '\\'
		text.gsub!( esc[:re], esc[:md5] )
	}

	return text
end

- (Object) encode_code(str, rs)

Escape any characters special to HTML and encode any characters special to Markdown in a copy of the given str and return it.



187
188
189
190
191
192
# File 'lib/bluecloth/transform/inline.rb', line 187

def encode_code( str, rs )
	str.gsub( %r{&}, '&amp;' ).
		gsub( %r{<}, '&lt;' ).
		gsub( %r{>}, '&gt;' ).
		gsub( CodeEscapeRegexp ) {|match| EscapeTable[match][:md5]}
end

- (Object) encode_email_address(addr)

Transform a copy of the given email addr into an escaped version safer for posting publicly.



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/bluecloth/transform/links.rb', line 69

def encode_email_address( addr )

	rval = ''
	("mailto:" + addr).each_byte {|b|
		case b
		when ?:
			rval += ":"
		when ?@
			rval += Encoders[ rand(2) ][ b ]
		else
			r = rand(100)
			rval += (
				r > 90 ? Encoders[2][ b ] :
				r < 45 ? Encoders[1][ b ] :
						 Encoders[0][ b ]
			)
		end
	}

	return %{<a href="%s">%s</a>} % [ rval, rval.sub(/.+?:/, '') ]
end

- (Object) encode_html(str)

# Return a copy of str with angle brackets and ampersands HTML-encoded.



92
93
94
95
# File 'lib/bluecloth/transform/util.rb', line 92

def encode_html( str )
	str.gsub( /&(?!#?[x]?(?:[0-9a-f]+|\w+);)/i, "&amp;" ).
		gsub( %r{<(?![a-z/?\$!])}i, "&lt;" )
end

- (Object) escape_md(str)

# Escape any markdown characters in a copy of the given str and return # it.



5
6
7
8
9
# File 'lib/bluecloth/transform/util.rb', line 5

def escape_md( str )
	str.
		gsub( /\*/, EscapeTable['*'][:md5] ).
		gsub( /_/,  EscapeTable['_'][:md5] )
end

- (Object) escape_special_chars(str)

Escape special characters in the given str



195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
# File 'lib/bluecloth/transform/inline.rb', line 195

def escape_special_chars( str )
	@log.debug "  Escaping special characters"
	text = ''

	# The original Markdown source has something called '$tags_to_skip'
	# declared here, but it's never used, so I don't define it.

	tokenize_html( str ) {|token, str|
		@log.debug "   Adding %p token %p" % [ token, str ]
		case token

		# Within tags, encode * and _
		when :tag
			text += str.
				gsub( /\*/, EscapeTable['*'][:md5] ).
				gsub( /_/, EscapeTable['_'][:md5] )

		# Encode backslashed stuff in regular text
		when :text
			text += encode_backslash_escapes( str )
		else
			raise TypeError, "Unknown token type %p" % token
		end
	}

	@log.debug "  Text with escapes is now: %p" % text
	return text
end

- (Object) form_paragraphs(str, rs)

Wrap all remaining paragraph-looking text in a copy of str inside <p> tags and return it.



306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
# File 'lib/bluecloth/transform/blocks.rb', line 306

def form_paragraphs( str, rs )
	@log.debug " Forming paragraphs"
	grafs = str.
		sub( /\A\n+/, '' ).
		sub( /\n+\z/, '' ).
		split( /\n{2,}/ )

	rval = grafs.collect {|graf|

		# Unhashify HTML blocks if this is a placeholder
		if rs.html_blocks.key?( graf )
			rs.html_blocks[ graf ]

		# Otherwise, wrap in <p> tags
		else
			apply_span_transforms(graf, rs).
				sub( /^[ ]*/, '<p>' ) + '</p>'
		end
	}.join( "\n\n" )

	@log.debug " Formed paragraphs: %p" % rval
	return rval
end

- (Object) hide_html_blocks(str, rs)

Replace all blocks of HTML in str that start in the left margin with tokens.



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/bluecloth/transform/blocks.rb', line 78

def hide_html_blocks( str, rs )
	@log.debug "Hiding HTML blocks in %p" % str
	
	# Tokenizer proc to pass to gsub
	tokenize = lambda { |match|
		key = Digest::MD5::hexdigest( match )
		rs.html_blocks[ key ] = match
		@log.debug "Replacing %p with %p" % [ match, key ]
		"\n\n#{key}\n\n"
	}

	rval = str.dup

	@log.debug "Finding blocks with the strict regex..."
	rval.gsub!( StrictBlockRegex, &tokenize )

	@log.debug "Finding blocks with the loose regex..."
	rval.gsub!( LooseBlockRegex, &tokenize )

	@log.debug "Finding hrules..."
	rval.gsub!( HruleBlockRegex ) {|match| $1 + tokenize[$2] }

	return rval
end

- (Object) outdent(str)

# Return one level of line-leading tabs or spaces from a copy of str and # return it.



99
100
101
# File 'lib/bluecloth/transform/util.rb', line 99

def outdent( str )
	str.gsub( /^(\t|[ ]{1,#{TabWidth}})/, '')
end

Strip link definitions from str, storing them in the given RenderState rs.



25
26
27
28
29
30
31
32
33
34
35
# File 'lib/bluecloth/transform/links.rb', line 25

def strip_link_definitions( str, rs )
	str.gsub( LinkRegex ) {|match|
		id, url, title = $1, $2, $3

		rs.urls[ id.downcase ] = encode_html( url )
		unless title.nil?
			rs.titles[ id.downcase ] = title.gsub( /"/, "&quot;" )
		end
		""
	}
end

- (Object) to_html(lite = false)

Render Markdown-formatted text in this string object as HTML and return it. The parameter is for compatibility with RedCloth, and is currently unused, though that may change in the future.



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/bluecloth/transform.rb', line 9

def to_html(lite = false)
	# Create a StringScanner we can reuse for various lexing tasks
	@scanner = StringScanner::new( '' )

	# Make a structure to carry around stuff that gets placeholdered out of
	# the source.
	rs = RenderState::new( {}, {}, {} )

	# Make a copy of the string with normalized line endings, tabs turned to
	# spaces, and a couple of guaranteed newlines at the end
	text = detab @content.gsub(/\r\n?/, "\n")
	text += "\n\n"
	@log.debug "Normalized line-endings: %p" % text

	# Filter HTML if we're asked to do so
	if filter_html
		text.gsub!( "<", "&lt;" )
		text.gsub!( ">", "&gt;" )
		@log.debug "Filtered HTML: %p" % text
	end

	# Simplify blank lines
	text.gsub!(/^ +$/, '')
	@log.debug "Tabs -> spaces/blank lines stripped: %p" % text

	# Replace HTML blocks with placeholders
	text = hide_html_blocks( text, rs )
	@log.debug "Hid HTML blocks: %p" % text
	@log.debug "Render state: %p" % rs

	# Strip link definitions, store in render state
	text = strip_link_definitions( text, rs )
	@log.debug "Stripped link definitions: %p" % text
	@log.debug "Render state: %p" % rs

	# Escape meta-characters
	text = escape_special_chars( text )
	@log.debug "Escaped special characters: %p" % text

	# Transform block-level constructs
	text = apply_block_transforms( text, rs )
	@log.debug "After block-level transforms: %p" % text

	# Now swap back in all the escaped characters
	text = unescape_special_chars( text )
	@log.debug "After unescaping special characters: %p" % text

	return text
end

- (Object) tokenize_html(str)

# Break the HTML source in str into a series of tokens and return # them. The tokens are just 2-element Array tuples with a type and the # actual content. If this function is called with a block, the type and # text parts of each token will be yielded to it one at a time as they are # extracted.



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/bluecloth/transform/util.rb', line 25

def tokenize_html( str )
	depth = 0
	tokens = []
	@scanner.string = str.dup
	type, token = nil, nil

	until @scanner.eos?
		@log.debug "Scanning from %p" % @scanner.rest

		# Match comments and PIs without nesting
		if (( token = @scanner.scan(MetaTag) ))
			type = :tag

		# Do nested matching for HTML tags
		elsif (( token = @scanner.scan(HTMLTagOpenRegexp) ))
			tagstart = @scanner.pos
			@log.debug " Found the start of a plain tag at %d" % tagstart

			# Start the token with the opening angle
			depth = 1
			type = :tag

			# Scan the rest of the tag, allowing unlimited nested <>s. If
			# the scanner runs out of text before the tag is closed, raise
			# an error.
			while depth.nonzero?

				# Scan either an opener or a closer
				chunk = @scanner.scan( HTMLTagPart ) or
					raise "Malformed tag at character %d: %p" % 
						[ tagstart, token + @scanner.rest ]
					
				@log.debug "  Found another part of the tag at depth %d: %p" % [ depth, chunk ]

				token += chunk

				# If the last character of the token so far is a closing
				# angle bracket, decrement the depth. Otherwise increment
				# it for a nested tag.
				depth += ( token[-1, 1] == '>' ? -1 : 1 )
				@log.debug "  Depth is now #{depth}"
			end

		# Match text segments
		else
			@log.debug " Looking for a chunk of text"
			type = :text

			# Scan forward, always matching at least one character to move
			# the pointer beyond any non-tag '<'.
			token = @scanner.scan_until( /[^<]+/m )
		end

		@log.debug " type: %p, token: %p" % [ type, token ]

		# If a block is given, feed it one token at a time. Add the token to
		# the token list to be returned regardless.
		if block_given?
			yield( type, token )
		end
		tokens << [ type, token ]
	end

	return tokens
end

- (Object) transform_anchors(str, rs)

Apply Markdown anchor transforms to a copy of the specified str with the given render state rs and return it.



116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
# File 'lib/bluecloth/transform/links.rb', line 116

def transform_anchors( str, rs )
	@log.debug " Transforming anchors"
	@scanner.string = str.dup
	text = ''

	# Scan the whole string
	until @scanner.eos?
	
		if @scanner.scan( /\[/ )
			link = ''; linkid = ''
			depth = 1
			startpos = @scanner.pos
			@log.debug " Found a bracket-open at %d" % startpos

			# Scan the rest of the tag, allowing unlimited nested []s. If
			# the scanner runs out of text before the opening bracket is
			# closed, append the text and return (wasn't a valid anchor).
			while depth.nonzero?
				linktext = @scanner.scan_until( /\]|\[/ )

				if linktext
					@log.debug "  Found a bracket at depth %d: %p" % [ depth, linktext ]
					link += linktext

					# Decrement depth for each closing bracket
					depth += ( linktext[-1, 1] == ']' ? -1 : 1 )
					@log.debug "  Depth is now #{depth}"

				# If there's no more brackets, it must not be an anchor, so
				# just abort.
				else
					@log.debug "  Missing closing brace, assuming non-link."
					link += @scanner.rest
					@scanner.terminate
					return text + '[' + link
				end
			end
			link.slice!( -1 ) # Trim final ']'
			@log.debug " Found leading link %p" % link

			# Look for a reference-style second part
			if @scanner.scan( RefLinkIdRegex )
				linkid = @scanner[1]
				linkid = link.dup if linkid.empty?
				linkid.downcase!
				@log.debug "  Found a linkid: %p" % linkid

				# If there's a matching link in the link table, build an
				# anchor tag for it.
				if rs.urls.key?( linkid )
					@log.debug "   Found link key in the link table: %p" % rs.urls[linkid]
					url = escape_md( rs.urls[linkid] )

					text += %{<a href="#{url}"}
					if rs.titles.key?(linkid)
						text += %{ title="%s"} % escape_md( rs.titles[linkid] )
					end
					text += %{>#{link}</a>}

				# If the link referred to doesn't exist, just append the raw
				# source to the result
				else
					@log.debug "  Linkid %p not found in link table" % linkid
					@log.debug "  Appending original string instead: "
					@log.debug "%p" % @scanner.string[ startpos-1 .. @scanner.pos-1 ]
					text += @scanner.string[ startpos-1 .. @scanner.pos-1 ]
				end

			# ...or for an inline style second part
			elsif @scanner.scan( InlineLinkRegex )
				url = @scanner[1]
				title = @scanner[3]
				@log.debug "  Found an inline link to %p" % url

				text += %{<a href="%s"} % escape_md( url )
				if title
					title.gsub!( /"/, "&quot;" )
					text += %{ title="%s"} % escape_md( title )
				end
				text += %{>#{link}</a>}

			# No linkid part: just append the first part as-is.
			else
				@log.debug "No linkid, so no anchor. Appending literal text."
				text += @scanner.string[ startpos-1 .. @scanner.pos-1 ]
			end # if linkid

		# Plain text
		else
			@log.debug " Scanning to the next link from %p" % @scanner.rest
			text += @scanner.scan( /[^\[]+/ )
		end

	end # until @scanner.empty?

	return text
end

Transform URLs in a copy of the specified str into links and return it.



50
51
52
53
54
55
56
# File 'lib/bluecloth/transform/links.rb', line 50

def transform_auto_links( str, rs )
	@log.debug " Transforming auto-links"
	str.gsub( AutoAnchorURLRegexp, %{<a href="\\1">\\1</a>}).
		gsub( AutoAnchorEmailRegexp ) {|addr|
		encode_email_address( unescape_special_chars($1) )
	}
end

- (Object) transform_block_quotes(str, rs)

Transform Markdown-style blockquotes in a copy of the specified str and return it.



229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
# File 'lib/bluecloth/transform/blocks.rb', line 229

def transform_block_quotes( str, rs )
	@log.debug " Transforming block quotes"

	str.gsub( BlockQuoteRegexp ) {|quote|
		@log.debug "Making blockquote from %p" % quote

		quote.gsub!( /^ *> ?/, '' ) # Trim one level of quoting 
		quote.gsub!( /^ +$/, '' )	# Trim whitespace-only lines

		indent = " " * TabWidth
		quoted = %{<blockquote>\n%s\n</blockquote>\n\n} %
			apply_block_transforms( quote, rs ).
			gsub( /^/, indent ).
			gsub( PreChunk ) {|m| m.gsub(/^#{indent}/o, '') }
		@log.debug "Blockquoted chunk is: %p" % quoted
		quoted
	}
end

- (Object) transform_code_blocks(str, rs)

Transform Markdown-style codeblocks in a copy of the specified str and return it.



203
204
205
206
207
208
209
210
211
212
213
214
# File 'lib/bluecloth/transform/blocks.rb', line 203

def transform_code_blocks( str, rs )
	@log.debug " Transforming code blocks"

	str.gsub( CodeBlockRegexp ) {|block|
		codeblock = $1
		remainder = $2

		# Generate the codeblock
		%{\n\n<pre><code>%s\n</code></pre>\n\n%s} %
			[ encode_code( outdent(codeblock), rs ).rstrip, remainder ]
	}
end

- (Object) transform_code_spans(str, rs)

Transform backticked spans into <code> spans.



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/bluecloth/transform/inline.rb', line 46

def transform_code_spans( str, rs )
	@log.debug " Transforming code spans"

	# Set up the string scanner and just return the string unless there's at
	# least one backtick.
	@scanner.string = str.dup
	unless @scanner.exist?( /`/ )
		@scanner.terminate
		@log.debug "No backticks found for code span in %p" % str
		return str
	end

	@log.debug "Transforming code spans in %p" % str

	# Build the transformed text anew
	text = ''

	# Scan to the end of the string
	until @scanner.eos?

		# Scan up to an opening backtick
		if pre = @scanner.scan_until( /.?(?=`)/m )
			text += pre
			@log.debug "Found backtick at %d after '...%s'" % [ @scanner.pos, text[-10, 10] ]

			# Make a pattern to find the end of the span
			opener = @scanner.scan( /`+/ )
			len = opener.length
			closer = Regexp::new( opener )
			@log.debug "Scanning for end of code span with %p" % closer

			# Scan until the end of the closing backtick sequence. Chop the
			# backticks off the resultant string, strip leading and trailing
			# whitespace, and encode any enitites contained in it.
			codespan = @scanner.scan_until( closer ) or
				raise FormatError::new( @scanner.rest[0,20],
					"No %p found before end" % opener )

			@log.debug "Found close of code span at %d: %p" % [ @scanner.pos - len, codespan ]
			codespan.slice!( -len, len )
			text += "<code>%s</code>" %
				encode_code( codespan.strip, rs )

		# If there's no more backticks, just append the rest of the string
		# and move the scan pointer to the end
		else
			text += @scanner.rest
			@scanner.terminate
		end
	end

	return text
end

- (Object) transform_headers(str, rs)

Apply Markdown header transforms to a copy of the given str amd render state rs and return the result.



268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
# File 'lib/bluecloth/transform/blocks.rb', line 268

def transform_headers( str, rs )
	@log.debug " Transforming headers"

	# Setext-style headers:
	#	  Header 1
	#	  ========
	#  
	#	  Header 2
	#	  --------
	#
	str.
		gsub( SetextHeaderRegexp ) {|m|
			@log.debug "Found setext-style header"
			title, hdrchar = $1, $2
			title = apply_span_transforms( title, rs )

			case hdrchar
			when '='
				%[<h1>#{title}</h1>\n\n]
			when '-'
				%[<h2>#{title}</h2>\n\n]
			else
				title
			end
		}.

		gsub( AtxHeaderRegexp ) {|m|
			@log.debug "Found ATX-style header"
			hdrchars, title = $1, $2
			title = apply_span_transforms( title, rs )

			level = hdrchars.length
			%{<h%d>%s</h%d>\n\n} % [ level, title, level ]
		}
end

- (Object) transform_hrules(str, rs)

Transform any Markdown-style horizontal rules in a copy of the specified str and return it.



105
106
107
108
# File 'lib/bluecloth/transform/blocks.rb', line 105

def transform_hrules( str, rs )
	@log.debug " Transforming horizontal rules"
	str.gsub( /^( ?[\-\*_] ?){3,}$/, "\n<hr#{EmptyElementSuffix}\n" )
end

- (Object) transform_images(str, rs)

Turn image markup into image tags.



129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# File 'lib/bluecloth/transform/inline.rb', line 129

def transform_images( str, rs )
	@log.debug " Transforming images" # % str

	# Handle reference-style labeled images: ![alt text][id]
	str.
		gsub( ReferenceImageRegexp ) {|match|
			whole, alt, linkid = $1, $2, $3.downcase
			@log.debug "Matched %p" % match
			res = nil
			alt.gsub!( /"/, '&quot;' )

			# for shortcut links like ![this][].
			linkid = alt.downcase if linkid.empty?

			if rs.urls.key?( linkid )
				url = escape_md( rs.urls[linkid] )
				@log.debug "Found url '%s' for linkid '%s' " % [ url, linkid ]

				# Build the tag
				result = %{<img src="%s" alt="%s"} % [ url, alt ]
				if rs.titles.key?( linkid )
					result += %{ title="%s"} % escape_md( rs.titles[linkid] )
				end
				result += EmptyElementSuffix

			else
				result = whole
			end

			@log.debug "Replacing %p with %p" % [ match, result ]
			result
		}.

		# Inline image style
		gsub( InlineImageRegexp ) {|match|
			@log.debug "Found inline image %p" % match
			whole, alt, title = $1, $2, $5
			url = escape_md( $3 )
			alt.gsub!( /"/, '&quot;' )

			# Build the tag
			result = %{<img src="%s" alt="%s"} % [ url, alt ]
			unless title.nil?
				title.gsub!( /"/, '&quot;' )
				result += %{ title="%s"} % escape_md( title )
			end
			result += EmptyElementSuffix

			@log.debug "Replacing %p with %p" % [ match, result ]
			result
		}
end

- (Object) transform_italic_and_bold(str, rs)

Transform italic- and bold-encoded text in a copy of the specified str and return it.



35
36
37
38
39
40
41
42
43
# File 'lib/bluecloth/transform/inline.rb', line 35

def transform_italic_and_bold( str, rs )
	@log.debug " Transforming italic and bold"

	str.
		gsub( BoldRegexps[0], %{<strong>\\2</strong>} ).
		gsub( BoldRegexps[1], %{<strong>\\2</strong>} ).
		gsub( ItalicRegexps[0], %{<em>\\2</em>} ).
		gsub( ItalicRegexps[1], %{<em>\\2</em>} )
end

- (Object) transform_list_items(str, rs)

Transform list items in a copy of the given str and return it.



164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# File 'lib/bluecloth/transform/blocks.rb', line 164

def transform_list_items( str, rs )
	@log.debug " Transforming list items"

	# Trim trailing blank lines
	str = str.sub( /\n{2,}\z/, "\n" )

	str.gsub( ListItemRegexp ) {|line|
		@log.debug "  Found item line %p" % line
		leading_line, item = $1, $4

		if leading_line or /\n{2,}/.match( item )
			@log.debug "   Found leading line or item has a blank"
			item = apply_block_transforms( outdent(item), rs )
		else
			# Recursion for sub-lists
			@log.debug "   Recursing for sublist"
			item = transform_lists( outdent(item), rs ).chomp
			item = apply_span_transforms( item, rs )
		end

		%{<li>%s</li>\n} % item
	}
end

- (Object) transform_lists(str, rs)

Transform Markdown-style lists in a copy of the specified str and return it.



135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'lib/bluecloth/transform/blocks.rb', line 135

def transform_lists( str, rs )
	@log.debug " Transforming lists at %p" % (str[0,100] + '...')

	str.gsub( ListRegexp ) {|list|
		@log.debug "  Found list %p" % list
		bullet = $1
		list_type = (ListMarkerUl.match(bullet) ? "ul" : "ol")
		list.gsub!( /\n{2,}/, "\n\n\n" )

		%{<%s>\n%s</%s>\n} % [
			list_type,
			transform_list_items( list, rs ),
			list_type,
		]
	}
end

- (Object) unescape_special_chars(str)

Swap escaped special characters in a copy of the given str and return it.



226
227
228
229
230
231
232
233
# File 'lib/bluecloth/transform/inline.rb', line 226

def unescape_special_chars( str )
	EscapeTable.each {|char, hash|
		@log.debug "Unescaping escaped %p with %p" % [ char, hash[:md5re] ]
		str.gsub!( hash[:md5re], char )
	}

	return str
end