Module: MaRuKu::Strings

Included in:
In::Markdown::BlockLevelParser, In::Markdown::BlockLevelParser::LineSource, In::Markdown::SpanLevelParser::CharSourceManual, In::Markdown::SpanLevelParser::CharSourceStrscan, In::Markdown::SpanLevelParser::HTMLHelper, In::Markdown::SpanLevelParser::SpanContext, MDElement, String
Defined in:
lib/maruku.rb,
lib/maruku/string_utils.rb,
lib/maruku/input/type_detection.rb

Overview

These are strings utilities.

Constant Summary

TabSize =
4
AttributeDefinitionList =

$1 = id $2 = attribute list

/^\s{0,3}\{([\w\d\s]+)\}:\s*(.*)\s*$/
InlineAttributeList =
/^\s{0,3}\{([:#\.].*)\}\s*$/
Definition =

Example:

^:blah blah
^: blah blah
^   : blah blah
%r{ 
	^ # begin of line
	[ ]{0,3} # up to 3 spaces
	: # colon
	\s* # whitespace
	(\S.*) # the text    = $1
	$ # end of line
}x
Abbreviation =

Example:

*[HTML]: Hyper Text Markup Language
%r{
	^  # begin of line
	[ ]{0,3} # up to 3 spaces
	\* # one asterisk
	\[ # opening bracket
	([^\]]+) # any non-closing bracket:  id = $1
	\] # closing bracket
	:  # colon
	\s* # whitespace
	(\S.*\S)* #           definition=$2
	\s* # strip this whitespace
	$   # end of line
}x
FootnoteText =
%r{
	^  # begin of line
	[ ]{0,3} # up to 3 spaces
	\[(\^.+)\]: # id = $1 (including '^')
	\s*(\S.*)?$    # text = $2 (not obb.)
}x
LinkRegex =

This regex is taken from BlueCloth sources Link defs are in the form: ^[id]: n? url "optional title"

%r{
		^[ ]{0,3}\[([^\[\]]+)\]:		# id = $1
 [ ]*
		<?([^>\s]+)>?				# url = $2
 [ ]*
		(?:# Titles are delimited by "quotes" or (parens).
["(']
(.+?)			# title = $3
[")']			# Matching ) or "
\s*(.+)?   # stuff = $4
		)?	# title is optional
}x
%r{^[ ]{0,3}\[([^\[\]]+)\]:\s*$}
HeaderWithId =
/^(.*)\{\#([\w_-]+)\}\s*$/
HeaderWithAttributes =
/^(.*)\{(.*)\}\s*$/
MightBeTableHeader =

if contains a pipe, it could be a table header

%r{\|}
Sep =

-------------:

/\s*(\:)?\s*-+\s*(\:)?\s*/
TableSeparator =

| -------------:| ------------------------------ |

%r{^(\|?#{Sep}\|?)+\s*$}
EMailAddress =
/<([^:]+@[^:]+)>/

Instance Method Summary (collapse)

Instance Method Details

- (Object) add_tabs(s, n = 1, char = "\t")



25
26
27
# File 'lib/maruku/string_utils.rb', line 25

def add_tabs(s,n=1,char="\t")
	s.split("\n").map{|x| char*n+x }.join("\n")
end

- (Object) dbg_describe_ary(a, prefix = '')



189
190
191
192
193
194
# File 'lib/maruku/string_utils.rb', line 189

def dbg_describe_ary(a, prefix='')
	i = 0 
	a.each do |l|
		puts "#{prefix} (#{i+=1})# #{l.inspect}"
	end
end

- (Boolean) force_linebreak?(l)

Returns:

  • (Boolean)


196
197
198
# File 'lib/maruku/string_utils.rb', line 196

def force_linebreak?(l)
	l =~ /  $/
end

- (Object) line_md_type(l)



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/maruku/input/type_detection.rb', line 36

def line_md_type(l)
	# The order of evaluation is important (:text is a catch-all)
	return :text   if l =~ /^[a-zA-Z]/
	return :code             if number_of_leading_spaces(l)>=4
	return :empty    if l =~ /^\s*$/
	return :footnote_text    if l =~ FootnoteText
	return :ref_definition   if l =~ LinkRegex or l=~ IncompleteLink
	return :abbreviation     if l =~ Abbreviation
	return :definition       if l =~ Definition
	# I had a bug with emails and urls at the beginning of the 
	# line that were mistaken for raw_html
	return :text if l=~ /^[ ]{0,3}#{EMailAddress}/
	return :text if l=~ /^[ ]{0,3}<http:/
	# raw html is like PHP Markdown Extra: at most three spaces before
	return :xml_instr if l =~ %r{^\s*<\?}
	return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?</?\s*\w+}
	return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?<\!\-\-}
	# Something is wrong with how we parse lists! :-(
	#return :ulist    if l =~ /^[ ]{0,3}([\*\-\+])\s+.*\w+/
	#return :olist    if l =~ /^[ ]{0,3}\d+\..*\w+/
	return :ulist    if l =~ /^[ ]{0,1}([\*\-\+])\s+.*\w+/
	return :olist    if l =~ /^[ ]{0,1}\d+\..*\w+/
	return :header1  if l =~ /^(=)+/ 
	return :header2  if l =~ /^([-\s])+$/ 
	return :header3  if l =~ /^(#)+\s*\S+/ 
	# at least three asterisks on a line, and only whitespace
	return :hrule    if l =~ /^(\s*\*\s*){3,1000}$/ 
	return :hrule    if l =~ /^(\s*-\s*){3,1000}$/ # or hyphens
	return :hrule    if l =~ /^(\s*_\s*){3,1000}$/ # or underscores	
	return :quote    if l =~ /^>/
	return :metadata if l =~ /^@/
#		if @@new_meta_data?
		return :ald   if l =~ AttributeDefinitionList
		return :ial   if l =~ InlineAttributeList
#		end
#		return :equation_end if l =~ EquationEnd
	return :text # else, it's just text
end

- (Object) normalize_key_and_value(k, v)

Keys are downcased, space becomes underscore, converted to symbols.



69
70
71
72
73
74
75
76
77
78
79
# File 'lib/maruku/string_utils.rb', line 69

def normalize_key_and_value(k,v)
	v = v ? v.strip : true # no value defaults to true
	k = k.strip
	
	# check synonyms
	v = true if ['yes','true'].include?(v.to_s.downcase)
	v = false if ['no','false'].include?(v.to_s.downcase)

	k = k.downcase.gsub(' ','_')
	return k, v
end

- (Object) num_leading_hashes(s)

Counts the number of leading '#' in the string



147
148
149
150
151
# File 'lib/maruku/string_utils.rb', line 147

def num_leading_hashes(s)
	i=0;
	while i<(s.size-1) && (s[i,1]=='#'); i+=1 end
	i	
end

- (Object) number_of_leading_spaces(s)

Returns the number of leading spaces, considering that a tab counts as `TabSize` spaces.



83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/maruku/string_utils.rb', line 83

def number_of_leading_spaces(s)
	n=0; i=0;
	while i < s.size 
		c = s[i,1]
		if c == ' '
			i+=1; n+=1;
		elsif c == "\t"
			i+=1; n+=TabSize;
		else
			break
		end
	end
	n
end

- (Object) parse_email_headers(s)

This parses email headers. Returns an hash.

hash is the message.

Keys are downcased, space becomes underscore, converted to symbols.

My key: true

becomes:

{:my_key => true}


47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/maruku/string_utils.rb', line 47

def parse_email_headers(s)
	keys={}
	match = (s =~ /\A((\w[\w\s\_\-]+: .*\n)+)\s*\n/)
	if match != 0
		keys[:data] = s
	else
		keys[:data] = $'
		headers = $1
		headers.split("\n").each do |l| 
# Fails if there are other ':' characters.
#				k, v = l.split(':')
			k, v = l.split(':', 2)
			k, v = normalize_key_and_value(k, v)
			k = k.to_sym
#				puts "K = #{k}, V=#{v}"
			keys[k] = v
		end
	end
	keys
end

- (Object) sanitize_ref_id(x)

change space to "_" and remove any non-word character



162
163
164
# File 'lib/maruku/string_utils.rb', line 162

def sanitize_ref_id(x)
	x.strip.downcase.gsub(' ','_').gsub(/[^\w]/,'')
end

- (Object) spaces_before_first_char(s)

This returns the position of the first real char in a list item

For example:

'*Hello' # => 1
'* Hello' # => 2
' * Hello' # => 3
' *   Hello' # => 5
'1.Hello' # => 2
' 1.  Hello' # => 5


108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/maruku/string_utils.rb', line 108

def spaces_before_first_char(s)
	case s.md_type
	when :ulist
		i=0;
		# skip whitespace if present
		while s[i,1] =~ /\s/; i+=1 end
		# skip indicator (+, -, *)
		i+=1
		# skip whitespace
		while s[i,1] =~ /\s/; i+=1 end
		# find an IAL
		ial = s[i,s.length - i][/^\{(.*?)\}/]
		i+= ial.length if ial
		# skip optional whitespace
		while s[i,1] =~ /\s/; i+=1 end
		return [i, ial]
	when :olist
		i=0;
		# skip whitespace
		while s[i,1] =~ /\s/; i+=1 end
		# skip digits
		while s[i,1] =~ /\d/; i+=1 end
		# skip dot
		i+=1
		# skip optional whitespace
		while s[i,1] =~ /\s/; i+=1 end
		# find an IAL
		ial = s[i,s.length - i][/^\{(.*?)\}/]
		i+= ial.length if ial
		# skip whitespace
		while s[i,1] =~ /\s/; i+=1 end
		return [i, ial]
	else
		tell_user "BUG (my bad): '#{s}' is not a list"
		[0, nil]
	end
end

- (Object) split_lines(s)



31
32
33
# File 'lib/maruku/string_utils.rb', line 31

def split_lines(s)
	s.gsub("\r","").split("\n")
end

- (Object) strip_hashes(s)

Strips initial and final hashes



154
155
156
157
158
159
# File 'lib/maruku/string_utils.rb', line 154

def strip_hashes(s)
	s = s[num_leading_hashes(s), s.size]
	i = s.size-1
	while i > 0 && (s[i,1] =~ /(#|\s)/); i-=1; end
	s[0, i+1].strip
end

- (Object) strip_indent(s, n)

toglie al massimo n caratteri



173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# File 'lib/maruku/string_utils.rb', line 173

def strip_indent(s, n) 
	i = 0
	while i < s.size && n>0
		c = s[i,1]
		if c == ' '
			n-=1;
		elsif c == "\t"
			n-=TabSize;
		else
			break
		end
		i+=1
	end
	s[i, s.size]
end

- (Object) unquote(s)

removes initial quote



168
169
170
# File 'lib/maruku/string_utils.rb', line 168

def unquote(s)
	s.gsub(/^>\s?/,'')
end