Class: String

Inherits:
Object
  • Object
show all
Defined in:
lib/direction_detector.rb

Overview

Copyright © 2010-2011, Diaspora Inc. This file is

licensed under the Affero General Public License version 3 or later.  See
the COPYRIGHT file.

Deeply inspired by gitorious.org/statusnet/mainline/blobs/master/plugins/DirectionDetector/DirectionDetectorPlugin.php

Constant Summary collapse

RTL_RANGES =
[
  [1536, 1791], # arabic, persian, urdu, kurdish, ...
  [65136, 65279], # arabic peresent 2
  [64336, 65023], # arabic peresent 1
  [1424, 1535], # hebrew
  [64256, 64335], # hebrew peresent
  [1792, 1871], # syriac
  [1920, 1983], # thaana
  [1984, 2047], # nko
  [11568, 11647] # tifinagh
]
RTL_CLEANER_REGEXES =

mention, tag

[ /@[^ ]+|#[^ ]+/u, # mention, tag
  /^RT[: ]{1}| RT | RT: |[♺♻:]/u # retweet
]

Instance Method Summary collapse

Instance Method Details

#cleaned_is_rtl?Boolean

Diaspora specific


38
39
40
41
42
43
44
# File 'lib/direction_detector.rb', line 38

def cleaned_is_rtl?
  string = String.new(self)
  RTL_CLEANER_REGEXES.each do |cleaner|
    string.gsub!(cleaner, '')
  end
  string.is_rtl?
end

#is_rtl?Boolean


23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/direction_detector.rb', line 23

def is_rtl?
  return false if self.strip.empty?
  count = 0
  self.split(" ").each do |word|
    if starts_with_rtl_char?(word)
      count += 1
    else
      count -= 1
    end
  end
  return true if count > 0 # more than half of the words are rtl words
  return starts_with_rtl_char?(self) # otherwise let the first word decide
end

#starts_with_rtl_char?(string = self) ⇒ Boolean


46
47
48
49
50
51
52
53
54
# File 'lib/direction_detector.rb', line 46

def starts_with_rtl_char?(string = self)
  stripped = string.strip
  return false if stripped.empty?
  char = stripped.unpack('U*').first
  RTL_RANGES.each do |limit|
    return true if char >= limit[0] && char <= limit[1]
  end
  return false
end