Class: RdfContext::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/rdf_context/parser.rb,
lib/rdf_context/n3parser.rb

Overview

Generic RdfContext Parser class

Direct Known Subclasses

N3Parser, RdfXmlParser, RdfaParser

Instance Attribute Summary (collapse)

Class Method Summary (collapse)

Instance Method Summary (collapse)

Constructor Details

- (Parser) initialize(options = {})

Creates a new parser

Options Hash (options):

  • :processor_graph (Graph) — default: nil

    Graph to record information, warnings and errors.

  • :type (:rdfxml, :html, :n3) — default: nil
  • :strict (Boolean) — default: false

    Raise Error if true, continue with lax parsing, otherwise



28
29
30
31
32
33
34
# File 'lib/rdf_context/parser.rb', line 28

def initialize(options = {})
  # initialize the triplestore
  @processor_graph = options[:processor_graph] if options[:processor_graph]
  @debug = options[:debug] # XXX deprecated
  @strict = options[:strict]
  @named_bnodes = {}
end

Instance Attribute Details

- (Array<String>) debug (readonly)



108
# File 'lib/rdf_context/parser.rb', line 108

def debug; @delegate ? @delegate.debug : @debug; end

- (Nokogiri::XML::Document, #read) doc

Source of parsed document



12
13
14
# File 'lib/rdf_context/parser.rb', line 12

def doc
  @doc
end

- (Graph) graph



16
# File 'lib/rdf_context/parser.rb', line 16

def graph; @delegate ? @delegate.graph : (@graph || Graph.new); end

- (Graph) processor_graph



20
# File 'lib/rdf_context/parser.rb', line 20

def processor_graph; @delegate ? @delegate.processor_graph : (@processor_graph || Graph.new); end

- (RdfContext::URIRef) uri (readonly)

URI of parsed document



8
9
10
# File 'lib/rdf_context/parser.rb', line 8

def uri
  @uri
end

Class Method Details

+ (N3Parser) n3_parser(options = {})

Return N3 Parser instance



112
# File 'lib/rdf_context/parser.rb', line 112

def self.n3_parser(options = {}); N3Parser.new(options); end

+ (Graph) parse(stream, uri = nil, options = {}) {|triple| ... }

Instantiate Parser and parse document

Options Hash (options):

  • :graph (Graph) — default: Graph.new

    Graph to parse into, otherwise a new Graph instance is created

  • :processor_graph (Graph) — default: nil

    Graph to record information, warnings and errors.

  • :type (:rdfxml, :html, :n3) — default: nil
  • :strict (Boolean) — default: false

    Raise Error if true, continue with lax parsing, otherwise

Yields:

  • (triple)

Yield Parameters:

Raises:

  • (Error)

    Raises RdfError if strict

  • (Error)

    Raises RdfError if strict



50
51
52
53
# File 'lib/rdf_context/parser.rb', line 50

def self.parse(stream, uri = nil, options = {}, &block) # :yields: triple
  parser = self.new(options)
  parser.parse(stream, uri, options, &block)
end

+ (RdfaParser) rdfa_parser(options = {})

Return Rdfa Parser instance



118
# File 'lib/rdf_context/parser.rb', line 118

def self.rdfa_parser(options = {}); RdfaParser.new(options); end

+ (RdfXmlParser) rdfxml_parser(options = {})

Return RDF/XML Parser instance



115
# File 'lib/rdf_context/parser.rb', line 115

def self.rdfxml_parser(options = {}); RdfXmlParser.new(options); end

Instance Method Details

- (Object) add_debug(node, message) (protected)

Add debug event to debug array, if specified



162
163
164
# File 'lib/rdf_context/parser.rb', line 162

def add_debug(node, message)
  add_processor_message(node, message, RDFA_NS.Info)
end

- (Object) add_error(node, message, process_class = RDFA_NS.Error) (protected)

Raises:



174
175
176
177
# File 'lib/rdf_context/parser.rb', line 174

def add_error(node, message, process_class = RDFA_NS.Error)
  add_processor_message(node, message, process_class)
  raise ParserException, message if @strict
end

- (Object) add_info(node, message, process_class = RDFA_NS.Info) (protected)



166
167
168
# File 'lib/rdf_context/parser.rb', line 166

def add_info(node, message, process_class = RDFA_NS.Info)
  add_processor_message(node, message, process_class)
end

- (Object) add_processor_message(node, message, process_class) (protected)



179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/rdf_context/parser.rb', line 179

def add_processor_message(node, message, process_class)
  puts "#{node_path(node)}: #{message}" if ::RdfContext::debug?
  @debug << "#{node_path(node)}: #{message}" if @debug.is_a?(Array)
  if @processor_graph
    @processor_sequence ||= 0
    n = BNode.new
    @processor_graph << Triple.new(n, RDF_TYPE, process_class)
    @processor_graph << Triple.new(n, DC_NS.description, message)
    @processor_graph << Triple.new(n, DC_NS.date, Literal.build_from(DateTime.now))
    @processor_graph << Triple.new(n, RDFA_NS.sequence, Literal.build_from(@processor_sequence += 1))
    @processor_graph << Triple.new(n, RDFA_NS.context, uri)
    nc = BNode.new
    @processor_graph << Triple.new(nc, RDF_TYPE, PTR_NS.XPathPointer)
    @processor_graph << Triple.new(nc, PTR_NS.expression, node.path)
    @processor_graph << Triple.new(n, RDFA_NS.context, nc)
  end
end

- (Array) add_triple(node, subject, predicate, object) (protected)

add a triple, object can be literal or URI or bnode

If the parser is called with a block, triples are passed to the block rather than added to the graph.

Raises:

  • (Error)

    Checks parameter types and raises if they are incorrect if parsing mode is strict.



208
209
210
211
212
213
214
215
216
217
218
219
220
221
# File 'lib/rdf_context/parser.rb', line 208

def add_triple(node, subject, predicate, object)
  triple = Triple.new(subject, predicate, object)
  add_debug(node, "triple: #{triple}")
  if @callback
    @callback.call(triple)  # Perform yield to saved block
  else
    @graph << triple
  end
  triple
rescue RdfException => e
  add_debug(node, "add_triple raised #{e.class}: #{e.message}")
  puts e.backtrace if ::RdfContext::debug?
  raise if @strict
end

- (Object) add_warning(node, message, process_class = RDFA_NS.Warning) (protected)



170
171
172
# File 'lib/rdf_context/parser.rb', line 170

def add_warning(node, message, process_class = RDFA_NS.Warning)
  add_processor_message(node, message, process_class)
end

- (:rdfxml, ...) detect_format(stream, uri = nil)

Heuristically detect the format of the uri



124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'lib/rdf_context/parser.rb', line 124

def detect_format(stream, uri = nil)
  uri ||= stream.path if stream.respond_to?(:path)
  format = case uri.to_s
  when /\.(rdf|xml)$/      then :rdfxml
  when /\.(html|xhtml)$/   then :rdfa
  when /\.(nt|n3|txt)$/    then :n3
  else
    # Got to look into the file to see
    if stream.respond_to?(:read)
      stream.rewind
      string = stream.read(1000)
      stream.rewind
    else
      string = stream.to_s
    end
    case string
    when /<\w+:RDF/ then :rdfxml
    when /<RDF/     then :rdfxml
    when /<html/i   then :rdfa
    else                 :n3
    end
  end
  format
end

- (Object) node_path(node) (protected)

Figure out the document path, if it is a Nokogiri::XML::Element or Attribute



151
152
153
154
155
156
# File 'lib/rdf_context/parser.rb', line 151

def node_path(node)
  case node
  when Nokogiri::XML::Node then node.display_path
  else node.to_s
  end
end

- (Graph) parse(stream, uri = nil, options = {}) {|triple| ... }

Parse RDF document from a string or input stream to closure or graph.

If the parser is called with a block, triples are passed to the block rather than added to the graph.

Virtual Class, prototype for Parser subclass.

Options Hash (options):

  • :graph (Graph) — default: Graph.new

    Graph to parse into, otherwise a new Graph instance is created

  • :processor_graph (Graph) — default: nil

    Graph to record information, warnings and errors.

  • :type (:rdfxml, :html, :n3) — default: nil
  • :strict (Boolean) — default: false

    Raise Error if true, continue with lax parsing, otherwise

Yields:

  • (triple)

Yield Parameters:

Raises:

  • (Error)

    Raises RdfError if strict

  • (Error)

    Raises RdfError if strict



74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/rdf_context/parser.rb', line 74

def parse(stream, uri = nil, options = {}, &block) # :yields: triple
  @graph = options[:graph] || Graph.new(:identifier => @uri)
  if self.class == Parser
    
    options[:strict] ||= @strict if @strict
    options[:graph] ||= @graph
    options[:debug] ||= @debug if @debug  # XXX deprecated
    # Intuit type, if not provided
    options[:type] ||= detect_format(stream, uri)
    
    # Create a delegate of a specific parser class
    @delegate ||= case options[:type].to_s
    when "n3", "ntriples", "turtle", "ttl", "n3", "notation3" then N3Parser.new(options)
    when "rdfa", "html", "xhtml"                              then RdfaParser.new(options)
    when "xml", "rdf", "rdfxml"                               then RdfXmlParser.new(options)
    else                                                           RdfXmlParser.new(options)
      # raise ParserException.new("type option must be one of :rdfxml, :html, or :n3")
    end
    @delegate.parse(stream, uri, options, &block)
  else
    # Common parser operations
    @uri = URIRef.new(uri.to_s) unless uri.nil?
    @strict = options[:strict] if options.has_key?(:strict)
    @debug = options[:debug] if options.has_key?(:debug)
  end
end