Source for file N3Parser.php
Documentation is available at N3Parser.php
require_once RDFAPI_INCLUDE_DIR .
'util/Object.php';
require_once RDFAPI_INCLUDE_DIR .
'model/Resource.php';
require_once RDFAPI_INCLUDE_DIR .
'model/Literal.php';
require_once RDFAPI_INCLUDE_DIR .
'model/Statement.php';
// ----------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------
* This parser can parse a subset of n3, reporting triples to a callback function
* or constructing a RAP Model ( http://www.wiwiss.fu-berlin.de/suhl/bizer/rdfapi )
* <li>Standard things, repeated triples ( ; and , ), blank nodes using [ ], self-reference ('<>')</li>
* <li>@prefix mappings</li>
* <li>= maps to owl#sameAs</li>
* <li>a maps to rdf-syntax-ns#type</li>
* <li>Literal datytype- and xmlLanguageTag support
* Un-supported N3 Features include:
* <li>Reification using { }</li>
* <li>. and ^ operators for tree traversal</li>
* <li>Any log operators, like log:forAll etc.</li>
* This parser is based on n3.py from Epp released 2nd March, 2002.
* ( http://infomesh.net/2002/eep/20020302-013802/n3.py )
* This parser is released under the GNU GPL license.
* ( http://www.gnu.org/licenses/gpl.txt )
* @author Sean B. Palmer <sean@mysterylights.com>, Gunnar AA. Grimnes <ggrimnes@csd.abdn.ac.uk>, Daniel Westphal <mail@d-westphal.de>
* @version $Id: fsource_syntax__syntaxN3Parser.php.html 443 2007-06-01 16:25:38Z cax $
/* ==================== Variables ==================== */
/* ==================== Public Methods ==================== */
$Name =
'[A-Za-z0-9_@\.]+[^\.,;\[\]\s ]*';
$QName =
'(?:[A-Za-z][A-Za-z0-9_@\.]*)?:'.
$Name;
$Literal =
'"(\\\"|[^"])*"'; # '"(?:\\"|[^"])*"'
// $Literal = '"[^"\\\\]*(?:\\.\\[^"\\]*)*"'; # '"(?:\\"|[^"])*"'
$LangTag =
'@[A-Za-z\-]*[^ \^\.\;\,]';
$Datatype =
'(\^\^)[^ ,\.;)]+';
$Datatype_URI =
'(\^\^)'.
$URI;
// $LLiteral = '"""[^"\\\\]*(?:(?:.|"(?!""))[^"\\\\]*)*"""';
$LLiteral =
'"""[^"\\\\]*(?:(?:\\\\.|"(?!""))[^"\\\\]*)*"""';
// '"""[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
$Prefix =
'(?:[A-Za-z][A-Za-z0-9_]*)?:';
$this->RDF_NS =
'http://www.w3.org/1999/02/22-rdf-syntax-ns#'; # for 'a' keyword
$this->DAML_NS =
'http://www.daml.org/2001/03/daml+oil#'; # for '=' keyword
$this->OWL_NS =
'http://www.w3.org/2002/07/owl#';
// $t = array( $LLiteral, $URI); //, $Literal, $PrefixDecl, $QName, $bNode, $Prefix,
// $Univar, 'a', '{', '}', '\(', '\)', '\[', '\]', ',', ';', '\.', $WS, $Comment);
$t =
array( $Datatype_URI,$Datatype,$LLiteral, $URI, $Literal, $PrefixDecl, $QName, $bNode, $Prefix, $Univar, 'a','=', '{', '}', '\(', '\)', '\[', '\]', ',', ';', '\.', $WS, $Comment,$LangTag);
$this->bNodeMap =
array();
* Sets, if BlankNode labels should be replaced by the generic label from the constants.php file
* default is "false" -> the used label in n3 is parsed to the model
if (($set===
true) OR ($set===
false)) $this->FixBnodes =
$set;
* This parses a N3 string and prints out the triples
// """Get a string, tokenize, create list, convert to Eep store."""
$stat=
$this->n3tolist($s);
for ($i =
3; $i <
5; $i++
){
if ($t[$i][0]==
'@')$object.=
$t[$i];
if (substr($t[$i],0,2)==
'^^')$object.=
$t[$i];
print
'('.
$t[0].
', '.
$t[1].
', '.
$object.
")\n";
// return [[eep.Article(t[0]), eep.Article(t[1]), eep.Article(t[2])]
* This parses a N3 string and calls func($subject, $predicate, $object) with each trioke
// """Get a string, tokenize, create list, convert to Eep store."""
$stat=
$this->n3tolist($s);
for ($i =
3; $i <
5; $i++
){
if ($t[$i][0]==
'@')$object.=
$t[$i];
if (substr($t[$i],0,2)==
'^^')$object.=
$t[$i];
// print "(".$t[0].", ".$t[1].", ".$t[2].")";
$func($t[0],$t[1],$object);
// return [[eep.Article(t[0]), eep.Article(t[1]), eep.Article(t[2])]
* This parses a N3 string and returns a memmodel
// """Get a string, tokenize, create list, convert to Eep store."""
$stat=
$this->n3tolist($s);
$s=
$this->toRDFNode($t[0],$t);
$p=
$this->toRDFNode($t[1],$t);
$o=
$this->toRDFNode($t[2],$t);
// print "(".$t[0].", ".$t[1].", ".$t[2].")";
// return [[eep.Article(t[0]), eep.Article(t[1]), eep.Article(t[2])]
* Generate a new MemModel from an URI or file.
* @return object MemModel
$handle =
fopen($path,'r') or die("N3 Parser: Could not open File: '$path' - Stopped parsing.");
$input .=
fread( $handle, 512 );
/* ==================== Private Methods from here ==================== */
// General list processing functions
* Returns FALSE if argument is a whitespace character
* Returns true if the string is not a comment
function notComment($s) {
if ($s==
"") return false;
$N3Comment =
'^[ \t]*\#';
if (ereg($N3Comment,$s)) return false;
* Removes all whitespace tokens from list
function filterWs($list) {
// """Filter whitespace from a list."""
* converts a string to its unicode NFC form (e.g. \uHHHH or \UHHHHHHHH).
function str2unicode_nfc($str=
""){
/* try to detect encoding */
for($i=
0,$i_max=
strlen($str);$i<
$i_max;$i++
){
$nr=
0;/* unicode dec nr */
/* 110##### 10###### = 192+x 128+x */
$nr=
((ord($utf8_char[0])-
192)*
64) +
(ord($utf8_char[1])-
128);
/* 1110#### 10###### 10###### = 224+x 128+x 128+x */
$nr=
((ord($utf8_char[0])-
224)*
4096) +
((ord($utf8_char[1])-
128)*
64) +
(ord($utf8_char[2])-
128);
/* 1111#### 10###### 10###### 10###### = 240+x 128+x 128+x 128+x */
$nr=
((ord($utf8_char[0])-
240)*
262144) +
((ord($utf8_char[1])-
128)*
4096) +
((ord($utf8_char[2])-
128)*
64) +
(ord($utf8_char[3])-
128);
/* result (see http://www.w3.org/TR/rdf-testcases/#ntrip_strings) */
if($nr<
9){/* #x0-#x8 (0-8) */
$result.=
"\\u".
sprintf("%04X",$nr);
elseif($nr==
9){/* #x9 (9) */
elseif($nr==
10){/* #xA (10) */
elseif($nr<
13){/* #xB-#xC (11-12) */
$result.=
"\\u".
sprintf("%04X",$nr);
elseif($nr==
13){/* #xD (13) */
elseif($nr<
32){/* #xE-#x1F (14-31) */
$result.=
"\\u".
sprintf("%04X",$nr);
elseif($nr<
34){/* #x20-#x21 (32-33) */
elseif($nr==
34){/* #x22 (34) */
elseif($nr<
92){/* #x23-#x5B (35-91) */
elseif($nr==
92){/* #x5C (92) */
elseif($nr<
127){/* #x5D-#x7E (93-126) */
elseif($nr<
65536){/* #x7F-#xFFFF (128-65535) */
$result.=
"\\u".
sprintf("%04X",$nr);
elseif($nr<
1114112){/* #x10000-#x10FFFF (65536-1114111) */
$result.=
"\\U".
sprintf("%08X",$nr);
/* other chars are not defined => ignore */
* Gets a slice of an array.
* Returns the wanted slice, as well as the remainder of the array.
* e.g. getSpan(['p', 'q', 'r'], 1, 2) gives (['q'], ['p', 'r'])
function getSpan($list, $start, $end) {
return array(array_slice($list, $start,$end-
$start),$this->array_concat($pre,$post));
* Concatenates two arrays
function array_concat($a, $b) {
* Returns an array with all indexes where item appears in list
function posns($list, $item) {
foreach ( $list as $k=>
$v) {
if ($v ===
$item ) $res[]=
$i;
/* More N3 specific functions */
* Returns a list of tokens
// """Notation3 tokenizer. Takes in a string, returns a raw token list."""
if (strlen($s) ==
0) die('Document has no content!');
//$lines=explode("\n",$s);
//$reallines=array_filter($lines, array($this, "notComment"));
// print "LINES: ".join($reallines, " ")." :LINES\n";
//array_walk($reallines, array($this, "trimLine"));
// foreach ($reallines as $l) {
//preg_match_all($this->Tokens, $l, $newres);
//$res=$this->array_concat($res,$newres[0]);
$res=
$this->array_concat($res, array_map('trim', $newres[0]));
* Returns a list with the elements between start and end as one quoted string
* e.g. listify(["a","b","c","d"],1,2) => ["a","b c", "d"]
function listify($list, $start, $end) {
//Re-form a list, merge elements start->end into one quoted element
//Start and end are offsets...
// array_push($s,"\"".join($m," ")."\"");
return $this->array_concat($s,$e);
* Returns an array with prefixes=>namespace mappings
function getPrefixes($list) {
// while '@prefix' in list {
//pos = list.index('@prefix')
$r =
$this->getSpan($list, $pos, ($pos+
4)); # processes the prefix tokens
$prefixes[$binding[$ns]] =
substr($binding[$name],1,-
1);
return array($prefixes, $list);
* Callback function for replacing "a" elements with the right RDF uri.
function replace_a_type(&$l,$p) {
if ($l==
'a') $l=
'<'.
$this->RDF_NS.
'type>';
* Callback function for replacing "=" elements with the right DAML+OIL uri.
function replace_equal(&$l,$p) {
if ($l==
'=') $l=
'<'.
$this->OWL_NS.
'sameAs>';
* Callback function for replacing "this" elements with the right RDF uri.
function replace_this($l,$p) {
if ($l==
'this') $l=
'<urn:urn-n:this>';
* Expands namespace prefixes etc.
function applyStuff($prefixes, $list) {
array_walk($list, array($this, 'replace_a_type'));
for ($i=
0;$i<
count($list);$i++
) {
// for i in range(len(list)) {
// if (!strstr('<_"?.;,{}[]()',$list[$i]{0})) {
// if a <> resource occours, change it to the parsed filename or local URI + timestamp
if (!isset
($_SERVER['SERVER_ADDR'])) $_SERVER['SERVER_ADDR']=
'localhost';
if (!isset
($_SERVER['REQUEST_URI'])) $_SERVER['REQUEST_URI']=
'/rdfapi-php';
$list[$i]=
'<http://'.
$_SERVER['SERVER_ADDR'].
$_SERVER['REQUEST_URI'].
'#generate_timestamp_'.
time().
'>';
}else {$list[$i]=
'<'.
$path.
'>';};
if ((!strstr('<_"?.;,{}[]()@',$list[$i]{0}))AND (substr($list[$i],0,3)!=
'^^<')) {
if (isset
($prefixes[$ns])) $list[$i] =
'<'.
$prefixes[$ns].
$name.
'>';
else if (isset
($prefixes[substr($ns,2)])) $list[$i] =
'^^'.
$prefixes[substr($ns,2)].
$name.
'';
#die('Prefix not declared:'.$ns);
if ($list[$i]{0} ==
'"') { // Congratulations - it's a literal!
if (substr($list[$i],0,3) ==
'"""') {
if (substr($list[$i],-
3,3) ==
'"""') { // A big literal...
$lit =
substr($list[$i],3,-
3);
$list[$i] =
'"'.
$lit.
'"';
else { die ('Incorrect string formatting: '.
substr($list[$i],-
3,3)); }
if (strstr($list[$i],"\n")) die('Newline in literal: '+
$list[$i]);
if (substr($list[$i],0,2)==
'^^') {
if ($list[$i][2]!=
'<'){$list[$i]=
'^^<'.
substr($list[$i],2).
'>';};
* Returns an array of triples extracted from the list of n3 tokens
function getStatements($list) {
// for($i=0;$i<count($list); $i++) {
$r=
$this->getSpan($list, 0, $pos+
1);
$statements[]=
$statement;
* Gets a list of triples with same subject
* e.g. :Gunnar :firstname "Gunnar" ; :lastname "Grimnes.
$r=
$this->posns($list,';');
$r =
$this->getSpan($list, $pos[0], $pos[1]);
// skip lone semicolons, e.g. "<a> <b> <c> ; ."
if (count($pov) ==
1) continue;
return array($list, $povs);
* Gets a list of triples with same predicate
* e.g. :Gunnar :likes "Cheese", "Wine".
function getObjs($list) {
// for($i=0;$i<count($list); $i++) {
if (isset
($list[$pos+
2])) {
if (@$list[$pos+
2][0]==
'@') $get_array_fields++
;
if (@$list[$pos+
2][0]==
'^') $get_array_fields++
;
if (isset
($list[$pos+
3])) { if (@$list[$pos+
3][0]==
'^') $get_array_fields++
;};
$r=
$this->getSpan($list, $pos, ($pos+
$get_array_fields));
if (!isset
($obj[2])) $obj[2]=
' ';
if (!isset
($obj[3])) $obj[3]=
' ';
return array($list, $objs);
* Does the real work, returns a list of subject, predicate, object triples.
function statementize($list) {
if ($this->debug) print
"Ignored bNode exists statement. $list\n";
if (count($list) ==
3) return array($list);
if (count($list) <
3) die("Error: statement too short!");
// (spo, po), all = getPovs(list), []
//myPo, obj = getObjs(pop)
if (!isset
($myPo[2])) $myPo[2]=
' ';
if (!isset
($myPo[3])) $myPo[3]=
' ';
$all[]=
array($subject,$predicate,$myPo[1],$myPo[2],$myPo[3]);
// all.append([subject, predicate, myPo[1]])
foreach ($obj as $o) $all[]=
array($subject,$predicate, $o[1],$o[2],$o[3]);
// for x in obj: all.append([subject, predicate, x])
$r =
$this->getObjs($spo);
//spo, objs = getObjs(spo)
if(!isset
($spo[3])) $spo[3]=
' ';
if(!isset
($spo[4])) $spo[4]=
' ';
$all[]=
array($subject, $predicate, $spo[2],$spo[3],$spo[4]);
foreach ($objs as $obj) $all[]=
array($subject, $predicate, $obj[1],$obj[2],$obj[3]);
* Makes lists of elements in list into a seperate array element.
* e.g. doLists(["a","b","[","c","]","d"], "[","]")=> ["a","b", ["c"], "d"]
function doLists($list, $schar, $echar) {
for ($i=
0;$i<
count($list);$i++
) {
if ($list[$i] ==
$schar) {
$ndict[$nestingLevel] =
array(array($i));
$ndict[$nestingLevel][]=
array($i);
if ($list[$i] ==
$echar) {
$ndict[$nestingLevel]=
array(array($i));
$ndict[$nestingLevel][count($ndict[$nestingLevel])-
1][]=
$i;
# elif type(list[i]) == type([]) {
# list[i] = doLists(list[i], schar, echar)
if ($key >
$biggest) $biggest =
$key;
$tol =
$ndict[$biggest][0];
$list =
$this->listify($list, $tol[0], ($tol[1]+
1));
* Apply doLists for all different types of list.
function listStuff($list) {
# y, z = zip(['[', ']'], ['{', '}'], ['(', ')'])
# return map(doLists, [list, list, list], y, z).pop()
$list =
$this->doLists($list, '[', ']');
$list =
$this->doLists($list, '{', '}');
return $this->doLists($list, '(', ')');
* Generates a new node id.
* This makes bNodes out of variables like _:a etc.
function fixAnon($list) {
for($i=
0;$i<
count($list);$i++
) {
if (!isset
($this->bNodeMap[$l])) {
} else $a=
$this->bNodeMap[$l];
* This makes [ ] lists into bnodes.
function expandLists($list) {
for($i=
0;$i<
count($list);$i++
) {
if ( $list[$i][0]==
'[' ) {
$list=
$this->array_concat($list, array_slice($prop,1,-
1));
}elseif($list[$i][0]==
'(') {
// local copy of list without "(" and ")"
$fromBnode =
$this->bnodeID();
$toBnode =
$this->bnodeID();
//link first bnode into graph
//loop through list, convert to RDF linked list
for ($idx =
0; $idx <
$count; $idx++
){
// set rdf:rest (nil or next bnode)
if ($idx ==
$count -
1) {
$toBnode =
$this->bnodeID();
die('Only [ ] and () lists are supported!');
* Main work-horse function. This converts a N3 string to a list of statements
// """Convert an N3 string into a list of triples as strings."""
$t =
$this->filterWs($this->toke($s)); # tokenize the stream, and filter whitespace tokens
$r=
$this->getPrefixes($t); # get the prefix directives, and add to a dict
$t=
$this->applyStuff($prefixes, $t);#apply prefixes, keywords, and string formatting
print
"Stuff applied:\n";
$t=
$this->fixAnon($t); # fix _:a anons
$t =
$this->listStuff($t); # apply list stuff: todo
$t=
$this->expandLists($t);
print
"Lists applied:\n";
$t =
$this->getStatements($t); # get all of the "statements" from the stream
$stats=
$this->statementize($stat);
// for x in [statementize(stat) for stat in t] {
// for y in x: result.append(y)
* Constructs a RAP RDFNode from URI/Literal/Bnode
* @returns object RDFNode
function toRDFNode($s,$state) {
for ($i =
3; $i <
count($state); $i++
){
if ($state[$i][0]==
'@')$lang=
substr($state[3],1);
if (substr($state[$i],0,2)==
'^^'){
if ($dtype[0]==
'<') $dtype=
substr($dtype,1,-
1);
$ins=
$this->str2unicode_nfc($ins);
$new_Literal=
new Literal($ins,$lang);
if (isset
($dtype)) $new_Literal->setDatatype($dtype);
if (($this->FixBnodes) OR (!array_search($s,$this->bNodeMap))) {
Documentation generated on Fri, 1 Jun 2007 16:49:59 +0200 by phpDocumentor 1.3.2