I'm trying to use rdflib to parse some Turtle files but I'm getting an exception like this:
rdflib.plugins.parsers.notation3.BadSyntax: at line 76894 of <>:
Bad syntax (expected '.' or '}' or ']' at end of statement) at ^ in:
This is happening with Turtle versions of the NY Times Locations dataset, which I downloaded from http://data.nytimes.com. I think there's some literal (or URI) in the original data that triggers an exception in the rdflib N3/Turtle parser. The original published data file is in RDF/XML which can be parsed just fine by rdflib, but when I convert the data into Turtle using any of three different tools (rdflib, Jena or rapper) the resulting Turtle files cannot be parsed by rdflib.
The full tracebacks I get parsing the different Turtle versions are at the end of this message, as well as in the rdflib-script.txt file in the above directory.
Unfortunately the data is pretty big (170k triples, about 10MB as Turtle) and the exceptions didn't help me locate the problematic part of the data. The data file contains Unicode literals in various non-Western scripts which may or may not be related to the problem.
Any ideas how to fix this?
Best regards,
Osma Suominen
$ python
Python 2.7.3 (default, Aug 1 2012, 05:14:39)
[GCC 4.6.3] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> from rdflib import *
>>> g = Graph()
>>> g.parse('locations-rdflib.ttl', format='n3')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/graph .py", line 918, in parse
parser.parse(source, self, **args)
File "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py", line 2393, in parse
TurtleParser.parse(self,source,conj_graph,encoding)
File "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py", line 2373, in parse
p.loadStream(source.getByteStream())
File "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py", line 937, in loadStream
return self.loadBuf(stream.read()) # Not ideal
File "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py", line 943, in loadBuf
self.feed(buf)
File "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py", line 969, in feed
i = self.directiveOrStatement(s, j)
File "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py", line 987, in directiveOrStatement
return self.checkDot(argstr, j)
File "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py", line 1558, in checkDot
argstr, j, "expected '.' or '}' or ']' at end of statement")
rdflib.plugins.parsers.notation3.BadSyntax: at line 76894 of <>:
Bad syntax (expected '.' or '}' or ']' at end of statement) at ^ in:
"...lat>,
<http://hu.wikipedia.org/wiki/Eilat>,
^<http://id.wikipedia.org/wiki/Eilat>,
<http://it.wik..."
>>> g = Graph()
>>> g.parse('locations-jena.ttl', format='n3')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/graph .py", line 918, in parse
parser.parse(source, self, **args)
File "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py", line 2393, in parse
TurtleParser.parse(self,source,conj_graph,encoding)
File "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py", line 2373, in parse
p.loadStream(source.getByteStream())
File "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py", line 937, in loadStream
return self.loadBuf(stream.read()) # Not ideal
File "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py", line 943, in loadBuf
self.feed(buf)
File "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py", line 969, in feed
i = self.directiveOrStatement(s, j)
File "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py", line 987, in directiveOrStatement
return self.checkDot(argstr, j)
File "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py", line 1558, in checkDot
argstr, j, "expected '.' or '}' or ']' at end of statement")
rdflib.plugins.parsers.notation3.BadSyntax: at line 3211 of <>:
Bad syntax (expected '.' or '}' or ']' at end of statement) at ^ in:
"...622290083051> ;
cc:license <http://creativecommons.org^/licenses/by/3.0/us/> ;
nyt:mapping_strategy
..."
>>> g = Graph()
>>> g.parse('locations-rapper.ttl', format='n3')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/graph .py", line 918, in parse
parser.parse(source, self, **args)
File "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py", line 2393, in parse
TurtleParser.parse(self,source,conj_graph,encoding)
File "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py", line 2373, in parse
p.loadStream(source.getByteStream())
File "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py", line 937, in loadStream
return self.loadBuf(stream.read()) # Not ideal
File "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py", line 943, in loadBuf
self.feed(buf)
File "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py", line 969, in feed
i = self.directiveOrStatement(s, j)
File "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py", line 987, in directiveOrStatement
return self.checkDot(argstr, j)
File "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py", line 1558, in checkDot
argstr, j, "expected '.' or '}' or ']' at end of statement")
rdflib.plugins.parsers.notation3.BadSyntax: at line 52803 of <>:
Bad syntax (expected '.' or '}' or ']' at end of statement) at ^ in:
"...ms.wikipedia.org/wiki/Berlin>, <http://pt.wikipedia.org/wiki^/Berlim>, <http://qu.wikipedia.org/wiki/Berlin>, <http://ro...."
-- Osma Suominen | Osma.Suomi...@aalto.fi | +358 40 5255 882
Aalto University, Department of Media Technology, Semantic Computing Research Group
Room 2541, Otaniementie 17, Espoo, Finland; P.O. Box 15500, FI-00076 Aalto, Finland
> I'm trying to use rdflib to parse some Turtle files but I'm getting an
> exception like this:
> rdflib.plugins.parsers.notation3.BadSyntax: at line 76894 of <>:
> Bad syntax (expected '.' or '}' or ']' at end of statement) at ^ in:
> This is happening with Turtle versions of the NY Times Locations dataset,
> which I downloaded from http://data.nytimes.com. I think there's some
> literal (or URI) in the original data that triggers an exception in the
> rdflib N3/Turtle parser. The original published data file is in RDF/XML
> which can be parsed just fine by rdflib, but when I convert the data into
> Turtle using any of three different tools (rdflib, Jena or rapper) the
> resulting Turtle files cannot be parsed by rdflib.
> The full tracebacks I get parsing the different Turtle versions are at the
> end of this message, as well as in the rdflib-script.txt file in the above
> directory.
> Unfortunately the data is pretty big (170k triples, about 10MB as Turtle)
> and the exceptions didn't help me locate the problematic part of the data.
> The data file contains Unicode literals in various non-Western scripts which
> may or may not be related to the problem.
> Any ideas how to fix this?
> Best regards,
> Osma Suominen
> $ python
> Python 2.7.3 (default, Aug 1 2012, 05:14:39)
> [GCC 4.6.3] on linux2
> Type "help", "copyright", "credits" or "license" for more information.
>>>> from rdflib import *
>>>> g = Graph()
>>>> g.parse('locations-rdflib.ttl', format='n3')
> Traceback (most recent call last):
> File "<stdin>", line 1, in <module>
> File
> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/graph .py",
> line 918, in parse
> parser.parse(source, self, **args)
> File
> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
> line 2393, in parse
> TurtleParser.parse(self,source,conj_graph,encoding)
> File
> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
> line 2373, in parse
> p.loadStream(source.getByteStream())
> File
> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
> line 937, in loadStream
> return self.loadBuf(stream.read()) # Not ideal
> File
> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
> line 943, in loadBuf
> self.feed(buf)
> File
> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
> line 969, in feed
> i = self.directiveOrStatement(s, j)
> File
> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
> line 987, in directiveOrStatement
> return self.checkDot(argstr, j)
> File
> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
> line 1558, in checkDot
> argstr, j, "expected '.' or '}' or ']' at end of statement")
> rdflib.plugins.parsers.notation3.BadSyntax: at line 76894 of <>:
> Bad syntax (expected '.' or '}' or ']' at end of statement) at ^ in:
> "...lat>,
> <http://hu.wikipedia.org/wiki/Eilat>,
> ^<http://id.wikipedia.org/wiki/Eilat>,
> <http://it.wik..."
>>>> g = Graph()
>>>> g.parse('locations-jena.ttl', format='n3')
> Traceback (most recent call last):
> File "<stdin>", line 1, in <module>
> File
> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/graph .py",
> line 918, in parse
> parser.parse(source, self, **args)
> File
> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
> line 2393, in parse
> TurtleParser.parse(self,source,conj_graph,encoding)
> File
> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
> line 2373, in parse
> p.loadStream(source.getByteStream())
> File
> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
> line 937, in loadStream
> return self.loadBuf(stream.read()) # Not ideal
> File
> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
> line 943, in loadBuf
> self.feed(buf)
> File
> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
> line 969, in feed
> i = self.directiveOrStatement(s, j)
> File
> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
> line 987, in directiveOrStatement
> return self.checkDot(argstr, j)
> File
> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
> line 1558, in checkDot
> argstr, j, "expected '.' or '}' or ']' at end of statement")
> rdflib.plugins.parsers.notation3.BadSyntax: at line 3211 of <>:
> Bad syntax (expected '.' or '}' or ']' at end of statement) at ^ in:
> "...622290083051> ;
> cc:license <http://creativecommons.org^/licenses/by/3.0/us/> ;
> nyt:mapping_strategy
> ..."
>>>> g = Graph()
>>>> g.parse('locations-rapper.ttl', format='n3')
> Traceback (most recent call last):
> File "<stdin>", line 1, in <module>
> File
> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/graph .py",
> line 918, in parse
> parser.parse(source, self, **args)
> File
> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
> line 2393, in parse
> TurtleParser.parse(self,source,conj_graph,encoding)
> File
> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
> line 2373, in parse
> p.loadStream(source.getByteStream())
> File
> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
> line 937, in loadStream
> return self.loadBuf(stream.read()) # Not ideal
> File
> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
> line 943, in loadBuf
> self.feed(buf)
> File
> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
> line 969, in feed
> i = self.directiveOrStatement(s, j)
> File
> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
> line 987, in directiveOrStatement
> return self.checkDot(argstr, j)
> File
> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
> line 1558, in checkDot
> argstr, j, "expected '.' or '}' or ']' at end of statement")
> rdflib.plugins.parsers.notation3.BadSyntax: at line 52803 of <>:
> Bad syntax (expected '.' or '}' or ']' at end of statement) at ^ in:
> "...ms.wikipedia.org/wiki/Berlin>, <http://pt.wikipedia.org/wiki^/Berlim>,
> <http://qu.wikipedia.org/wiki/Berlin>, <http://ro...."
> --
> Osma Suominen | Osma.Suomi...@aalto.fi | +358 40 5255 882
> Aalto University, Department of Media Technology, Semantic Computing
> Research Group
> Room 2541, Otaniementie 17, Espoo, Finland; P.O. Box 15500, FI-00076 Aalto,
> Finland
> --
> You received this message because you are subscribed to the Google Groups
> "rdflib-dev" group.
> To post to this group, send email to rdflib-dev@googlegroups.com.
> To unsubscribe from this group, send email to
> rdflib-dev+unsubscribe@googlegroups.com.
> For more options, visit https://groups.google.com/groups/opt_out.
> This is an error in the original data - clearly the rdf/xml parser is
> less strict. I don't really want to fix this in RDFLib :)
> You can pipe the original input through sed or somethign and replace
> xml:lang="fr_1793" with xml:lang="fr-1793"?
> Cheers,
> - Gunnar
> On 15 November 2012 10:51, Osma Suominen <osma.suomi...@aalto.fi> wrote:
>> Hi all,
>> I'm trying to use rdflib to parse some Turtle files but I'm getting an
>> exception like this:
>> rdflib.plugins.parsers.notation3.BadSyntax: at line 76894 of <>:
>> Bad syntax (expected '.' or '}' or ']' at end of statement) at ^ in:
>> This is happening with Turtle versions of the NY Times Locations dataset,
>> which I downloaded from http://data.nytimes.com. I think there's some
>> literal (or URI) in the original data that triggers an exception in the
>> rdflib N3/Turtle parser. The original published data file is in RDF/XML
>> which can be parsed just fine by rdflib, but when I convert the data into
>> Turtle using any of three different tools (rdflib, Jena or rapper) the
>> resulting Turtle files cannot be parsed by rdflib.
>> The full tracebacks I get parsing the different Turtle versions are at the
>> end of this message, as well as in the rdflib-script.txt file in the above
>> directory.
>> Unfortunately the data is pretty big (170k triples, about 10MB as Turtle)
>> and the exceptions didn't help me locate the problematic part of the data.
>> The data file contains Unicode literals in various non-Western scripts which
>> may or may not be related to the problem.
>> Any ideas how to fix this?
>> Best regards,
>> Osma Suominen
>> $ python
>> Python 2.7.3 (default, Aug 1 2012, 05:14:39)
>> [GCC 4.6.3] on linux2
>> Type "help", "copyright", "credits" or "license" for more information.
>>>>> from rdflib import *
>>>>> g = Graph()
>>>>> g.parse('locations-rdflib.ttl', format='n3')
>> Traceback (most recent call last):
>> File "<stdin>", line 1, in <module>
>> File
>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/graph .py",
>> line 918, in parse
>> parser.parse(source, self, **args)
>> File
>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>> line 2393, in parse
>> TurtleParser.parse(self,source,conj_graph,encoding)
>> File
>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>> line 2373, in parse
>> p.loadStream(source.getByteStream())
>> File
>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>> line 937, in loadStream
>> return self.loadBuf(stream.read()) # Not ideal
>> File
>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>> line 943, in loadBuf
>> self.feed(buf)
>> File
>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>> line 969, in feed
>> i = self.directiveOrStatement(s, j)
>> File
>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>> line 987, in directiveOrStatement
>> return self.checkDot(argstr, j)
>> File
>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>> line 1558, in checkDot
>> argstr, j, "expected '.' or '}' or ']' at end of statement")
>> rdflib.plugins.parsers.notation3.BadSyntax: at line 76894 of <>:
>> Bad syntax (expected '.' or '}' or ']' at end of statement) at ^ in:
>> "...lat>,
>> <http://hu.wikipedia.org/wiki/Eilat>,
>> ^<http://id.wikipedia.org/wiki/Eilat>,
>> <http://it.wik..."
>>>>> g = Graph()
>>>>> g.parse('locations-jena.ttl', format='n3')
>> Traceback (most recent call last):
>> File "<stdin>", line 1, in <module>
>> File
>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/graph .py",
>> line 918, in parse
>> parser.parse(source, self, **args)
>> File
>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>> line 2393, in parse
>> TurtleParser.parse(self,source,conj_graph,encoding)
>> File
>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>> line 2373, in parse
>> p.loadStream(source.getByteStream())
>> File
>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>> line 937, in loadStream
>> return self.loadBuf(stream.read()) # Not ideal
>> File
>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>> line 943, in loadBuf
>> self.feed(buf)
>> File
>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>> line 969, in feed
>> i = self.directiveOrStatement(s, j)
>> File
>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>> line 987, in directiveOrStatement
>> return self.checkDot(argstr, j)
>> File
>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>> line 1558, in checkDot
>> argstr, j, "expected '.' or '}' or ']' at end of statement")
>> rdflib.plugins.parsers.notation3.BadSyntax: at line 3211 of <>:
>> Bad syntax (expected '.' or '}' or ']' at end of statement) at ^ in:
>> "...622290083051> ;
>> cc:license <http://creativecommons.org^/licenses/by/3.0/us/> ;
>> nyt:mapping_strategy
>> ..."
>>>>> g = Graph()
>>>>> g.parse('locations-rapper.ttl', format='n3')
>> Traceback (most recent call last):
>> File "<stdin>", line 1, in <module>
>> File
>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/graph .py",
>> line 918, in parse
>> parser.parse(source, self, **args)
>> File
>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>> line 2393, in parse
>> TurtleParser.parse(self,source,conj_graph,encoding)
>> File
>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>> line 2373, in parse
>> p.loadStream(source.getByteStream())
>> File
>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>> line 937, in loadStream
>> return self.loadBuf(stream.read()) # Not ideal
>> File
>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>> line 943, in loadBuf
>> self.feed(buf)
>> File
>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>> line 969, in feed
>> i = self.directiveOrStatement(s, j)
>> File
>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>> line 987, in directiveOrStatement
>> return self.checkDot(argstr, j)
>> File
>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>> line 1558, in checkDot
>> argstr, j, "expected '.' or '}' or ']' at end of statement")
>> rdflib.plugins.parsers.notation3.BadSyntax: at line 52803 of <>:
>> Bad syntax (expected '.' or '}' or ']' at end of statement) at ^ in:
>> "...ms.wikipedia.org/wiki/Berlin>, <http://pt.wikipedia.org/wiki^/Berlim>,
>> <http://qu.wikipedia.org/wiki/Berlin>, <http://ro...."
>> --
>> Osma Suominen | Osma.Suomi...@aalto.fi | +358 40 5255 882
>> Aalto University, Department of Media Technology, Semantic Computing
>> Research Group
>> Room 2541, Otaniementie 17, Espoo, Finland; P.O. Box 15500, FI-00076 Aalto,
>> Finland
>> --
>> You received this message because you are subscribed to the Google Groups
>> "rdflib-dev" group.
>> To post to this group, send email to rdflib-dev@googlegroups.com.
>> To unsubscribe from this group, send email to
>> rdflib-dev+unsubscribe@googlegroups.com.
>> For more options, visit https://groups.google.com/groups/opt_out.
> -- > You received this message because you are subscribed to the Google Groups "rdflib-dev" group.
> To post to this group, send email to rdflib-dev@googlegroups.com.
> To unsubscribe from this group, send email to rdflib-dev+unsubscribe@googlegroups.com.
> For more options, visit https://groups.google.com/groups/opt_out.
-- Osma Suominen | Osma.Suomi...@aalto.fi | +358 40 5255 882
Aalto University, Department of Media Technology, Semantic Computing Research Group
Room 2541, Otaniementie 17, Espoo, Finland; P.O. Box 15500, FI-00076 Aalto, Finland
>> This is an error in the original data - clearly the rdf/xml parser is
>> less strict. I don't really want to fix this in RDFLib :)
>> You can pipe the original input through sed or somethign and replace
>> xml:lang="fr_1793" with xml:lang="fr-1793"?
>> Cheers,
>> - Gunnar
>> On 15 November 2012 10:51, Osma Suominen <osma.suomi...@aalto.fi> wrote:
>>> Hi all,
>>> I'm trying to use rdflib to parse some Turtle files but I'm getting an
>>> exception like this:
>>> rdflib.plugins.parsers.notation3.BadSyntax: at line 76894 of <>:
>>> Bad syntax (expected '.' or '}' or ']' at end of statement) at ^ in:
>>> This is happening with Turtle versions of the NY Times Locations dataset,
>>> which I downloaded from http://data.nytimes.com. I think there's some
>>> literal (or URI) in the original data that triggers an exception in the
>>> rdflib N3/Turtle parser. The original published data file is in RDF/XML
>>> which can be parsed just fine by rdflib, but when I convert the data into
>>> Turtle using any of three different tools (rdflib, Jena or rapper) the
>>> resulting Turtle files cannot be parsed by rdflib.
>>> The full tracebacks I get parsing the different Turtle versions are at the
>>> end of this message, as well as in the rdflib-script.txt file in the above
>>> directory.
>>> Unfortunately the data is pretty big (170k triples, about 10MB as Turtle)
>>> and the exceptions didn't help me locate the problematic part of the data.
>>> The data file contains Unicode literals in various non-Western scripts which
>>> may or may not be related to the problem.
>>> Any ideas how to fix this?
>>> Best regards,
>>> Osma Suominen
>>> $ python
>>> Python 2.7.3 (default, Aug 1 2012, 05:14:39)
>>> [GCC 4.6.3] on linux2
>>> Type "help", "copyright", "credits" or "license" for more information.
>>>>>> from rdflib import *
>>>>>> g = Graph()
>>>>>> g.parse('locations-rdflib.ttl', format='n3')
>>> Traceback (most recent call last):
>>> File "<stdin>", line 1, in <module>
>>> File
>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/graph .py",
>>> line 918, in parse
>>> parser.parse(source, self, **args)
>>> File
>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>> line 2393, in parse
>>> TurtleParser.parse(self,source,conj_graph,encoding)
>>> File
>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>> line 2373, in parse
>>> p.loadStream(source.getByteStream())
>>> File
>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>> line 937, in loadStream
>>> return self.loadBuf(stream.read()) # Not ideal
>>> File
>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>> line 943, in loadBuf
>>> self.feed(buf)
>>> File
>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>> line 969, in feed
>>> i = self.directiveOrStatement(s, j)
>>> File
>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>> line 987, in directiveOrStatement
>>> return self.checkDot(argstr, j)
>>> File
>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>> line 1558, in checkDot
>>> argstr, j, "expected '.' or '}' or ']' at end of statement")
>>> rdflib.plugins.parsers.notation3.BadSyntax: at line 76894 of <>:
>>> Bad syntax (expected '.' or '}' or ']' at end of statement) at ^ in:
>>> "...lat>,
>>> <http://hu.wikipedia.org/wiki/Eilat>,
>>> ^<http://id.wikipedia.org/wiki/Eilat>,
>>> <http://it.wik..."
>>>>>> g = Graph()
>>>>>> g.parse('locations-jena.ttl', format='n3')
>>> Traceback (most recent call last):
>>> File "<stdin>", line 1, in <module>
>>> File
>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/graph .py",
>>> line 918, in parse
>>> parser.parse(source, self, **args)
>>> File
>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>> line 2393, in parse
>>> TurtleParser.parse(self,source,conj_graph,encoding)
>>> File
>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>> line 2373, in parse
>>> p.loadStream(source.getByteStream())
>>> File
>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>> line 937, in loadStream
>>> return self.loadBuf(stream.read()) # Not ideal
>>> File
>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>> line 943, in loadBuf
>>> self.feed(buf)
>>> File
>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>> line 969, in feed
>>> i = self.directiveOrStatement(s, j)
>>> File
>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>> line 987, in directiveOrStatement
>>> return self.checkDot(argstr, j)
>>> File
>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>> line 1558, in checkDot
>>> argstr, j, "expected '.' or '}' or ']' at end of statement")
>>> rdflib.plugins.parsers.notation3.BadSyntax: at line 3211 of <>:
>>> Bad syntax (expected '.' or '}' or ']' at end of statement) at ^ in:
>>> "...622290083051> ;
>>> cc:license <http://creativecommons.org^/licenses/by/3.0/us/> ;
>>> nyt:mapping_strategy
>>> ..."
>>>>>> g = Graph()
>>>>>> g.parse('locations-rapper.ttl', format='n3')
>>> Traceback (most recent call last):
>>> File "<stdin>", line 1, in <module>
>>> File
>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/graph .py",
>>> line 918, in parse
>>> parser.parse(source, self, **args)
>>> File
>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>> line 2393, in parse
>>> TurtleParser.parse(self,source,conj_graph,encoding)
>>> File
>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>> line 2373, in parse
>>> p.loadStream(source.getByteStream())
>>> File
>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>> line 937, in loadStream
>>> return self.loadBuf(stream.read()) # Not ideal
>>> File
>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>> line 943, in loadBuf
>>> self.feed(buf)
>>> File
>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>> line 969, in feed
>>> i = self.directiveOrStatement(s, j)
>>> File
>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>> line 987, in directiveOrStatement
>>> return self.checkDot(argstr, j)
>>> File
>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>> line 1558, in checkDot
>>> argstr, j, "expected '.' or '}' or ']' at end of statement")
>>> rdflib.plugins.parsers.notation3.BadSyntax: at line 52803 of <>:
>>> Bad syntax (expected '.' or '}' or ']' at end of statement) at ^ in:
>>> "...ms.wikipedia.org/wiki/Berlin>, <http://pt.wikipedia.org/wiki^/Berlim>,
>>> <http://qu.wikipedia.org/wiki/Berlin>, <http://ro...."
>>> --
>>> Osma Suominen | Osma.Suomi...@aalto.fi | +358 40 5255 882
>>> Aalto University, Department of Media Technology, Semantic Computing
>>> Research Group
>>> Room 2541, Otaniementie 17, Espoo, Finland; P.O. Box 15500, FI-00076 Aalto,
>>> Finland
>>> --
>>> You received this message because you are subscribed to the Google Groups
>>> "rdflib-dev" group.
>>> To post to this group, send email to rdflib-dev@googlegroups.com.
>>> To unsubscribe from this group, send email to
>>> rdflib-dev+unsubscribe@googlegroups.com.
>>> For more options, visit https://groups.google.com/groups/opt_out.
>> -- >> You received this message because you are subscribed to the Google Groups "rdflib-dev" group.
>> To post to this group, send email to rdflib-dev@googlegroups.com.
>> To unsubscribe from this group, send email to rdflib-dev+unsubscribe@googlegroups.com.
>> For more options, visit https://groups.google.com/groups/opt_out.
There is a perhaps also an issue about input validation here, recently
we changed the Graph interface to make sure the terms of added triples
were of type rdflib.term.Node
But not language tag validation - so if evil you can do:
In [70]: import rdflib
In [71]: g=rdflib.Graph()
In [73]: g.add((rdflib.URIRef("urn:a"), rdflib.RDFS.label,
rdflib.Literal('cake', lang='en ; rdfs:comment "hello!"' )))
This caused some of my SPARQL tests to fail as well. We could validate
and normalize language tags on literal construction time. Some people
may rely on the casing of the langtag to remain when roundtripping
though. (like people expect the lexical representation of their
datatypes to remain)
I'll make a ticket :)
- Gunnar
On 15 November 2012 12:39, Ivan herman <ivan.her...@gmail.com> wrote:
>>> This is an error in the original data - clearly the rdf/xml parser is
>>> less strict. I don't really want to fix this in RDFLib :)
>>> You can pipe the original input through sed or somethign and replace
>>> xml:lang="fr_1793" with xml:lang="fr-1793"?
>>> Cheers,
>>> - Gunnar
>>> On 15 November 2012 10:51, Osma Suominen <osma.suomi...@aalto.fi> wrote:
>>>> Hi all,
>>>> I'm trying to use rdflib to parse some Turtle files but I'm getting an
>>>> exception like this:
>>>> rdflib.plugins.parsers.notation3.BadSyntax: at line 76894 of <>:
>>>> Bad syntax (expected '.' or '}' or ']' at end of statement) at ^ in:
>>>> This is happening with Turtle versions of the NY Times Locations dataset,
>>>> which I downloaded from http://data.nytimes.com. I think there's some
>>>> literal (or URI) in the original data that triggers an exception in the
>>>> rdflib N3/Turtle parser. The original published data file is in RDF/XML
>>>> which can be parsed just fine by rdflib, but when I convert the data into
>>>> Turtle using any of three different tools (rdflib, Jena or rapper) the
>>>> resulting Turtle files cannot be parsed by rdflib.
>>>> The full tracebacks I get parsing the different Turtle versions are at the
>>>> end of this message, as well as in the rdflib-script.txt file in the above
>>>> directory.
>>>> Unfortunately the data is pretty big (170k triples, about 10MB as Turtle)
>>>> and the exceptions didn't help me locate the problematic part of the data.
>>>> The data file contains Unicode literals in various non-Western scripts which
>>>> may or may not be related to the problem.
>>>> Any ideas how to fix this?
>>>> Best regards,
>>>> Osma Suominen
>>>> $ python
>>>> Python 2.7.3 (default, Aug 1 2012, 05:14:39)
>>>> [GCC 4.6.3] on linux2
>>>> Type "help", "copyright", "credits" or "license" for more information.
>>>>>>> from rdflib import *
>>>>>>> g = Graph()
>>>>>>> g.parse('locations-rdflib.ttl', format='n3')
>>>> Traceback (most recent call last):
>>>> File "<stdin>", line 1, in <module>
>>>> File
>>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/graph .py",
>>>> line 918, in parse
>>>> parser.parse(source, self, **args)
>>>> File
>>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>>> line 2393, in parse
>>>> TurtleParser.parse(self,source,conj_graph,encoding)
>>>> File
>>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>>> line 2373, in parse
>>>> p.loadStream(source.getByteStream())
>>>> File
>>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>>> line 937, in loadStream
>>>> return self.loadBuf(stream.read()) # Not ideal
>>>> File
>>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>>> line 943, in loadBuf
>>>> self.feed(buf)
>>>> File
>>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>>> line 969, in feed
>>>> i = self.directiveOrStatement(s, j)
>>>> File
>>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>>> line 987, in directiveOrStatement
>>>> return self.checkDot(argstr, j)
>>>> File
>>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>>> line 1558, in checkDot
>>>> argstr, j, "expected '.' or '}' or ']' at end of statement")
>>>> rdflib.plugins.parsers.notation3.BadSyntax: at line 76894 of <>:
>>>> Bad syntax (expected '.' or '}' or ']' at end of statement) at ^ in:
>>>> "...lat>,
>>>> <http://hu.wikipedia.org/wiki/Eilat>,
>>>> ^<http://id.wikipedia.org/wiki/Eilat>,
>>>> <http://it.wik..."
>>>>>>> g = Graph()
>>>>>>> g.parse('locations-jena.ttl', format='n3')
>>>> Traceback (most recent call last):
>>>> File "<stdin>", line 1, in <module>
>>>> File
>>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/graph .py",
>>>> line 918, in parse
>>>> parser.parse(source, self, **args)
>>>> File
>>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>>> line 2393, in parse
>>>> TurtleParser.parse(self,source,conj_graph,encoding)
>>>> File
>>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>>> line 2373, in parse
>>>> p.loadStream(source.getByteStream())
>>>> File
>>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>>> line 937, in loadStream
>>>> return self.loadBuf(stream.read()) # Not ideal
>>>> File
>>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>>> line 943, in loadBuf
>>>> self.feed(buf)
>>>> File
>>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>>> line 969, in feed
>>>> i = self.directiveOrStatement(s, j)
>>>> File
>>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>>> line 987, in directiveOrStatement
>>>> return self.checkDot(argstr, j)
>>>> File
>>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>>> line 1558, in checkDot
>>>> argstr, j, "expected '.' or '}' or ']' at end of statement")
>>>> rdflib.plugins.parsers.notation3.BadSyntax: at line 3211 of <>:
>>>> Bad syntax (expected '.' or '}' or ']' at end of statement) at ^ in:
>>>> "...622290083051> ;
>>>> cc:license <http://creativecommons.org^/licenses/by/3.0/us/> ;
>>>> nyt:mapping_strategy
>>>> ..."
>>>>>>> g = Graph()
>>>>>>> g.parse('locations-rapper.ttl', format='n3')
>>>> Traceback (most recent call last):
>>>> File "<stdin>", line 1, in <module>
>>>> File
>>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/graph .py",
>>>> line 918, in parse
>>>> parser.parse(source, self, **args)
>>>> File
>>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>>> line 2393, in parse
>>>> TurtleParser.parse(self,source,conj_graph,encoding)
>>>> File
>>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>>> line 2373, in parse
>>>> p.loadStream(source.getByteStream())
>>>> File
>>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>>> line 937, in loadStream
>>>> return self.loadBuf(stream.read()) # Not ideal
>>>> File
>>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>>> line 943, in loadBuf
>>>> self.feed(buf)
>>>> File
>>>> "/usr/local/lib/python2.7/dist-packages/rdflib-3.2.3-py2.7.egg/rdflib/plugi ns/parsers/notation3.py",
>>>> line 969, in feed
>>>> i = self.directiveOrStatement(s, j)
>>>> File