from pattern.nl import parse, tokenize
print parse("Zeer vreemd!")
def my_tokenizer(s): """ Returns a list of sentences, where each sentence is a list of words (tokens). """ s = s.replace(",", " ,") s = s.replace(".", " .") s = s.replace("!", " !") s = s.split(".") s = [sentence.strip().split(" ") for sentence in s] return s
print parse(my_tokenizer("Zeer vreemd!"), tokenize=False)