diff --git a/memorynetwork.py b/memorynetwork.py index c7f7e1b..f8b0809 100644 --- a/memorynetwork.py +++ b/memorynetwork.py @@ -28,7 +28,7 @@ def tokenize(sent): >>> tokenize('Bob dropped the apple. Where is the apple?') ['Bob', 'dropped', 'the', 'apple', '.', 'Where', 'is', 'the', 'apple', '?'] ''' - return [x.strip() for x in re.split('(\W+)?', sent) if x.strip()] + return [x.strip() for x in re.split('(\w+)?', sent) if x and not x.isspace()] def parse_stories(lines, only_supporting=False):