From 211d9554086e3d2c788569d933956cf2f6e8cade Mon Sep 17 00:00:00 2001 From: Joab Date: Fri, 31 May 2019 11:54:22 -0300 Subject: [PATCH] Tokenize: fix NoneType error and filter whitespace before regex split --- memorynetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/memorynetwork.py b/memorynetwork.py index c7f7e1b..f8b0809 100644 --- a/memorynetwork.py +++ b/memorynetwork.py @@ -28,7 +28,7 @@ def tokenize(sent): >>> tokenize('Bob dropped the apple. Where is the apple?') ['Bob', 'dropped', 'the', 'apple', '.', 'Where', 'is', 'the', 'apple', '?'] ''' - return [x.strip() for x in re.split('(\W+)?', sent) if x.strip()] + return [x.strip() for x in re.split('(\w+)?', sent) if x and not x.isspace()] def parse_stories(lines, only_supporting=False):