From 4832334dee3afddcea2c2882d932bb657f85d91b Mon Sep 17 00:00:00 2001
From: prrao87 <prrao87@gmail.com>
Date: Wed, 5 Aug 2020 17:23:16 -0700
Subject: [PATCH 1/3] Fix bug: Lowercasing w/ lemmas in dependency bigrams
 works now

---
 corpus_toolkit_dev/corpus_tools.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/corpus_toolkit_dev/corpus_tools.py b/corpus_toolkit_dev/corpus_tools.py
index bf75a42..37d5e87 100644
--- a/corpus_toolkit_dev/corpus_tools.py
+++ b/corpus_toolkit_dev/corpus_tools.py
@@ -30,7 +30,7 @@
 
 dirsep = os.path.sep
 default_punct_list = [",",".","?","'",'"',"!",":",";","(",")","[","]","''","``","--"] #we can add more items to this if needed
-default_space_list = ["\n","\t","    ","   ","  "]
+default_space_list = ["\n","\t","	","   ","  "]
 
 def doc_check(f_list,dirname,ending):
 	if len(f_list) == 0:
@@ -191,7 +191,7 @@ def write_corpus(new_dirname,corpus, dirname = False, ending = "txt"):
 		outf.flush()
 		outf.close()
 
-ignore_list = [""," ", "  ", "   ", "    "] #list of items we want to ignore in our frequency calculations
+ignore_list = [""," ", "  ", "   ", "	"] #list of items we want to ignore in our frequency calculations
 
 def frequency(corpus_list, ignore = ignore_list, calc = 'freq', normed = False): #options for calc are 'freq' or 'range'
 	freq_dict = {} #empty dictionary
@@ -430,11 +430,12 @@ def dicter(item,d): #d is a dictinoary
 								dependent = token.text.lower() #then use the raw form of the word
 								headt = token.head.text.lower()
 							else:
-								dependent = token.lemma_
-								headt = token.head.lemma_
-						else:
-							dependent = token.lemma_
-							headt = token.head.lemma_
+								if lower:
+									dependent = token.lemma_.lower()
+									headt = token.head.lemma_.lower()
+								else:  # If lower is false, don't lower
+									dependent = token.lemma_
+									headt = token.head.lemma_
 					
 					if lemma == False: #if lemma is false, use the token form
 						if lower == True: #if lower is true, lower it

From 2e62b0b31ef3465306043599178ed106e63003ef Mon Sep 17 00:00:00 2001
From: prrao87 <prrao87@gmail.com>
Date: Wed, 5 Aug 2020 17:26:54 -0700
Subject: [PATCH 2/3] Fix space_list and ignore_list to be consistent with
 original version

---
 corpus_toolkit_dev/corpus_tools.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/corpus_toolkit_dev/corpus_tools.py b/corpus_toolkit_dev/corpus_tools.py
index 37d5e87..107ff06 100644
--- a/corpus_toolkit_dev/corpus_tools.py
+++ b/corpus_toolkit_dev/corpus_tools.py
@@ -30,7 +30,7 @@
 
 dirsep = os.path.sep
 default_punct_list = [",",".","?","'",'"',"!",":",";","(",")","[","]","''","``","--"] #we can add more items to this if needed
-default_space_list = ["\n","\t","	","   ","  "]
+default_space_list = ["\n","\t","    ","   ","  "]
 
 def doc_check(f_list,dirname,ending):
 	if len(f_list) == 0:
@@ -191,7 +191,7 @@ def write_corpus(new_dirname,corpus, dirname = False, ending = "txt"):
 		outf.flush()
 		outf.close()
 
-ignore_list = [""," ", "  ", "   ", "	"] #list of items we want to ignore in our frequency calculations
+ignore_list = [""," ", "  ", "   ", "    "]  #list of items we want to ignore in our frequency calculations
 
 def frequency(corpus_list, ignore = ignore_list, calc = 'freq', normed = False): #options for calc are 'freq' or 'range'
 	freq_dict = {} #empty dictionary

From d6b64d283e5b977ae5d89c71b9db74dce39dedc5 Mon Sep 17 00:00:00 2001
From: prrao87 <prrao87@gmail.com>
Date: Wed, 5 Aug 2020 17:45:54 -0700
Subject: [PATCH 3/3] Forgot to include else block for -PRON- case (fixed now)

---
 corpus_toolkit_dev/corpus_tools.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/corpus_toolkit_dev/corpus_tools.py b/corpus_toolkit_dev/corpus_tools.py
index 107ff06..6ccccd2 100644
--- a/corpus_toolkit_dev/corpus_tools.py
+++ b/corpus_toolkit_dev/corpus_tools.py
@@ -430,12 +430,15 @@ def dicter(item,d): #d is a dictinoary
 								dependent = token.text.lower() #then use the raw form of the word
 								headt = token.head.text.lower()
 							else:
-								if lower:
+								if lower == True:
 									dependent = token.lemma_.lower()
 									headt = token.head.lemma_.lower()
 								else:  # If lower is false, don't lower
 									dependent = token.lemma_
 									headt = token.head.lemma_
+						else:  #if we want Spacy's pronoun lemma
+							dependent = token.lemma_
+							headt = token.head.lemma_
 					
 					if lemma == False: #if lemma is false, use the token form
 						if lower == True: #if lower is true, lower it