diff --git a/corpus_toolkit_dev/corpus_tools.py b/corpus_toolkit_dev/corpus_tools.py index bf75a42..6ccccd2 100644 --- a/corpus_toolkit_dev/corpus_tools.py +++ b/corpus_toolkit_dev/corpus_tools.py @@ -191,7 +191,7 @@ def write_corpus(new_dirname,corpus, dirname = False, ending = "txt"): outf.flush() outf.close() -ignore_list = [""," ", " ", " ", " "] #list of items we want to ignore in our frequency calculations +ignore_list = [""," ", " ", " ", " "] #list of items we want to ignore in our frequency calculations def frequency(corpus_list, ignore = ignore_list, calc = 'freq', normed = False): #options for calc are 'freq' or 'range' freq_dict = {} #empty dictionary @@ -430,9 +430,13 @@ def dicter(item,d): #d is a dictinoary dependent = token.text.lower() #then use the raw form of the word headt = token.head.text.lower() else: - dependent = token.lemma_ - headt = token.head.lemma_ - else: + if lower == True: + dependent = token.lemma_.lower() + headt = token.head.lemma_.lower() + else: # If lower is false, don't lower + dependent = token.lemma_ + headt = token.head.lemma_ + else: #if we want Spacy's pronoun lemma dependent = token.lemma_ headt = token.head.lemma_