Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions corpus_toolkit_dev/corpus_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def write_corpus(new_dirname,corpus, dirname = False, ending = "txt"):
outf.flush()
outf.close()

ignore_list = [""," ", " ", " ", " "] #list of items we want to ignore in our frequency calculations
ignore_list = [""," ", " ", " ", " "] #list of items we want to ignore in our frequency calculations

def frequency(corpus_list, ignore = ignore_list, calc = 'freq', normed = False): #options for calc are 'freq' or 'range'
freq_dict = {} #empty dictionary
Expand Down Expand Up @@ -430,9 +430,13 @@ def dicter(item,d): #d is a dictinoary
dependent = token.text.lower() #then use the raw form of the word
headt = token.head.text.lower()
else:
dependent = token.lemma_
headt = token.head.lemma_
else:
if lower == True:
dependent = token.lemma_.lower()
headt = token.head.lemma_.lower()
else: # If lower is false, don't lower
dependent = token.lemma_
headt = token.head.lemma_
else: #if we want Spacy's pronoun lemma
dependent = token.lemma_
headt = token.head.lemma_

Expand Down