forked from rspeer/solvertools
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMakefile
More file actions
83 lines (60 loc) · 2.9 KB
/
Makefile
File metadata and controls
83 lines (60 loc) · 2.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
PYTHON=python3
SEARCH_DIR=data/search
WORDLIST_DIR=data/wordlists
CORPUS_DIR=data/corpora
all: wordlists search
clean:
rm $(WORDLIST_DIR)/*.txt
rm -r $(SEARCH_DIR)
WORDLISTS = $(WORDLIST_DIR)/enable.txt $(WORDLIST_DIR)/twl06.txt \
$(WORDLIST_DIR)/google-books.freq.txt \
$(WORDLIST_DIR)/google-books-1grams.txt \
$(WORDLIST_DIR)/google-books-1grams.freq.txt \
$(WORDLIST_DIR)/google-books.txt \
$(WORDLIST_DIR)/wikipedia-en-titles.txt \
$(WORDLIST_DIR)/wikipedia-en-links.txt \
$(WORDLIST_DIR)/wordnet.txt \
$(WORDLIST_DIR)/wordfreq.txt \
$(WORDLIST_DIR)/csw-apr07.txt \
$(WORDLIST_DIR)/npl-allwords.txt \
$(WORDLIST_DIR)/wordfreq.txt \
$(WORDLIST_DIR)/wordfreq.freq.txt \
search: $(SEARCH_DIR)/_MAIN_1.toc
wordlists: $(WORDLISTS) $(WORDLIST_DIR)/combined.txt $(WORDLIST_DIR)/combined.freq.txt
$(WORDLIST_DIR)/google-books.freq.txt: $(WORDLIST_DIR)/raw/google-books-1grams.txt\
$(WORDLIST_DIR)/raw/google-books-2grams.txt
LC_ALL=C egrep -h "^[A-Z' ]+,[0-9]" $^ | sort -nrk 2 -t "," > $@
$(WORDLIST_DIR)/google-books.txt: $(WORDLIST_DIR)/google-books.freq.txt
LC_ALL=C sort $< > $@
$(WORDLIST_DIR)/google-books-1grams.txt: $(WORDLIST_DIR)/raw/google-books-1grams.txt
LC_ALL=C egrep -h "^[A-Z']+," $^ | sort > $@
$(WORDLIST_DIR)/combined.freq.txt: $(WORDLIST_DIR)/combined.txt
sort -nrk 2 -t "," $< | grep -v ",1$$" > $@
$(WORDLIST_DIR)/wordfreq.txt: $(WORDLIST_DIR)/wordfreq.freq.txt
LC_ALL=C sort $< > $@
$(WORDLIST_DIR)/wordfreq.freq.txt: scripts/build_wordfreq.py
$(PYTHON) scripts/build_wordfreq.py > $@
$(WORDLIST_DIR)/google-books-1grams.freq.txt: $(WORDLIST_DIR)/google-books-1grams.txt
sort -nrk 2 -t "," $< > $@
$(WORDLIST_DIR)/enable.txt: $(WORDLIST_DIR)/raw/enable.txt shell/freq1.sh
tr a-z A-Z < $< | shell/freq1.sh > $@
$(WORDLIST_DIR)/wordfreq.freq.txt: $(WORDLIST_DIR)/raw/wordfreq-1.6-en.txt
cp $< $@
$(WORDLIST_DIR)/wordfreq.txt: $(WORDLIST_DIR)/wordfreq.freq.txt
LC_ALL=C sort $< > $@
$(WORDLIST_DIR)/csw-apr07.txt: $(WORDLIST_DIR)/raw/csw-apr07.txt shell/freq1.sh
shell/freq1.sh < $< > $@
$(WORDLIST_DIR)/twl06.txt: $(WORDLIST_DIR)/raw/twl06.txt shell/freq1.sh
tr a-z A-Z < $< | shell/freq1.sh > $@
$(WORDLIST_DIR)/wikipedia-en-titles.txt: $(WORDLIST_DIR)/raw/wikipedia-en-titles.txt
egrep -hv " .* .* " $< | shell/freq1.sh > $@
$(WORDLIST_DIR)/wikipedia-en-links.txt: $(WORDLIST_DIR)/raw/wp-links-sorted.txt.bz2
bunzip2 -c $< | $(PYTHON) scripts/transform_wp_freq.py > $@
$(WORDLIST_DIR)/wordnet.txt: $(WORDLIST_DIR)/raw/wordnet.txt
LC_ALL=C egrep -h "^[A-Za-z0-9'/ -]+$$" $< | tr a-z A-Z | shell/freq1.sh > $@
$(WORDLIST_DIR)/npl-allwords.txt: $(WORDLIST_DIR)/raw/npl_allwords2.txt
LC_ALL=C egrep -h "^[A-Za-z0-9' -]+$$" $< | tr a-z A-Z | shell/freq1.sh > $@
$(WORDLIST_DIR)/combined.txt: $(WORDLISTS) scripts/build_combined.py
$(PYTHON) scripts/build_combined.py
$(SEARCH_DIR)/_MAIN_1.toc: scripts/build_search_index.py $(CORPUS_DIR)/crossword_clues.txt
$(PYTHON) scripts/build_search_index.py