-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstring_grouping.py
More file actions
74 lines (52 loc) · 1.92 KB
/
string_grouping.py
File metadata and controls
74 lines (52 loc) · 1.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# -*- coding:utf-8 -*-
# Python3
def string_similarity(str1, str2):
length = min(len(str1), len(str2))
for ii in range(length):
if str1[ii] != str2[ii]:
return ii
return length
def string_div2(strings, min_size): # return: gsmall, glarge
assert 2 * min_size <= len(strings)
strings.sort()
sims = [0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF] * len(strings)
for ii in range(1, len(strings)):
sims[ii] = string_similarity(strings[ii-1], strings[ii])
min_sim = min(sims)
divl = 0
for div in range(1, len(strings)):
if sims[div] == min_sim:
divl = div
if div >= min_size and (len(strings)-div) >= min_size:
gsmall, glarge = strings[:divl], strings[divl:]
if len(gsmall) > len(glarge):
gsmall, glarge = glarge, gsmall
return gsmall, glarge
gsmall, glarge = strings[:divl], strings[divl:]
if len(gsmall) > len(glarge):
gsmall, glarge = glarge, gsmall
gt, glarge = string_div2(glarge, min_size-len(gsmall))
gsmall += gt
if len(gsmall) > len(glarge):
gsmall, glarge = glarge, gsmall
return gsmall, glarge
def string_group(strings, min_size, max_size):
assert 2*min_size <= max_size
if len(strings) > max_size:
large_groups = [strings]
small_groups = []
else:
large_groups = []
small_groups = [strings]
while bool(large_groups):
group0 = large_groups.pop()
gsmall, glarge = string_div2(group0, min_size)
if len(gsmall) > max_size:
large_groups.append(gsmall)
else:
small_groups.append(gsmall)
if len(glarge) > max_size:
large_groups.append(glarge)
else:
small_groups.append(glarge)
return small_groups