-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdecision_sums_sorting.py
More file actions
93 lines (80 loc) · 3.64 KB
/
decision_sums_sorting.py
File metadata and controls
93 lines (80 loc) · 3.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# -*- coding: utf-8 -*-
"""
Created on Wed Apr 19 14:39:56 2017
@author: Torsten Gross @ Humboldt University Berlin / Charite Universitaetsmedizin Berlin
A sum where each of the N summands can be independently chosen from two choices
yields 2^N possible summation outcomes. This is an O(K^2)-algorithm that finds
the K smallest/largest of these sums by evading the enumeration of all sums.
As described in:
"Sorting sums over binary decision summands"
Torsten Gross, Nils Bluethgen, 2017
See comments at end of file for an example.
"""
import numpy as np
def sort_choice_sum_gen(scores):
N=np.shape(scores)[0]
score_sort=np.argsort(scores,1)
delta=np.abs(scores[:,0]-scores[:,1])
sorted_delta_inds=np.argsort(delta)
sorted_delta=delta[sorted_delta_inds]
sorted_delta_inds=np.argsort(sorted_delta_inds)
delta_diff=sorted_delta[1:]-sorted_delta[:-1]
pendings=[N*(False,)]
pending_sums=[np.sum(scores.min(1))]
while True:
#extract next choice
next_choice=pendings.pop()
next_sum=pending_sums.pop()
yield next_sum,tuple(score_sort[i,int(next_choice[j])] for i,j in enumerate(sorted_delta_inds))
if np.all(next_choice):
break #It would be enough to do half
#generate putative choices
putatives=[]
putative_sums=[]
if not next_choice[0]:
new_putative=(True,)+next_choice[1:]
if new_putative not in pendings: #sort out duplicates
putatives.append((True,)+next_choice[1:])
putative_sums.append(next_sum+sorted_delta[0])
for ind in range(N-1):
if (next_choice[ind],next_choice[ind+1])==(True,False):
new_putative=next_choice[:ind]+ (False,True) + next_choice[ind+2:]
if new_putative not in pendings: #sort out duplicates
putatives.append(new_putative)
putative_sums.append(next_sum+delta_diff[ind])
if not putatives:
continue #no new putatives, so just stay with the current pendings
#merge putative choices into pending choices
putative_sorted_inds=list(np.argsort(putative_sums)) #from small to large
pending_ind,shift,putatives_is_empty=0,0,False
putative_sorted_ind=putative_sorted_inds.pop()
original_len_pendings=len(pending_sums)
while pending_ind < original_len_pendings:
while putative_sums[putative_sorted_ind] >= pending_sums[pending_ind+shift]:
pendings.insert(pending_ind+shift,putatives[putative_sorted_ind])
pending_sums.insert(pending_ind+shift,putative_sums[putative_sorted_ind])
shift+=1
if putative_sorted_inds:
putative_sorted_ind=putative_sorted_inds.pop()
else:
putatives_is_empty=True
break #stop merging because there are no more putative choices
if putatives_is_empty:
break #stop merging because there are no more putative choices
pending_ind+=1
#append_the_rest
if not putatives_is_empty:
for putative_sorted_ind in reversed(putative_sorted_inds+[putative_sorted_ind]): #from large to small
pendings.append(putatives[putative_sorted_ind])
pending_sums.append(putative_sums[putative_sorted_ind])
##############
#example run:#
##############
#
#N=100
#K=10000
#scores=np.random.rand(N,2)
#sort_gen=sort_choice_sum_gen(scores)
#for i in range(K):
# curr_score,curr_choice=sort_gen.next()
#