-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathST_ECLAT.py
More file actions
107 lines (88 loc) · 2.71 KB
/
ST_ECLAT.py
File metadata and controls
107 lines (88 loc) · 2.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/env python
# coding: utf-8
# In[8]:
import sys
FreqItems = dict()
per=float(sys.argv[1])
per_freq=float(sys.argv[2])
final_data=dict()
nbh_dict=dict()
def eclat(prefix, items,nbh_prefix):
while items:
i,itids = items.pop()
# print(itids)
# print("++++++")
# print(items)
isupp = len(itids)
# FreqItems[frozenset(prefix + [i])] = isupp
yield (prefix + [i],itids[1])
nbh_sent=nbh_prefix & nbh_dict[i]
# print(prefix + [i])
suffix = []
for j, ojtids in items:
if j in nbh_sent:
jtids = itids[0] & ojtids[0]
pf_alpha=verify(jtids)
if pf_alpha>= per_freq:
suffix.append((j,[jtids,pf_alpha]))
# print(suffix)
for pat in eclat(prefix+[i], sorted(suffix, key=lambda item: item[1][1], reverse=False),nbh_sent):
yield pat
def Read_Data(filename):
vertical_data = {}
trans = 0
f = open(filename, 'r')
for row in f:
k=row.split()
for item in k[1:]:
if item not in vertical_data:
vertical_data[item] = set()
vertical_data[item].add(int(k[0]))
f.close()
vertical_dataPF={}
for k in vertical_data:
perf_k=verify(vertical_data[k])
if perf_k>=per_freq:
vertical_dataPF[k]=[vertical_data[k],perf_k]
# print(vertical_dataPF['127'])
# final_data={ k: v for k, (v,verify(v)) in data.items() if verify(v)==0}
return vertical_dataPF
# In[13]:
def verify(tids):
tids = list(tids)
tids.sort()
if(len(tids)>0):
cur = tids[0]
pf = 0
for j in range(1, len(tids)):
if (tids[j] - cur <= per):
pf += 1
cur = tids[j]
return pf
# In[11]
if __name__ == "__main__":
dict_id = 0
data = Read_Data(sys.argv[3]) #change the delimiter based on your input file
with open(sys.argv[4],'r') as nbh:
for line in nbh:
#print(line)
li=line.split()
if li[0] in data:
ds=set()
for i in range(1,len(li)):
if li[i] in data:
ds.add(li[i])
nbh_dict[li[0]]=ds
nbh.close()
init_nbh=set()
for j in data:
init_nbh.add(j)
if j not in nbh_dict:
nbh_dict[j]=set()
print('finished reading data..... \n Starting mining .....')
# print(verify(data['234'][0]))
k=eclat([], sorted(data.items(), key=lambda item: item[1][1], reverse=False),init_nbh )
# print('found %d Frequent items' % len(FreqItems))
with open(sys.argv[5], 'w') as f:
for x in k:
f.write('%s \n'%str(x))