-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcd2.py
More file actions
63 lines (60 loc) · 2.04 KB
/
cd2.py
File metadata and controls
63 lines (60 loc) · 2.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import csv
import random
import math
def loadDataset(filename,trainingSet=[]):
with open(filename, 'r') as csvfile:
lines = csv.reader(csvfile)
dataset = list(lines)
for x in range(len(dataset)):
trainingSet.append(dataset[x])
def cosineDistance(trainingSet=[], j=0, k=0):
num=0
denr=0
denl=0
for x in range(len(trainingSet)):
num+=int(trainingSet[x][j])*int(trainingSet[x][k])
denl+=pow(int(trainingSet[x][j]),2)
denr+=pow(int(trainingSet[x][k]),2)
den=denl*denr
den=pow(den,0.5)
cd=round(1-(num/den),2)
#print('Cosine Distance of '+repr(k)+' is: '+repr(cd))
return cd
def main():
trainingSet=[]
loadDataset('pdf_testSet.csv', trainingSet)
#print('Train set: ' + repr(len(trainingSet)))
delta = 0.6 # Threshold
mnum=0
rnum=0
users=5
movies=6
for i in range(users):
for j in range(movies):
if int(trainingSet[i][j])==0:
continue
nb=0 # Number of neighbours
nbsum=0 # Sum of neighbour scores
for k in range(movies):
if j == k:
continue
distance=cosineDistance(trainingSet,j,k)
#print('Distance= '+repr(distance))
if round(distance,2)<=delta :
nb+=1
nbsum+=int(trainingSet[i][k])
if nb>=1:
prediction=nbsum/nb
else:
prediction=int(trainingSet[i][k])
#print('nb= '+repr(nb))
#print('nbsum= '+repr(nbsum))
#print("Cosine Predicted value of ["+repr(i)+"]["+repr(j)+"] = "+repr(round(prediction,0)))
mnum=mnum+abs(prediction-int(trainingSet[i][j]))
rnum=rnum+pow((prediction-int(trainingSet[i][j])),2)
mden=users*movies
mae=mnum/mden
rmse=pow(rnum/mden,0.5)
print('MAE ='+repr(mae))
print('RMSE ='+repr(rmse))
main()