-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcalculate_feature_errors.cpp
More file actions
138 lines (133 loc) · 4.5 KB
/
calculate_feature_errors.cpp
File metadata and controls
138 lines (133 loc) · 4.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#include <iostream>
#include <fstream>
#include <stdlib.h>
using namespace std;
#define TRAINING_FILE "all_um.dta"
#define USER_FEATURE_FILE "user_features.dta"
#define MOVIE_FEATURE_FILE "movie_features.dta"
#define OUTPUT_FILE "feature_errors.dta"
#define NUM_USERS 458293
#define NUM_MOVIES 17770
#define NUM_TRAINING 102416306
#define NUM_TESTING 2749898
#define NUM_COMPONENTS 40
double ** load_feature_matrices() {
int i = 0;
int j = 0;
string line;
char * p, * line_copy;
double ** pointers = new double*[2];
double * user_features = new double[NUM_USERS * NUM_COMPONENTS];
double * movie_features = new double[NUM_MOVIES * NUM_COMPONENTS];
pointers[0] = user_features;
pointers[1] = movie_features;
// cout << pointers[0] << pointers[1] << endl;
ifstream user_feature_file(USER_FEATURE_FILE);
if (user_feature_file.is_open()) {
while (user_feature_file.good()) {
getline(user_feature_file, line);
line_copy = new char[line.size() + 1];
strcpy(line_copy, line.c_str());
p = strtok(line_copy, " ");
while (p != NULL) {
if (j < NUM_COMPONENTS) {
user_features[i * NUM_COMPONENTS + j] = atof(p);
}
j++;
p = strtok(NULL, " ");
}
j = 0;
delete[] line_copy;
i++;
}
user_feature_file.close();
}
else cout << "Unable to open file " << USER_FEATURE_FILE << endl;
i = 0;
j = 0;
ifstream movie_feature_file(MOVIE_FEATURE_FILE);
if (movie_feature_file.is_open()) {
while (movie_feature_file.good()) {
getline(movie_feature_file, line);
line_copy = new char[line.size() + 1];
strcpy(line_copy, line.c_str());
p = strtok(line_copy, " ");
while (p != NULL) {
if (j < NUM_COMPONENTS) {
movie_features[i * NUM_COMPONENTS + j] = atof(p);
}
j++;
p = strtok(NULL, " ");
}
j = 0;
delete[] line_copy;
i++;
}
movie_feature_file.close();
}
else cout << "Unable to open file " << MOVIE_FEATURE_FILE << endl;
return pointers;
}
void calculate_errors(double ** feature_pointers) {
int i = 0;
int j = 0;
int k, user, movie;
double total_error = 0.0;
double error, predicted_rating;
string line;
char * p, * line_copy;
double * user_features = feature_pointers[0];
double * movie_features = feature_pointers[1];
ifstream training_file(TRAINING_FILE);
ofstream output_file(OUTPUT_FILE);
if (training_file.is_open() && output_file.is_open()) {
while (training_file.good()) {
getline(training_file, line);
line_copy = new char[line.size() + 1];
strcpy(line_copy, line.c_str());
p = strtok(line_copy, " ");
while (p != NULL) {
if (j == 0) {
user = atoi(p);
}
else if (j == 1) {
movie = atoi(p);
}
else if (j == 3) {
predicted_rating = 0.0;
for (k = 0; k < NUM_COMPONENTS; k++) {
predicted_rating += user_features[user * NUM_COMPONENTS + k] * movie_features[movie * NUM_COMPONENTS + k];
}
error = (atof(p) - predicted_rating);
if (error < 0) {
error = -error;
}
output_file << error << '\n' << endl;
total_error += error;
}
j++;
p = strtok(NULL, " ");
}
j = 0;
delete[] line_copy;
if (i % 1000000 == 0) {
cout << i << " rating errors calculated." << endl;
}
i++;
}
cout << "Total error: " << total_error << endl;
training_file.close();
output_file.close();
}
else cout << "Unable to open fils." << endl;
}
int main() {
double ** feature_pointers = new double*[2];
cout << "Loading feature matrices..." << endl;
feature_pointers = load_feature_matrices();
cout << "Feature matrices loaded." << endl;
cout << "Writing errors between predicted and actual values..." << endl;
calculate_errors(feature_pointers);
cout << "Done." << endl;
delete[] feature_pointers;
}