-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmodel_preprocess.py
More file actions
95 lines (68 loc) · 2.51 KB
/
model_preprocess.py
File metadata and controls
95 lines (68 loc) · 2.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from secrets import CHAMPIONS
import os
import pickle as pkl
import json
import pandas as pd
import numpy as np
maindir = "/mnt/c/Users/sydmb/Documents/stackbattles/participant_jsons"
#create a column for each champion for each team
blue_cols = {} #this will be teamid=100
red_cols = {} #this will be teamid=200
wins = [] #will by the dependent variable will be wins for teamiD 100
for champion in CHAMPIONS:
blue_cols[champion + "_100"]= []
red_cols[champion + "_200"] = []
#track total # of champions successfully appended
red_count=0
blue_count=0
err_count = 0
for subdir, dirs, files in os.walk(maindir):
for f in files:
with open(os.path.join(subdir, f),) as json_file:
p_json = json.load(json_file, encoding = 'latin-1')
#determine which champions were in the game
#append 1s for champions that were in the game and zeros for champions that weren't
c_ingame = []
for participant in p_json:
champion = "{champion}_{teamId}" .format(champion = participant["championName"], teamId = str(participant["teamId"]))
c_ingame.append(champion)
if participant["teamId"] == 100:
win = int(participant["win"])
r = 0
b = 0
for key in blue_cols.keys():
if key in c_ingame:
blue_cols[key].append(1)
b+=1
else:
blue_cols[key].append(0)
for key in red_cols.keys():
if key in c_ingame:
red_cols[key].append(1)
r+=1
else:
red_cols[key].append(0)
#update dependent variable
wins.append(win)
#update total counts
red_count+=r
blue_count+=b
err_count+= 10-r-b
print("SUCCESS")
print("red champions appended: {red_count}" .format(red_count=red_count))
print("blue champions appended: {blue_count}" .format(blue_count = blue_count))
print("encountered {errs} errors" .format(errs=err_count))
print("{wins} games seen" .format(wins=len(wins)))
red = pd.DataFrame.from_dict(red_cols)
blue = pd.DataFrame.from_dict(blue_cols)
data = pd.concat([red, blue], axis =1)
data["wins"] = wins
print(data.describe())
print(red.describe())
print(blue.describe())
with open("./model_data.pkl", "wb") as pkl_file:
pkl.dump(data, pkl_file)
#iterate through each data pull directory and read in the data
#format desired data as a df
#seperate dependent and independent variables
#train test split for the data