-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathenv.py
More file actions
86 lines (74 loc) · 3.77 KB
/
env.py
File metadata and controls
86 lines (74 loc) · 3.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import numpy as np
import pandas as pd
# Class defined to create our test environment
# Purpose: To define the structure of the environment and actions possible
# To get / set the status of the environment based on the Agent's movement
class ENV:
def __init__(self):
self.state_cnt = 16 #used only for printing purpose
self.state_row_cnt = 4
self.action_cnt = 4
self.actions= ['UP','DOWN','LEFT','RIGHT']#four possible actions in this environment
self.states = np.full((4,4),'-')#individual states represented as - in the environment
self.states[0][0]='A'#Agent
self.states[1][2]='O'#holes
self.states[2][3]='O'
self.states[3][0]='O'
self.states[3][3]='G'#Goal or destination to reach
self.A_in_row = 0 # Agent's init position at [0,0]
self.A_in_col = 0
# used while creating a Q class to know what all possible actions are offered by this environment
def getActionItems(self):
return (self.action_cnt, self.actions)
#used mainly to test the agent that was learnt already
def getAgentPosition(self):
return (self.A_in_row, self.A_in_col)
#just for debugging purpose
def display_env(self):
print("Number of states : {}".format(self.state_cnt))
print("Number of actions : {}".format(self.action_cnt))
print("Action list : {}".format(self.actions))
print("Agent's current position :[{},{}]".format(self.A_in_row,self.A_in_col))
print("Environment dump : \n{}\n".format(pd.DataFrame(self.states).to_string(index=False,header=False)))
#to check if the agent has reached the destination or fell into any of the three holes in the environment
def isDone(self,stateR,stateC):
done = False
if(((stateR == 1) and (stateC == 2)) or
((stateR == 2) and (stateC == 3)) or
((stateR == 3) and (stateC == 0)) or
((stateR == 3) and (stateC == 3))):
done = True
return done
#used for display purpose
def render(self):
return ("{}\n".format(pd.DataFrame(self.states).to_string(index=False,header=False)),(self.A_in_row,self.A_in_col))
#function: step taken by the agent. One of the four actions would be input to this function.
#Function would update the environment's state based on the input action and returns the next state,the reward received by the agent and
#status info if the Agent has reached the destination/ fallen in the hole ('done' variable = true)
def step(self,action):
done = False
R = 0
prev_A_in_row = self.A_in_row
prev_A_in_col = self.A_in_col
if (action == 'UP'):
self.A_in_row = max(self.A_in_row -1,0)
if (action == 'DOWN'):
self.A_in_row = min(self.A_in_row +1,self.state_row_cnt-1)
if (action == 'LEFT'):
self.A_in_col = max(self.A_in_col -1,0)
if (action == 'RIGHT'):
self.A_in_col = min(self.A_in_col +1,self.action_cnt-1)
if (self.isDone( self.A_in_row, self.A_in_col) == False):
self.states[prev_A_in_row][prev_A_in_col]='-'
self.states[self.A_in_row][self.A_in_col]='A'
else:
done = True
if ((self.A_in_row == 3) and (self.A_in_col == 3)):#Target reached. Add reward = 1
self.states[prev_A_in_row][prev_A_in_col]='-'
self.states[self.A_in_row][self.A_in_col]='A'
print('Target reached')
R = 1
else:
print('fallen in the hole')
next_state = (self.A_in_row,self.A_in_col)
return(next_state,R,done)