Skip to content

Commit c61ec17

Browse files
author
Norbert Kozlowski
committed
Boolean Multiplexer environment
1 parent 719ec07 commit c61ec17

File tree

13 files changed

+187
-60
lines changed

13 files changed

+187
-60
lines changed

README.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
1-
# openai-maze-envs
1+
# Parrot Prediction OpenAI environments
2+
3+
## Maze
24

35
Initializing
46

57
maze = gym.make('MazeF1-v0')
68

79
Getting all possible transitions
810

9-
transitions = maze.env.get_all_possible_transitions()
11+
transitions = maze.env.get_all_possible_transitions()
12+
13+
## Boolean Multiplexer
14+
Read blog [post](https://medium.com/parrot-prediction/boolean-multiplexer-in-practice-94e3236821b5) describing the usage.

ex.py

Lines changed: 0 additions & 32 deletions
This file was deleted.

examples/maze.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import logging
2+
from random import choice
3+
4+
import gym
5+
6+
# noinspection PyUnresolvedReferences
7+
import gym_maze
8+
9+
logging.basicConfig(level=logging.DEBUG)
10+
11+
if __name__ == '__main__':
12+
maze = gym.make('MazeF1-v0')
13+
14+
possible_actions = list(range(8))
15+
transitions = maze.env.get_all_possible_transitions()
16+
17+
for i_episode in range(1):
18+
observation = maze.reset()
19+
20+
for t in range(100):
21+
logging.info("Time: [{}], observation: [{}]".format(t, observation))
22+
23+
action = choice(possible_actions)
24+
25+
logging.info("\t\tExecuted action: [{}]".format(action))
26+
observation, reward, done, info = maze.step(action)
27+
28+
if done:
29+
logging.info("Episode finished after {} timesteps.".format(t + 1))
30+
logging.info("Last reward: {}".format(reward))
31+
break
32+
33+
logging.info("Finished")

gym_maze/Maze.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,3 @@
1-
import logging
2-
3-
logger = logging.getLogger(__name__)
4-
51
PATH_MAPPING = 0
62
WALL_MAPPING = 1
73
REWARD_MAPPING = 9

gym_maze/__init__.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,7 @@
1-
import logging
2-
31
from gym.envs.registration import register
42

5-
from gym_maze.Maze import Maze
6-
from gym_maze.Maze import PATH_MAPPING, WALL_MAPPING, REWARD_MAPPING
7-
8-
logger = logging.getLogger(__name__)
3+
# noinspection PyUnresolvedReferences
4+
from gym_maze.Maze import Maze, PATH_MAPPING, WALL_MAPPING, REWARD_MAPPING
95

106
ACTION_LOOKUP = {
117
0: 'N',

gym_maze/envs/AbstractMaze.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,13 @@
1-
import gym
2-
from gym import error, spaces, utils
3-
from gym.utils import seeding
4-
5-
from gym_maze import Maze, WALL_MAPPING, ACTION_LOOKUP
6-
from gym_maze.utils import get_all_possible_transitions
7-
8-
import numpy as np
91
import logging
102
import random
113
import sys
124

5+
import gym
6+
import numpy as np
7+
from gym import spaces, utils
138

14-
logger = logging.getLogger(__name__)
9+
from gym_maze import Maze, WALL_MAPPING, ACTION_LOOKUP
10+
from gym_maze.utils import get_all_possible_transitions
1511

1612
ANIMAT_MARKER = 5
1713

@@ -38,7 +34,7 @@ def _step(self, action):
3834
return observation, reward, episode_over, {}
3935

4036
def _reset(self):
41-
logger.debug("Resetting the environment")
37+
logging.debug("Resetting the environment")
4238
self._insert_animat()
4339
return self._observe()
4440

gym_maze/tests/test_utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import gym
2+
# noinspection PyUnresolvedReferences
23
import gym_maze
34

45

gym_multiplexer/__init__.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from .boolean_multiplexer import BooleanMultiplexer
2+
3+
from gym.envs.registration import register
4+
5+
name = "boolean-multiplexer"
6+
max_episode_steps = 1
7+
8+
register(
9+
id='{}-3bit-v0'.format(name),
10+
entry_point='gym_multiplexer:BooleanMultiplexer',
11+
max_episode_steps=max_episode_steps,
12+
kwargs={'control_bits': 1}
13+
)
14+
15+
register(
16+
id='{}-6bit-v0'.format(name),
17+
entry_point='gym_multiplexer:BooleanMultiplexer',
18+
max_episode_steps=max_episode_steps,
19+
kwargs={'control_bits': 2}
20+
)
21+
22+
register(
23+
id='{}-11bit-v0'.format(name),
24+
entry_point='gym_multiplexer:BooleanMultiplexer',
25+
max_episode_steps=max_episode_steps,
26+
kwargs={'control_bits': 3}
27+
)
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import logging
2+
import random
3+
4+
import gym
5+
from bitstring import BitArray
6+
from gym.spaces import Discrete
7+
8+
9+
class BooleanMultiplexer(gym.Env):
10+
11+
def __init__(self, control_bits=3) -> None:
12+
self.metadata = {'render.modes': ['human']}
13+
self.control_bits = control_bits
14+
self.observation_space = Discrete(len(self._observation_string_length))
15+
self.action_space = Discrete(2)
16+
17+
def _reset(self):
18+
logging.debug("Resetting the environment")
19+
bits = BitArray([random.randint(0, 1) for _ in
20+
self._observation_string_length])
21+
22+
self._ctrl_bits = bits[:self.control_bits]
23+
self._data_bits = bits[self.control_bits:]
24+
25+
def _step(self, action):
26+
state = self._observation()
27+
reward = 0
28+
29+
if action == self._answer:
30+
reward = 1
31+
32+
return state, reward, None, None
33+
34+
def _render(self, mode='human', close=False):
35+
if close:
36+
return
37+
38+
if mode == 'human':
39+
return self.control_bits + self._data_bits
40+
else:
41+
super(BooleanMultiplexer, self).render(mode=mode)
42+
43+
def _observation(self):
44+
return self.control_bits + self._data_bits
45+
46+
@property
47+
def _observation_string_length(self):
48+
return range(0, self.control_bits + pow(2, self.control_bits))
49+
50+
@property
51+
def _answer(self):
52+
return int(self._data_bits[self._ctrl_bits.uint])

gym_multiplexer/tests/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)