Real Multiplexer

Norbert Kozlowski · Norbert Kozlowski · commit 5a3d6f49d7c5 · 2018-06-26T15:27:39.000+02:00
diff --git a/gym_multiplexer/__init__.py b/gym_multiplexer/__init__.py
@@ -1,44 +1,60 @@
-from .boolean_multiplexer import BooleanMultiplexer
-
 from gym.envs.registration import register
 
-name = "boolean-multiplexer"
+from .boolean_multiplexer import BooleanMultiplexer
+from .real_multiplexer import RealMultiplexer
+
+bool_mpx_name = "boolean-multiplexer"
+real_mpx_name = "real-multiplexer"
 max_episode_steps = 1
 
 # Length of a multiplexer is calculated
 # using l = k + 2^k
 
 register(
-    id='{}-3bit-v0'.format(name),
+    id='{}-3bit-v0'.format(bool_mpx_name),
     entry_point='gym_multiplexer:BooleanMultiplexer',
     max_episode_steps=max_episode_steps,
     kwargs={'control_bits': 1}
 )
 
 register(
-    id='{}-6bit-v0'.format(name),
+    id='{}-6bit-v0'.format(bool_mpx_name),
     entry_point='gym_multiplexer:BooleanMultiplexer',
     max_episode_steps=max_episode_steps,
     kwargs={'control_bits': 2}
 )
 
 register(
-    id='{}-11bit-v0'.format(name),
+    id='{}-11bit-v0'.format(bool_mpx_name),
     entry_point='gym_multiplexer:BooleanMultiplexer',
     max_episode_steps=max_episode_steps,
     kwargs={'control_bits': 3}
 )
 
 register(
-    id='{}-20bit-v0'.format(name),
+    id='{}-20bit-v0'.format(bool_mpx_name),
     entry_point='gym_multiplexer:BooleanMultiplexer',
     max_episode_steps=max_episode_steps,
     kwargs={'control_bits': 4}
 )
 
 register(
-    id='{}-37bit-v0'.format(name),
+    id='{}-37bit-v0'.format(bool_mpx_name),
     entry_point='gym_multiplexer:BooleanMultiplexer',
     max_episode_steps=max_episode_steps,
     kwargs={'control_bits': 5}
+)
+
+register(
+    id='{}-3bit-v0'.format(real_mpx_name),
+    entry_point='gym_multiplexer:RealMultiplexer',
+    max_episode_steps=max_episode_steps,
+    kwargs={'control_bits': 1}
+)
+
+register(
+    id='{}-6bit-v0'.format(real_mpx_name),
+    entry_point='gym_multiplexer:RealMultiplexer',
+    max_episode_steps=max_episode_steps,
+    kwargs={'control_bits': 2}
 )
diff --git a/gym_multiplexer/boolean_multiplexer.py b/gym_multiplexer/boolean_multiplexer.py
@@ -1,58 +1,14 @@
-import logging
-import random
-
-import gym
-from bitstring import BitArray
 from gym.spaces import Discrete
 
+from .multiplexer import Multiplexer
 
-class BooleanMultiplexer(gym.Env):
 
-    REWARD = 1000
+class BooleanMultiplexer(Multiplexer):
 
     def __init__(self, control_bits=3) -> None:
-        self.control_bits = control_bits
-        self.metadata = {'render.modes': ['human']}
+        super().__init__(control_bits)
         self.observation_space = Discrete(self._observation_string_length)
         self.action_space = Discrete(2)
 
-    def _reset(self):
-        logging.debug("Resetting the environment")
-        bits = BitArray([random.randint(0, 1) for _ in
-                         range(0, self._observation_string_length - 1)])
-
-        self._ctrl_bits = bits[:self.control_bits]
-        self._data_bits = bits[self.control_bits:]
-        self._validation_bit = False
-
-        return self._observation()
-
-    def _step(self, action):
-        reward = 0
-
-        if action == self._answer:
-            self._validation_bit = True
-            reward = self.REWARD
-
-        return self._observation(), reward, None, None
-
-    def _render(self, mode='human', close=False):
-        if close:
-            return
-
-        if mode == 'human':
-            return self._observation()
-        else:
-            super(BooleanMultiplexer, self).render(mode=mode)
-
-    def _observation(self) -> str:
-        return (self._ctrl_bits + self._data_bits
-                + BitArray([self._validation_bit])).bin
-
-    @property
-    def _observation_string_length(self):
-        return self.control_bits + pow(2, self.control_bits) + 1
-
-    @property
-    def _answer(self):
-        return int(self._data_bits[self._ctrl_bits.uint])
+    def _internal_state(self):
+      return map(lambda x: round(x), self._state)
diff --git a/gym_multiplexer/multiplexer.py b/gym_multiplexer/multiplexer.py
@@ -0,0 +1,57 @@
+import random
+
+import gym
+
+from .utils import get_correct_answer
+
+
+class Multiplexer(gym.Env):
+
+  REWARD = 1000
+
+  def _internal_state(self): raise NotImplementedError
+
+  def __init__(self, control_bits=3) -> None:
+    self.control_bits = control_bits
+    self.metadata = {'render.modes': ['human']}
+
+    self._state = None
+    self._validation_bit = 0
+
+  def _reset(self):
+      self._state = [random.random() for _ in
+                     range(0, self._observation_string_length - 1)]
+      self._validation_bit = 0
+      return self._observation
+
+  def _step(self, action):
+      reward = 0
+
+      if action == self._correct_answer:
+          self._validation_bit = 1
+          reward = self.REWARD
+
+      return self._observation, reward, None, None
+
+  def _render(self, mode='human', close=False):
+      if close:
+          return
+
+      if mode == 'human':
+          return self._observation
+
+      return self.render(mode=mode)
+
+  @property
+  def _observation(self) -> list:
+    observation = list(self._state)
+    observation.append(self._validation_bit)
+    return observation
+
+  @property
+  def _correct_answer(self):
+    return get_correct_answer(list(self._internal_state()) , self.control_bits)
+
+  @property
+  def _observation_string_length(self):
+    return self.control_bits + pow(2, self.control_bits) + 1
diff --git a/gym_multiplexer/real_multiplexer.py b/gym_multiplexer/real_multiplexer.py
@@ -0,0 +1,15 @@
+from gym.spaces import Box, Discrete
+
+from .multiplexer import Multiplexer
+
+
+class RealMultiplexer(Multiplexer):
+
+  def __init__(self, control_bits=3, threshold=.5) -> None:
+    super().__init__(control_bits)
+    self.threshold = threshold
+    self.observation_space = Box(low=0, high=1, shape=(self._observation_string_length, ))
+    self.action_space = Discrete(2)
+
+  def _internal_state(self):
+    return map(lambda x: x > self.threshold, self._state)
diff --git a/gym_multiplexer/tests/test_boolean_multiplexer.py b/gym_multiplexer/tests/test_boolean_multiplexer.py
@@ -10,7 +10,7 @@
 logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
 
 
-class TestMultiplexer:
+class TestBooleanMultiplexer:
     def test_should_initialize_multiplexer(self):
         # when
         mp = gym.make('boolean-multiplexer-6bit-v0')
@@ -29,9 +29,9 @@ def test_should_return_observation_when_reset(self):
 
         # then
         assert state is not None
-        assert state[-1] == '0'
+        assert state[-1] == 0
         assert 7 == len(state)
-        assert type(state) is str
+        assert type(state) is list
 
     def test_should_render_state(self):
         # given
@@ -43,9 +43,9 @@ def test_should_render_state(self):
 
         # then
         assert state is not None
-        assert state[-1] == '0'
+        assert state[-1] == 0
         assert 4 == len(state)
-        assert type(state) is str
+        assert type(state) is list
 
     def test_should_execute_step(self):
         # given
@@ -58,8 +58,8 @@ def test_should_execute_step(self):
 
         # then
         assert state is not None
-        assert state[-1] in ['0', '1']
-        assert type(state) is str
+        assert state[-1] in [0, 1]
+        assert type(state) is list
         assert reward in [0, 1000]
         assert done is True
 
diff --git a/gym_multiplexer/tests/test_real_multiplexer.py b/gym_multiplexer/tests/test_real_multiplexer.py
@@ -0,0 +1,68 @@
+import logging
+import random
+import sys
+
+import gym
+
+# noinspection PyUnresolvedReferences
+import gym_multiplexer
+
+logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
+
+class TestRealMultiplexer:
+
+  def test_should_initialize_real_mpx(self):
+    # when
+    mp = gym.make("real-multiplexer-6bit-v0")
+
+    # then
+    assert mp is not None
+    assert (7,) == mp.observation_space.shape
+    assert 2 == mp.action_space.n
+
+  def test_should_return_observation_when_reset(self):
+    # given
+    mp = gym.make('real-multiplexer-6bit-v0')
+
+    # when
+    state = mp.reset()
+
+    # then
+    assert state is not None
+    assert state[-1] == 0
+    assert 7 == len(state)
+    assert type(state) is list
+
+  def test_should_execute_step(self):
+    # given
+    mp = gym.make('real-multiplexer-6bit-v0')
+    mp.reset()
+    action = self._random_action()
+
+    # when
+    state, reward, done, _ = mp.step(action)
+
+    # then
+    assert state is not None
+    assert state[-1] in [0, 1]
+    assert type(state) is list
+    assert reward in [0, 1000]
+    assert done is True
+
+  def test_execute_multiple_steps_and_keep_constant_perception_length(self):
+      # given
+      mp = gym.make('real-multiplexer-6bit-v0')
+      steps = 100
+
+      # when & then
+      for _ in range(0, steps):
+          p0 = mp.reset()
+          assert 7 == len(p0)
+
+          action = self._random_action()
+          p1, reward, done, _ = mp.step(action)
+          assert 7 == len(p1)
+
+  @staticmethod
+  def _random_action():
+    return random.sample([0, 1], 1)[0]
diff --git a/gym_multiplexer/tests/test_utils.py b/gym_multiplexer/tests/test_utils.py
@@ -4,11 +4,11 @@
 class TestUtils:
 
     def test_should_calculate_correct_answer_for_3bit_multiplexer(self):
-        assert 1 == get_correct_answer('0100', 1)
-        assert 0 == get_correct_answer('1100', 1)
+        assert 1 == get_correct_answer([0,1,0,0], 1)
+        assert 0 == get_correct_answer([1,1,0,0], 1)
 
     def test_should_calculate_correct_answer_for_6bit_multiplexer(self):
-        assert 0 == get_correct_answer('1101000', 2)
+        assert 0 == get_correct_answer([1,1,0,1,0,0,0], 2)
 
     def test_should_calculate_correct_answer_for_11bit_multiplexer(self):
-        assert 1 == get_correct_answer('1011011010', 3)
+        assert 1 == get_correct_answer([1,0,1,1,0,1,1,0,1,0], 3)
diff --git a/gym_multiplexer/utils/utils.py b/gym_multiplexer/utils/utils.py
@@ -1,11 +1,10 @@
 from bitstring import BitString
 
 
-def get_correct_answer(bitstring: str, control_bits: int) -> int:
-    bits = BitString(bin=bitstring)
+def get_correct_answer(bitstring: list, control_bits: int) -> int:
+    bits = BitString(bitstring)
 
     _ctrl_bits = bits[:control_bits]
     _data_bits = bits[control_bits:]
-    _validation_bit = bits[-1]
 
     return int(_data_bits[_ctrl_bits.uint])
diff --git a/setup.py b/setup.py
@@ -1,7 +1,7 @@
 from setuptools import setup, find_packages
 
 setup(name='parrotprediction-openai-envs',
-      version='1.0.0',
+      version='2.0.0',
       description='Custom environments for OpenAI Gym',
       keywords='acs lcs machine-learning reinforcement-learning openai',
       url='https://github.com/ParrotPrediction/openai-envs',