Move action handling out of Coordinator.

This change is a rather mechanical one to simplify the implementation of `Coordinator` by moving the logic of preparing the action that's sent to the simulator out of `coordinator.py`. PiperOrigin-RevId: 687054968
google-deepmind · Oct 17, 2024 · de00444 · de00444
1 parent cb72c19
commit de00444
Show file tree

Hide file tree

Showing 4 changed files with 426 additions and 101 deletions.
diff --git a/android_env/components/action_fns.py b/android_env/components/action_fns.py
@@ -0,0 +1,134 @@
+# coding=utf-8
+# Copyright 2024 DeepMind Technologies Limited.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Functions to convert actions between different components' formats."""
+
+import socket
+
+from absl import logging
+from android_env.components import action_type as action_type_lib
+from android_env.components import errors
+from android_env.components import pixel_fns
+from android_env.components.simulators import base_simulator
+import numpy as np
+
+
+def send_action_to_simulator(
+    action: dict[str, np.ndarray],
+    simulator: base_simulator.BaseSimulator,
+    screen_width: int,
+    screen_height: int,
+    num_fingers: int,
+) -> bool:
+  """Sends the selected action to the given simulator.
+
+  The simulator will interpret the action according to `action["action_type"]`.
+  The effect this action triggers in the Android OS will be determined by the
+  currently running application.
+
+  Args:
+    action: action which will get interpreted as a touchscreen event.
+    simulator: The simulator that will receive the action.
+    screen_width: The width of the touchscreen in pixels.
+    screen_height: The height of the touchscreen in pixels.
+    num_fingers: The number of fingers used in this simulator.
+  """
+
+  try:
+    match action['action_type']:
+      # If the action is a TOUCH or LIFT, send a touch event to the simulator.
+      case action_type_lib.ActionType.TOUCH | action_type_lib.ActionType.LIFT:
+        prepared_action = _prepare_touch_action(
+            action, screen_width, screen_height, num_fingers
+        )
+        simulator.send_touch(prepared_action)
+      # If the action is a key event, send a key event to the simulator.
+      case action_type_lib.ActionType.KEYDOWN:
+        simulator.send_key(action['keycode'].item(0), event_type='keydown')
+      case action_type_lib.ActionType.KEYUP:
+        simulator.send_key(action['keycode'].item(0), event_type='keyup')
+      case action_type_lib.ActionType.KEYPRESS:
+        simulator.send_key(action['keycode'].item(0), event_type='keypress')
+  except (socket.error, errors.SendActionError):
+    logging.exception('Unable to execute action: %r', action)
+    return False
+
+  return True
+
+
+def _prepare_touch_action(
+    action: dict[str, np.ndarray],
+    screen_width: int,
+    screen_height: int,
+    num_fingers: int,
+) -> list[tuple[int, int, bool, int]]:
+  """Turns an AndroidEnv action into values that the simulator can interpret.
+
+  Converts float-valued 'touch_position' to integer coordinates corresponding
+  to specific pixels, and 'action_type' to booleans indicating whether the
+  screen is touched at said location or not. The result of this function can
+  be sent directly to the underlying simulator (e.g. the Android Emulator,
+  virtual machine, or a phone).
+
+  Args:
+    action: An action containing 'action_type' and 'touch_position'.
+
+  Returns:
+    A tuple with the format (x: int, y: int, down/up: bool, finger_index: int).
+  """
+
+  touch_events = []
+  for i, finger_action in enumerate(_split_touch_action(action, num_fingers)):
+    is_touch = finger_action['action_type'] == action_type_lib.ActionType.TOUCH
+    touch_position = finger_action['touch_position']
+    touch_pixels = pixel_fns.touch_position_to_pixel_position(
+        touch_position, width_height=(screen_width, screen_height)
+    )
+    touch_events.append((touch_pixels[0], touch_pixels[1], is_touch, i))
+  return touch_events
+
+
+def _split_touch_action(
+    action: dict[str, np.ndarray], num_fingers: int
+) -> list[dict[str, np.ndarray]]:
+  """Splits a multitouch action into a list of single-touch actions."""
+
+  single_touch_actions = [{
+      'action_type': action['action_type'],
+      'touch_position': action['touch_position'],
+  }]
+  for i in range(2, num_fingers + 1):
+    single_touch_actions.append({
+        'action_type': action[f'action_type_{i}'],
+        'touch_position': action[f'touch_position_{i}'],
+    })
+  return single_touch_actions
+
+
+def lift_all_fingers_action(num_fingers: int) -> dict[str, np.ndarray]:
+  """A lift action with each finger."""
+
+  # There's always at least one finger.
+  lift_action = {
+      'action_type': np.array(action_type_lib.ActionType.LIFT),
+      'touch_position': np.array([0, 0]),
+  }
+  # Subsequent fingers have separate dict entries.
+  for i in range(2, num_fingers + 1):
+    lift_action |= {
+        f'action_type_{i}': np.array(action_type_lib.ActionType.LIFT),
+        f'touch_position_{i}': np.array([0, 0]),
+    }
+  return lift_action
diff --git a/android_env/components/action_fns_test.py b/android_env/components/action_fns_test.py
@@ -0,0 +1,252 @@
+# coding=utf-8
+# Copyright 2024 DeepMind Technologies Limited.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import socket
+from unittest import mock
+
+from absl.testing import absltest
+from absl.testing import parameterized
+from android_env.components import action_fns
+from android_env.components import action_type as action_type_lib
+from android_env.components import errors
+from android_env.components.simulators import base_simulator
+import numpy as np
+
+
+class ActionFnsTest(parameterized.TestCase):
+
+  def test_send_action_to_simulator_missing_action_type(self):
+    """A `KeyError` should be raised if the action is missing "action_type"."""
+
+    # Arrange.
+    simulator = mock.create_autospec(base_simulator.BaseSimulator)
+    action = {'some_key': np.array(123, np.int32)}
+
+    # Act & Assert.
+    self.assertRaises(
+        KeyError,
+        action_fns.send_action_to_simulator,
+        action,
+        simulator,
+        800,
+        600,
+        1,
+    )
+
+  def test_send_action_to_simulator_socket_error(self):
+    """Returns `False` if the simulator raises a socket error."""
+
+    # Arrange.
+    simulator = mock.create_autospec(base_simulator.BaseSimulator)
+    simulator.send_touch.side_effect = socket.error('not today')
+    action = {
+        'action_type': action_type_lib.ActionType.TOUCH,
+        'touch_position': np.array([0.3, 0.5], np.float32),
+    }
+
+    # Act.
+    output = action_fns.send_action_to_simulator(
+        action,
+        simulator,
+        800,
+        600,
+        1,
+    )
+
+    # Assert.
+    self.assertFalse(output)
+    simulator.send_touch.assert_called_once()
+
+  def test_send_action_to_simulator_sendactionerror(self):
+    """Returns `False` if the simulator raises a SendActionError."""
+
+    # Arrange.
+    simulator = mock.create_autospec(base_simulator.BaseSimulator)
+    simulator.send_touch.side_effect = errors.SendActionError('oops!')
+    action = {
+        'action_type': action_type_lib.ActionType.TOUCH,
+        'touch_position': np.array([0.3, 0.5], np.float32),
+    }
+
+    # Act.
+    output = action_fns.send_action_to_simulator(
+        action,
+        simulator,
+        800,
+        600,
+        1,
+    )
+
+    # Assert.
+    self.assertFalse(output)
+    simulator.send_touch.assert_called_once()
+
+  def test_send_action_to_simulator_touch_success_one_finger(self):
+    """Returns `True` with a proper 1-finger touch action."""
+
+    # Arrange.
+    simulator = mock.create_autospec(base_simulator.BaseSimulator)
+    action = {
+        'action_type': action_type_lib.ActionType.TOUCH,
+        'touch_position': np.array([0.2, 0.5], np.float32),
+    }
+
+    # Act.
+    output = action_fns.send_action_to_simulator(
+        action,
+        simulator,
+        800,
+        600,
+        1,
+    )
+
+    # Assert.
+    self.assertTrue(output)
+    simulator.send_touch.assert_called_once_with(
+        [(np.int32(160), np.int32(300), True, 0)]
+    )
+
+  def test_send_action_to_simulator_touch_success_multiple_finger(self):
+    """Returns `True` with a proper 3-finger touch action."""
+
+    # Arrange.
+    simulator = mock.create_autospec(base_simulator.BaseSimulator)
+    action = {
+        'action_type': action_type_lib.ActionType.TOUCH,
+        'touch_position': np.array([0.2, 0.5], np.float32),
+        'action_type_2': action_type_lib.ActionType.LIFT,
+        'touch_position_2': np.array([0.1, 0.2], np.float32),
+        'action_type_3': action_type_lib.ActionType.TOUCH,
+        'touch_position_3': np.array([0.5, 0.2], np.float32),
+    }
+
+    # Act.
+    output = action_fns.send_action_to_simulator(
+        action,
+        simulator,
+        800,
+        600,
+        3,
+    )
+
+    # Assert.
+    self.assertTrue(output)
+    simulator.send_touch.assert_called_once_with([
+        (np.int32(160), np.int32(300), True, 0),
+        (np.int32(80), np.int32(120), False, 1),
+        (np.int32(400), np.int32(120), True, 2),
+    ])
+
+  def test_send_action_to_simulator_keydown_success(self):
+    """Returns `True` with a proper keydown action."""
+
+    # Arrange.
+    simulator = mock.create_autospec(base_simulator.BaseSimulator)
+    action = {
+        'action_type': action_type_lib.ActionType.KEYDOWN,
+        'keycode': np.array([21], np.int32),
+    }
+
+    # Act.
+    output = action_fns.send_action_to_simulator(
+        action,
+        simulator,
+        800,
+        600,
+        1,
+    )
+
+    # Assert.
+    self.assertTrue(output)
+    simulator.send_key.assert_called_once_with(21, event_type='keydown')
+
+  def test_send_action_to_simulator_keyup_success(self):
+    """Returns `True` with a proper keyup action."""
+
+    # Arrange.
+    simulator = mock.create_autospec(base_simulator.BaseSimulator)
+    action = {
+        'action_type': action_type_lib.ActionType.KEYUP,
+        'keycode': np.array([42], np.int32),
+    }
+
+    # Act.
+    output = action_fns.send_action_to_simulator(
+        action,
+        simulator,
+        800,
+        600,
+        1,
+    )
+
+    # Assert.
+    self.assertTrue(output)
+    simulator.send_key.assert_called_once_with(42, event_type='keyup')
+
+  def test_send_action_to_simulator_keypress_success(self):
+    """Returns `True` with a proper keypress action."""
+
+    # Arrange.
+    simulator = mock.create_autospec(base_simulator.BaseSimulator)
+    action = {
+        'action_type': action_type_lib.ActionType.KEYPRESS,
+        'keycode': np.array([96], np.int32),
+    }
+
+    # Act.
+    output = action_fns.send_action_to_simulator(
+        action,
+        simulator,
+        800,
+        600,
+        1,
+    )
+
+    # Assert.
+    self.assertTrue(output)
+    simulator.send_key.assert_called_once_with(96, event_type='keypress')
+
+  @parameterized.named_parameters(
+      (
+          'one_finger',
+          1,
+          {
+              'action_type': np.array(action_type_lib.ActionType.LIFT),
+              'touch_position': np.array([0, 0]),
+          },
+      ),
+      (
+          'two_fingers',
+          2,
+          {
+              'action_type': np.array(action_type_lib.ActionType.LIFT),
+              'touch_position': np.array([0, 0]),
+              'action_type_2': np.array(action_type_lib.ActionType.LIFT),
+              'touch_position_2': np.array([0, 0]),
+          },
+      ),
+  )
+  def test_lift_all_fingers_action(
+      self, num_fingers: int, expected_action: dict[str, np.ndarray]
+  ):
+    """Returns the expected action."""
+
+    output = action_fns.lift_all_fingers_action(num_fingers)
+    for k, v in expected_action.items():
+      np.testing.assert_array_equal(v, output[k])
+
+
+if __name__ == '__main__':
+  absltest.main()