feat: add limo speaker package (#6)

* feat: add limo speaker package * feat: upload sounds * update: subproject commit reference in limo_ros2 * fix: rm python3 sound device * fix: apt update to find the packages just before install * fix: correct launch file ref * fix: venv install of sounddevice, and move code server back to last * fix: install sounddevice after * fix: depend on pip * fix: move install higher up in the container chain * fix: move speaker installation in a separate file and remove ipam config * chore: add train sound * fix: serve code-server with https and a self signed cert * fix: expose /speaker/play and /speaker/tts to the zenoh router * fix: move to speaker pkg to platform repo Doing this to quickly test if it merges and works... preventing issues from building up * fix: move the sounds into the build process, find them dynamically... make the temp file safely * fix: update file path handling for audio files and improve temporary file usage in playAudio and playTTS * chore: launch speaker on container start * fix: add ros user to dockerfile --------- Co-authored-by: Marc Hanheide <[email protected]>
LCAS · Feb 25, 2025 · 726a4c8 · 726a4c8
1 parent c93973f
commit 726a4c8
Show file tree

Hide file tree

Showing 22 changed files with 467 additions and 8 deletions.
diff --git a/.docker/speaker.dockerfile b/.docker/speaker.dockerfile
@@ -0,0 +1,16 @@
+# Install Speaker Utils
+RUN apt-get update && apt-get install -y \
+    python3-pip \
+    portaudio19-dev \
+    alsa-utils \
+    espeak \
+    libespeak-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+# Add user to audio group
+RUN usermod -a -G audio ros
+
+# Install Python packages as the ros user
+USER ros
+RUN pip3 install --user sounddevice
+USER root
diff --git a/.gitignore b/.gitignore
@@ -165,3 +165,38 @@ cython_debug/
 build/
 log/
 install/
+
+### macOS ###
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+### macOS Patch ###
+# iCloud generated files
+*.icloud
+
+# End of https://www.toptal.com/developers/gitignore/api/macos
diff --git a/Dockerfile b/Dockerfile
@@ -45,7 +45,7 @@ INCLUDE .docker/ydlidar.dockerfile
 INCLUDE .docker/glog.dockerfile
 INCLUDE .docker/magic_enum.dockerfile
 INCLUDE .docker/uvc.dockerfile
-
+INCLUDE .docker/speaker.dockerfile
 
 # This stage is named 'sourcefilter' and is based on the 'base' image.
 # It performs the following actions:
@@ -129,8 +129,14 @@ RUN . /opt/ros/lcas/install/setup.sh && \
 RUN cd /opt/ros/lcas && colcon build && \
     rm -rf /opt/ros/lcas/src/ /opt/ros/lcas/build/ /opt/ros/lcas/log/
 
+# Install code-server
 RUN curl -fsSL https://code-server.dev/install.sh | sh
 
+# # Install sounddevice in system Python
+# RUN pip3 install sounddevice
+# # Install sounddevice in virtual environment
+# RUN /opt/venv/bin/pip install sounddevice
+
 USER ros
 WORKDIR /home/ros
 ENV SHELL=/bin/bash
diff --git a/configs/docker-compose.yaml b/configs/docker-compose.yaml
@@ -56,7 +56,7 @@ services:
     ipc: host
     #command: bash -c "while true; do sleep 10; done"
     #command: bash -c "zenoh-bridge-ros2dds -r 8080 -l tcp/0.0.0.0:8888"
-    command: bash -c "source /opt/ros/lcas/install/setup.bash; echo $$CYCLONEDDS_URI; (code-server --auth none -an ${ROBOT_NAME} --bind-addr 0.0.0.0:9999 &);(ros2 launch astra_camera dabai.launch.py &); ros2 launch limo_bringup limo_start.launch.py"
+    command: bash -c "source /opt/ros/lcas/install/setup.bash; echo $$CYCLONEDDS_URI; (code-server --auth none -an ${ROBOT_NAME} --bind-addr 0.0.0.0:9999 --cert &);(ros2 launch astra_camera dabai.launch.py &);(ros2 launch limo_speaker limo_speaker.launch.py &); ros2 launch limo_bringup limo_start.launch.py"
     volumes:
       - /dev:/dev
       - ${HOME}:/home/ros/robot_home
@@ -125,9 +125,5 @@ services:
 networks:
   rosnet:
     driver: bridge
-    ipam:
-     driver: default
-     config:
-       - subnet: 172.100.0.0/16
     driver_opts:
       com.docker.network.container_iface_prefix: eth
diff --git a/configs/zenoh.json5 b/configs/zenoh.json5
@@ -92,12 +92,16 @@
             "/joint_states",
             "/ydlidar_ros2_driver_node/transition_event",
             "/initialpose",
-            "/cmd_vel"
+            "/cmd_vel",
+            "/speaker/play",
+            "/speaker/tts",
         ],
         subscribers: [
             "/cmd_vel",
             "/tf",
-            "/tf_static"
+            "/tf_static",
+            "/speaker/play",
+            "/speaker/tts",
         ],
         service_servers: [".*/.*_parameters"],
         service_clients: [],

diff --git a/src/limo_speaker/launch/limo_speaker.launch.py b/src/limo_speaker/launch/limo_speaker.launch.py
@@ -0,0 +1,18 @@
+from launch import LaunchDescription
+from launch_ros.actions import Node
+
+def generate_launch_description():
+    return LaunchDescription([
+        Node(
+            package='limo_speaker',
+            executable='play_audio',
+            name='play_audio',
+            output='screen'
+        ),
+        Node(
+            package='limo_speaker',
+            executable='play_tts',
+            name='play_tts',
+            output='screen'
+        ),
+    ])
diff --git a/src/limo_speaker/limo_speaker/__init__.py b/src/limo_speaker/limo_speaker/__init__.py
diff --git a/src/limo_speaker/limo_speaker/consoleHorn.py b/src/limo_speaker/limo_speaker/consoleHorn.py
@@ -0,0 +1,53 @@
+import rclpy
+from rclpy.node import Node
+from std_msgs.msg import String
+
+class LimoSpeakerConsoleHorn(Node):
+    def __init__(self):
+        super().__init__('limo_speaker_console_horn_publisher')
+        self.publisher_ = self.create_publisher(String, '/speaker/play', 10)
+        self.timer = self.create_timer(1.0, self.timer_callback)
+
+    def timer_callback(self):
+        user_input = input("> ").strip().replace('"', "'").replace("!,", "")
+
+        if user_input.__len__() <= 1:
+            # catch empty lines
+            if user_input == "":
+                user_input = "car"
+            elif user_input == "1":
+                user_input = "car"
+            elif user_input == "2":
+                user_input = "truck"
+            elif user_input == "3":
+                user_input = "clown"
+            elif user_input == "4":
+                user_input = "train"
+            # default catch!
+            else:
+                user_input = "car"
+
+        msg = String()
+        msg.data = user_input
+        self.publisher_.publish(msg)
+        # Print the command to teach users how to do it manually
+        print(f"ros2 topic pub -1 /speaker/play std_msgs/msg/String \"data: '{user_input}'\"\n")
+
+def main(args=None):
+    rclpy.init(args=args)
+    horn_pub = LimoSpeakerConsoleHorn()
+    rclpy.spin(horn_pub)
+
+    # Destroy the node explicitly
+    # (optional - otherwise it will be done automatically
+    # when the garbage collector destroys the node object)
+    horn_pub.destroy_node()
+    rclpy.shutdown()
+
+
+if __name__ == '__main__':
+    print("\033[1mLimo Horn Publisher\n\033[0mWhich horn would you like to honk?")
+    print("*"*16)
+    print(" 1) Car Horn\n 2) Truck Horn \n 3) Clown Horn \n 4) Train Horn")
+    print("*"*16, "\n")
+    main()
diff --git a/src/limo_speaker/limo_speaker/consoleTTS.py b/src/limo_speaker/limo_speaker/consoleTTS.py
@@ -0,0 +1,33 @@
+import rclpy
+from rclpy.node import Node
+from std_msgs.msg import String
+
+class LimoSpeakerConsoleTTS(Node):
+    def __init__(self):
+        super().__init__('limo_speaker_console_tts_publisher')
+        self.publisher_ = self.create_publisher(String, '/speaker/tts', 10)
+        self.timer = self.create_timer(1.0, self.timer_callback)
+
+    def timer_callback(self):
+        user_input = input("> ").strip().replace('"', "'").replace("!,", "")
+        msg = String()
+        msg.data = user_input
+        self.publisher_.publish(msg)
+        # Print the command to teach users how to do it manually
+        print(f"ros2 topic pub -1 /speaker/tts std_msgs/msg/String \"data: '{user_input}'\"\n")
+
+def main(args=None):
+    rclpy.init(args=args)
+    tts_pub = LimoSpeakerConsoleTTS()
+    rclpy.spin(tts_pub)
+
+    # Destroy the node explicitly
+    # (optional - otherwise it will be done automatically
+    # when the garbage collector destroys the node object)
+    tts_pub.destroy_node()
+    rclpy.shutdown()
+
+
+if __name__ == '__main__':
+    print("What do you want me to say?\n\n")
+    main()
diff --git a/src/limo_speaker/limo_speaker/playAudio.py b/src/limo_speaker/limo_speaker/playAudio.py
@@ -0,0 +1,76 @@
+import rclpy
+from rclpy.node import Node
+import sounddevice as sd
+import numpy as np
+import wave
+from std_msgs.msg import String
+from ament_index_python.packages import get_package_share_directory
+import os
+
+class LimoSpeakerPlayAudio(Node):
+    def __init__(self):
+        super().__init__('limo_speaker_play_audio')
+        self.subscription = self.create_subscription(
+            String,
+            '/speaker/play',
+            self.listener_callback,
+            10)
+
+    def listener_callback(self, msg):
+        self.get_logger().info('Trying to play sound: "%s"' % msg.data)
+        self.play_wav(msg.data)
+
+    # Plays a WAV file on the specified sound device.
+    def play_wav(self, sound, device_name="USB PnP Audio Device"):
+        device_index = self.get_speaker_by_name(device_name)
+        if device_index is None:
+            self.get_logger().warning(f"No sound device found with name containing '{device_name}'")
+            return
+
+        # Take the sounds that have been installed with the package
+        file_path = (os.path.join(get_package_share_directory('limo_speaker'), 'sounds', f"{sound}.wav"))
+
+        try:
+            with wave.open(file_path, 'rb') as wf:
+                samplerate = wf.getframerate()
+                frames = wf.readframes(wf.getnframes())
+
+                # Convert bytes to NumPy array
+                data = np.frombuffer(frames, dtype=np.int16)
+
+                # Reshape if stereo
+                channels = wf.getnchannels()
+                if channels > 1:
+                    data = data.reshape(-1, channels)
+
+                self.get_logger().info(f"Playing {sound} sound on device {device_index} - {device_name}")
+
+                sd.play(data, samplerate=samplerate, device=device_index)
+                sd.wait()
+        except:
+            self.get_logger().warning(f"Couldn't play file {file_path}, does the file exist?")
+
+    # Finds the first audio device containing the given name.
+    def get_speaker_by_name(self, name_contains):
+        devices = sd.query_devices()
+        for idx, device in enumerate(devices):
+            if name_contains.lower() in device['name'].lower():
+                return idx  # Return the device index
+        return None  # Return None if not found
+
+def main(args=None):
+    rclpy.init(args=args)
+
+    playAudio = LimoSpeakerPlayAudio()
+
+    rclpy.spin(playAudio)
+
+    # Destroy the node explicitly
+    # (optional - otherwise it will be done automatically
+    # when the garbage collector destroys the node object)
+    playAudio.destroy_node()
+    rclpy.shutdown()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/src/limo_speaker/limo_speaker/playTTS.py b/src/limo_speaker/limo_speaker/playTTS.py
@@ -0,0 +1,85 @@
+import rclpy
+from rclpy.node import Node
+import sounddevice as sd
+import numpy as np
+import wave
+import os
+from std_msgs.msg import String
+from time import sleep
+from scipy.io import wavfile
+from scipy.signal import resample
+import tempfile
+
+class LimoSpeakerPlayTTS(Node):
+    def __init__(self):
+        super().__init__('limo_speaker_play_tts')
+        self.subscription = self.create_subscription(
+            String,
+            '/speaker/tts',
+            self.listener_callback,
+            10)
+
+    def listener_callback(self, msg):
+        self.get_logger().info('I am saying: "%s"' % msg.data)
+        self.play_speech(msg.data)
+
+    # Sends text to speech and then to the specified sound device.
+    def play_speech(self, text, device_name="USB PnP Audio Device"):
+        device_index = self.get_speaker_by_name(device_name)
+        if device_index is None:
+            self.get_logger().warning(f"No sound device found with name containing '{device_name}'")
+            return
+
+        # Create a temporary file to store the speech, 
+        # this is using tempfile to make a safe file name
+        # e.g. tts_a1b2c3.wav
+        temp = tempfile.NamedTemporaryFile(prefix="tts_", suffix=".wav")
+        os.system(f'espeak "{text}" -w {temp.name}')
+
+        try:
+            with wave.open(temp.name, 'rb') as wf:
+                samplerate = wf.getframerate()
+                frames = wf.readframes(wf.getnframes())
+
+                # Convert bytes to NumPy array
+                data = np.frombuffer(frames, dtype=np.int16)
+
+                self.get_logger().info(f"Playing tts sound file on device {device_index} - {device_name}")
+
+                # Get the supported sample rates for the device
+                device_info = sd.query_devices(device_index, 'output')
+                supported_samplerates = device_info['default_samplerate']
+
+                # Resample if the file samplerate is not supported
+                if samplerate != supported_samplerates:
+                    self.get_logger().info(f"Resampling from {samplerate} to {supported_samplerates}")
+                    data = resample(data, int(len(data) * supported_samplerates / samplerate))
+                    data = np.asarray(data, dtype=np.int16)  # Ensure data is int16
+                    samplerate = supported_samplerates
+
+                sd.play(data, samplerate=samplerate, device=device_index)
+                sd.wait()
+        except Exception as e:
+            self.get_logger().warning(f"Couldn't play file {temp.name}, does the file exist? Error: {e}")
+
+    # Finds the first audio device containing the given name.
+    def get_speaker_by_name(self, name_contains):
+        devices = sd.query_devices()
+        for idx, device in enumerate(devices):
+            if name_contains.lower() in device['name'].lower():
+                return idx  # Return the device index
+        return None  # Return None if not found
+
+def main(args=None):
+    rclpy.init(args=args)
+    playTTS = LimoSpeakerPlayTTS()
+    rclpy.spin(playTTS)
+
+    # Destroy the node explicitly
+    # (optional - otherwise it will be done automatically
+    # when the garbage collector destroys the node object)
+    playTTS.destroy_node()
+    rclpy.shutdown()
+
+if __name__ == '__main__':
+    main()