Merge pull request #852 from ftnext/followup-additional-type-inference-840

ftnext · web-flow · commit 1b3ccffca5b7 · 2025-09-15T01:16:51.000+09:00
docs: add additional type hints (follow up #840)
diff --git a/reference/library-reference.rst b/reference/library-reference.rst
@@ -163,26 +163,12 @@ If ``duration`` is not specified, then it will record until there is no more aud
 ``recognizer_instance.adjust_for_ambient_noise(source: AudioSource, duration: float = 1) -> None``
 --------------------------------------------------------------------------------------------------
 
-Adjusts the energy threshold dynamically using audio from ``source`` (an ``AudioSource`` instance) to account for ambient noise.
+.. autofunction:: speech_recognition.Recognizer.adjust_for_ambient_noise
 
-Intended to calibrate the energy threshold with the ambient energy level. Should be used on periods of audio without speech - will stop early if any speech is detected.
+``recognizer_instance.listen(source: AudioSource, timeout: Union[float, None] = None, phrase_time_limit: Union[float, None] = None, snowboy_configuration: Union[Tuple[str, Iterable[str]], None] = None, stream: bool = False) -> AudioData``
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 
-The ``duration`` parameter is the maximum number of seconds that it will dynamically adjust the threshold for before returning. This value should be at least 0.5 in order to get a representative sample of the ambient noise.
-
-``recognizer_instance.listen(source: AudioSource, timeout: Union[float, None] = None, phrase_time_limit: Union[float, None] = None, snowboy_configuration: Union[Tuple[str, Iterable[str]], None] = None) -> AudioData``
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-
-Records a single phrase from ``source`` (an ``AudioSource`` instance) into an ``AudioData`` instance, which it returns.
-
-This is done by waiting until the audio has an energy above ``recognizer_instance.energy_threshold`` (the user has started speaking), and then recording until it encounters ``recognizer_instance.pause_threshold`` seconds of non-speaking or there is no more audio input. The ending silence is not included.
-
-The ``timeout`` parameter is the maximum number of seconds that this will wait for a phrase to start before giving up and throwing an ``speech_recognition.WaitTimeoutError`` exception. If ``timeout`` is ``None``, there will be no wait timeout.
-
-The ``phrase_time_limit`` parameter is the maximum number of seconds that this will allow a phrase to continue before stopping and returning the part of the phrase processed before the time limit was reached. The resulting audio will be the phrase cut off at the time limit. If ``phrase_timeout`` is ``None``, there will be no phrase time limit.
-
-The ``snowboy_configuration`` parameter allows integration with `Snowboy <https://snowboy.kitt.ai/>`__, an offline, high-accuracy, power-efficient hotword recognition engine. When used, this function will pause until Snowboy detects a hotword, after which it will unpause. This parameter should either be ``None`` to turn off Snowboy support, or a tuple of the form ``(SNOWBOY_LOCATION, LIST_OF_HOT_WORD_FILES)``, where ``SNOWBOY_LOCATION`` is the path to the Snowboy root directory, and ``LIST_OF_HOT_WORD_FILES`` is a list of paths to Snowboy hotword configuration files (`*.pmdl` or `*.umdl` format).
-
-This operation will always complete within ``timeout + phrase_timeout`` seconds if both are numbers, either by returning the audio data, or by raising a ``speech_recognition.WaitTimeoutError`` exception.
+.. autofunction:: speech_recognition.Recognizer.listen
 
 ``recognizer_instance.listen_in_background(source: AudioSource, callback: Callable[[Recognizer, AudioData], Any]) -> Callable[bool, None]``
 -------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
@@ -21,6 +21,7 @@
 import time
 import uuid
 import wave
+from collections.abc import Iterable
 from urllib.error import HTTPError, URLError
 from urllib.parse import urlencode
 from urllib.request import Request, urlopen
@@ -363,7 +364,7 @@ def record(self, source, duration=None, offset=None):
         frames.close()
         return AudioData(frame_data, source.SAMPLE_RATE, source.SAMPLE_WIDTH)
 
-    def adjust_for_ambient_noise(self, source: AudioSource, duration=1):
+    def adjust_for_ambient_noise(self, source: AudioSource, duration: float = 1) -> None:
         """
         Adjusts the energy threshold dynamically using audio from ``source`` (an ``AudioSource`` instance) to account for ambient noise.
 
@@ -439,7 +440,7 @@ def snowboy_wait_for_hot_word(self, snowboy_location, snowboy_hot_word_files, so
 
         return b"".join(frames), elapsed_time
 
-    def listen(self, source: AudioSource, timeout=None, phrase_time_limit=None, snowboy_configuration=None, stream=False):
+    def listen(self, source: AudioSource, timeout: float | None = None, phrase_time_limit: float | None = None, snowboy_configuration: tuple[str, Iterable[str]] | None = None, stream: bool = False) -> AudioData:
         """
         Records a single phrase from ``source`` (an ``AudioSource`` instance) into an ``AudioData`` instance, which it returns.