Skip to content

Commit e568069

Browse files
Implement LocalAudioTrack, AudioSource, audio tracks, Local/RemoteTrackPublication (#8)
* initial commit * some more polish * fix the build, and order of CMakeList sections * removed the un-implemented functions and added a comment to SendRequest() * added back getTrackStatsAsync which was removed by mistake * implemented the LocalAudioTracks, participants, audio sources, and track_publications * fix the linux build and the license header * remove the debug logs
1 parent 31e9dbe commit e568069

33 files changed

+2851
-101
lines changed

CMakeLists.txt

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,22 +141,52 @@ add_custom_target(build_rust_ffi ALL
141141
VERBATIM
142142
)
143143

144+
# A workaround to strip out the protozero_plugin.o symbols which cause our examples fail to link on Linux.
145+
# Make sure CMAKE_AR is defined; if not, you can hardcode "ar"
146+
if(NOT CMAKE_AR)
147+
find_program(CMAKE_AR ar REQUIRED)
148+
endif()
149+
150+
add_custom_command(
151+
TARGET build_rust_ffi
152+
POST_BUILD
153+
COMMAND ${CMAKE_AR} -dv $<TARGET_FILE:livekit_ffi> protozero_plugin.o
154+
COMMENT "Removing protozero_plugin.o (stray main) from liblivekit_ffi.a"
155+
)
156+
144157
# ---- C++ wrapper library ----
145158
add_library(livekit
159+
include/livekit/audio_frame.h
160+
include/livekit/audio_source.h
146161
include/livekit/room.h
162+
include/livekit/room_delegate.h
147163
include/livekit/ffi_handle.h
148164
include/livekit/ffi_client.h
165+
include/livekit/local_audio_track.h
149166
include/livekit/participant.h
167+
include/livekit/local_participant.h
150168
include/livekit/livekit.h
151169
include/livekit/stats.h
152170
include/livekit/track.h
171+
include/livekit/track_publication.h
172+
include/livekit/local_track_publication.h
173+
include/livekit/remote_track_publication.h
174+
src/audio_frame.cpp
175+
src/audio_source.cpp
153176
src/ffi_handle.cpp
154177
src/ffi_client.cpp
178+
src/local_audio_track.cpp
155179
src/room.cpp
156-
src/room_event_converter.cpp
157-
src/room_event_converter.h
180+
src/room_proto_converter.cpp
181+
src/room_proto_converter.h
182+
src/local_participant.cpp
158183
src/stats.cpp
159184
src/track.cpp
185+
src/track_proto_converter.cpp
186+
src/track_proto_converter.h
187+
src/track_publication.cpp
188+
src/local_track_publication.cpp
189+
src/remote_track_publication.cpp
160190
)
161191

162192
# Add generated proto objects to the wrapper

examples/simple_room/main.cpp

Lines changed: 76 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <csignal>
44
#include <cstdlib>
55
#include <iostream>
6+
#include <random>
67
#include <string>
78
#include <thread>
89
#include <vector>
@@ -28,7 +29,7 @@ void print_usage(const char *prog) {
2829
<< " LIVEKIT_URL, LIVEKIT_TOKEN\n";
2930
}
3031

31-
void handle_sigint(int) { g_running = false; }
32+
void handle_sigint(int) { g_running.store(false); }
3233

3334
bool parse_args(int argc, char *argv[], std::string &url, std::string &token) {
3435
// 1) --help
@@ -118,6 +119,47 @@ class SimpleRoomDelegate : public livekit::RoomDelegate {
118119
}
119120
};
120121

122+
// Test utils to run a capture loop to publish noisy audio frames to the room
123+
void runNoiseCaptureLoop(const std::shared_ptr<AudioSource> &source) {
124+
const int sample_rate = source->sample_rate();
125+
const int num_channels = source->num_channels();
126+
const int frame_ms = 10;
127+
const int samples_per_channel = sample_rate * frame_ms / 1000;
128+
129+
std::mt19937 rng(std::random_device{}());
130+
std::uniform_int_distribution<int16_t> noise_dist(-5000, 5000);
131+
using Clock = std::chrono::steady_clock;
132+
auto next_deadline = Clock::now();
133+
while (g_running.load(std::memory_order_relaxed)) {
134+
AudioFrame frame =
135+
AudioFrame::create(sample_rate, num_channels, samples_per_channel);
136+
const std::size_t total_samples =
137+
static_cast<std::size_t>(num_channels) *
138+
static_cast<std::size_t>(samples_per_channel);
139+
for (std::size_t i = 0; i < total_samples; ++i) {
140+
frame.data()[i] = noise_dist(rng);
141+
}
142+
try {
143+
source->captureFrame(frame);
144+
} catch (const std::exception &e) {
145+
// If something goes wrong, log and break out
146+
std::cerr << "Error in captureFrame: " << e.what() << std::endl;
147+
break;
148+
}
149+
150+
// Pace the loop to roughly real-time
151+
next_deadline += std::chrono::milliseconds(frame_ms);
152+
std::this_thread::sleep_until(next_deadline);
153+
}
154+
155+
// Optionally clear queued audio on exit
156+
try {
157+
source->clearQueue();
158+
} catch (...) {
159+
// ignore errors on shutdown
160+
std::cout << "Error in clearQueue" << std::endl;
161+
}
162+
}
121163
} // namespace
122164

123165
int main(int argc, char *argv[]) {
@@ -168,14 +210,46 @@ int main(int argc, char *argv[]) {
168210
<< info.reliable_dc_buffered_amount_low_threshold << "\n"
169211
<< " Creation time (ms): " << info.creation_time << "\n";
170212

171-
// TOD(shijing), implement local and remoteParticipants in the room
213+
auto audioSource = std::make_shared<AudioSource>(44100, 1, 10);
214+
auto audioTrack =
215+
LocalAudioTrack::createLocalAudioTrack("micTrack", audioSource);
216+
217+
TrackPublishOptions opts;
218+
opts.source = TrackSource::SOURCE_MICROPHONE;
219+
opts.dtx = false;
220+
opts.simulcast = false;
172221

222+
try {
223+
// publishTrack takes std::shared_ptr<Track>, LocalAudioTrack derives from
224+
// Track
225+
auto pub = room.local_participant()->publishTrack(audioTrack, opts);
226+
227+
std::cout << "Published track:\n"
228+
<< " SID: " << pub->sid() << "\n"
229+
<< " Name: " << pub->name() << "\n"
230+
<< " Kind: " << static_cast<int>(pub->kind()) << "\n"
231+
<< " Source: " << static_cast<int>(pub->source()) << "\n"
232+
<< " Simulcasted: " << std::boolalpha << pub->simulcasted()
233+
<< "\n"
234+
<< " Muted: " << std::boolalpha << pub->muted() << "\n";
235+
} catch (const std::exception &e) {
236+
std::cerr << "Failed to publish track: " << e.what() << std::endl;
237+
}
238+
239+
// TODO, if we have pre-buffering feature, we might consider starting the
240+
// thread right after creating the source.
241+
std::thread audioThread(runNoiseCaptureLoop, audioSource);
173242
// Keep the app alive until Ctrl-C so we continue receiving events,
174243
// similar to asyncio.run(main()) keeping the loop running.
175244
while (g_running.load()) {
176245
std::this_thread::sleep_for(std::chrono::milliseconds(100));
177246
}
178247

248+
// Shutdown the audio thread.
249+
if (audioThread.joinable()) {
250+
audioThread.join();
251+
}
252+
179253
FfiClient::instance().shutdown();
180254
std::cout << "Exiting.\n";
181255
return 0;

include/livekit/audio_frame.h

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
/*
2+
* Copyright 2025 LiveKit
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an “AS IS” BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#pragma once
18+
19+
#include <cstdint>
20+
#include <string>
21+
#include <vector>
22+
23+
namespace livekit {
24+
25+
namespace proto {
26+
class AudioFrameBufferInfo;
27+
class OwnedAudioFrameBuffer;
28+
} // namespace proto
29+
30+
class AudioFrame {
31+
public:
32+
/**
33+
* Construct an AudioFrame from raw PCM samples.
34+
*
35+
* @param data Interleaved PCM samples (int16).
36+
* @param sample_rate Sample rate (Hz).
37+
* @param num_channels Number of channels.
38+
* @param samples_per_channel Number of samples per channel.
39+
*
40+
* Throws std::invalid_argument if the data size is inconsistent with
41+
* num_channels * samples_per_channel.
42+
*/
43+
AudioFrame(std::vector<std::int16_t> data, int sample_rate, int num_channels,
44+
int samples_per_channel);
45+
46+
/**
47+
* Create a new zero-initialized AudioFrame instance.
48+
*/
49+
static AudioFrame create(int sample_rate, int num_channels,
50+
int samples_per_channel);
51+
52+
/**
53+
* Construct an AudioFrame by copying data out of an OwnedAudioFrameBuffer.
54+
*/
55+
static AudioFrame fromOwnedInfo(const proto::OwnedAudioFrameBuffer &owned);
56+
57+
/**
58+
* Build a proto AudioFrameBufferInfo pointing at this frame’s data.
59+
*
60+
* The underlying buffer must stay alive as long as the native side
61+
* uses the pointer.
62+
*
63+
*/
64+
proto::AudioFrameBufferInfo toProto() const;
65+
66+
// ---- Accessors ----
67+
68+
const std::vector<std::int16_t> &data() const noexcept { return data_; }
69+
std::vector<std::int16_t> &data() noexcept { return data_; }
70+
71+
/// Number of samples in the buffer (per all channels).
72+
std::size_t total_samples() const noexcept { return data_.size(); }
73+
74+
/// Sample rate in Hz.
75+
int sample_rate() const noexcept { return sample_rate_; }
76+
77+
/// Number of channels.
78+
int num_channels() const noexcept { return num_channels_; }
79+
80+
/// Samples per channel.
81+
int samples_per_channel() const noexcept { return samples_per_channel_; }
82+
83+
/// Duration in seconds (samples_per_channel / sample_rate).
84+
double duration() const noexcept;
85+
86+
/// A human-readable description (like Python __repr__).
87+
std::string to_string() const;
88+
89+
private:
90+
std::vector<std::int16_t> data_;
91+
int sample_rate_;
92+
int num_channels_;
93+
int samples_per_channel_;
94+
};
95+
96+
} // namespace livekit

include/livekit/audio_source.h

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
/*
2+
* Copyright 2025 LiveKit
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an “AS IS” BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#pragma once
18+
19+
#include <cstdint>
20+
21+
#include "livekit/audio_frame.h"
22+
#include "livekit/ffi_handle.h"
23+
24+
namespace livekit {
25+
26+
namespace proto {
27+
class FfiRequest;
28+
class FfiResponse;
29+
} // namespace proto
30+
31+
class FfiClient;
32+
33+
/**
34+
* Represents a real-time audio source with an internal audio queue.
35+
*/
36+
class AudioSource {
37+
public:
38+
/**
39+
* Create a new native audio source.
40+
*
41+
* @param sample_rate Sample rate in Hz.
42+
* @param num_channels Number of channels.
43+
* @param queue_size_ms Max buffer duration for the internal queue in ms.
44+
*/
45+
AudioSource(int sample_rate, int num_channels, int queue_size_ms = 1000);
46+
47+
~AudioSource();
48+
49+
AudioSource(const AudioSource &) = delete;
50+
AudioSource &operator=(const AudioSource &) = delete;
51+
AudioSource(AudioSource &&) noexcept = default;
52+
AudioSource &operator=(AudioSource &&) noexcept = default;
53+
54+
/// The sample rate of the audio source in Hz.
55+
int sample_rate() const noexcept { return sample_rate_; }
56+
57+
/// The number of audio channels.
58+
int num_channels() const noexcept { return num_channels_; }
59+
60+
/// Underlying FFI handle ID used in FFI requests.
61+
std::uint64_t ffi_handle_id() const noexcept {
62+
return static_cast<std::uint64_t>(handle_.get());
63+
}
64+
65+
/// Current duration of queued audio (in seconds).
66+
double queuedDuration() const noexcept;
67+
68+
/**
69+
* Clears the internal audio queue on the native side and resets local
70+
* queue tracking.
71+
*/
72+
void clearQueue();
73+
74+
/**
75+
* Push an AudioFrame into the audio source and BLOCK until the FFI callback
76+
* confirms that the native side has finished processing (consuming) the
77+
* frame. Safe usage: The frame's internal buffer must remain valid only until
78+
* this function returns. Because this call blocks until the corresponding FFI
79+
* callback arrives, the caller may safely destroy or reuse the frame
80+
* afterward.
81+
* @param frame The audio frame to send. No-op if the frame contains
82+
* zero samples.
83+
* @param timeout_ms Maximum time to wait for the FFI callback.
84+
* - If timeout_ms > 0: block up to this duration.
85+
* A timeout will cause std::runtime_error.
86+
* - If timeout_ms == 0: wait indefinitely until the
87+
* callback arrives (recommended for production unless the caller needs
88+
* explicit timeout control).
89+
*
90+
* Notes:
91+
* - This is a blocking call.
92+
* - timeout_ms == 0 (infinite wait) is the safest mode because it
93+
* guarantees the callback completes before the function returns, which in
94+
* turn guarantees that the audio buffer lifetime is fully protected. The
95+
* caller does not need to manage or extend the frame lifetime manually.
96+
*
97+
* - May throw std::runtime_error if:
98+
* • the FFI reports an error
99+
*
100+
* - The underlying FFI request *must* eventually produce a callback for
101+
* each frame. If the FFI layer is misbehaving or the event loop is stalled,
102+
* a timeout may occur in bounded-wait mode.
103+
*/
104+
void captureFrame(const AudioFrame &frame, int timeout_ms = 20);
105+
106+
/**
107+
* Block until the currently queued audio has (roughly) played out.
108+
*/
109+
void waitForPlayout() const;
110+
111+
private:
112+
// Internal helper to reset the local queue tracking (like _release_waiter).
113+
void resetQueueTracking() noexcept;
114+
115+
int sample_rate_;
116+
int num_channels_;
117+
int queue_size_ms_;
118+
119+
// RAII wrapper for this audio source's FFI handle
120+
FfiHandle handle_;
121+
122+
// Queue tracking (all in seconds; based on steady_clock in the .cpp).
123+
mutable double last_capture_{0.0};
124+
mutable double q_size_{0.0};
125+
};
126+
127+
} // namespace livekit

0 commit comments

Comments
 (0)