From 23ffd37dded3bf872e42d7a00727ab3c4d105a97 Mon Sep 17 00:00:00 2001 From: Marvin W Date: Sat, 1 May 2021 15:19:05 +0200 Subject: Echo Cancellation --- plugins/rtp/CMakeLists.txt | 20 +++- plugins/rtp/src/device.vala | 30 +++-- plugins/rtp/src/plugin.vala | 5 +- plugins/rtp/src/voice_processor.vala | 176 +++++++++++++++++++++++++++++ plugins/rtp/src/voice_processor_native.cpp | 141 +++++++++++++++++++++++ 5 files changed, 356 insertions(+), 16 deletions(-) create mode 100644 plugins/rtp/src/voice_processor.vala create mode 100644 plugins/rtp/src/voice_processor_native.cpp (limited to 'plugins') diff --git a/plugins/rtp/CMakeLists.txt b/plugins/rtp/CMakeLists.txt index 92ec1b97..b19c8a8f 100644 --- a/plugins/rtp/CMakeLists.txt +++ b/plugins/rtp/CMakeLists.txt @@ -1,4 +1,5 @@ find_package(GstRtp REQUIRED) +find_package(WebRTCAudioProcessing 0.2) find_packages(RTP_PACKAGES REQUIRED Gee GLib @@ -8,12 +9,26 @@ find_packages(RTP_PACKAGES REQUIRED GTK3 Gst GstApp + GstAudio ) if(Gst_VERSION VERSION_GREATER "1.16") set(RTP_DEFINITIONS GST_1_16) endif() +if(WebRTCAudioProcessing_VERSION GREATER "0.4") + message(WARNING "Ignoring WebRTCAudioProcessing, only versions < 0.4 supported so far") + unset(WebRTCAudioProcessing_FOUND) +endif() + +if(WebRTCAudioProcessing_FOUND) + set(RTP_DEFINITIONS ${RTP_DEFINITIONS} WITH_VOICE_PROCESSOR) + set(RTP_VOICE_PROCESSOR_VALA src/voice_processor.vala) + set(RTP_VOICE_PROCESSOR_CXX src/voice_processor_native.cpp) +else() + message(WARNING "WebRTCAudioProcessing not found, build without voice pre-processing!") +endif() + vala_precompile(RTP_VALA_C SOURCES src/codec_util.vala @@ -23,6 +38,7 @@ SOURCES src/stream.vala src/video_widget.vala src/register_plugin.vala + ${RTP_VOICE_PROCESSOR_VALA} CUSTOM_VAPIS ${CMAKE_BINARY_DIR}/exports/crypto-vala.vapi ${CMAKE_BINARY_DIR}/exports/xmpp-vala.vapi @@ -36,8 +52,8 @@ DEFINITIONS ) add_definitions(${VALA_CFLAGS} -DG_LOG_DOMAIN="rtp" -I${CMAKE_CURRENT_SOURCE_DIR}/src) -add_library(rtp SHARED ${RTP_VALA_C}) -target_link_libraries(rtp libdino crypto-vala ${RTP_PACKAGES} gstreamer-rtp-1.0) +add_library(rtp SHARED ${RTP_VALA_C} ${RTP_VOICE_PROCESSOR_CXX}) +target_link_libraries(rtp libdino crypto-vala ${RTP_PACKAGES} gstreamer-rtp-1.0 webrtc-audio-processing) set_target_properties(rtp PROPERTIES PREFIX "") set_target_properties(rtp PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/plugins/) diff --git a/plugins/rtp/src/device.vala b/plugins/rtp/src/device.vala index 785f853a..f8894502 100644 --- a/plugins/rtp/src/device.vala +++ b/plugins/rtp/src/device.vala @@ -37,6 +37,7 @@ public class Dino.Plugins.Rtp.Device : MediaDevice, Object { private Gst.Element dsp; private Gst.Element mixer; private Gst.Element filter; + private Gst.Element rate; private int links = 0; public Device(Plugin plugin, Gst.Device device) { @@ -132,12 +133,10 @@ public class Dino.Plugins.Rtp.Device : MediaDevice, Object { pipe.add(filter); element.link(filter); if (media == "audio" && plugin.echoprobe != null) { - dsp = Gst.ElementFactory.make("webrtcdsp", @"dsp_$id"); - if (dsp != null) { - dsp.@set("probe", plugin.echoprobe.name); - pipe.add(dsp); - filter.link(dsp); - } + dsp = new VoiceProcessor(plugin.echoprobe, element as Gst.Audio.StreamVolume); + dsp.name = @"dsp_$id"; + pipe.add(dsp); + filter.link(dsp); } tee = Gst.ElementFactory.make("tee", @"tee_$id"); tee.@set("allow-not-linked", true); @@ -153,7 +152,11 @@ public class Dino.Plugins.Rtp.Device : MediaDevice, Object { filter.@set("caps", get_best_caps()); pipe.add(filter); if (plugin.echoprobe != null) { - filter.link(plugin.echoprobe); + rate = Gst.ElementFactory.make("audiorate", @"rate_$id"); + rate.@set("tolerance", 100000000); + pipe.add(rate); + filter.link(rate); + rate.link(plugin.echoprobe); plugin.echoprobe.link(element); } else { filter.link(element); @@ -184,14 +187,17 @@ public class Dino.Plugins.Rtp.Device : MediaDevice, Object { if (filter != null) { filter.set_locked_state(true); filter.set_state(Gst.State.NULL); - if (plugin.echoprobe != null) { - filter.unlink(plugin.echoprobe); - } else { - filter.unlink(element); - } + filter.unlink(rate ?? ((Gst.Element)plugin.echoprobe) ?? element); pipe.remove(filter); filter = null; } + if (rate != null) { + rate.set_locked_state(true); + rate.set_state(Gst.State.NULL); + rate.unlink(plugin.echoprobe); + pipe.remove(rate); + rate = null; + } if (plugin.echoprobe != null) { plugin.echoprobe.unlink(element); } diff --git a/plugins/rtp/src/plugin.vala b/plugins/rtp/src/plugin.vala index d43588b4..e3d5ee41 100644 --- a/plugins/rtp/src/plugin.vala +++ b/plugins/rtp/src/plugin.vala @@ -8,7 +8,7 @@ public class Dino.Plugins.Rtp.Plugin : RootInterface, VideoCallPlugin, Object { public Gst.DeviceMonitor device_monitor { get; private set; } public Gst.Pipeline pipe { get; private set; } public Gst.Bin rtpbin { get; private set; } - public Gst.Element echoprobe { get; private set; } + public EchoProbe echoprobe { get; private set; } private Gee.List streams = new ArrayList(); private Gee.List devices = new ArrayList(); @@ -72,7 +72,8 @@ public class Dino.Plugins.Rtp.Plugin : RootInterface, VideoCallPlugin, Object { pipe.add(rtpbin); // Audio echo probe - echoprobe = Gst.ElementFactory.make("webrtcechoprobe", "echo-probe"); +// echoprobe = Gst.ElementFactory.make("webrtcechoprobe", "echo-probe"); + echoprobe = new EchoProbe(); if (echoprobe != null) pipe.add(echoprobe); // Pipeline diff --git a/plugins/rtp/src/voice_processor.vala b/plugins/rtp/src/voice_processor.vala new file mode 100644 index 00000000..e6dc7e8f --- /dev/null +++ b/plugins/rtp/src/voice_processor.vala @@ -0,0 +1,176 @@ +using Gst; + +namespace Dino.Plugins.Rtp { +public static extern Buffer adjust_to_running_time(Base.Transform transform, Buffer buf); +} + +public class Dino.Plugins.Rtp.EchoProbe : Audio.Filter { + private static StaticPadTemplate sink_template = {"sink", PadDirection.SINK, PadPresence.ALWAYS, {null, "audio/x-raw,rate=48000,channels=1,layout=interleaved,format=S16LE"}}; + private static StaticPadTemplate src_template = {"src", PadDirection.SRC, PadPresence.ALWAYS, {null, "audio/x-raw,rate=48000,channels=1,layout=interleaved,format=S16LE"}}; + public Audio.Info audio_info { get; private set; } + public signal void on_new_buffer(Buffer buffer); + private uint period_samples; + private uint period_size; + private Base.Adapter adapter = new Base.Adapter(); + + static construct { + add_static_pad_template(sink_template); + add_static_pad_template(src_template); + set_static_metadata("Acoustic Echo Canceller probe", "Generic/Audio", "Gathers playback buffers for echo cancellation", "Dino Team "); + } + + construct { + set_passthrough(true); + } + + public override bool setup(Audio.Info info) { + audio_info = info; + period_samples = info.rate / 100; // 10ms buffers + period_size = period_samples * info.bpf; + return true; + } + + + public override FlowReturn transform_ip(Buffer buf) { + lock (adapter) { + adapter.push(adjust_to_running_time(this, buf)); + while (adapter.available() > period_size) { + on_new_buffer(adapter.take_buffer(period_size)); + } + } + return FlowReturn.OK; + } + + public override bool stop() { + adapter.clear(); + return true; + } +} + +public class Dino.Plugins.Rtp.VoiceProcessor : Audio.Filter { + private static StaticPadTemplate sink_template = {"sink", PadDirection.SINK, PadPresence.ALWAYS, {null, "audio/x-raw,rate=48000,channels=1,layout=interleaved,format=S16LE"}}; + private static StaticPadTemplate src_template = {"src", PadDirection.SRC, PadPresence.ALWAYS, {null, "audio/x-raw,rate=48000,channels=1,layout=interleaved,format=S16LE"}}; + public Audio.Info audio_info { get; private set; } + private ulong process_outgoing_buffer_handler_id; + private uint adjust_delay_timeout_id; + private uint period_samples; + private uint period_size; + private Base.Adapter adapter = new Base.Adapter(); + private EchoProbe? echo_probe; + private Audio.StreamVolume? stream_volume; + private ClockTime last_reverse; + private void* native; + + static construct { + add_static_pad_template(sink_template); + add_static_pad_template(src_template); + set_static_metadata("Voice Processor (AGC, AEC, filters, etc.)", "Generic/Audio", "Pre-processes voice with WebRTC Audio Processing Library", "Dino Team "); + } + + construct { + set_passthrough(false); + } + + public VoiceProcessor(EchoProbe? echo_probe = null, Audio.StreamVolume? stream_volume = null) { + this.echo_probe = echo_probe; + this.stream_volume = stream_volume; + } + + private static extern void* init_native(int stream_delay); + private static extern void setup_native(void* native); + private static extern void destroy_native(void* native); + private static extern void analyze_reverse_stream(void* native, Audio.Info info, Buffer buffer); + private static extern void process_stream(void* native, Audio.Info info, Buffer buffer); + private static extern void adjust_stream_delay(void* native); + private static extern void notify_gain_level(void* native, int gain_level); + private static extern int get_suggested_gain_level(void* native); + private static extern bool get_stream_has_voice(void* native); + + public override bool setup(Audio.Info info) { + debug("VoiceProcessor.setup(%s)", info.to_caps().to_string()); + audio_info = info; + period_samples = info.rate / 100; // 10ms buffers + period_size = period_samples * info.bpf; + adapter.clear(); + setup_native(native); + return true; + } + + public override bool start() { + native = init_native(150); + if (process_outgoing_buffer_handler_id == 0 && echo_probe != null) { + process_outgoing_buffer_handler_id = echo_probe.on_new_buffer.connect(process_outgoing_buffer); + } + if (stream_volume == null && sinkpad.get_peer() != null && sinkpad.get_peer().get_parent_element() is Audio.StreamVolume) { + stream_volume = sinkpad.get_peer().get_parent_element() as Audio.StreamVolume; + } + return true; + } + + private bool adjust_delay() { + if (native != null) { + adjust_stream_delay(native); + return Source.CONTINUE; + } else { + adjust_delay_timeout_id = 0; + return Source.REMOVE; + } + } + + private void process_outgoing_buffer(Buffer buffer) { + if (buffer.pts != uint64.MAX) { + last_reverse = buffer.pts; + } + analyze_reverse_stream(native, echo_probe.audio_info, buffer); + if (adjust_delay_timeout_id == 0 && echo_probe != null) { + adjust_delay_timeout_id = Timeout.add(5000, adjust_delay); + } + } + + public override FlowReturn submit_input_buffer(bool is_discont, Buffer input) { + lock (adapter) { + if (is_discont) { + adapter.clear(); + } + adapter.push(adjust_to_running_time(this, input)); + } + return FlowReturn.OK; + } + + public override FlowReturn generate_output(out Buffer output_buffer) { + lock (adapter) { + if (adapter.available() >= period_size) { + output_buffer = (Gst.Buffer) adapter.take_buffer(period_size).make_writable(); + int old_gain_level = 0; + if (stream_volume != null) { + old_gain_level = (int) (stream_volume.get_volume(Audio.StreamVolumeFormat.LINEAR) * 255.0); + notify_gain_level(native, old_gain_level); + } + process_stream(native, audio_info, output_buffer); + if (stream_volume != null) { + int new_gain_level = get_suggested_gain_level(native); + if (old_gain_level != new_gain_level) { + debug("Gain: %i -> %i", old_gain_level, new_gain_level); + stream_volume.set_volume(Audio.StreamVolumeFormat.LINEAR, ((double)new_gain_level) / 255.0); + } + } + } + } + return FlowReturn.OK; + } + + public override bool stop() { + if (process_outgoing_buffer_handler_id != 0) { + echo_probe.disconnect(process_outgoing_buffer_handler_id); + process_outgoing_buffer_handler_id = 0; + } + if (adjust_delay_timeout_id != 0) { + Source.remove(adjust_delay_timeout_id); + adjust_delay_timeout_id = 0; + } + adapter.clear(); + destroy_native(native); + native = null; + return true; + } +} \ No newline at end of file diff --git a/plugins/rtp/src/voice_processor_native.cpp b/plugins/rtp/src/voice_processor_native.cpp new file mode 100644 index 00000000..9b3292b8 --- /dev/null +++ b/plugins/rtp/src/voice_processor_native.cpp @@ -0,0 +1,141 @@ +#include +#include +#include +#include +#include +#include + +#define SAMPLE_RATE 48000 +#define SAMPLE_CHANNELS 1 + +struct _DinoPluginsRtpVoiceProcessorNative { + webrtc::AudioProcessing *apm; + gint stream_delay; +}; + +extern "C" void *dino_plugins_rtp_adjust_to_running_time(GstBaseTransform *transform, GstBuffer *buffer) { + GstBuffer *copy = gst_buffer_copy(buffer); + GST_BUFFER_PTS(copy) = gst_segment_to_running_time(&transform->segment, GST_FORMAT_TIME, GST_BUFFER_PTS(buffer)); + return copy; +} + +extern "C" void *dino_plugins_rtp_voice_processor_init_native(gint stream_delay) { + _DinoPluginsRtpVoiceProcessorNative *native = new _DinoPluginsRtpVoiceProcessorNative(); + webrtc::Config config; + config.Set(new webrtc::ExtendedFilter(true)); + config.Set(new webrtc::ExperimentalAgc(true, 85)); + native->apm = webrtc::AudioProcessing::Create(config); + native->stream_delay = stream_delay; + return native; +} + +extern "C" void dino_plugins_rtp_voice_processor_setup_native(void *native_ptr) { + _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr; + webrtc::AudioProcessing *apm = native->apm; + webrtc::ProcessingConfig pconfig; + pconfig.streams[webrtc::ProcessingConfig::kInputStream] = + webrtc::StreamConfig(SAMPLE_RATE, SAMPLE_CHANNELS, false); + pconfig.streams[webrtc::ProcessingConfig::kOutputStream] = + webrtc::StreamConfig(SAMPLE_RATE, SAMPLE_CHANNELS, false); + pconfig.streams[webrtc::ProcessingConfig::kReverseInputStream] = + webrtc::StreamConfig(SAMPLE_RATE, SAMPLE_CHANNELS, false); + pconfig.streams[webrtc::ProcessingConfig::kReverseOutputStream] = + webrtc::StreamConfig(SAMPLE_RATE, SAMPLE_CHANNELS, false); + apm->Initialize(pconfig); + apm->high_pass_filter()->Enable(true); + apm->echo_cancellation()->enable_drift_compensation(false); + apm->echo_cancellation()->set_suppression_level(webrtc::EchoCancellation::kModerateSuppression); + apm->echo_cancellation()->enable_delay_logging(true); + apm->echo_cancellation()->Enable(true); + apm->noise_suppression()->set_level(webrtc::NoiseSuppression::kModerate); + apm->noise_suppression()->Enable(true); + apm->gain_control()->set_analog_level_limits(0, 255); + apm->gain_control()->set_mode(webrtc::GainControl::kAdaptiveAnalog); + apm->gain_control()->set_target_level_dbfs(3); + apm->gain_control()->set_compression_gain_db(9); + apm->gain_control()->enable_limiter(true); + apm->gain_control()->Enable(true); + apm->voice_detection()->set_likelihood(webrtc::VoiceDetection::Likelihood::kLowLikelihood); + apm->voice_detection()->Enable(true); +} + +extern "C" void +dino_plugins_rtp_voice_processor_analyze_reverse_stream(void *native_ptr, GstAudioInfo *info, GstBuffer *buffer) { + _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr; + webrtc::StreamConfig config(SAMPLE_RATE, SAMPLE_CHANNELS, false); + webrtc::AudioProcessing *apm = native->apm; + + GstAudioBuffer audio_buffer; + gst_audio_buffer_map(&audio_buffer, info, buffer, GST_MAP_READ); + + webrtc::AudioFrame frame; + frame.num_channels_ = info->channels; + frame.sample_rate_hz_ = info->rate; + frame.samples_per_channel_ = gst_buffer_get_size(buffer) / info->bpf; + memcpy(frame.data_, audio_buffer.planes[0], frame.samples_per_channel_ * info->bpf); + + int err = apm->AnalyzeReverseStream(&frame); + if (err < 0) g_warning("ProcessReverseStream %i", err); + + gst_audio_buffer_unmap(&audio_buffer); +} + +extern "C" void dino_plugins_rtp_voice_processor_notify_gain_level(void *native_ptr, gint gain_level) { + _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr; + webrtc::AudioProcessing *apm = native->apm; + apm->gain_control()->set_stream_analog_level(gain_level); +} + +extern "C" gint dino_plugins_rtp_voice_processor_get_suggested_gain_level(void *native_ptr) { + _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr; + webrtc::AudioProcessing *apm = native->apm; + return apm->gain_control()->stream_analog_level(); +} + +extern "C" bool dino_plugins_rtp_voice_processor_get_stream_has_voice(void *native_ptr) { + _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr; + webrtc::AudioProcessing *apm = native->apm; + return apm->voice_detection()->stream_has_voice(); +} + +extern "C" void dino_plugins_rtp_voice_processor_adjust_stream_delay(void *native_ptr) { + _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr; + webrtc::AudioProcessing *apm = native->apm; + int median, std; + float fraction_poor_delays; + apm->echo_cancellation()->GetDelayMetrics(&median, &std, &fraction_poor_delays); + if (fraction_poor_delays < 0) return; + g_debug("voice_processor_native.cpp: Stream delay metrics: %i %i %f", median, std, fraction_poor_delays); + if (fraction_poor_delays > 0.5) { + native->stream_delay = std::max(0, native->stream_delay + std::min(-10, std::max(median, 10))); + g_debug("Adjusted stream delay %i", native->stream_delay); + } +} + +extern "C" void +dino_plugins_rtp_voice_processor_process_stream(void *native_ptr, GstAudioInfo *info, GstBuffer *buffer) { + _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr; + webrtc::StreamConfig config(SAMPLE_RATE, SAMPLE_CHANNELS, false); + webrtc::AudioProcessing *apm = native->apm; + + GstAudioBuffer audio_buffer; + gst_audio_buffer_map(&audio_buffer, info, buffer, GST_MAP_READWRITE); + + webrtc::AudioFrame frame; + frame.num_channels_ = info->channels; + frame.sample_rate_hz_ = info->rate; + frame.samples_per_channel_ = info->rate / 100; + memcpy(frame.data_, audio_buffer.planes[0], frame.samples_per_channel_ * info->bpf); + + apm->set_stream_delay_ms(native->stream_delay); + int err = apm->ProcessStream(&frame); + if (err >= 0) memcpy(audio_buffer.planes[0], frame.data_, frame.samples_per_channel_ * info->bpf); + if (err < 0) g_warning("ProcessStream %i", err); + + gst_audio_buffer_unmap(&audio_buffer); +} + +extern "C" void dino_plugins_rtp_voice_processor_destroy_native(void *native_ptr) { + _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr; + delete native; +} \ No newline at end of file -- cgit v1.2.3-70-g09d2