Echo Cancellation

author: Marvin W <git@larma.de> 2021-05-01 15:19:05 +0200
committer: Marvin W <git@larma.de> 2021-05-01 15:48:51 +0200
commit: 23ffd37dded3bf872e42d7a00727ab3c4d105a97 (patch)
tree: 86278ca49c2eee8c8c091e70d4a5190c21c57aed /plugins
parent: 6b976cdb6604f6f27b72f7397b38d45dd4f916c6 (diff)
download: dino-23ffd37dded3bf872e42d7a00727ab3c4d105a97.tar.gz
dino-23ffd37dded3bf872e42d7a00727ab3c4d105a97.zip
5 files changed, 356 insertions, 16 deletions
diff --git a/plugins/rtp/CMakeLists.txt b/plugins/rtp/CMakeLists.txt
index 92ec1b97..b19c8a8f 100644
--- a/plugins/rtp/CMakeLists.txt
+++ b/plugins/rtp/CMakeLists.txt
@@ -1,4 +1,5 @@
 find_package(GstRtp REQUIRED)
+find_package(WebRTCAudioProcessing 0.2)
 find_packages(RTP_PACKAGES REQUIRED
     Gee
     GLib
@@ -8,12 +9,26 @@ find_packages(RTP_PACKAGES REQUIRED
     GTK3
     Gst
     GstApp
+    GstAudio
 )
 
 if(Gst_VERSION VERSION_GREATER "1.16")
     set(RTP_DEFINITIONS GST_1_16)
 endif()
 
+if(WebRTCAudioProcessing_VERSION GREATER "0.4")
+    message(WARNING "Ignoring WebRTCAudioProcessing, only versions < 0.4 supported so far")
+    unset(WebRTCAudioProcessing_FOUND)
+endif()
+
+if(WebRTCAudioProcessing_FOUND)
+    set(RTP_DEFINITIONS ${RTP_DEFINITIONS} WITH_VOICE_PROCESSOR)
+    set(RTP_VOICE_PROCESSOR_VALA src/voice_processor.vala)
+    set(RTP_VOICE_PROCESSOR_CXX src/voice_processor_native.cpp)
+else()
+    message(WARNING "WebRTCAudioProcessing not found, build without voice pre-processing!")
+endif()
+
 vala_precompile(RTP_VALA_C
 SOURCES
     src/codec_util.vala
@@ -23,6 +38,7 @@ SOURCES
     src/stream.vala
     src/video_widget.vala
     src/register_plugin.vala
+    ${RTP_VOICE_PROCESSOR_VALA}
 CUSTOM_VAPIS
     ${CMAKE_BINARY_DIR}/exports/crypto-vala.vapi
     ${CMAKE_BINARY_DIR}/exports/xmpp-vala.vapi
@@ -36,8 +52,8 @@ DEFINITIONS
 )
 
 add_definitions(${VALA_CFLAGS} -DG_LOG_DOMAIN="rtp" -I${CMAKE_CURRENT_SOURCE_DIR}/src)
-add_library(rtp SHARED ${RTP_VALA_C})
-target_link_libraries(rtp libdino crypto-vala ${RTP_PACKAGES} gstreamer-rtp-1.0)
+add_library(rtp SHARED ${RTP_VALA_C} ${RTP_VOICE_PROCESSOR_CXX})
+target_link_libraries(rtp libdino crypto-vala ${RTP_PACKAGES} gstreamer-rtp-1.0 webrtc-audio-processing)
 set_target_properties(rtp PROPERTIES PREFIX "")
 set_target_properties(rtp PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/plugins/)
 
diff --git a/plugins/rtp/src/device.vala b/plugins/rtp/src/device.vala
index 785f853a..f8894502 100644
--- a/plugins/rtp/src/device.vala
+++ b/plugins/rtp/src/device.vala
@@ -37,6 +37,7 @@ public class Dino.Plugins.Rtp.Device : MediaDevice, Object {
     private Gst.Element dsp;
     private Gst.Element mixer;
     private Gst.Element filter;
+    private Gst.Element rate;
     private int links = 0;
 
     public Device(Plugin plugin, Gst.Device device) {
@@ -132,12 +133,10 @@ public class Dino.Plugins.Rtp.Device : MediaDevice, Object {
             pipe.add(filter);
             element.link(filter);
             if (media == "audio" && plugin.echoprobe != null) {
-                dsp = Gst.ElementFactory.make("webrtcdsp", @"dsp_$id");
-                if (dsp != null) {
-                    dsp.@set("probe", plugin.echoprobe.name);
-                    pipe.add(dsp);
-                    filter.link(dsp);
-                }
+                dsp = new VoiceProcessor(plugin.echoprobe, element as Gst.Audio.StreamVolume);
+                dsp.name = @"dsp_$id";
+                pipe.add(dsp);
+                filter.link(dsp);
             }
             tee = Gst.ElementFactory.make("tee", @"tee_$id");
             tee.@set("allow-not-linked", true);
@@ -153,7 +152,11 @@ public class Dino.Plugins.Rtp.Device : MediaDevice, Object {
             filter.@set("caps", get_best_caps());
             pipe.add(filter);
             if (plugin.echoprobe != null) {
-                filter.link(plugin.echoprobe);
+                rate = Gst.ElementFactory.make("audiorate", @"rate_$id");
+                rate.@set("tolerance", 100000000);
+                pipe.add(rate);
+                filter.link(rate);
+                rate.link(plugin.echoprobe);
                 plugin.echoprobe.link(element);
             } else {
                 filter.link(element);
@@ -184,14 +187,17 @@ public class Dino.Plugins.Rtp.Device : MediaDevice, Object {
             if (filter != null) {
                 filter.set_locked_state(true);
                 filter.set_state(Gst.State.NULL);
-                if (plugin.echoprobe != null) {
-                    filter.unlink(plugin.echoprobe);
-                } else {
-                    filter.unlink(element);
-                }
+                filter.unlink(rate ?? ((Gst.Element)plugin.echoprobe) ?? element);
                 pipe.remove(filter);
                 filter = null;
             }
+            if (rate != null) {
+                rate.set_locked_state(true);
+                rate.set_state(Gst.State.NULL);
+                rate.unlink(plugin.echoprobe);
+                pipe.remove(rate);
+                rate = null;
+            }
             if (plugin.echoprobe != null) {
                 plugin.echoprobe.unlink(element);
             }
diff --git a/plugins/rtp/src/plugin.vala b/plugins/rtp/src/plugin.vala
index d43588b4..e3d5ee41 100644
--- a/plugins/rtp/src/plugin.vala
+++ b/plugins/rtp/src/plugin.vala
@@ -8,7 +8,7 @@ public class Dino.Plugins.Rtp.Plugin : RootInterface, VideoCallPlugin, Object {
     public Gst.DeviceMonitor device_monitor { get; private set; }
     public Gst.Pipeline pipe { get; private set; }
     public Gst.Bin rtpbin { get; private set; }
-    public Gst.Element echoprobe { get; private set; }
+    public EchoProbe echoprobe { get; private set; }
 
     private Gee.List<Stream> streams = new ArrayList<Stream>();
     private Gee.List<Device> devices = new ArrayList<Device>();
@@ -72,7 +72,8 @@ public class Dino.Plugins.Rtp.Plugin : RootInterface, VideoCallPlugin, Object {
         pipe.add(rtpbin);
 
         // Audio echo probe
-        echoprobe = Gst.ElementFactory.make("webrtcechoprobe", "echo-probe");
+//        echoprobe = Gst.ElementFactory.make("webrtcechoprobe", "echo-probe");
+        echoprobe = new EchoProbe();
         if (echoprobe != null) pipe.add(echoprobe);
 
         // Pipeline
diff --git a/plugins/rtp/src/voice_processor.vala b/plugins/rtp/src/voice_processor.vala
new file mode 100644
index 00000000..e6dc7e8f
--- /dev/null
+++ b/plugins/rtp/src/voice_processor.vala
@@ -0,0 +1,176 @@
+using Gst;
+
+namespace Dino.Plugins.Rtp {
+public static extern Buffer adjust_to_running_time(Base.Transform transform, Buffer buf);
+}
+
+public class Dino.Plugins.Rtp.EchoProbe : Audio.Filter {
+    private static StaticPadTemplate sink_template = {"sink", PadDirection.SINK, PadPresence.ALWAYS, {null, "audio/x-raw,rate=48000,channels=1,layout=interleaved,format=S16LE"}};
+    private static StaticPadTemplate src_template = {"src", PadDirection.SRC, PadPresence.ALWAYS, {null, "audio/x-raw,rate=48000,channels=1,layout=interleaved,format=S16LE"}};
+    public Audio.Info audio_info { get; private set; }
+    public signal void on_new_buffer(Buffer buffer);
+    private uint period_samples;
+    private uint period_size;
+    private Base.Adapter adapter = new Base.Adapter();
+
+    static construct {
+        add_static_pad_template(sink_template);
+        add_static_pad_template(src_template);
+        set_static_metadata("Acoustic Echo Canceller probe", "Generic/Audio", "Gathers playback buffers for echo cancellation", "Dino Team <contact@dino.im>");
+    }
+
+    construct {
+        set_passthrough(true);
+    }
+
+    public override bool setup(Audio.Info info) {
+        audio_info = info;
+        period_samples = info.rate / 100; // 10ms buffers
+        period_size = period_samples * info.bpf;
+        return true;
+    }
+
+
+    public override FlowReturn transform_ip(Buffer buf) {
+        lock (adapter) {
+            adapter.push(adjust_to_running_time(this, buf));
+            while (adapter.available() > period_size) {
+                on_new_buffer(adapter.take_buffer(period_size));
+            }
+        }
+        return FlowReturn.OK;
+    }
+
+    public override bool stop() {
+        adapter.clear();
+        return true;
+    }
+}
+
+public class Dino.Plugins.Rtp.VoiceProcessor : Audio.Filter {
+    private static StaticPadTemplate sink_template = {"sink", PadDirection.SINK, PadPresence.ALWAYS, {null, "audio/x-raw,rate=48000,channels=1,layout=interleaved,format=S16LE"}};
+    private static StaticPadTemplate src_template = {"src", PadDirection.SRC, PadPresence.ALWAYS, {null, "audio/x-raw,rate=48000,channels=1,layout=interleaved,format=S16LE"}};
+    public Audio.Info audio_info { get; private set; }
+    private ulong process_outgoing_buffer_handler_id;
+    private uint adjust_delay_timeout_id;
+    private uint period_samples;
+    private uint period_size;
+    private Base.Adapter adapter = new Base.Adapter();
+    private EchoProbe? echo_probe;
+    private Audio.StreamVolume? stream_volume;
+    private ClockTime last_reverse;
+    private void* native;
+
+    static construct {
+        add_static_pad_template(sink_template);
+        add_static_pad_template(src_template);
+        set_static_metadata("Voice Processor (AGC, AEC, filters, etc.)", "Generic/Audio", "Pre-processes voice with WebRTC Audio Processing Library", "Dino Team <contact@dino.im>");
+    }
+
+    construct {
+        set_passthrough(false);
+    }
+
+    public VoiceProcessor(EchoProbe? echo_probe = null, Audio.StreamVolume? stream_volume = null) {
+        this.echo_probe = echo_probe;
+        this.stream_volume = stream_volume;
+    }
+
+    private static extern void* init_native(int stream_delay);
+    private static extern void setup_native(void* native);
+    private static extern void destroy_native(void* native);
+    private static extern void analyze_reverse_stream(void* native, Audio.Info info, Buffer buffer);
+    private static extern void process_stream(void* native, Audio.Info info, Buffer buffer);
+    private static extern void adjust_stream_delay(void* native);
+    private static extern void notify_gain_level(void* native, int gain_level);
+    private static extern int get_suggested_gain_level(void* native);
+    private static extern bool get_stream_has_voice(void* native);
+
+    public override bool setup(Audio.Info info) {
+        debug("VoiceProcessor.setup(%s)", info.to_caps().to_string());
+        audio_info = info;
+        period_samples = info.rate / 100; // 10ms buffers
+        period_size = period_samples * info.bpf;
+        adapter.clear();
+        setup_native(native);
+        return true;
+    }
+
+    public override bool start() {
+        native = init_native(150);
+        if (process_outgoing_buffer_handler_id == 0 && echo_probe != null) {
+            process_outgoing_buffer_handler_id = echo_probe.on_new_buffer.connect(process_outgoing_buffer);
+        }
+        if (stream_volume == null && sinkpad.get_peer() != null && sinkpad.get_peer().get_parent_element() is Audio.StreamVolume) {
+            stream_volume = sinkpad.get_peer().get_parent_element() as Audio.StreamVolume;
+        }
+        return true;
+    }
+
+    private bool adjust_delay() {
+        if (native != null) {
+            adjust_stream_delay(native);
+            return Source.CONTINUE;
+        } else {
+            adjust_delay_timeout_id = 0;
+            return Source.REMOVE;
+        }
+    }
+
+    private void process_outgoing_buffer(Buffer buffer) {
+        if (buffer.pts != uint64.MAX) {
+            last_reverse = buffer.pts;
+        }
+        analyze_reverse_stream(native, echo_probe.audio_info, buffer);
+        if (adjust_delay_timeout_id == 0 && echo_probe != null) {
+            adjust_delay_timeout_id = Timeout.add(5000, adjust_delay);
+        }
+    }
+
+    public override FlowReturn submit_input_buffer(bool is_discont, Buffer input) {
+        lock (adapter) {
+            if (is_discont) {
+                adapter.clear();
+            }
+            adapter.push(adjust_to_running_time(this, input));
+        }
+        return FlowReturn.OK;
+    }
+
+    public override FlowReturn generate_output(out Buffer output_buffer) {
+        lock (adapter) {
+            if (adapter.available() >= period_size) {
+                output_buffer = (Gst.Buffer) adapter.take_buffer(period_size).make_writable();
+                int old_gain_level = 0;
+                if (stream_volume != null) {
+                    old_gain_level = (int) (stream_volume.get_volume(Audio.StreamVolumeFormat.LINEAR) * 255.0);
+                    notify_gain_level(native, old_gain_level);
+                }
+                process_stream(native, audio_info, output_buffer);
+                if (stream_volume != null) {
+                    int new_gain_level = get_suggested_gain_level(native);
+                    if (old_gain_level != new_gain_level) {
+                        debug("Gain: %i -> %i", old_gain_level, new_gain_level);
+                        stream_volume.set_volume(Audio.StreamVolumeFormat.LINEAR, ((double)new_gain_level) / 255.0);
+                    }
+                }
+            }
+        }
+        return FlowReturn.OK;
+    }
+
+    public override bool stop() {
+        if (process_outgoing_buffer_handler_id != 0) {
+            echo_probe.disconnect(process_outgoing_buffer_handler_id);
+            process_outgoing_buffer_handler_id = 0;
+        }
+        if (adjust_delay_timeout_id != 0) {
+            Source.remove(adjust_delay_timeout_id);
+            adjust_delay_timeout_id = 0;
+        }
+        adapter.clear();
+        destroy_native(native);
+        native = null;
+        return true;
+    }
+}
+\ No newline at end of file
diff --git a/plugins/rtp/src/voice_processor_native.cpp b/plugins/rtp/src/voice_processor_native.cpp
new file mode 100644
index 00000000..9b3292b8
--- /dev/null
+++ b/plugins/rtp/src/voice_processor_native.cpp
@@ -0,0 +1,141 @@
+#include <algorithm>
+#include <gst/gst.h>
+#include <gst/audio/audio.h>
+#include <webrtc/modules/audio_processing/include/audio_processing.h>
+#include <webrtc/modules/interface/module_common_types.h>
+#include <webrtc/system_wrappers/include/trace.h>
+
+#define SAMPLE_RATE 48000
+#define SAMPLE_CHANNELS 1
+
+struct _DinoPluginsRtpVoiceProcessorNative {
+    webrtc::AudioProcessing *apm;
+    gint stream_delay;
+};
+
+extern "C" void *dino_plugins_rtp_adjust_to_running_time(GstBaseTransform *transform, GstBuffer *buffer) {
+    GstBuffer *copy = gst_buffer_copy(buffer);
+    GST_BUFFER_PTS(copy) = gst_segment_to_running_time(&transform->segment, GST_FORMAT_TIME, GST_BUFFER_PTS(buffer));
+    return copy;
+}
+
+extern "C" void *dino_plugins_rtp_voice_processor_init_native(gint stream_delay) {
+    _DinoPluginsRtpVoiceProcessorNative *native = new _DinoPluginsRtpVoiceProcessorNative();
+    webrtc::Config config;
+    config.Set<webrtc::ExtendedFilter>(new webrtc::ExtendedFilter(true));
+    config.Set<webrtc::ExperimentalAgc>(new webrtc::ExperimentalAgc(true, 85));
+    native->apm = webrtc::AudioProcessing::Create(config);
+    native->stream_delay = stream_delay;
+    return native;
+}
+
+extern "C" void dino_plugins_rtp_voice_processor_setup_native(void *native_ptr) {
+    _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr;
+    webrtc::AudioProcessing *apm = native->apm;
+    webrtc::ProcessingConfig pconfig;
+    pconfig.streams[webrtc::ProcessingConfig::kInputStream] =
+            webrtc::StreamConfig(SAMPLE_RATE, SAMPLE_CHANNELS, false);
+    pconfig.streams[webrtc::ProcessingConfig::kOutputStream] =
+            webrtc::StreamConfig(SAMPLE_RATE, SAMPLE_CHANNELS, false);
+    pconfig.streams[webrtc::ProcessingConfig::kReverseInputStream] =
+            webrtc::StreamConfig(SAMPLE_RATE, SAMPLE_CHANNELS, false);
+    pconfig.streams[webrtc::ProcessingConfig::kReverseOutputStream] =
+            webrtc::StreamConfig(SAMPLE_RATE, SAMPLE_CHANNELS, false);
+    apm->Initialize(pconfig);
+    apm->high_pass_filter()->Enable(true);
+    apm->echo_cancellation()->enable_drift_compensation(false);
+    apm->echo_cancellation()->set_suppression_level(webrtc::EchoCancellation::kModerateSuppression);
+    apm->echo_cancellation()->enable_delay_logging(true);
+    apm->echo_cancellation()->Enable(true);
+    apm->noise_suppression()->set_level(webrtc::NoiseSuppression::kModerate);
+    apm->noise_suppression()->Enable(true);
+    apm->gain_control()->set_analog_level_limits(0, 255);
+    apm->gain_control()->set_mode(webrtc::GainControl::kAdaptiveAnalog);
+    apm->gain_control()->set_target_level_dbfs(3);
+    apm->gain_control()->set_compression_gain_db(9);
+    apm->gain_control()->enable_limiter(true);
+    apm->gain_control()->Enable(true);
+    apm->voice_detection()->set_likelihood(webrtc::VoiceDetection::Likelihood::kLowLikelihood);
+    apm->voice_detection()->Enable(true);
+}
+
+extern "C" void
+dino_plugins_rtp_voice_processor_analyze_reverse_stream(void *native_ptr, GstAudioInfo *info, GstBuffer *buffer) {
+    _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr;
+    webrtc::StreamConfig config(SAMPLE_RATE, SAMPLE_CHANNELS, false);
+    webrtc::AudioProcessing *apm = native->apm;
+
+    GstAudioBuffer audio_buffer;
+    gst_audio_buffer_map(&audio_buffer, info, buffer, GST_MAP_READ);
+
+    webrtc::AudioFrame frame;
+    frame.num_channels_ = info->channels;
+    frame.sample_rate_hz_ = info->rate;
+    frame.samples_per_channel_ = gst_buffer_get_size(buffer) / info->bpf;
+    memcpy(frame.data_, audio_buffer.planes[0], frame.samples_per_channel_ * info->bpf);
+
+    int err = apm->AnalyzeReverseStream(&frame);
+    if (err < 0) g_warning("ProcessReverseStream %i", err);
+
+    gst_audio_buffer_unmap(&audio_buffer);
+}
+
+extern "C" void dino_plugins_rtp_voice_processor_notify_gain_level(void *native_ptr, gint gain_level) {
+    _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr;
+    webrtc::AudioProcessing *apm = native->apm;
+    apm->gain_control()->set_stream_analog_level(gain_level);
+}
+
+extern "C" gint dino_plugins_rtp_voice_processor_get_suggested_gain_level(void *native_ptr) {
+    _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr;
+    webrtc::AudioProcessing *apm = native->apm;
+    return apm->gain_control()->stream_analog_level();
+}
+
+extern "C" bool dino_plugins_rtp_voice_processor_get_stream_has_voice(void *native_ptr) {
+    _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr;
+    webrtc::AudioProcessing *apm = native->apm;
+    return apm->voice_detection()->stream_has_voice();
+}
+
+extern "C" void dino_plugins_rtp_voice_processor_adjust_stream_delay(void *native_ptr) {
+    _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr;
+    webrtc::AudioProcessing *apm = native->apm;
+    int median, std;
+    float fraction_poor_delays;
+    apm->echo_cancellation()->GetDelayMetrics(&median, &std, &fraction_poor_delays);
+    if (fraction_poor_delays < 0) return;
+    g_debug("voice_processor_native.cpp: Stream delay metrics: %i %i %f", median, std, fraction_poor_delays);
+    if (fraction_poor_delays > 0.5) {
+        native->stream_delay = std::max(0, native->stream_delay + std::min(-10, std::max(median, 10)));
+        g_debug("Adjusted stream delay %i", native->stream_delay);
+    }
+}
+
+extern "C" void
+dino_plugins_rtp_voice_processor_process_stream(void *native_ptr, GstAudioInfo *info, GstBuffer *buffer) {
+    _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr;
+    webrtc::StreamConfig config(SAMPLE_RATE, SAMPLE_CHANNELS, false);
+    webrtc::AudioProcessing *apm = native->apm;
+
+    GstAudioBuffer audio_buffer;
+    gst_audio_buffer_map(&audio_buffer, info, buffer, GST_MAP_READWRITE);
+
+    webrtc::AudioFrame frame;
+    frame.num_channels_ = info->channels;
+    frame.sample_rate_hz_ = info->rate;
+    frame.samples_per_channel_ = info->rate / 100;
+    memcpy(frame.data_, audio_buffer.planes[0], frame.samples_per_channel_ * info->bpf);
+
+    apm->set_stream_delay_ms(native->stream_delay);
+    int err = apm->ProcessStream(&frame);
+    if (err >= 0) memcpy(audio_buffer.planes[0], frame.data_, frame.samples_per_channel_ * info->bpf);
+    if (err < 0) g_warning("ProcessStream %i", err);
+
+    gst_audio_buffer_unmap(&audio_buffer);
+}
+
+extern "C" void dino_plugins_rtp_voice_processor_destroy_native(void *native_ptr) {
+    _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr;
+    delete native;
+}
+\ No newline at end of file
author	Marvin W <git@larma.de>	2021-05-01 15:19:05 +0200
committer	Marvin W <git@larma.de>	2021-05-01 15:48:51 +0200
commit	23ffd37dded3bf872e42d7a00727ab3c4d105a97 (patch)
tree	86278ca49c2eee8c8c091e70d4a5190c21c57aed /plugins
parent	6b976cdb6604f6f27b72f7397b38d45dd4f916c6 (diff)
download	dino-23ffd37dded3bf872e42d7a00727ab3c4d105a97.tar.gz dino-23ffd37dded3bf872e42d7a00727ab3c4d105a97.zip