aboutsummaryrefslogtreecommitdiff
path: root/plugins/rtp/src
diff options
context:
space:
mode:
authorMarvin W <git@larma.de>2021-05-01 15:19:05 +0200
committerMarvin W <git@larma.de>2021-05-01 15:48:51 +0200
commit23ffd37dded3bf872e42d7a00727ab3c4d105a97 (patch)
tree86278ca49c2eee8c8c091e70d4a5190c21c57aed /plugins/rtp/src
parent6b976cdb6604f6f27b72f7397b38d45dd4f916c6 (diff)
downloaddino-23ffd37dded3bf872e42d7a00727ab3c4d105a97.tar.gz
dino-23ffd37dded3bf872e42d7a00727ab3c4d105a97.zip
Echo Cancellation
Diffstat (limited to 'plugins/rtp/src')
-rw-r--r--plugins/rtp/src/device.vala30
-rw-r--r--plugins/rtp/src/plugin.vala5
-rw-r--r--plugins/rtp/src/voice_processor.vala176
-rw-r--r--plugins/rtp/src/voice_processor_native.cpp141
4 files changed, 338 insertions, 14 deletions
diff --git a/plugins/rtp/src/device.vala b/plugins/rtp/src/device.vala
index 785f853a..f8894502 100644
--- a/plugins/rtp/src/device.vala
+++ b/plugins/rtp/src/device.vala
@@ -37,6 +37,7 @@ public class Dino.Plugins.Rtp.Device : MediaDevice, Object {
private Gst.Element dsp;
private Gst.Element mixer;
private Gst.Element filter;
+ private Gst.Element rate;
private int links = 0;
public Device(Plugin plugin, Gst.Device device) {
@@ -132,12 +133,10 @@ public class Dino.Plugins.Rtp.Device : MediaDevice, Object {
pipe.add(filter);
element.link(filter);
if (media == "audio" && plugin.echoprobe != null) {
- dsp = Gst.ElementFactory.make("webrtcdsp", @"dsp_$id");
- if (dsp != null) {
- dsp.@set("probe", plugin.echoprobe.name);
- pipe.add(dsp);
- filter.link(dsp);
- }
+ dsp = new VoiceProcessor(plugin.echoprobe, element as Gst.Audio.StreamVolume);
+ dsp.name = @"dsp_$id";
+ pipe.add(dsp);
+ filter.link(dsp);
}
tee = Gst.ElementFactory.make("tee", @"tee_$id");
tee.@set("allow-not-linked", true);
@@ -153,7 +152,11 @@ public class Dino.Plugins.Rtp.Device : MediaDevice, Object {
filter.@set("caps", get_best_caps());
pipe.add(filter);
if (plugin.echoprobe != null) {
- filter.link(plugin.echoprobe);
+ rate = Gst.ElementFactory.make("audiorate", @"rate_$id");
+ rate.@set("tolerance", 100000000);
+ pipe.add(rate);
+ filter.link(rate);
+ rate.link(plugin.echoprobe);
plugin.echoprobe.link(element);
} else {
filter.link(element);
@@ -184,14 +187,17 @@ public class Dino.Plugins.Rtp.Device : MediaDevice, Object {
if (filter != null) {
filter.set_locked_state(true);
filter.set_state(Gst.State.NULL);
- if (plugin.echoprobe != null) {
- filter.unlink(plugin.echoprobe);
- } else {
- filter.unlink(element);
- }
+ filter.unlink(rate ?? ((Gst.Element)plugin.echoprobe) ?? element);
pipe.remove(filter);
filter = null;
}
+ if (rate != null) {
+ rate.set_locked_state(true);
+ rate.set_state(Gst.State.NULL);
+ rate.unlink(plugin.echoprobe);
+ pipe.remove(rate);
+ rate = null;
+ }
if (plugin.echoprobe != null) {
plugin.echoprobe.unlink(element);
}
diff --git a/plugins/rtp/src/plugin.vala b/plugins/rtp/src/plugin.vala
index d43588b4..e3d5ee41 100644
--- a/plugins/rtp/src/plugin.vala
+++ b/plugins/rtp/src/plugin.vala
@@ -8,7 +8,7 @@ public class Dino.Plugins.Rtp.Plugin : RootInterface, VideoCallPlugin, Object {
public Gst.DeviceMonitor device_monitor { get; private set; }
public Gst.Pipeline pipe { get; private set; }
public Gst.Bin rtpbin { get; private set; }
- public Gst.Element echoprobe { get; private set; }
+ public EchoProbe echoprobe { get; private set; }
private Gee.List<Stream> streams = new ArrayList<Stream>();
private Gee.List<Device> devices = new ArrayList<Device>();
@@ -72,7 +72,8 @@ public class Dino.Plugins.Rtp.Plugin : RootInterface, VideoCallPlugin, Object {
pipe.add(rtpbin);
// Audio echo probe
- echoprobe = Gst.ElementFactory.make("webrtcechoprobe", "echo-probe");
+// echoprobe = Gst.ElementFactory.make("webrtcechoprobe", "echo-probe");
+ echoprobe = new EchoProbe();
if (echoprobe != null) pipe.add(echoprobe);
// Pipeline
diff --git a/plugins/rtp/src/voice_processor.vala b/plugins/rtp/src/voice_processor.vala
new file mode 100644
index 00000000..e6dc7e8f
--- /dev/null
+++ b/plugins/rtp/src/voice_processor.vala
@@ -0,0 +1,176 @@
+using Gst;
+
+namespace Dino.Plugins.Rtp {
+public static extern Buffer adjust_to_running_time(Base.Transform transform, Buffer buf);
+}
+
+public class Dino.Plugins.Rtp.EchoProbe : Audio.Filter {
+ private static StaticPadTemplate sink_template = {"sink", PadDirection.SINK, PadPresence.ALWAYS, {null, "audio/x-raw,rate=48000,channels=1,layout=interleaved,format=S16LE"}};
+ private static StaticPadTemplate src_template = {"src", PadDirection.SRC, PadPresence.ALWAYS, {null, "audio/x-raw,rate=48000,channels=1,layout=interleaved,format=S16LE"}};
+ public Audio.Info audio_info { get; private set; }
+ public signal void on_new_buffer(Buffer buffer);
+ private uint period_samples;
+ private uint period_size;
+ private Base.Adapter adapter = new Base.Adapter();
+
+ static construct {
+ add_static_pad_template(sink_template);
+ add_static_pad_template(src_template);
+ set_static_metadata("Acoustic Echo Canceller probe", "Generic/Audio", "Gathers playback buffers for echo cancellation", "Dino Team <contact@dino.im>");
+ }
+
+ construct {
+ set_passthrough(true);
+ }
+
+ public override bool setup(Audio.Info info) {
+ audio_info = info;
+ period_samples = info.rate / 100; // 10ms buffers
+ period_size = period_samples * info.bpf;
+ return true;
+ }
+
+
+ public override FlowReturn transform_ip(Buffer buf) {
+ lock (adapter) {
+ adapter.push(adjust_to_running_time(this, buf));
+ while (adapter.available() > period_size) {
+ on_new_buffer(adapter.take_buffer(period_size));
+ }
+ }
+ return FlowReturn.OK;
+ }
+
+ public override bool stop() {
+ adapter.clear();
+ return true;
+ }
+}
+
+public class Dino.Plugins.Rtp.VoiceProcessor : Audio.Filter {
+ private static StaticPadTemplate sink_template = {"sink", PadDirection.SINK, PadPresence.ALWAYS, {null, "audio/x-raw,rate=48000,channels=1,layout=interleaved,format=S16LE"}};
+ private static StaticPadTemplate src_template = {"src", PadDirection.SRC, PadPresence.ALWAYS, {null, "audio/x-raw,rate=48000,channels=1,layout=interleaved,format=S16LE"}};
+ public Audio.Info audio_info { get; private set; }
+ private ulong process_outgoing_buffer_handler_id;
+ private uint adjust_delay_timeout_id;
+ private uint period_samples;
+ private uint period_size;
+ private Base.Adapter adapter = new Base.Adapter();
+ private EchoProbe? echo_probe;
+ private Audio.StreamVolume? stream_volume;
+ private ClockTime last_reverse;
+ private void* native;
+
+ static construct {
+ add_static_pad_template(sink_template);
+ add_static_pad_template(src_template);
+ set_static_metadata("Voice Processor (AGC, AEC, filters, etc.)", "Generic/Audio", "Pre-processes voice with WebRTC Audio Processing Library", "Dino Team <contact@dino.im>");
+ }
+
+ construct {
+ set_passthrough(false);
+ }
+
+ public VoiceProcessor(EchoProbe? echo_probe = null, Audio.StreamVolume? stream_volume = null) {
+ this.echo_probe = echo_probe;
+ this.stream_volume = stream_volume;
+ }
+
+ private static extern void* init_native(int stream_delay);
+ private static extern void setup_native(void* native);
+ private static extern void destroy_native(void* native);
+ private static extern void analyze_reverse_stream(void* native, Audio.Info info, Buffer buffer);
+ private static extern void process_stream(void* native, Audio.Info info, Buffer buffer);
+ private static extern void adjust_stream_delay(void* native);
+ private static extern void notify_gain_level(void* native, int gain_level);
+ private static extern int get_suggested_gain_level(void* native);
+ private static extern bool get_stream_has_voice(void* native);
+
+ public override bool setup(Audio.Info info) {
+ debug("VoiceProcessor.setup(%s)", info.to_caps().to_string());
+ audio_info = info;
+ period_samples = info.rate / 100; // 10ms buffers
+ period_size = period_samples * info.bpf;
+ adapter.clear();
+ setup_native(native);
+ return true;
+ }
+
+ public override bool start() {
+ native = init_native(150);
+ if (process_outgoing_buffer_handler_id == 0 && echo_probe != null) {
+ process_outgoing_buffer_handler_id = echo_probe.on_new_buffer.connect(process_outgoing_buffer);
+ }
+ if (stream_volume == null && sinkpad.get_peer() != null && sinkpad.get_peer().get_parent_element() is Audio.StreamVolume) {
+ stream_volume = sinkpad.get_peer().get_parent_element() as Audio.StreamVolume;
+ }
+ return true;
+ }
+
+ private bool adjust_delay() {
+ if (native != null) {
+ adjust_stream_delay(native);
+ return Source.CONTINUE;
+ } else {
+ adjust_delay_timeout_id = 0;
+ return Source.REMOVE;
+ }
+ }
+
+ private void process_outgoing_buffer(Buffer buffer) {
+ if (buffer.pts != uint64.MAX) {
+ last_reverse = buffer.pts;
+ }
+ analyze_reverse_stream(native, echo_probe.audio_info, buffer);
+ if (adjust_delay_timeout_id == 0 && echo_probe != null) {
+ adjust_delay_timeout_id = Timeout.add(5000, adjust_delay);
+ }
+ }
+
+ public override FlowReturn submit_input_buffer(bool is_discont, Buffer input) {
+ lock (adapter) {
+ if (is_discont) {
+ adapter.clear();
+ }
+ adapter.push(adjust_to_running_time(this, input));
+ }
+ return FlowReturn.OK;
+ }
+
+ public override FlowReturn generate_output(out Buffer output_buffer) {
+ lock (adapter) {
+ if (adapter.available() >= period_size) {
+ output_buffer = (Gst.Buffer) adapter.take_buffer(period_size).make_writable();
+ int old_gain_level = 0;
+ if (stream_volume != null) {
+ old_gain_level = (int) (stream_volume.get_volume(Audio.StreamVolumeFormat.LINEAR) * 255.0);
+ notify_gain_level(native, old_gain_level);
+ }
+ process_stream(native, audio_info, output_buffer);
+ if (stream_volume != null) {
+ int new_gain_level = get_suggested_gain_level(native);
+ if (old_gain_level != new_gain_level) {
+ debug("Gain: %i -> %i", old_gain_level, new_gain_level);
+ stream_volume.set_volume(Audio.StreamVolumeFormat.LINEAR, ((double)new_gain_level) / 255.0);
+ }
+ }
+ }
+ }
+ return FlowReturn.OK;
+ }
+
+ public override bool stop() {
+ if (process_outgoing_buffer_handler_id != 0) {
+ echo_probe.disconnect(process_outgoing_buffer_handler_id);
+ process_outgoing_buffer_handler_id = 0;
+ }
+ if (adjust_delay_timeout_id != 0) {
+ Source.remove(adjust_delay_timeout_id);
+ adjust_delay_timeout_id = 0;
+ }
+ adapter.clear();
+ destroy_native(native);
+ native = null;
+ return true;
+ }
+} \ No newline at end of file
diff --git a/plugins/rtp/src/voice_processor_native.cpp b/plugins/rtp/src/voice_processor_native.cpp
new file mode 100644
index 00000000..9b3292b8
--- /dev/null
+++ b/plugins/rtp/src/voice_processor_native.cpp
@@ -0,0 +1,141 @@
+#include <algorithm>
+#include <gst/gst.h>
+#include <gst/audio/audio.h>
+#include <webrtc/modules/audio_processing/include/audio_processing.h>
+#include <webrtc/modules/interface/module_common_types.h>
+#include <webrtc/system_wrappers/include/trace.h>
+
+#define SAMPLE_RATE 48000
+#define SAMPLE_CHANNELS 1
+
+struct _DinoPluginsRtpVoiceProcessorNative {
+ webrtc::AudioProcessing *apm;
+ gint stream_delay;
+};
+
+extern "C" void *dino_plugins_rtp_adjust_to_running_time(GstBaseTransform *transform, GstBuffer *buffer) {
+ GstBuffer *copy = gst_buffer_copy(buffer);
+ GST_BUFFER_PTS(copy) = gst_segment_to_running_time(&transform->segment, GST_FORMAT_TIME, GST_BUFFER_PTS(buffer));
+ return copy;
+}
+
+extern "C" void *dino_plugins_rtp_voice_processor_init_native(gint stream_delay) {
+ _DinoPluginsRtpVoiceProcessorNative *native = new _DinoPluginsRtpVoiceProcessorNative();
+ webrtc::Config config;
+ config.Set<webrtc::ExtendedFilter>(new webrtc::ExtendedFilter(true));
+ config.Set<webrtc::ExperimentalAgc>(new webrtc::ExperimentalAgc(true, 85));
+ native->apm = webrtc::AudioProcessing::Create(config);
+ native->stream_delay = stream_delay;
+ return native;
+}
+
+extern "C" void dino_plugins_rtp_voice_processor_setup_native(void *native_ptr) {
+ _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr;
+ webrtc::AudioProcessing *apm = native->apm;
+ webrtc::ProcessingConfig pconfig;
+ pconfig.streams[webrtc::ProcessingConfig::kInputStream] =
+ webrtc::StreamConfig(SAMPLE_RATE, SAMPLE_CHANNELS, false);
+ pconfig.streams[webrtc::ProcessingConfig::kOutputStream] =
+ webrtc::StreamConfig(SAMPLE_RATE, SAMPLE_CHANNELS, false);
+ pconfig.streams[webrtc::ProcessingConfig::kReverseInputStream] =
+ webrtc::StreamConfig(SAMPLE_RATE, SAMPLE_CHANNELS, false);
+ pconfig.streams[webrtc::ProcessingConfig::kReverseOutputStream] =
+ webrtc::StreamConfig(SAMPLE_RATE, SAMPLE_CHANNELS, false);
+ apm->Initialize(pconfig);
+ apm->high_pass_filter()->Enable(true);
+ apm->echo_cancellation()->enable_drift_compensation(false);
+ apm->echo_cancellation()->set_suppression_level(webrtc::EchoCancellation::kModerateSuppression);
+ apm->echo_cancellation()->enable_delay_logging(true);
+ apm->echo_cancellation()->Enable(true);
+ apm->noise_suppression()->set_level(webrtc::NoiseSuppression::kModerate);
+ apm->noise_suppression()->Enable(true);
+ apm->gain_control()->set_analog_level_limits(0, 255);
+ apm->gain_control()->set_mode(webrtc::GainControl::kAdaptiveAnalog);
+ apm->gain_control()->set_target_level_dbfs(3);
+ apm->gain_control()->set_compression_gain_db(9);
+ apm->gain_control()->enable_limiter(true);
+ apm->gain_control()->Enable(true);
+ apm->voice_detection()->set_likelihood(webrtc::VoiceDetection::Likelihood::kLowLikelihood);
+ apm->voice_detection()->Enable(true);
+}
+
+extern "C" void
+dino_plugins_rtp_voice_processor_analyze_reverse_stream(void *native_ptr, GstAudioInfo *info, GstBuffer *buffer) {
+ _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr;
+ webrtc::StreamConfig config(SAMPLE_RATE, SAMPLE_CHANNELS, false);
+ webrtc::AudioProcessing *apm = native->apm;
+
+ GstAudioBuffer audio_buffer;
+ gst_audio_buffer_map(&audio_buffer, info, buffer, GST_MAP_READ);
+
+ webrtc::AudioFrame frame;
+ frame.num_channels_ = info->channels;
+ frame.sample_rate_hz_ = info->rate;
+ frame.samples_per_channel_ = gst_buffer_get_size(buffer) / info->bpf;
+ memcpy(frame.data_, audio_buffer.planes[0], frame.samples_per_channel_ * info->bpf);
+
+ int err = apm->AnalyzeReverseStream(&frame);
+ if (err < 0) g_warning("ProcessReverseStream %i", err);
+
+ gst_audio_buffer_unmap(&audio_buffer);
+}
+
+extern "C" void dino_plugins_rtp_voice_processor_notify_gain_level(void *native_ptr, gint gain_level) {
+ _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr;
+ webrtc::AudioProcessing *apm = native->apm;
+ apm->gain_control()->set_stream_analog_level(gain_level);
+}
+
+extern "C" gint dino_plugins_rtp_voice_processor_get_suggested_gain_level(void *native_ptr) {
+ _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr;
+ webrtc::AudioProcessing *apm = native->apm;
+ return apm->gain_control()->stream_analog_level();
+}
+
+extern "C" bool dino_plugins_rtp_voice_processor_get_stream_has_voice(void *native_ptr) {
+ _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr;
+ webrtc::AudioProcessing *apm = native->apm;
+ return apm->voice_detection()->stream_has_voice();
+}
+
+extern "C" void dino_plugins_rtp_voice_processor_adjust_stream_delay(void *native_ptr) {
+ _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr;
+ webrtc::AudioProcessing *apm = native->apm;
+ int median, std;
+ float fraction_poor_delays;
+ apm->echo_cancellation()->GetDelayMetrics(&median, &std, &fraction_poor_delays);
+ if (fraction_poor_delays < 0) return;
+ g_debug("voice_processor_native.cpp: Stream delay metrics: %i %i %f", median, std, fraction_poor_delays);
+ if (fraction_poor_delays > 0.5) {
+ native->stream_delay = std::max(0, native->stream_delay + std::min(-10, std::max(median, 10)));
+ g_debug("Adjusted stream delay %i", native->stream_delay);
+ }
+}
+
+extern "C" void
+dino_plugins_rtp_voice_processor_process_stream(void *native_ptr, GstAudioInfo *info, GstBuffer *buffer) {
+ _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr;
+ webrtc::StreamConfig config(SAMPLE_RATE, SAMPLE_CHANNELS, false);
+ webrtc::AudioProcessing *apm = native->apm;
+
+ GstAudioBuffer audio_buffer;
+ gst_audio_buffer_map(&audio_buffer, info, buffer, GST_MAP_READWRITE);
+
+ webrtc::AudioFrame frame;
+ frame.num_channels_ = info->channels;
+ frame.sample_rate_hz_ = info->rate;
+ frame.samples_per_channel_ = info->rate / 100;
+ memcpy(frame.data_, audio_buffer.planes[0], frame.samples_per_channel_ * info->bpf);
+
+ apm->set_stream_delay_ms(native->stream_delay);
+ int err = apm->ProcessStream(&frame);
+ if (err >= 0) memcpy(audio_buffer.planes[0], frame.data_, frame.samples_per_channel_ * info->bpf);
+ if (err < 0) g_warning("ProcessStream %i", err);
+
+ gst_audio_buffer_unmap(&audio_buffer);
+}
+
+extern "C" void dino_plugins_rtp_voice_processor_destroy_native(void *native_ptr) {
+ _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr;
+ delete native;
+} \ No newline at end of file