From 23ffd37dded3bf872e42d7a00727ab3c4d105a97 Mon Sep 17 00:00:00 2001 From: Marvin W Date: Sat, 1 May 2021 15:19:05 +0200 Subject: Echo Cancellation --- plugins/rtp/src/voice_processor_native.cpp | 141 +++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 plugins/rtp/src/voice_processor_native.cpp (limited to 'plugins/rtp/src/voice_processor_native.cpp') diff --git a/plugins/rtp/src/voice_processor_native.cpp b/plugins/rtp/src/voice_processor_native.cpp new file mode 100644 index 00000000..9b3292b8 --- /dev/null +++ b/plugins/rtp/src/voice_processor_native.cpp @@ -0,0 +1,141 @@ +#include +#include +#include +#include +#include +#include + +#define SAMPLE_RATE 48000 +#define SAMPLE_CHANNELS 1 + +struct _DinoPluginsRtpVoiceProcessorNative { + webrtc::AudioProcessing *apm; + gint stream_delay; +}; + +extern "C" void *dino_plugins_rtp_adjust_to_running_time(GstBaseTransform *transform, GstBuffer *buffer) { + GstBuffer *copy = gst_buffer_copy(buffer); + GST_BUFFER_PTS(copy) = gst_segment_to_running_time(&transform->segment, GST_FORMAT_TIME, GST_BUFFER_PTS(buffer)); + return copy; +} + +extern "C" void *dino_plugins_rtp_voice_processor_init_native(gint stream_delay) { + _DinoPluginsRtpVoiceProcessorNative *native = new _DinoPluginsRtpVoiceProcessorNative(); + webrtc::Config config; + config.Set(new webrtc::ExtendedFilter(true)); + config.Set(new webrtc::ExperimentalAgc(true, 85)); + native->apm = webrtc::AudioProcessing::Create(config); + native->stream_delay = stream_delay; + return native; +} + +extern "C" void dino_plugins_rtp_voice_processor_setup_native(void *native_ptr) { + _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr; + webrtc::AudioProcessing *apm = native->apm; + webrtc::ProcessingConfig pconfig; + pconfig.streams[webrtc::ProcessingConfig::kInputStream] = + webrtc::StreamConfig(SAMPLE_RATE, SAMPLE_CHANNELS, false); + pconfig.streams[webrtc::ProcessingConfig::kOutputStream] = + webrtc::StreamConfig(SAMPLE_RATE, SAMPLE_CHANNELS, false); + pconfig.streams[webrtc::ProcessingConfig::kReverseInputStream] = + webrtc::StreamConfig(SAMPLE_RATE, SAMPLE_CHANNELS, false); + pconfig.streams[webrtc::ProcessingConfig::kReverseOutputStream] = + webrtc::StreamConfig(SAMPLE_RATE, SAMPLE_CHANNELS, false); + apm->Initialize(pconfig); + apm->high_pass_filter()->Enable(true); + apm->echo_cancellation()->enable_drift_compensation(false); + apm->echo_cancellation()->set_suppression_level(webrtc::EchoCancellation::kModerateSuppression); + apm->echo_cancellation()->enable_delay_logging(true); + apm->echo_cancellation()->Enable(true); + apm->noise_suppression()->set_level(webrtc::NoiseSuppression::kModerate); + apm->noise_suppression()->Enable(true); + apm->gain_control()->set_analog_level_limits(0, 255); + apm->gain_control()->set_mode(webrtc::GainControl::kAdaptiveAnalog); + apm->gain_control()->set_target_level_dbfs(3); + apm->gain_control()->set_compression_gain_db(9); + apm->gain_control()->enable_limiter(true); + apm->gain_control()->Enable(true); + apm->voice_detection()->set_likelihood(webrtc::VoiceDetection::Likelihood::kLowLikelihood); + apm->voice_detection()->Enable(true); +} + +extern "C" void +dino_plugins_rtp_voice_processor_analyze_reverse_stream(void *native_ptr, GstAudioInfo *info, GstBuffer *buffer) { + _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr; + webrtc::StreamConfig config(SAMPLE_RATE, SAMPLE_CHANNELS, false); + webrtc::AudioProcessing *apm = native->apm; + + GstAudioBuffer audio_buffer; + gst_audio_buffer_map(&audio_buffer, info, buffer, GST_MAP_READ); + + webrtc::AudioFrame frame; + frame.num_channels_ = info->channels; + frame.sample_rate_hz_ = info->rate; + frame.samples_per_channel_ = gst_buffer_get_size(buffer) / info->bpf; + memcpy(frame.data_, audio_buffer.planes[0], frame.samples_per_channel_ * info->bpf); + + int err = apm->AnalyzeReverseStream(&frame); + if (err < 0) g_warning("ProcessReverseStream %i", err); + + gst_audio_buffer_unmap(&audio_buffer); +} + +extern "C" void dino_plugins_rtp_voice_processor_notify_gain_level(void *native_ptr, gint gain_level) { + _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr; + webrtc::AudioProcessing *apm = native->apm; + apm->gain_control()->set_stream_analog_level(gain_level); +} + +extern "C" gint dino_plugins_rtp_voice_processor_get_suggested_gain_level(void *native_ptr) { + _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr; + webrtc::AudioProcessing *apm = native->apm; + return apm->gain_control()->stream_analog_level(); +} + +extern "C" bool dino_plugins_rtp_voice_processor_get_stream_has_voice(void *native_ptr) { + _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr; + webrtc::AudioProcessing *apm = native->apm; + return apm->voice_detection()->stream_has_voice(); +} + +extern "C" void dino_plugins_rtp_voice_processor_adjust_stream_delay(void *native_ptr) { + _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr; + webrtc::AudioProcessing *apm = native->apm; + int median, std; + float fraction_poor_delays; + apm->echo_cancellation()->GetDelayMetrics(&median, &std, &fraction_poor_delays); + if (fraction_poor_delays < 0) return; + g_debug("voice_processor_native.cpp: Stream delay metrics: %i %i %f", median, std, fraction_poor_delays); + if (fraction_poor_delays > 0.5) { + native->stream_delay = std::max(0, native->stream_delay + std::min(-10, std::max(median, 10))); + g_debug("Adjusted stream delay %i", native->stream_delay); + } +} + +extern "C" void +dino_plugins_rtp_voice_processor_process_stream(void *native_ptr, GstAudioInfo *info, GstBuffer *buffer) { + _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr; + webrtc::StreamConfig config(SAMPLE_RATE, SAMPLE_CHANNELS, false); + webrtc::AudioProcessing *apm = native->apm; + + GstAudioBuffer audio_buffer; + gst_audio_buffer_map(&audio_buffer, info, buffer, GST_MAP_READWRITE); + + webrtc::AudioFrame frame; + frame.num_channels_ = info->channels; + frame.sample_rate_hz_ = info->rate; + frame.samples_per_channel_ = info->rate / 100; + memcpy(frame.data_, audio_buffer.planes[0], frame.samples_per_channel_ * info->bpf); + + apm->set_stream_delay_ms(native->stream_delay); + int err = apm->ProcessStream(&frame); + if (err >= 0) memcpy(audio_buffer.planes[0], frame.data_, frame.samples_per_channel_ * info->bpf); + if (err < 0) g_warning("ProcessStream %i", err); + + gst_audio_buffer_unmap(&audio_buffer); +} + +extern "C" void dino_plugins_rtp_voice_processor_destroy_native(void *native_ptr) { + _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr; + delete native; +} \ No newline at end of file -- cgit v1.2.3-70-g09d2 From 0409f554268c0e8f24e23e471a94de4d3a035ff1 Mon Sep 17 00:00:00 2001 From: Marvin W Date: Sat, 1 May 2021 17:27:55 +0200 Subject: Fix webcam framerate selection --- plugins/rtp/src/device.vala | 33 ++++++++++++++++++++++++++++-- plugins/rtp/src/plugin.vala | 6 +----- plugins/rtp/src/voice_processor_native.cpp | 6 +++--- 3 files changed, 35 insertions(+), 10 deletions(-) (limited to 'plugins/rtp/src/voice_processor_native.cpp') diff --git a/plugins/rtp/src/device.vala b/plugins/rtp/src/device.vala index 3c650ad6..e25271b1 100644 --- a/plugins/rtp/src/device.vala +++ b/plugins/rtp/src/device.vala @@ -87,6 +87,7 @@ public class Dino.Plugins.Rtp.Device : MediaDevice, Object { return Gst.Caps.from_string("audio/x-raw,rate=48000,channels=1"); } else if (media == "video" && device.caps.get_size() > 0) { int best_index = 0; + Value? best_fraction = null; int best_fps = 0; int best_width = 0; int best_height = 0; @@ -94,7 +95,28 @@ public class Dino.Plugins.Rtp.Device : MediaDevice, Object { unowned Gst.Structure? that = device.caps.get_structure(i); if (!that.has_name("video/x-raw")) continue; int num = 0, den = 0, width = 0, height = 0; - if (!that.has_field("framerate") || !that.get_fraction("framerate", out num, out den)) continue; + if (!that.has_field("framerate")) continue; + Value framerate = that.get_value("framerate"); + if (framerate.type() == typeof(Gst.Fraction)) { + num = Gst.Value.get_fraction_numerator(framerate); + den = Gst.Value.get_fraction_denominator(framerate); + } else if (framerate.type() == typeof(Gst.ValueList)) { + for(uint j = 0; j < Gst.ValueList.get_size(framerate); j++) { + Value fraction = Gst.ValueList.get_value(framerate, j); + int in_num = Gst.Value.get_fraction_numerator(fraction); + int in_den = Gst.Value.get_fraction_denominator(fraction); + int fps = den > 0 ? (num/den) : 0; + int in_fps = in_den > 0 ? (in_num/in_den) : 0; + if (in_fps > fps) { + best_fraction = fraction; + num = in_num; + den = in_den; + } + } + } else { + debug("Unknown type for framerate: %s", framerate.type_name()); + } + if (den == 0) continue; if (!that.has_field("width") || !that.get_int("width", out width)) continue; if (!that.has_field("height") || !that.get_int("height", out height)) continue; int fps = num/den; @@ -105,7 +127,14 @@ public class Dino.Plugins.Rtp.Device : MediaDevice, Object { best_index = i; } } - return caps_copy_nth(device.caps, best_index); + Gst.Caps res = caps_copy_nth(device.caps, best_index); + unowned Gst.Structure? that = res.get_structure(0); + Value framerate = that.get_value("framerate"); + if (framerate.type() == typeof(Gst.ValueList)) { + that.set_value("framerate", best_fraction); + } + debug("Selected caps %s", res.to_string()); + return res; } else if (device.caps.get_size() > 0) { return caps_copy_nth(device.caps, 0); } else { diff --git a/plugins/rtp/src/plugin.vala b/plugins/rtp/src/plugin.vala index f575a7d0..19a266b1 100644 --- a/plugins/rtp/src/plugin.vala +++ b/plugins/rtp/src/plugin.vala @@ -136,11 +136,7 @@ public class Dino.Plugins.Rtp.Plugin : RootInterface, VideoCallPlugin, Object { pipe.set_state(Gst.State.PLAYING); break; case Gst.MessageType.STATE_CHANGED: - Gst.State new_state; - message.parse_state_changed(null, out new_state, null); - if (message.src is Gst.Element) { - debug("%s changed state to %s", ((Gst.Element)message.src).name, new_state.to_string()); - } + // Ignore break; case Gst.MessageType.STREAM_STATUS: Gst.StreamStatusType status; diff --git a/plugins/rtp/src/voice_processor_native.cpp b/plugins/rtp/src/voice_processor_native.cpp index 9b3292b8..00f719e1 100644 --- a/plugins/rtp/src/voice_processor_native.cpp +++ b/plugins/rtp/src/voice_processor_native.cpp @@ -75,7 +75,7 @@ dino_plugins_rtp_voice_processor_analyze_reverse_stream(void *native_ptr, GstAud memcpy(frame.data_, audio_buffer.planes[0], frame.samples_per_channel_ * info->bpf); int err = apm->AnalyzeReverseStream(&frame); - if (err < 0) g_warning("ProcessReverseStream %i", err); + if (err < 0) g_warning("voice_processor_native.cpp: ProcessReverseStream %i", err); gst_audio_buffer_unmap(&audio_buffer); } @@ -108,7 +108,7 @@ extern "C" void dino_plugins_rtp_voice_processor_adjust_stream_delay(void *nativ g_debug("voice_processor_native.cpp: Stream delay metrics: %i %i %f", median, std, fraction_poor_delays); if (fraction_poor_delays > 0.5) { native->stream_delay = std::max(0, native->stream_delay + std::min(-10, std::max(median, 10))); - g_debug("Adjusted stream delay %i", native->stream_delay); + g_debug("voice_processor_native.cpp: Adjusted stream delay %i", native->stream_delay); } } @@ -130,7 +130,7 @@ dino_plugins_rtp_voice_processor_process_stream(void *native_ptr, GstAudioInfo * apm->set_stream_delay_ms(native->stream_delay); int err = apm->ProcessStream(&frame); if (err >= 0) memcpy(audio_buffer.planes[0], frame.data_, frame.samples_per_channel_ * info->bpf); - if (err < 0) g_warning("ProcessStream %i", err); + if (err < 0) g_warning("voice_processor_native.cpp: ProcessStream %i", err); gst_audio_buffer_unmap(&audio_buffer); } -- cgit v1.2.3-70-g09d2 From 8044b546d0ac15d34a3e6499b9c0d55d3d8f9c94 Mon Sep 17 00:00:00 2001 From: Marvin W Date: Sun, 2 May 2021 00:34:17 +0200 Subject: Support voice processing on GStreamer 0.14 --- plugins/rtp/CMakeLists.txt | 7 +++--- plugins/rtp/src/voice_processor.vala | 2 +- plugins/rtp/src/voice_processor_native.cpp | 37 ++++++++++++++++++------------ 3 files changed, 27 insertions(+), 19 deletions(-) (limited to 'plugins/rtp/src/voice_processor_native.cpp') diff --git a/plugins/rtp/CMakeLists.txt b/plugins/rtp/CMakeLists.txt index b19c8a8f..52419425 100644 --- a/plugins/rtp/CMakeLists.txt +++ b/plugins/rtp/CMakeLists.txt @@ -17,7 +17,7 @@ if(Gst_VERSION VERSION_GREATER "1.16") endif() if(WebRTCAudioProcessing_VERSION GREATER "0.4") - message(WARNING "Ignoring WebRTCAudioProcessing, only versions < 0.4 supported so far") + message(STATUS "Ignoring WebRTCAudioProcessing, only versions < 0.4 supported so far") unset(WebRTCAudioProcessing_FOUND) endif() @@ -25,8 +25,9 @@ if(WebRTCAudioProcessing_FOUND) set(RTP_DEFINITIONS ${RTP_DEFINITIONS} WITH_VOICE_PROCESSOR) set(RTP_VOICE_PROCESSOR_VALA src/voice_processor.vala) set(RTP_VOICE_PROCESSOR_CXX src/voice_processor_native.cpp) + set(RTP_VOICE_PROCESSOR_LIB webrtc-audio-processing) else() - message(WARNING "WebRTCAudioProcessing not found, build without voice pre-processing!") + message(STATUS "WebRTCAudioProcessing not found, build without voice pre-processing!") endif() vala_precompile(RTP_VALA_C @@ -53,7 +54,7 @@ DEFINITIONS add_definitions(${VALA_CFLAGS} -DG_LOG_DOMAIN="rtp" -I${CMAKE_CURRENT_SOURCE_DIR}/src) add_library(rtp SHARED ${RTP_VALA_C} ${RTP_VOICE_PROCESSOR_CXX}) -target_link_libraries(rtp libdino crypto-vala ${RTP_PACKAGES} gstreamer-rtp-1.0 webrtc-audio-processing) +target_link_libraries(rtp libdino crypto-vala ${RTP_PACKAGES} gstreamer-rtp-1.0 ${RTP_VOICE_PROCESSOR_LIB}) set_target_properties(rtp PROPERTIES PREFIX "") set_target_properties(rtp PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/plugins/) diff --git a/plugins/rtp/src/voice_processor.vala b/plugins/rtp/src/voice_processor.vala index e6dc7e8f..66e95d72 100644 --- a/plugins/rtp/src/voice_processor.vala +++ b/plugins/rtp/src/voice_processor.vala @@ -123,7 +123,7 @@ public class Dino.Plugins.Rtp.VoiceProcessor : Audio.Filter { } analyze_reverse_stream(native, echo_probe.audio_info, buffer); if (adjust_delay_timeout_id == 0 && echo_probe != null) { - adjust_delay_timeout_id = Timeout.add(5000, adjust_delay); + adjust_delay_timeout_id = Timeout.add(1000, adjust_delay); } } diff --git a/plugins/rtp/src/voice_processor_native.cpp b/plugins/rtp/src/voice_processor_native.cpp index 00f719e1..8a052cf8 100644 --- a/plugins/rtp/src/voice_processor_native.cpp +++ b/plugins/rtp/src/voice_processor_native.cpp @@ -11,6 +11,8 @@ struct _DinoPluginsRtpVoiceProcessorNative { webrtc::AudioProcessing *apm; gint stream_delay; + gint last_median; + gint last_poor_delays; }; extern "C" void *dino_plugins_rtp_adjust_to_running_time(GstBaseTransform *transform, GstBuffer *buffer) { @@ -26,6 +28,8 @@ extern "C" void *dino_plugins_rtp_voice_processor_init_native(gint stream_delay) config.Set(new webrtc::ExperimentalAgc(true, 85)); native->apm = webrtc::AudioProcessing::Create(config); native->stream_delay = stream_delay; + native->last_median = 0; + native->last_poor_delays = 0; return native; } @@ -65,19 +69,19 @@ dino_plugins_rtp_voice_processor_analyze_reverse_stream(void *native_ptr, GstAud webrtc::StreamConfig config(SAMPLE_RATE, SAMPLE_CHANNELS, false); webrtc::AudioProcessing *apm = native->apm; - GstAudioBuffer audio_buffer; - gst_audio_buffer_map(&audio_buffer, info, buffer, GST_MAP_READ); + GstMapInfo map; + gst_buffer_map(buffer, &map, GST_MAP_READ); webrtc::AudioFrame frame; frame.num_channels_ = info->channels; frame.sample_rate_hz_ = info->rate; frame.samples_per_channel_ = gst_buffer_get_size(buffer) / info->bpf; - memcpy(frame.data_, audio_buffer.planes[0], frame.samples_per_channel_ * info->bpf); + memcpy(frame.data_, map.data, frame.samples_per_channel_ * info->bpf); int err = apm->AnalyzeReverseStream(&frame); if (err < 0) g_warning("voice_processor_native.cpp: ProcessReverseStream %i", err); - gst_audio_buffer_unmap(&audio_buffer); + gst_buffer_unmap(buffer, &map); } extern "C" void dino_plugins_rtp_voice_processor_notify_gain_level(void *native_ptr, gint gain_level) { @@ -101,14 +105,17 @@ extern "C" bool dino_plugins_rtp_voice_processor_get_stream_has_voice(void *nati extern "C" void dino_plugins_rtp_voice_processor_adjust_stream_delay(void *native_ptr) { _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr; webrtc::AudioProcessing *apm = native->apm; - int median, std; + int median, std, poor_delays; float fraction_poor_delays; apm->echo_cancellation()->GetDelayMetrics(&median, &std, &fraction_poor_delays); - if (fraction_poor_delays < 0) return; - g_debug("voice_processor_native.cpp: Stream delay metrics: %i %i %f", median, std, fraction_poor_delays); - if (fraction_poor_delays > 0.5) { - native->stream_delay = std::max(0, native->stream_delay + std::min(-10, std::max(median, 10))); - g_debug("voice_processor_native.cpp: Adjusted stream delay %i", native->stream_delay); + poor_delays = (int)(fraction_poor_delays * 100.0); + if (fraction_poor_delays < 0 || (native->last_median == median && native->last_poor_delays == poor_delays)) return; + g_debug("voice_processor_native.cpp: Stream delay metrics: median=%i std=%i poor_delays=%i%%", median, std, poor_delays); + native->last_median = median; + native->last_poor_delays = poor_delays; + if (poor_delays > 90) { + native->stream_delay = std::min(std::max(0, native->stream_delay + std::min(48, std::max(median, -48))), 384); + g_debug("voice_processor_native.cpp: set stream_delay=%i", native->stream_delay); } } @@ -118,21 +125,21 @@ dino_plugins_rtp_voice_processor_process_stream(void *native_ptr, GstAudioInfo * webrtc::StreamConfig config(SAMPLE_RATE, SAMPLE_CHANNELS, false); webrtc::AudioProcessing *apm = native->apm; - GstAudioBuffer audio_buffer; - gst_audio_buffer_map(&audio_buffer, info, buffer, GST_MAP_READWRITE); + GstMapInfo map; + gst_buffer_map(buffer, &map, GST_MAP_READWRITE); webrtc::AudioFrame frame; frame.num_channels_ = info->channels; frame.sample_rate_hz_ = info->rate; frame.samples_per_channel_ = info->rate / 100; - memcpy(frame.data_, audio_buffer.planes[0], frame.samples_per_channel_ * info->bpf); + memcpy(frame.data_, map.data, frame.samples_per_channel_ * info->bpf); apm->set_stream_delay_ms(native->stream_delay); int err = apm->ProcessStream(&frame); - if (err >= 0) memcpy(audio_buffer.planes[0], frame.data_, frame.samples_per_channel_ * info->bpf); + if (err >= 0) memcpy(map.data, frame.data_, frame.samples_per_channel_ * info->bpf); if (err < 0) g_warning("voice_processor_native.cpp: ProcessStream %i", err); - gst_audio_buffer_unmap(&audio_buffer); + gst_buffer_unmap(buffer, &map); } extern "C" void dino_plugins_rtp_voice_processor_destroy_native(void *native_ptr) { -- cgit v1.2.3-70-g09d2