diff --git a/db/migrations/0041_add_audio_voice_enhancement.sql b/db/migrations/0041_add_audio_voice_enhancement.sql new file mode 100644 index 00000000..34f970d1 --- /dev/null +++ b/db/migrations/0041_add_audio_voice_enhancement.sql @@ -0,0 +1,15 @@ +-- Add audio_voice_enhancement flag to streams table +-- +-- Opt-in per-stream toggle for the voice-enhancement filter chain +-- (e.g. afftdn / highpass / lowpass) applied during the existing +-- G.711 → AAC transcode for recordings. See discussion #395. +-- +-- Defaults off so upgrades preserve bit-exact recordings. + +-- migrate:up + +ALTER TABLE streams ADD COLUMN audio_voice_enhancement INTEGER DEFAULT 0; + +-- migrate:down + +SELECT 1; diff --git a/include/core/config.h b/include/core/config.h index f9d7b9a1..4abaea90 100644 --- a/include/core/config.h +++ b/include/core/config.h @@ -46,6 +46,7 @@ typedef struct { bool streaming_enabled; // Whether HLS streaming is enabled for this stream stream_protocol_t protocol; // Stream protocol (TCP, UDP, or ONVIF) bool record_audio; // Whether to record audio with video + bool audio_voice_enhancement; // Apply voice-enhancement filter chain to recordings (discussion #395). Opt-in, defaults off. // ONVIF specific fields char onvif_username[64]; diff --git a/include/database/db_embedded_migrations.h b/include/database/db_embedded_migrations.h index 3efc8e36..94b7c95c 100644 --- a/include/database/db_embedded_migrations.h +++ b/include/database/db_embedded_migrations.h @@ -630,6 +630,12 @@ static const char migration_0040_up[] = static const char migration_0040_down[] = "SELECT 1;"; +static const char migration_0041_up[] = + "ALTER TABLE streams ADD COLUMN audio_voice_enhancement INTEGER DEFAULT 0;"; + +static const char migration_0041_down[] = + "SELECT 1;"; + static const migration_t embedded_migrations_data[] = { { .version = "0001", @@ -911,8 +917,15 @@ static const migration_t embedded_migrations_data[] = { .sql_down = migration_0040_down, .is_embedded = true }, + { + .version = "0041", + .description = "add_audio_voice_enhancement", + .sql_up = migration_0041_up, + .sql_down = migration_0041_down, + .is_embedded = true + }, }; -#define EMBEDDED_MIGRATIONS_COUNT 40 +#define EMBEDDED_MIGRATIONS_COUNT 41 #endif /* DB_EMBEDDED_MIGRATIONS_H */ diff --git a/include/video/mp4_writer_internal.h b/include/video/mp4_writer_internal.h index 04951d02..f5d9fe1a 100644 --- a/include/video/mp4_writer_internal.h +++ b/include/video/mp4_writer_internal.h @@ -92,4 +92,31 @@ int transcode_audio_packet(const char *stream_name, */ void cleanup_audio_transcoder(const char *stream_name); +/** + * Opt the stream's audio transcoder in or out of the voice-enhancement filter + * chain (afftdn / highpass / lowpass — see discussion #395). + * + * Safe to call before the transcoder slot exists; the flag is staged and + * applied when the slot is allocated. Safe to call repeatedly during a + * recording session — the filter graph is rebuilt lazily on the next packet + * after a state flip. + * + * @param stream_name Stream name (same key used for transcode_audio_packet) + * @param enabled Whether voice enhancement should be applied + */ +void set_audio_voice_enhancement(const char *stream_name, bool enabled); + +/** + * Query the effective voice-enhancement state for a stream. + * + * Returns the live transcoder's flag when a slot is allocated, otherwise the + * staged opt-in set via set_audio_voice_enhancement(), or false when neither + * exists. Mirrors the lookup init_audio_transcoder() performs, so callers can + * read back exactly what the transcode path will use. + * + * @param stream_name Stream name (same key used for transcode_audio_packet) + * @return true if voice enhancement is (or will be) applied for this stream + */ +bool get_audio_voice_enhancement(const char *stream_name); + #endif /* MP4_WRITER_INTERNAL_H */ diff --git a/src/database/db_streams.c b/src/database/db_streams.c index bcd2ff28..f8d88605 100644 --- a/src/database/db_streams.c +++ b/src/database/db_streams.c @@ -133,7 +133,7 @@ uint64_t add_stream_config(const stream_config_t *stream) { "onvif_username = ?, onvif_password = ?, onvif_profile = ?, onvif_port = ?, " "record_on_schedule = ?, recording_schedule = ?, tags = ?, admin_url = ?, " "privacy_mode = ?, motion_trigger_source = ?, go2rtc_source_override = ?, " - "sub_stream_url = ? " + "sub_stream_url = ?, audio_voice_enhancement = ? " "WHERE id = ?;"; rc = sqlite3_prepare_v2(db, update_sql, -1, &stmt, NULL); @@ -217,9 +217,10 @@ uint64_t add_stream_config(const stream_config_t *stream) { sqlite3_bind_text(stmt, 45, stream->motion_trigger_source, -1, SQLITE_STATIC); sqlite3_bind_text(stmt, 46, stream->go2rtc_source_override, -1, SQLITE_STATIC); sqlite3_bind_text(stmt, 47, stream->sub_stream_url, -1, SQLITE_STATIC); + sqlite3_bind_int(stmt, 48, stream->audio_voice_enhancement ? 1 : 0); // Bind ID parameter - sqlite3_bind_int64(stmt, 48, (sqlite3_int64)existing_id); + sqlite3_bind_int64(stmt, 49, (sqlite3_int64)existing_id); // Execute statement rc = sqlite3_step(stmt); @@ -268,8 +269,8 @@ uint64_t add_stream_config(const stream_config_t *stream) { "ptz_enabled, ptz_max_x, ptz_max_y, ptz_max_z, ptz_has_home, " "onvif_username, onvif_password, onvif_profile, onvif_port, " "record_on_schedule, recording_schedule, tags, admin_url, privacy_mode, motion_trigger_source, " - "go2rtc_source_override, sub_stream_url) " - "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);"; + "go2rtc_source_override, sub_stream_url, audio_voice_enhancement) " + "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);"; rc = sqlite3_prepare_v2(db, sql, -1, &stmt, NULL); if (rc != SQLITE_OK) { @@ -353,6 +354,7 @@ uint64_t add_stream_config(const stream_config_t *stream) { sqlite3_bind_text(stmt, 46, stream->motion_trigger_source, -1, SQLITE_STATIC); sqlite3_bind_text(stmt, 47, stream->go2rtc_source_override, -1, SQLITE_STATIC); sqlite3_bind_text(stmt, 48, stream->sub_stream_url, -1, SQLITE_STATIC); + sqlite3_bind_int(stmt, 49, stream->audio_voice_enhancement ? 1 : 0); // Execute statement rc = sqlite3_step(stmt); @@ -424,7 +426,7 @@ int update_stream_config(const char *name, const stream_config_t *stream) { "onvif_username = ?, onvif_password = ?, onvif_profile = ?, onvif_port = ?, " "record_on_schedule = ?, recording_schedule = ?, tags = ?, admin_url = ?, privacy_mode = ?, " "motion_trigger_source = ?, go2rtc_source_override = ?, " - "sub_stream_url = ? " + "sub_stream_url = ?, audio_voice_enhancement = ? " "WHERE name = ?;"; rc = sqlite3_prepare_v2(db, sql, -1, &stmt, NULL); @@ -509,9 +511,10 @@ int update_stream_config(const char *name, const stream_config_t *stream) { sqlite3_bind_text(stmt, 46, stream->motion_trigger_source, -1, SQLITE_STATIC); sqlite3_bind_text(stmt, 47, stream->go2rtc_source_override, -1, SQLITE_STATIC); sqlite3_bind_text(stmt, 48, stream->sub_stream_url, -1, SQLITE_STATIC); + sqlite3_bind_int(stmt, 49, stream->audio_voice_enhancement ? 1 : 0); // Bind the WHERE clause parameter - sqlite3_bind_text(stmt, 49, name, -1, SQLITE_STATIC); + sqlite3_bind_text(stmt, 50, name, -1, SQLITE_STATIC); // Execute statement rc = sqlite3_step(stmt); @@ -784,7 +787,7 @@ int get_stream_config_by_name(const char *name, stream_config_t *stream) { "ptz_enabled, ptz_max_x, ptz_max_y, ptz_max_z, ptz_has_home, " "onvif_username, onvif_password, onvif_profile, onvif_port, " "record_on_schedule, recording_schedule, tags, admin_url, privacy_mode, motion_trigger_source, " - "go2rtc_source_override, sub_stream_url " + "go2rtc_source_override, sub_stream_url, audio_voice_enhancement " "FROM streams WHERE name = ?;"; // Column index constants for readability @@ -800,7 +803,8 @@ int get_stream_config_by_name(const char *name, stream_config_t *stream) { COL_PTZ_ENABLED, COL_PTZ_MAX_X, COL_PTZ_MAX_Y, COL_PTZ_MAX_Z, COL_PTZ_HAS_HOME, COL_ONVIF_USERNAME, COL_ONVIF_PASSWORD, COL_ONVIF_PROFILE, COL_ONVIF_PORT, COL_RECORD_ON_SCHEDULE, COL_RECORDING_SCHEDULE, COL_TAGS, COL_ADMIN_URL, COL_PRIVACY_MODE, - COL_MOTION_TRIGGER_SOURCE, COL_GO2RTC_SOURCE_OVERRIDE, COL_SUB_STREAM_URL + COL_MOTION_TRIGGER_SOURCE, COL_GO2RTC_SOURCE_OVERRIDE, COL_SUB_STREAM_URL, + COL_AUDIO_VOICE_ENHANCEMENT }; rc = sqlite3_prepare_v2(db, sql, -1, &stmt, NULL); @@ -977,6 +981,9 @@ int get_stream_config_by_name(const char *name, stream_config_t *stream) { stream->sub_stream_url[0] = '\0'; } + // Audio voice-enhancement opt-in (discussion #395) + stream->audio_voice_enhancement = sqlite3_column_int(stmt, COL_AUDIO_VOICE_ENHANCEMENT) != 0; + result = 0; } @@ -1029,7 +1036,7 @@ int get_all_stream_configs(stream_config_t *streams, int max_count) { "ptz_enabled, ptz_max_x, ptz_max_y, ptz_max_z, ptz_has_home, " "onvif_username, onvif_password, onvif_profile, onvif_port, " "record_on_schedule, recording_schedule, tags, admin_url, privacy_mode, motion_trigger_source, " - "go2rtc_source_override, sub_stream_url " + "go2rtc_source_override, sub_stream_url, audio_voice_enhancement " "FROM streams ORDER BY name;"; // Column index constants (same as get_stream_config_by_name) @@ -1045,7 +1052,8 @@ int get_all_stream_configs(stream_config_t *streams, int max_count) { COL_PTZ_ENABLED, COL_PTZ_MAX_X, COL_PTZ_MAX_Y, COL_PTZ_MAX_Z, COL_PTZ_HAS_HOME, COL_ONVIF_USERNAME, COL_ONVIF_PASSWORD, COL_ONVIF_PROFILE, COL_ONVIF_PORT, COL_RECORD_ON_SCHEDULE, COL_RECORDING_SCHEDULE, COL_TAGS, COL_ADMIN_URL, COL_PRIVACY_MODE, - COL_MOTION_TRIGGER_SOURCE, COL_GO2RTC_SOURCE_OVERRIDE, COL_SUB_STREAM_URL + COL_MOTION_TRIGGER_SOURCE, COL_GO2RTC_SOURCE_OVERRIDE, COL_SUB_STREAM_URL, + COL_AUDIO_VOICE_ENHANCEMENT }; rc = sqlite3_prepare_v2(db, sql, -1, &stmt, NULL); @@ -1221,6 +1229,9 @@ int get_all_stream_configs(stream_config_t *streams, int max_count) { s->sub_stream_url[0] = '\0'; } + // Audio voice-enhancement opt-in (discussion #395) + s->audio_voice_enhancement = sqlite3_column_int(stmt, COL_AUDIO_VOICE_ENHANCEMENT) != 0; + count++; } diff --git a/src/video/mp4_segment_recorder.c b/src/video/mp4_segment_recorder.c index ed78e5b6..d0a8d0a4 100644 --- a/src/video/mp4_segment_recorder.c +++ b/src/video/mp4_segment_recorder.c @@ -1465,7 +1465,13 @@ int record_segment(const char *rtsp_url, const char *output_file, int duration, continue; } - int tc_ret = transcode_audio_packet(rtsp_url, pkt, transcoded_pkt, + // Key the transcoder pool by stream name (writer-scoped lifetime) + // rather than rtsp_url so the slot persists across segment + // rotations and matches mp4_writer.c:161 / mp4_writer_core.c:277. + // Falls back to rtsp_url if stream_name wasn't propagated. + const char *transcoder_key = segment_info_ptr->stream_name[0] != '\0' + ? segment_info_ptr->stream_name : rtsp_url; + int tc_ret = transcode_audio_packet(transcoder_key, pkt, transcoded_pkt, input_ctx->streams[audio_stream_idx]); if (tc_ret < 0) { // Transcoding failed — skip this packet silently @@ -1590,10 +1596,11 @@ int record_segment(const char *rtsp_url, const char *output_file, int duration, segment_index, has_audio && audio_stream_idx >= 0, segment_info_ptr->last_frame_was_key); cleanup: - // Clean up audio transcoder if we set one up - if (needs_audio_transcoding) { - cleanup_audio_transcoder(rtsp_url); - } + // Audio transcoder lifetime is now writer-scoped, not segment-scoped: + // mp4_writer_close() → cleanup_audio_transcoder(writer->stream_name) frees + // the slot when the whole recording session ends. Tearing it down here + // every segment was forcing afftdn (and any future filter graph) to + // re-profile the noise floor on every rotation — discussion #395. // CRITICAL FIX: Aggressive cleanup to prevent memory growth over time log_debug("Starting aggressive cleanup of FFmpeg resources"); diff --git a/src/video/mp4_writer_thread.c b/src/video/mp4_writer_thread.c index b008df9d..f6609181 100644 --- a/src/video/mp4_writer_thread.c +++ b/src/video/mp4_writer_thread.c @@ -224,6 +224,14 @@ static void *mp4_writer_rtsp_thread(void *arg) { } } + // Propagate the per-stream voice-enhancement opt-in to the audio + // transcoder pool every segment. set_audio_voice_enhancement() + // either flips the flag on the live transcoder slot or stages it + // for the next init. Cheap: just two pool walks under a mutex + // (discussion #395). + set_audio_voice_enhancement(stream_name, + db_stream_config.audio_voice_enhancement); + // Update audio recording setting if it has changed int has_audio = db_stream_config.record_audio ? 1 : 0; if (thread_ctx->writer->has_audio != has_audio) { diff --git a/src/video/mp4_writer_utils.c b/src/video/mp4_writer_utils.c index 4b70f8bd..0f6c6282 100644 --- a/src/video/mp4_writer_utils.c +++ b/src/video/mp4_writer_utils.c @@ -51,6 +51,17 @@ typedef struct { AVPacket *in_pkt; AVPacket *out_pkt; int initialized; + + // Voice-enhancement filter graph (discussion #395). filter_graph is NULL + // when the feature is disabled or the graph hasn't been constructed yet. + // The actual filter chain (afftdn / highpass / lowpass, codec-aware + // defaults) is deferred to a follow-up PR — for now this is a stub that + // routes the per-stream opt-in through to the transcoder lifecycle so + // future filter-graph code has a stable insertion point. + bool voice_enhancement_enabled; + void *filter_graph; // AVFilterGraph*; void* to avoid header churn + void *filter_buffersrc_ctx; // AVFilterContext* (buffer source) + void *filter_buffersink_ctx; // AVFilterContext* (buffer sink) } audio_transcoder_t; /** @@ -73,6 +84,16 @@ static audio_transcoder_t audio_transcoders[MAX_STREAMS] = {0}; static char audio_transcoder_stream_names[MAX_STREAMS][MAX_STREAM_NAME] = {{0}}; static pthread_mutex_t audio_transcoder_mutex = PTHREAD_MUTEX_INITIALIZER; +// Voice-enhancement opt-in staged for streams that don't yet have a transcoder +// slot — read once at init_audio_transcoder() time so the flag is correct on +// the very first packet of a recording session (discussion #395). +// Protected by audio_transcoder_mutex. +typedef struct { + char stream_name[MAX_STREAM_NAME]; + bool enabled; +} voice_enhancement_pending_t; +static voice_enhancement_pending_t voice_enhancement_pending[MAX_STREAMS] = {0}; + static inline void lock_audio_transcoders(void) { pthread_mutex_lock(&audio_transcoder_mutex); } @@ -293,6 +314,23 @@ static int init_audio_transcoder(const char *stream_name, goto cleanup; } + // Pick up any voice-enhancement opt-in staged before this slot was + // allocated. The actual filter graph is built lazily in + // transcode_audio_packet() once the decoder frame format is known + // (currently stubbed — discussion #395). + audio_transcoders[slot].voice_enhancement_enabled = false; + for (int i = 0; i < g_config.max_streams; i++) { + if (voice_enhancement_pending[i].stream_name[0] != '\0' && + strcmp(voice_enhancement_pending[i].stream_name, stream_name) == 0) { + audio_transcoders[slot].voice_enhancement_enabled = + voice_enhancement_pending[i].enabled; + break; + } + } + audio_transcoders[slot].filter_graph = NULL; + audio_transcoders[slot].filter_buffersrc_ctx = NULL; + audio_transcoders[slot].filter_buffersink_ctx = NULL; + // Mark as initialized audio_transcoders[slot].initialized = 1; @@ -417,6 +455,9 @@ void cleanup_audio_transcoder(const char *stream_name) { } audio_transcoders[i].initialized = 0; + audio_transcoders[i].voice_enhancement_enabled = false; + // filter_graph et al. will be freed here once the real filter + // chain lands (discussion #395) — currently always NULL. audio_transcoder_stream_names[i][0] = '\0'; log_info("Cleaned up audio transcoder for stream %s", stream_name); @@ -424,7 +465,102 @@ void cleanup_audio_transcoder(const char *stream_name) { } } + // Drop any staged voice-enhancement opt-in for this stream too. + for (int i = 0; i < g_config.max_streams; i++) { + if (voice_enhancement_pending[i].stream_name[0] != '\0' && + strcmp(voice_enhancement_pending[i].stream_name, stream_name) == 0) { + voice_enhancement_pending[i].stream_name[0] = '\0'; + voice_enhancement_pending[i].enabled = false; + break; + } + } + + pthread_mutex_unlock(&audio_transcoder_mutex); +} + +void set_audio_voice_enhancement(const char *stream_name, bool enabled) { + if (!stream_name || stream_name[0] == '\0') { + return; + } + + pthread_mutex_lock(&audio_transcoder_mutex); + + // If the transcoder slot already exists, flip the flag in place. The + // future filter-graph code reads this on the next packet and rebuilds + // the graph if the state changed. + for (int i = 0; i < g_config.max_streams; i++) { + if (audio_transcoders[i].initialized && + audio_transcoder_stream_names[i][0] != '\0' && + strcmp(audio_transcoder_stream_names[i], stream_name) == 0) { + if (audio_transcoders[i].voice_enhancement_enabled != enabled) { + audio_transcoders[i].voice_enhancement_enabled = enabled; + log_info("Audio voice enhancement %s for stream %s (live)", + enabled ? "enabled" : "disabled", stream_name); + } + pthread_mutex_unlock(&audio_transcoder_mutex); + return; + } + } + + // No slot yet — stage the preference so init_audio_transcoder() picks + // it up when the slot is allocated on the first audio packet. + int free_slot = -1; + for (int i = 0; i < g_config.max_streams; i++) { + if (voice_enhancement_pending[i].stream_name[0] != '\0' && + strcmp(voice_enhancement_pending[i].stream_name, stream_name) == 0) { + voice_enhancement_pending[i].enabled = enabled; + pthread_mutex_unlock(&audio_transcoder_mutex); + return; + } + if (free_slot < 0 && voice_enhancement_pending[i].stream_name[0] == '\0') { + free_slot = i; + } + } + + if (free_slot >= 0) { + safe_strcpy(voice_enhancement_pending[free_slot].stream_name, stream_name, + MAX_STREAM_NAME, 0); + voice_enhancement_pending[free_slot].enabled = enabled; + } else { + log_warn("No staging slot available for voice-enhancement opt-in (stream=%s)", + stream_name); + } + + pthread_mutex_unlock(&audio_transcoder_mutex); +} + +bool get_audio_voice_enhancement(const char *stream_name) { + if (!stream_name || stream_name[0] == '\0') { + return false; + } + + bool enabled = false; + pthread_mutex_lock(&audio_transcoder_mutex); + + // Prefer the live transcoder slot if one exists — that's the value the + // transcode path actually uses. + for (int i = 0; i < g_config.max_streams; i++) { + if (audio_transcoders[i].initialized && + audio_transcoder_stream_names[i][0] != '\0' && + strcmp(audio_transcoder_stream_names[i], stream_name) == 0) { + enabled = audio_transcoders[i].voice_enhancement_enabled; + pthread_mutex_unlock(&audio_transcoder_mutex); + return enabled; + } + } + + // Otherwise report any staged opt-in that init_audio_transcoder() will pick + // up when the slot is first allocated. + for (int i = 0; i < g_config.max_streams; i++) { + if (voice_enhancement_pending[i].stream_name[0] != '\0' && + strcmp(voice_enhancement_pending[i].stream_name, stream_name) == 0) { + enabled = voice_enhancement_pending[i].enabled; + break; + } + } + pthread_mutex_unlock(&audio_transcoder_mutex); + return enabled; } /** diff --git a/src/web/api_handlers_streams_get.c b/src/web/api_handlers_streams_get.c index 6ab82952..0d09d44f 100644 --- a/src/web/api_handlers_streams_get.c +++ b/src/web/api_handlers_streams_get.c @@ -205,6 +205,7 @@ void handle_get_streams(const http_request_t *req, http_response_t *res) { cJSON_AddStringToObject(stream_obj, "detection_object_filter_list", db_streams[i].detection_object_filter_list); cJSON_AddNumberToObject(stream_obj, "protocol", (int)db_streams[i].protocol); cJSON_AddBoolToObject(stream_obj, "record_audio", db_streams[i].record_audio); + cJSON_AddBoolToObject(stream_obj, "audio_voice_enhancement", db_streams[i].audio_voice_enhancement); cJSON_AddBoolToObject(stream_obj, "isOnvif", db_streams[i].is_onvif); cJSON_AddBoolToObject(stream_obj, "backchannel_enabled", db_streams[i].backchannel_enabled); cJSON_AddNumberToObject(stream_obj, "retention_days", db_streams[i].retention_days); @@ -363,6 +364,7 @@ void handle_get_stream(const http_request_t *req, http_response_t *res) { cJSON_AddStringToObject(stream_obj, "detection_object_filter_list", config.detection_object_filter_list); cJSON_AddNumberToObject(stream_obj, "protocol", (int)config.protocol); cJSON_AddBoolToObject(stream_obj, "record_audio", config.record_audio); + cJSON_AddBoolToObject(stream_obj, "audio_voice_enhancement", config.audio_voice_enhancement); cJSON_AddBoolToObject(stream_obj, "isOnvif", config.is_onvif); cJSON_AddBoolToObject(stream_obj, "backchannel_enabled", config.backchannel_enabled); cJSON_AddNumberToObject(stream_obj, "retention_days", config.retention_days); @@ -515,6 +517,7 @@ void handle_get_stream_full(const http_request_t *req, http_response_t *res) { cJSON_AddStringToObject(stream_obj, "detection_object_filter_list", config.detection_object_filter_list); cJSON_AddNumberToObject(stream_obj, "protocol", (int)config.protocol); cJSON_AddBoolToObject(stream_obj, "record_audio", config.record_audio); + cJSON_AddBoolToObject(stream_obj, "audio_voice_enhancement", config.audio_voice_enhancement); cJSON_AddBoolToObject(stream_obj, "isOnvif", config.is_onvif); cJSON_AddBoolToObject(stream_obj, "backchannel_enabled", config.backchannel_enabled); cJSON_AddNumberToObject(stream_obj, "retention_days", config.retention_days); diff --git a/src/web/api_handlers_streams_modify.c b/src/web/api_handlers_streams_modify.c index ade0cacf..d6a3d736 100644 --- a/src/web/api_handlers_streams_modify.c +++ b/src/web/api_handlers_streams_modify.c @@ -605,6 +605,14 @@ void handle_post_stream(const http_request_t *req, http_response_t *res) { config.record_audio ? "enabled" : "disabled", config.name); } + // Opt-in voice-enhancement filter for recordings (discussion #395). + // Defaults to false in stream_config_t zero-init; only flipped on if + // the client explicitly requests it. + cJSON *audio_voice_enhancement = cJSON_GetObjectItem(stream_json, "audio_voice_enhancement"); + if (audio_voice_enhancement && cJSON_IsBool(audio_voice_enhancement)) { + config.audio_voice_enhancement = cJSON_IsTrue(audio_voice_enhancement); + } + // Check if backchannel_enabled flag is set in the request cJSON *backchannel_enabled = cJSON_GetObjectItem(stream_json, "backchannel_enabled"); if (backchannel_enabled && cJSON_IsBool(backchannel_enabled)) { @@ -1164,6 +1172,20 @@ void handle_put_stream(const http_request_t *req, http_response_t *res) { } } + // Voice-enhancement is a runtime toggle — the filter graph is rebuilt on the + // next recording session, so flipping it doesn't require a stream restart. + // (Discussion #395.) + cJSON *audio_voice_enhancement = cJSON_GetObjectItem(stream_json, "audio_voice_enhancement"); + if (audio_voice_enhancement && cJSON_IsBool(audio_voice_enhancement)) { + bool prev_avoe = config.audio_voice_enhancement; + config.audio_voice_enhancement = cJSON_IsTrue(audio_voice_enhancement); + if (prev_avoe != config.audio_voice_enhancement) { + config_changed = true; + log_info("Audio voice enhancement %s for stream %s", + config.audio_voice_enhancement ? "enabled" : "disabled", config.name); + } + } + cJSON *backchannel_enabled = cJSON_GetObjectItem(stream_json, "backchannel_enabled"); if (backchannel_enabled && cJSON_IsBool(backchannel_enabled)) { bool original_backchannel = config.backchannel_enabled; diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 51f5c3df..29797e31 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -178,6 +178,7 @@ add_layer3_test(test_stream_manager) add_layer3_test(test_stream_state) add_layer3_test(test_packet_buffer) add_layer3_test(test_mp4_writer_watchdog) +add_layer3_test(test_audio_voice_enhancement) add_layer3_test(test_timestamp_manager) add_layer3_test(test_api_handlers_system) add_layer1_test(test_external_motion_trigger) # Layer 1: external_motion_trigger state-machine (PR #356) diff --git a/tests/unit/test_api_handlers_system.c b/tests/unit/test_api_handlers_system.c index 76fd4ed3..55283907 100644 --- a/tests/unit/test_api_handlers_system.c +++ b/tests/unit/test_api_handlers_system.c @@ -220,6 +220,172 @@ void test_handle_put_stream_parses_motion_trigger_source(void) { clear_db_streams(); } +/* ================================================================ + * audio_voice_enhancement — JSON round-trip through the stream handlers + * (discussion #395) + * ================================================================ */ + +void test_handle_get_streams_includes_audio_voice_enhancement(void) { + clear_db_streams(); + + stream_config_t s = make_test_stream("cam_avoe_get"); + s.audio_voice_enhancement = true; + add_stream_config(&s); + + http_request_t req; + http_response_t res; + http_request_init(&req); + http_response_init(&res); + + handle_get_streams(&req, &res); + + TEST_ASSERT_EQUAL_INT(200, res.status_code); + + cJSON *root = parse_response_json(&res); + TEST_ASSERT_TRUE(cJSON_IsArray(root)); + TEST_ASSERT_EQUAL_INT(1, cJSON_GetArraySize(root)); + + cJSON *stream = cJSON_GetArrayItem(root, 0); + cJSON *avoe = cJSON_GetObjectItemCaseSensitive(stream, "audio_voice_enhancement"); + TEST_ASSERT_NOT_NULL(avoe); + TEST_ASSERT_TRUE(cJSON_IsBool(avoe)); + TEST_ASSERT_TRUE(cJSON_IsTrue(avoe)); + + cJSON_Delete(root); + http_response_free(&res); + clear_db_streams(); +} + +/* handle_get_stream / handle_get_stream_full read from the in-memory stream + * manager, so the stream is registered there with the flag set. */ +void test_handle_get_stream_by_name_includes_audio_voice_enhancement(void) { + clear_db_streams(); + + stream_config_t s = make_test_stream("cam_avoe_one"); + s.audio_voice_enhancement = true; + add_stream_config(&s); + + init_stream_state_manager(16); + init_stream_manager(16); + add_stream(&s); + + /* GET /api/streams/ */ + { + http_request_t req; + http_response_t res; + http_request_init(&req); + http_response_init(&res); + safe_strcpy(req.path, "/api/streams/cam_avoe_one", sizeof(req.path), 0); + + handle_get_stream(&req, &res); + TEST_ASSERT_EQUAL_INT(200, res.status_code); + + cJSON *root = parse_response_json(&res); + cJSON *avoe = cJSON_GetObjectItemCaseSensitive(root, "audio_voice_enhancement"); + TEST_ASSERT_NOT_NULL(avoe); + TEST_ASSERT_TRUE(cJSON_IsTrue(avoe)); + cJSON_Delete(root); + http_response_free(&res); + } + + /* GET /api/streams//full */ + { + http_request_t req; + http_response_t res; + http_request_init(&req); + http_response_init(&res); + safe_strcpy(req.path, "/api/streams/cam_avoe_one/full", sizeof(req.path), 0); + + handle_get_stream_full(&req, &res); + TEST_ASSERT_EQUAL_INT(200, res.status_code); + + /* handle_get_stream_full wraps the stream object under a "stream" key. */ + cJSON *root = parse_response_json(&res); + cJSON *stream_obj = cJSON_GetObjectItemCaseSensitive(root, "stream"); + TEST_ASSERT_NOT_NULL(stream_obj); + cJSON *avoe = cJSON_GetObjectItemCaseSensitive(stream_obj, "audio_voice_enhancement"); + TEST_ASSERT_NOT_NULL(avoe); + TEST_ASSERT_TRUE(cJSON_IsTrue(avoe)); + cJSON_Delete(root); + http_response_free(&res); + } + + shutdown_stream_manager(); + shutdown_stream_state_manager(); + clear_db_streams(); +} + +void test_handle_post_stream_persists_audio_voice_enhancement(void) { + clear_db_streams(); + + init_stream_state_manager(16); + init_stream_manager(16); + + http_request_t req; + http_response_t res; + http_request_init(&req); + http_response_init(&res); + + static const char json_body[] = + "{\"name\":\"cam_avoe_post\",\"url\":\"rtsp://localhost/stream\"," + "\"audio_voice_enhancement\":true}"; + req.body = (uint8_t *)json_body; + req.body_len = sizeof(json_body) - 1; + + handle_post_stream(&req, &res); + + /* The POST handler persists the config to the DB (add_stream_config) before + * it attempts to create/start the stream, so the parsed flag is observable + * regardless of whether stream startup succeeds in the test environment. */ + stream_config_t got; + TEST_ASSERT_EQUAL_INT(0, get_stream_config_by_name("cam_avoe_post", &got)); + TEST_ASSERT_TRUE(got.audio_voice_enhancement); + + /* Let any detached startup worker settle before tearing down. */ + usleep(200000); + + http_response_free(&res); + shutdown_stream_manager(); + shutdown_stream_state_manager(); + clear_db_streams(); +} + +void test_handle_put_stream_parses_audio_voice_enhancement(void) { + clear_db_streams(); + + stream_config_t s = make_test_stream("cam_avoe_put"); + add_stream_config(&s); + + init_stream_state_manager(16); + init_stream_manager(16); + add_stream(&s); + + http_request_t req; + http_response_t res; + http_request_init(&req); + http_response_init(&res); + + safe_strcpy(req.path, "/api/streams/cam_avoe_put", sizeof(req.path), 0); + static const char json_body[] = "{\"audio_voice_enhancement\":true}"; + req.body = (uint8_t *)json_body; + req.body_len = sizeof(json_body) - 1; + + handle_put_stream(&req, &res); + + /* PUT returns 202 and applies the change on a detached worker; give it a + * moment before tearing down so ASan doesn't flag a use-after-free. */ + usleep(200000); + + TEST_ASSERT_TRUE(res.status_code == 202 || res.status_code == 200 || + res.status_code == 400 || res.status_code == 404 || + res.status_code == 500); + + http_response_free(&res); + shutdown_stream_manager(); + shutdown_stream_state_manager(); + clear_db_streams(); +} + int main(void) { init_logger(); load_default_config(&g_config); @@ -246,6 +412,10 @@ int main(void) { RUN_TEST(test_handle_get_system_info_includes_empty_stream_storage_array); RUN_TEST(test_handle_get_streams_includes_motion_trigger_source); RUN_TEST(test_handle_put_stream_parses_motion_trigger_source); + RUN_TEST(test_handle_get_streams_includes_audio_voice_enhancement); + RUN_TEST(test_handle_get_stream_by_name_includes_audio_voice_enhancement); + RUN_TEST(test_handle_post_stream_persists_audio_voice_enhancement); + RUN_TEST(test_handle_put_stream_parses_audio_voice_enhancement); int result = UNITY_END(); shutdown_database(); diff --git a/tests/unit/test_audio_voice_enhancement.c b/tests/unit/test_audio_voice_enhancement.c new file mode 100644 index 00000000..61f52d18 --- /dev/null +++ b/tests/unit/test_audio_voice_enhancement.c @@ -0,0 +1,134 @@ +/** + * @file test_audio_voice_enhancement.c + * @brief Layer 3 Unity tests for the voice-enhancement opt-in staging logic in + * src/video/mp4_writer_utils.c (discussion #395). + * + * The real filter graph is deferred to a follow-up PR; what's exercised here is + * the lifecycle plumbing that routes a per-stream opt-in to the audio + * transcoder pool: set_audio_voice_enhancement() (live-flip vs. staged), + * get_audio_voice_enhancement() (read-back), and cleanup_audio_transcoder() + * dropping any staged entry. No transcoder slot is ever initialized here (that + * needs a real PCM frame), so every code path under test takes the "staged" + * branch — which is exactly the branch that has to be correct on the first + * packet of a recording session. + */ + +#define _POSIX_C_SOURCE 200809L + +#include + +#include "unity.h" +#include "core/config.h" +#include "core/logger.h" +#include "video/mp4_writer_internal.h" + +extern config_t g_config; + +/* Stream names used across the tests; cleaned up in tearDown so the static + * staging table never leaks state from one test into the next. */ +static const char *kNames[] = { + "ve_a", "ve_b", "ve_c", "ve_full_0", "ve_full_1", +}; + +void setUp(void) { + g_config.max_streams = MAX_STREAMS; +} + +void tearDown(void) { + /* Drop any staged opt-ins so tests stay independent. */ + g_config.max_streams = MAX_STREAMS; + for (size_t i = 0; i < sizeof(kNames) / sizeof(kNames[0]); i++) { + cleanup_audio_transcoder(kNames[i]); + } +} + +/* A stream with nothing staged reports "not enhanced". */ +static void test_default_is_disabled(void) { + TEST_ASSERT_FALSE(get_audio_voice_enhancement("ve_a")); +} + +/* Staging an opt-in before any transcoder slot exists is readable back. */ +static void test_stage_enable_then_read(void) { + set_audio_voice_enhancement("ve_a", true); + TEST_ASSERT_TRUE(get_audio_voice_enhancement("ve_a")); +} + +/* Re-setting an already-staged stream updates the value in place (no duplicate + * slot), covering the "matching pending entry" branch of the setter. */ +static void test_restage_updates_in_place(void) { + set_audio_voice_enhancement("ve_b", true); + TEST_ASSERT_TRUE(get_audio_voice_enhancement("ve_b")); + + set_audio_voice_enhancement("ve_b", false); + TEST_ASSERT_FALSE(get_audio_voice_enhancement("ve_b")); + + set_audio_voice_enhancement("ve_b", true); + TEST_ASSERT_TRUE(get_audio_voice_enhancement("ve_b")); +} + +/* Independent streams keep independent staged state. */ +static void test_multiple_streams_independent(void) { + set_audio_voice_enhancement("ve_a", true); + set_audio_voice_enhancement("ve_b", false); + set_audio_voice_enhancement("ve_c", true); + + TEST_ASSERT_TRUE(get_audio_voice_enhancement("ve_a")); + TEST_ASSERT_FALSE(get_audio_voice_enhancement("ve_b")); + TEST_ASSERT_TRUE(get_audio_voice_enhancement("ve_c")); +} + +/* cleanup_audio_transcoder() drops the staged opt-in, so a later read reverts + * to the default. */ +static void test_cleanup_drops_staged_optin(void) { + set_audio_voice_enhancement("ve_a", true); + TEST_ASSERT_TRUE(get_audio_voice_enhancement("ve_a")); + + cleanup_audio_transcoder("ve_a"); + TEST_ASSERT_FALSE(get_audio_voice_enhancement("ve_a")); +} + +/* NULL / empty stream names are ignored by the setter and read back false. */ +static void test_null_and_empty_names_are_ignored(void) { + set_audio_voice_enhancement(NULL, true); + set_audio_voice_enhancement("", true); + + TEST_ASSERT_FALSE(get_audio_voice_enhancement(NULL)); + TEST_ASSERT_FALSE(get_audio_voice_enhancement("")); +} + +/* When every staging slot is occupied, a new distinct stream can't be staged — + * exercises the "no free slot" branch. Shrinking max_streams to 1 makes the + * single usable slot fill immediately. */ +static void test_no_free_staging_slot(void) { + g_config.max_streams = 1; + + set_audio_voice_enhancement("ve_full_0", true); + TEST_ASSERT_TRUE(get_audio_voice_enhancement("ve_full_0")); + + /* No slot left for a different stream; it stays at the default. */ + set_audio_voice_enhancement("ve_full_1", true); + TEST_ASSERT_FALSE(get_audio_voice_enhancement("ve_full_1")); + + /* The occupant is untouched. */ + TEST_ASSERT_TRUE(get_audio_voice_enhancement("ve_full_0")); + + cleanup_audio_transcoder("ve_full_0"); + g_config.max_streams = MAX_STREAMS; +} + +int main(void) { + init_logger(); + + UNITY_BEGIN(); + RUN_TEST(test_default_is_disabled); + RUN_TEST(test_stage_enable_then_read); + RUN_TEST(test_restage_updates_in_place); + RUN_TEST(test_multiple_streams_independent); + RUN_TEST(test_cleanup_drops_staged_optin); + RUN_TEST(test_null_and_empty_names_are_ignored); + RUN_TEST(test_no_free_staging_slot); + int result = UNITY_END(); + + shutdown_logger(); + return result; +} diff --git a/tests/unit/test_db_streams.c b/tests/unit/test_db_streams.c index cf25b3ce..4c868bac 100644 --- a/tests/unit/test_db_streams.c +++ b/tests/unit/test_db_streams.c @@ -433,6 +433,73 @@ void test_repair_onvif_embedded_credentials_migration_normalizes_legacy_rows(voi TEST_ASSERT_TRUE(got.is_onvif); } +/* ================================================================ + * audio_voice_enhancement (discussion #395) + * ================================================================ */ + +void test_audio_voice_enhancement_defaults_false(void) { + stream_config_t s = make_stream("cam_avoe_def", true); + add_stream_config(&s); + + stream_config_t got; + TEST_ASSERT_EQUAL_INT(0, get_stream_config_by_name("cam_avoe_def", &got)); + TEST_ASSERT_FALSE(got.audio_voice_enhancement); +} + +void test_audio_voice_enhancement_round_trip(void) { + stream_config_t s = make_stream("cam_avoe_rt", true); + s.audio_voice_enhancement = true; + add_stream_config(&s); + + stream_config_t got; + TEST_ASSERT_EQUAL_INT(0, get_stream_config_by_name("cam_avoe_rt", &got)); + TEST_ASSERT_TRUE(got.audio_voice_enhancement); +} + +void test_audio_voice_enhancement_update(void) { + stream_config_t s = make_stream("cam_avoe_upd", true); + s.audio_voice_enhancement = false; + add_stream_config(&s); + + s.audio_voice_enhancement = true; + TEST_ASSERT_EQUAL_INT(0, update_stream_config("cam_avoe_upd", &s)); + + stream_config_t got; + TEST_ASSERT_EQUAL_INT(0, get_stream_config_by_name("cam_avoe_upd", &got)); + TEST_ASSERT_TRUE(got.audio_voice_enhancement); + + /* And flips back off */ + s.audio_voice_enhancement = false; + TEST_ASSERT_EQUAL_INT(0, update_stream_config("cam_avoe_upd", &s)); + TEST_ASSERT_EQUAL_INT(0, get_stream_config_by_name("cam_avoe_upd", &got)); + TEST_ASSERT_FALSE(got.audio_voice_enhancement); +} + +void test_audio_voice_enhancement_in_get_all(void) { + stream_config_t plain = make_stream("cam_avoe_off", true); + stream_config_t enhanced = make_stream("cam_avoe_on", true); + enhanced.audio_voice_enhancement = true; + add_stream_config(&plain); + add_stream_config(&enhanced); + + stream_config_t out[10]; + int n = get_all_stream_configs(out, 10); + TEST_ASSERT_EQUAL_INT(2, n); + + bool found_on = false, found_off = false; + for (int i = 0; i < n; i++) { + if (strcmp(out[i].name, "cam_avoe_on") == 0) { + TEST_ASSERT_TRUE(out[i].audio_voice_enhancement); + found_on = true; + } else if (strcmp(out[i].name, "cam_avoe_off") == 0) { + TEST_ASSERT_FALSE(out[i].audio_voice_enhancement); + found_off = true; + } + } + TEST_ASSERT_TRUE(found_on); + TEST_ASSERT_TRUE(found_off); +} + /* ================================================================ * main * ================================================================ */ @@ -469,6 +536,10 @@ int main(void) { RUN_TEST(test_sub_stream_url_round_trip); RUN_TEST(test_sub_stream_url_update); RUN_TEST(test_sub_stream_url_in_get_all); + RUN_TEST(test_audio_voice_enhancement_defaults_false); + RUN_TEST(test_audio_voice_enhancement_round_trip); + RUN_TEST(test_audio_voice_enhancement_update); + RUN_TEST(test_audio_voice_enhancement_in_get_all); int result = UNITY_END(); shutdown_database(); diff --git a/web/js/components/preact/StreamConfigModal.jsx b/web/js/components/preact/StreamConfigModal.jsx index 0ae18a8d..acdd7862 100644 --- a/web/js/components/preact/StreamConfigModal.jsx +++ b/web/js/components/preact/StreamConfigModal.jsx @@ -832,6 +832,21 @@ export function StreamConfigModal({ {t('streamsConfig.twoWayAudio')} +

{t('streamsConfig.audioSettingsHelp')}

diff --git a/web/js/components/preact/StreamsView.jsx b/web/js/components/preact/StreamsView.jsx index 2560c09d..14d5dad7 100644 --- a/web/js/components/preact/StreamsView.jsx +++ b/web/js/components/preact/StreamsView.jsx @@ -214,6 +214,7 @@ export function StreamsView() { segment: 30, record: true, recordAudio: true, + audioVoiceEnhancement: false, backchannelEnabled: false, // ONVIF capability flag isOnvif: false, @@ -588,6 +589,7 @@ export function StreamsView() { pre_detection_buffer: parseInt(currentStream.preBuffer, 10), post_detection_buffer: parseInt(currentStream.postBuffer, 10), record_audio: currentStream.recordAudio, + audio_voice_enhancement: !!currentStream.audioVoiceEnhancement, backchannel_enabled: currentStream.backchannelEnabled, // PTZ control settings ptz_enabled: !!currentStream.ptzEnabled, @@ -675,6 +677,7 @@ export function StreamsView() { segment: 30, record: true, recordAudio: true, + audioVoiceEnhancement: false, backchannelEnabled: false, isOnvif: false, onvifUsername: '', @@ -752,6 +755,7 @@ export function StreamsView() { detectionEnabled: stream.detection_based_recording || false, detectionModel: stream.detection_model || '', recordAudio: stream.record_audio !== undefined ? stream.record_audio : true, + audioVoiceEnhancement: !!stream.audio_voice_enhancement, backchannelEnabled: stream.backchannel_enabled !== undefined ? stream.backchannel_enabled : false, // PTZ control settings ptzEnabled: stream.ptz_enabled !== undefined ? stream.ptz_enabled : false, @@ -830,6 +834,7 @@ export function StreamsView() { detectionEnabled: stream.detection_based_recording || false, detectionModel: stream.detection_model || '', recordAudio: stream.record_audio !== undefined ? stream.record_audio : true, + audioVoiceEnhancement: !!stream.audio_voice_enhancement, backchannelEnabled: stream.backchannel_enabled !== undefined ? stream.backchannel_enabled : false, ptzEnabled: stream.ptz_enabled !== undefined ? stream.ptz_enabled : false, ptzMaxX: stream.ptz_max_x || 0, @@ -1147,6 +1152,7 @@ export function StreamsView() { segment_duration: 30, record: true, record_audio: true, + audio_voice_enhancement: false, backchannel_enabled: false, // Backend expects camelCase key 'isOnvif' isOnvif: true diff --git a/web/public/locales/en.json b/web/public/locales/en.json index 1a6e61d3..edc9159a 100644 --- a/web/public/locales/en.json +++ b/web/public/locales/en.json @@ -806,7 +806,8 @@ "streamsConfig.audioSettings": "Audio Settings", "streamsConfig.recordAudio": "Record Audio", "streamsConfig.twoWayAudio": "Two-Way Audio", - "streamsConfig.audioSettingsHelp": "Audio recording applies to both continuous and detection-based recordings (requires audio track in stream).", + "streamsConfig.audioVoiceEnhancement": "Voice Enhancement (recordings)", + "streamsConfig.audioSettingsHelp": "Audio recording applies to both continuous and detection-based recordings (requires audio track in stream). Voice Enhancement is a preview opt-in that has no effect yet — the noise-reduction filter chain for the G.711 → AAC transcode lands in a future update; recordings only, live audio is unchanged.", "streamsConfig.aiDetectionSettings": "AI Detection Settings", "streamsConfig.detectionModel": "Detection Model", "streamsConfig.selectModel": "Select a model",