| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | use crate::error::{Error, Result}; |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | #[derive(Debug, Clone, Copy, PartialEq)] |
| | pub struct MarineProsodyVector { |
| | |
| | |
| | pub jp_mean: f32, |
| |
|
| | |
| | |
| | pub jp_std: f32, |
| |
|
| | |
| | |
| | pub ja_mean: f32, |
| |
|
| | |
| | |
| | pub ja_std: f32, |
| |
|
| | |
| | |
| | pub h_mean: f32, |
| |
|
| | |
| | |
| | pub s_mean: f32, |
| |
|
| | |
| | |
| | pub peak_density: f32, |
| |
|
| | |
| | |
| | pub energy_mean: f32, |
| | } |
| |
|
| | impl MarineProsodyVector { |
| | |
| | pub fn zeros() -> Self { |
| | Self { |
| | jp_mean: 0.0, |
| | jp_std: 0.0, |
| | ja_mean: 0.0, |
| | ja_std: 0.0, |
| | h_mean: 1.0, |
| | s_mean: 1.0, |
| | peak_density: 0.0, |
| | energy_mean: 0.0, |
| | } |
| | } |
| |
|
| | |
| | pub fn to_array(&self) -> [f32; 8] { |
| | [ |
| | self.jp_mean, |
| | self.jp_std, |
| | self.ja_mean, |
| | self.ja_std, |
| | self.h_mean, |
| | self.s_mean, |
| | self.peak_density, |
| | self.energy_mean, |
| | ] |
| | } |
| |
|
| | |
| | pub fn from_array(arr: [f32; 8]) -> Self { |
| | Self { |
| | jp_mean: arr[0], |
| | jp_std: arr[1], |
| | ja_mean: arr[2], |
| | ja_std: arr[3], |
| | h_mean: arr[4], |
| | s_mean: arr[5], |
| | peak_density: arr[6], |
| | energy_mean: arr[7], |
| | } |
| | } |
| |
|
| | |
| | pub fn combined_jitter(&self) -> f32 { |
| | (self.jp_mean + self.ja_mean) / 2.0 |
| | } |
| |
|
| | |
| | |
| | pub fn estimate_valence(&self) -> f32 { |
| | |
| | |
| | let jitter_factor = 1.0 / (1.0 + self.combined_jitter()); |
| | let energy_factor = self.energy_mean.sqrt(); |
| |
|
| | |
| | (jitter_factor * energy_factor * 2.0 - 1.0).clamp(-1.0, 1.0) |
| | } |
| |
|
| | |
| | |
| | pub fn estimate_arousal(&self) -> f32 { |
| | |
| | let density_factor = (self.peak_density / 100.0).clamp(0.0, 1.0); |
| | let energy_factor = self.energy_mean.sqrt(); |
| | let variance_factor = (self.jp_std + self.ja_std).clamp(0.0, 1.0); |
| |
|
| | ((density_factor + energy_factor + variance_factor) / 3.0).clamp(0.0, 1.0) |
| | } |
| | } |
| |
|
| | impl Default for MarineProsodyVector { |
| | fn default() -> Self { |
| | Self::zeros() |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | pub struct MarineProsodyConditioner { |
| | sample_rate: u32, |
| | jitter_low: f32, |
| | jitter_high: f32, |
| | min_period: u32, |
| | max_period: u32, |
| | ema_alpha: f32, |
| | } |
| |
|
| | impl MarineProsodyConditioner { |
| | |
| | pub fn new(sample_rate: u32) -> Self { |
| | |
| | let min_period = sample_rate / 4000; |
| | let max_period = sample_rate / 60; |
| |
|
| | Self { |
| | sample_rate, |
| | jitter_low: 0.02, |
| | jitter_high: 0.60, |
| | min_period, |
| | max_period, |
| | ema_alpha: 0.01, |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | pub fn from_samples(&self, samples: &[f32]) -> Result<MarineProsodyVector> { |
| | if samples.is_empty() { |
| | return Err(Error::Audio("Empty audio buffer".into())); |
| | } |
| |
|
| | |
| | let mut peaks: Vec<PeakInfo> = Vec::new(); |
| | let clip_threshold = 1e-3; |
| |
|
| | |
| | for i in 1..samples.len().saturating_sub(1) { |
| | let prev = samples[i - 1].abs(); |
| | let curr = samples[i].abs(); |
| | let next = samples[i + 1].abs(); |
| |
|
| | if curr > prev && curr > next && curr > clip_threshold { |
| | peaks.push(PeakInfo { |
| | index: i, |
| | amplitude: curr, |
| | }); |
| | } |
| | } |
| |
|
| | if peaks.len() < 3 { |
| | |
| | return Ok(MarineProsodyVector::zeros()); |
| | } |
| |
|
| | |
| | let mut periods: Vec<f32> = Vec::new(); |
| | let mut amplitudes: Vec<f32> = Vec::new(); |
| | let mut jp_values: Vec<f32> = Vec::new(); |
| | let mut ja_values: Vec<f32> = Vec::new(); |
| |
|
| | |
| | let mut ema_period = 0.0f32; |
| | let mut ema_amp = 0.0f32; |
| | let mut ema_initialized = false; |
| |
|
| | for i in 1..peaks.len() { |
| | let period = (peaks[i].index - peaks[i - 1].index) as f32; |
| | let amp = peaks[i].amplitude; |
| |
|
| | |
| | if period > self.min_period as f32 && period < self.max_period as f32 { |
| | periods.push(period); |
| | amplitudes.push(amp); |
| |
|
| | if !ema_initialized { |
| | ema_period = period; |
| | ema_amp = amp; |
| | ema_initialized = true; |
| | } else { |
| | |
| | let jp = (period - ema_period).abs() / ema_period; |
| | let ja = (amp - ema_amp).abs() / ema_amp; |
| | jp_values.push(jp); |
| | ja_values.push(ja); |
| |
|
| | |
| | ema_period = self.ema_alpha * period + (1.0 - self.ema_alpha) * ema_period; |
| | ema_amp = self.ema_alpha * amp + (1.0 - self.ema_alpha) * ema_amp; |
| | } |
| | } |
| | } |
| |
|
| | if jp_values.is_empty() { |
| | return Ok(MarineProsodyVector::zeros()); |
| | } |
| |
|
| | |
| | let n = jp_values.len() as f32; |
| | let duration_sec = samples.len() as f32 / self.sample_rate as f32; |
| |
|
| | |
| | let jp_mean = jp_values.iter().sum::<f32>() / n; |
| | let ja_mean = ja_values.iter().sum::<f32>() / n; |
| | let energy_mean = amplitudes.iter().map(|a| a * a).sum::<f32>() / amplitudes.len() as f32; |
| |
|
| | |
| | let jp_var = jp_values.iter().map(|x| (x - jp_mean).powi(2)).sum::<f32>() / n; |
| | let ja_var = ja_values.iter().map(|x| (x - ja_mean).powi(2)).sum::<f32>() / n; |
| | let jp_std = jp_var.sqrt(); |
| | let ja_std = ja_var.sqrt(); |
| |
|
| | |
| | let h_mean = 1.0; |
| |
|
| | |
| | let s_mean = 1.0 / (1.0 + jp_mean + ja_mean); |
| |
|
| | |
| | let peak_density = peaks.len() as f32 / duration_sec; |
| |
|
| | Ok(MarineProsodyVector { |
| | jp_mean, |
| | jp_std, |
| | ja_mean, |
| | ja_std, |
| | h_mean, |
| | s_mean, |
| | peak_density, |
| | energy_mean, |
| | }) |
| | } |
| |
|
| | |
| | |
| | |
| | pub fn validate_tts_output(&self, samples: &[f32]) -> Result<TTSQualityReport> { |
| | let prosody = self.from_samples(samples)?; |
| |
|
| | let mut issues = Vec::new(); |
| |
|
| | |
| | if prosody.jp_mean < 0.005 { |
| | issues.push("Too perfect - sounds robotic (add natural variation)"); |
| | } |
| |
|
| | if prosody.jp_mean > 0.3 { |
| | issues.push("High period jitter - possible artifacts"); |
| | } |
| |
|
| | if prosody.ja_mean > 0.4 { |
| | issues.push("High amplitude jitter - volume inconsistency"); |
| | } |
| |
|
| | if prosody.s_mean < 0.4 { |
| | issues.push("Low salience - audio quality issues"); |
| | } |
| |
|
| | if prosody.peak_density < 10.0 { |
| | issues.push("Low peak density - missing speech energy"); |
| | } |
| |
|
| | let quality_score = prosody.s_mean * 100.0; |
| |
|
| | Ok(TTSQualityReport { |
| | prosody, |
| | quality_score, |
| | issues, |
| | }) |
| | } |
| |
|
| | |
| | pub fn sample_rate(&self) -> u32 { |
| | self.sample_rate |
| | } |
| | } |
| |
|
| | |
| | struct PeakInfo { |
| | index: usize, |
| | amplitude: f32, |
| | } |
| |
|
| | |
| | #[derive(Debug, Clone)] |
| | pub struct TTSQualityReport { |
| | |
| | pub prosody: MarineProsodyVector, |
| | |
| | pub quality_score: f32, |
| | |
| | pub issues: Vec<&'static str>, |
| | } |
| |
|
| | impl TTSQualityReport { |
| | |
| | pub fn passes(&self, threshold: f32) -> bool { |
| | self.quality_score >= threshold && self.issues.is_empty() |
| | } |
| | } |
| |
|
| | #[cfg(test)] |
| | mod tests { |
| | use super::*; |
| |
|
| | #[test] |
| | fn test_prosody_vector_array_conversion() { |
| | let vec = MarineProsodyVector { |
| | jp_mean: 0.1, |
| | jp_std: 0.05, |
| | ja_mean: 0.2, |
| | ja_std: 0.1, |
| | h_mean: 0.9, |
| | s_mean: 0.8, |
| | peak_density: 50.0, |
| | energy_mean: 0.3, |
| | }; |
| |
|
| | let arr = vec.to_array(); |
| | let reconstructed = MarineProsodyVector::from_array(arr); |
| |
|
| | assert_eq!(vec.jp_mean, reconstructed.jp_mean); |
| | assert_eq!(vec.s_mean, reconstructed.s_mean); |
| | } |
| |
|
| | #[test] |
| | fn test_conditioner_empty_buffer() { |
| | let conditioner = MarineProsodyConditioner::new(22050); |
| | let result = conditioner.from_samples(&[]); |
| | assert!(result.is_err()); |
| | } |
| |
|
| | #[test] |
| | fn test_conditioner_silence() { |
| | let conditioner = MarineProsodyConditioner::new(22050); |
| | let silence = vec![0.0; 1000]; |
| | let prosody = conditioner.from_samples(&silence).unwrap(); |
| | |
| | assert_eq!(prosody.peak_density, 0.0); |
| | } |
| |
|
| | #[test] |
| | fn test_estimate_valence() { |
| | let positive = MarineProsodyVector { |
| | jp_mean: 0.01, |
| | jp_std: 0.01, |
| | ja_mean: 0.01, |
| | ja_std: 0.01, |
| | h_mean: 1.0, |
| | s_mean: 0.95, |
| | peak_density: 100.0, |
| | energy_mean: 0.8, |
| | }; |
| |
|
| | let negative = MarineProsodyVector { |
| | jp_mean: 0.5, |
| | jp_std: 0.3, |
| | ja_mean: 0.4, |
| | ja_std: 0.2, |
| | h_mean: 0.7, |
| | s_mean: 0.4, |
| | peak_density: 30.0, |
| | energy_mean: 0.1, |
| | }; |
| |
|
| | |
| | assert!(positive.estimate_valence() > negative.estimate_valence()); |
| | } |
| | } |
| |
|