From 7862f7c9f3f579889e08d52822dd03ef39ea4660 Mon Sep 17 00:00:00 2001 From: Hermann_Kitio Date: Sun, 26 Apr 2026 20:08:45 +0300 Subject: [PATCH] =?UTF-8?q?Sprint=206b=20=E2=80=94=20Frontend=20audio=20ca?= =?UTF-8?q?pture=20+=20playback=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit feat(audio): pcm-capture-processor.js AudioWorklet (16kHz resample, Int16 LE) feat(hooks): useAudioCapture (getUserMedia + worklet + onChunk base64) feat(hooks): useAudioPlayback (24kHz sequential scheduling, gap-free) feat(hooks): useAudioRecording (chronological buffer, resample 16→24k, WAV export) feat(lib): audio-utils (base64, int16/float32, resample, WAV header) test: 12 audio-utils + 7 useAudioRecording = 238/238 green (+19) --- docs/CHANGELOG.md | 20 +++ public/pcm-capture-processor.js | 80 ++++++++++ .../hooks/__tests__/useAudioRecording.test.ts | 132 ++++++++++++++++ src/features/t2-live/hooks/useAudioCapture.ts | 146 ++++++++++++++++++ .../t2-live/hooks/useAudioPlayback.ts | 132 ++++++++++++++++ .../t2-live/hooks/useAudioRecording.ts | 100 ++++++++++++ src/shared/lib/__tests__/audio-utils.test.ts | 113 ++++++++++++++ src/shared/lib/audio-utils.ts | 139 +++++++++++++++++ 8 files changed, 862 insertions(+) create mode 100644 public/pcm-capture-processor.js create mode 100644 src/features/t2-live/hooks/__tests__/useAudioRecording.test.ts create mode 100644 src/features/t2-live/hooks/useAudioCapture.ts create mode 100644 src/features/t2-live/hooks/useAudioPlayback.ts create mode 100644 src/features/t2-live/hooks/useAudioRecording.ts create mode 100644 src/shared/lib/__tests__/audio-utils.test.ts create mode 100644 src/shared/lib/audio-utils.ts diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index dc1da9f..246fdeb 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -29,6 +29,26 @@ Chaque entrée suit ce format : --- +## [Unreleased] — 2026-04-26 — Sprint 6b — Frontend audio (T2 Live) + +### Added + +- `public/pcm-capture-processor.js` — AudioWorklet processor : capture micro, rééchantillonnage vers 16 kHz si `sampleRate` natif différent, conversion Float32 → Int16 LE, chunks de 4096 samples (~256 ms). +- `src/shared/lib/audio-utils.ts` — 6 helpers purs : `arrayBufferToBase64`, `base64ToArrayBuffer`, `int16ToFloat32`, `float32ToInt16`, `resample16kTo24k`, `buildWavHeader`. +- `src/features/t2-live/hooks/useAudioCapture.ts` — hook capture : `getUserMedia` (mono, echoCancellation, noiseSuppression) → AudioContext 16 kHz → AudioWorklet → callback `onChunk(base64)`. Cleanup au stop/unmount. +- `src/features/t2-live/hooks/useAudioPlayback.ts` — hook playback : AudioContext 24 kHz lazy-init, scheduling séquentiel via `start(max(currentTime, lastEndTime))` pour lecture sans gaps. Cleanup au stop/unmount. +- `src/features/t2-live/hooks/useAudioRecording.ts` — hook recording : buffer chronologique unique normalisé 24 kHz (chunks candidat rééchantillonnés 16→24k), `addAIChunk(base64)` décode en interne, `exportWAV()` → Blob `audio/wav` mono 24 kHz. +- 12 tests `audio-utils.test.ts` (round-trips base64/ArrayBuffer, clamping int16/float32, interpolation resample, header WAV). +- 7 tests `useAudioRecording.test.ts` (add candidat resample, add IA, alternance, header WAV, reset, export vide, chunks vides ignorés). + +### Notes + +- Tests frontend : 219 → 238 verts (+19). +- `useAudioCapture` et `useAudioPlayback` dépendent de AudioContext (API navigateur) — validation manuelle au Sprint 6c. +- AudioWorklet utilisé directement (pas ScriptProcessorNode) — FTD-06 ne s'applique plus pour T2 Live. + +--- + ## [Unreleased] — 2026-04-26 — Sprint 5.5 Clean FTD ### Changed diff --git a/public/pcm-capture-processor.js b/public/pcm-capture-processor.js new file mode 100644 index 0000000..3b1a717 --- /dev/null +++ b/public/pcm-capture-processor.js @@ -0,0 +1,80 @@ +/** + * pcm-capture-processor.js — AudioWorklet processor pour T2 Live (Sprint 6b). + * + * Capture du micro à `sampleRate` natif du navigateur (typiquement 48 kHz), + * rééchantillonnage vers 16 kHz si nécessaire, conversion Float32 → Int16 + * little-endian, envoi par chunks de ~4096 samples (≈ 256 ms à 16 kHz). + * + * Format de sortie attendu par Gemini Live API : + * PCM brut, 16 kHz, 16 bits, little-endian, mono. + * + * Le rééchantillonnage utilise une interpolation linéaire — équivalent + * à `resample16kTo24k` côté audio-utils.ts mais en sens inverse. + * + * Vanille JS (pas TS) : les AudioWorklet processors s'exécutent dans un + * scope global isolé qui ne peut pas importer depuis le bundle TS. + */ + +const TARGET_SAMPLE_RATE = 16000 +const CHUNK_SIZE_16K = 4096 // ≈ 256 ms à 16 kHz + +class PcmCaptureProcessor extends AudioWorkletProcessor { + constructor() { + super() + this.buffer16k = new Float32Array(0) + } + + /** + * Rééchantillonne un Float32 du sample rate source vers 16 kHz par + * interpolation linéaire. Si srcRate === 16000, no-op. + */ + resampleTo16k(input, srcRate) { + if (srcRate === TARGET_SAMPLE_RATE) return input + const ratio = TARGET_SAMPLE_RATE / srcRate + const outLength = Math.floor(input.length * ratio) + const out = new Float32Array(outLength) + for (let i = 0; i < outLength; i++) { + const srcIndex = i / ratio + const srcFloor = Math.floor(srcIndex) + const srcCeil = Math.min(srcFloor + 1, input.length - 1) + const frac = srcIndex - srcFloor + out[i] = input[srcFloor] * (1 - frac) + input[srcCeil] * frac + } + return out + } + + process(inputs) { + const input = inputs[0] + if (!input || !input[0]) return true + + const channelData = input[0] // mono + + // Rééchantillonner d'abord vers 16 kHz puis accumuler. + // `sampleRate` est une variable globale du scope AudioWorklet (Web Audio spec). + const resampled = this.resampleTo16k(channelData, sampleRate) + + const newBuffer = new Float32Array(this.buffer16k.length + resampled.length) + newBuffer.set(this.buffer16k) + newBuffer.set(resampled, this.buffer16k.length) + this.buffer16k = newBuffer + + while (this.buffer16k.length >= CHUNK_SIZE_16K) { + const chunk = this.buffer16k.slice(0, CHUNK_SIZE_16K) + this.buffer16k = this.buffer16k.slice(CHUNK_SIZE_16K) + + // Float32 [-1, 1] → Int16 PCM little-endian + const pcm = new ArrayBuffer(chunk.length * 2) + const view = new DataView(pcm) + for (let i = 0; i < chunk.length; i++) { + const s = Math.max(-1, Math.min(1, chunk[i])) + view.setInt16(i * 2, s < 0 ? s * 0x8000 : s * 0x7fff, true) + } + + this.port.postMessage(pcm, [pcm]) + } + + return true + } +} + +registerProcessor('pcm-capture-processor', PcmCaptureProcessor) diff --git a/src/features/t2-live/hooks/__tests__/useAudioRecording.test.ts b/src/features/t2-live/hooks/__tests__/useAudioRecording.test.ts new file mode 100644 index 0000000..d2e750b --- /dev/null +++ b/src/features/t2-live/hooks/__tests__/useAudioRecording.test.ts @@ -0,0 +1,132 @@ +import { describe, it, expect } from 'vitest' +import { act, renderHook } from '@testing-library/react' +import { useAudioRecording } from '../useAudioRecording' +import { arrayBufferToBase64 } from '@/shared/lib/audio-utils' + +/** Crée un ArrayBuffer Int16 LE à partir d'un tableau de samples. */ +function makePcm16(samples: number[]): ArrayBuffer { + return new Int16Array(samples).buffer +} + +/** Crée un base64 PCM 24 kHz Int16 LE à partir d'un tableau de samples. */ +function makePcm24Base64(samples: number[]): string { + return arrayBufferToBase64(new Int16Array(samples).buffer) +} + +describe('useAudioRecording', () => { + it('addCandidateChunk : rééchantillonne 16 → 24 kHz et met à jour durationSeconds', () => { + const { result } = renderHook(() => useAudioRecording()) + + // 16 samples à 16 kHz = 1 ms → après resample : 24 samples à 24 kHz = 1 ms + act(() => { + result.current.addCandidateChunk(makePcm16(new Array(16).fill(1000))) + }) + // 24 samples / 24000 = 0.001 s + expect(result.current.durationSeconds).toBeCloseTo(0.001, 4) + }) + + it('addAIChunk : ajoute le chunk tel quel et met à jour durationSeconds', () => { + const { result } = renderHook(() => useAudioRecording()) + + // 24 samples à 24 kHz = 1 ms (déjà au bon sample rate) + act(() => { + result.current.addAIChunk(makePcm24Base64(new Array(24).fill(500))) + }) + expect(result.current.durationSeconds).toBeCloseTo(0.001, 4) + }) + + it('alternance candidat + IA : durée cumulée correcte, ordre chronologique préservé', () => { + const { result } = renderHook(() => useAudioRecording()) + + act(() => { + // Candidat : 16 samples 16k → 24 samples 24k + result.current.addCandidateChunk(makePcm16(new Array(16).fill(100))) + // IA : 48 samples 24k + result.current.addAIChunk(makePcm24Base64(new Array(48).fill(200))) + // Candidat : 32 samples 16k → 48 samples 24k + result.current.addCandidateChunk(makePcm16(new Array(32).fill(300))) + }) + + // Total : 24 + 48 + 48 = 120 samples à 24 kHz = 5 ms + expect(result.current.durationSeconds).toBeCloseTo(120 / 24000, 5) + + // Vérifier que exportWAV produit le buffer dans le bon ordre. + const blob = result.current.exportWAV() + expect(blob.type).toBe('audio/wav') + expect(blob.size).toBe(44 + 120 * 2) // header + 120 samples × 2 octets + }) + + it('exportWAV : header valide RIFF/WAVE/fmt/data + sampleRate 24000 LE', async () => { + const { result } = renderHook(() => useAudioRecording()) + + act(() => { + result.current.addAIChunk(makePcm24Base64([1, 2, 3, 4])) + }) + + const blob = result.current.exportWAV() + // jsdom : Response/blob.arrayBuffer() peuvent ne pas matérialiser les + // parts ArrayBuffer ; on lit via FileReader qui est plus fiable. + const buf = await new Promise((resolve, reject) => { + const reader = new FileReader() + reader.onload = () => resolve(reader.result as ArrayBuffer) + reader.onerror = () => reject(reader.error) + reader.readAsArrayBuffer(blob) + }) + const view = new DataView(buf) + + // Magic strings + const readString = (off: number, len: number) => { + let s = '' + for (let i = 0; i < len; i++) s += String.fromCharCode(view.getUint8(off + i)) + return s + } + expect(readString(0, 4)).toBe('RIFF') + expect(readString(8, 4)).toBe('WAVE') + expect(readString(12, 4)).toBe('fmt ') + expect(readString(36, 4)).toBe('data') + + // Sample rate (offset 24, uint32 LE) + expect(view.getUint32(24, true)).toBe(24000) + // Data length (offset 40) = 4 samples × 2 octets + expect(view.getUint32(40, true)).toBe(8) + + // PCM data : les 4 samples + expect(view.getInt16(44, true)).toBe(1) + expect(view.getInt16(46, true)).toBe(2) + expect(view.getInt16(48, true)).toBe(3) + expect(view.getInt16(50, true)).toBe(4) + }) + + it('reset : vide le buffer et remet durationSeconds à 0', () => { + const { result } = renderHook(() => useAudioRecording()) + + act(() => { + result.current.addAIChunk(makePcm24Base64([1, 2, 3, 4])) + }) + expect(result.current.durationSeconds).toBeGreaterThan(0) + + act(() => { + result.current.reset() + }) + expect(result.current.durationSeconds).toBe(0) + const blob = result.current.exportWAV() + expect(blob.size).toBe(44) // juste le header + }) + + it('exportWAV sans chunks : Blob avec uniquement le header (44 octets)', () => { + const { result } = renderHook(() => useAudioRecording()) + const blob = result.current.exportWAV() + expect(blob.size).toBe(44) + expect(blob.type).toBe('audio/wav') + }) + + it('chunks vides ignorés : addCandidateChunk(empty) et addAIChunk("") n’incrémentent pas la durée', () => { + const { result } = renderHook(() => useAudioRecording()) + + act(() => { + result.current.addCandidateChunk(new ArrayBuffer(0)) + result.current.addAIChunk('') + }) + expect(result.current.durationSeconds).toBe(0) + }) +}) diff --git a/src/features/t2-live/hooks/useAudioCapture.ts b/src/features/t2-live/hooks/useAudioCapture.ts new file mode 100644 index 0000000..72df470 --- /dev/null +++ b/src/features/t2-live/hooks/useAudioCapture.ts @@ -0,0 +1,146 @@ +/** + * useAudioCapture — Hook de capture micro pour T2 Live (Sprint 6b). + * + * Encapsule le pipeline : + * getUserMedia → AudioContext → AudioWorklet (pcm-capture-processor.js) + * → chunks PCM 16 kHz Int16 LE → base64 → onChunk() + * + * Le worklet gère le rééchantillonnage si le sample rate natif diffère de 16 kHz. + * Le hook ne touche pas au WebSocket — l'appelant (Sprint 6c) branche `onChunk` + * sur `ws.send`. + * + * Cleanup garanti : tracks.stop(), worklet.disconnect(), context.close() au + * stop() ou au démontage du composant. + */ + +import { useCallback, useEffect, useRef, useState } from 'react' +import { arrayBufferToBase64 } from '@/shared/lib/audio-utils' + +export interface UseAudioCaptureOptions { + /** Callback invoqué pour chaque chunk PCM 16 kHz encodé en base64. */ + onChunk: (base64: string) => void +} + +export interface UseAudioCaptureResult { + start: () => Promise + stop: () => void + isCapturing: boolean + error: string | null +} + +const WORKLET_URL = '/pcm-capture-processor.js' + +export function useAudioCapture(options: UseAudioCaptureOptions): UseAudioCaptureResult { + const [isCapturing, setIsCapturing] = useState(false) + const [error, setError] = useState(null) + + const contextRef = useRef(null) + const streamRef = useRef(null) + const workletNodeRef = useRef(null) + const sourceNodeRef = useRef(null) + + // Capture options dans une ref pour éviter de réabonner les effets + // sur chaque render (l'appelant fournit souvent un onChunk inline). + const optionsRef = useRef(options) + useEffect(() => { + optionsRef.current = options + }) + + const cleanup = useCallback(() => { + if (workletNodeRef.current) { + try { + workletNodeRef.current.port.onmessage = null + workletNodeRef.current.disconnect() + } catch { + /* ignore */ + } + workletNodeRef.current = null + } + if (sourceNodeRef.current) { + try { + sourceNodeRef.current.disconnect() + } catch { + /* ignore */ + } + sourceNodeRef.current = null + } + if (streamRef.current) { + streamRef.current.getTracks().forEach((t) => { + try { + t.stop() + } catch { + /* ignore */ + } + }) + streamRef.current = null + } + if (contextRef.current) { + try { + void contextRef.current.close() + } catch { + /* ignore */ + } + contextRef.current = null + } + }, []) + + const start = useCallback(async () => { + if (isCapturing) return + setError(null) + + try { + const stream = await navigator.mediaDevices.getUserMedia({ + audio: { + channelCount: 1, + echoCancellation: true, + noiseSuppression: true, + }, + }) + streamRef.current = stream + + // Tenter 16 kHz natif (Chrome / Firefox modernes l'acceptent). + // Sinon, le worklet rééchantillonnera. + const ctx = new AudioContext({ sampleRate: 16000 }) + contextRef.current = ctx + + await ctx.audioWorklet.addModule(WORKLET_URL) + + const source = ctx.createMediaStreamSource(stream) + sourceNodeRef.current = source + + const workletNode = new AudioWorkletNode(ctx, 'pcm-capture-processor') + workletNodeRef.current = workletNode + + workletNode.port.onmessage = (e: MessageEvent) => { + try { + optionsRef.current.onChunk(arrayBufferToBase64(e.data)) + } catch { + /* ignore — ne pas casser le worklet sur callback throw */ + } + } + + source.connect(workletNode) + // Pas besoin de connecter au destination — on ne lit pas le micro local. + + setIsCapturing(true) + } catch (err) { + const message = err instanceof Error ? err.message : 'Unknown error' + setError(message) + cleanup() + } + }, [cleanup, isCapturing]) + + const stop = useCallback(() => { + cleanup() + setIsCapturing(false) + }, [cleanup]) + + // Cleanup au démontage. + useEffect(() => { + return () => { + cleanup() + } + }, [cleanup]) + + return { start, stop, isCapturing, error } +} diff --git a/src/features/t2-live/hooks/useAudioPlayback.ts b/src/features/t2-live/hooks/useAudioPlayback.ts new file mode 100644 index 0000000..3f63e8c --- /dev/null +++ b/src/features/t2-live/hooks/useAudioPlayback.ts @@ -0,0 +1,132 @@ +/** + * useAudioPlayback — Hook de lecture audio pour T2 Live (Sprint 6b). + * + * Reçoit des chunks PCM 24 kHz Int16 LE encodés en base64 (format Gemini Live) + * et les joue séquentiellement sans gaps via AudioContext + AudioBufferSourceNode. + * + * Stratégie : chaque chunk est programmé via `source.start(nextStartTime)` + * où `nextStartTime = max(ctx.currentTime, lastEndTime)`. Cela garantit une + * lecture continue même si les chunks arrivent par bursts. + * + * Le hook ne touche pas au WebSocket — l'appelant (Sprint 6c) appelle + * `playChunk(base64)` à chaque message audio reçu. + */ + +import { useCallback, useEffect, useRef, useState } from 'react' +import { base64ToArrayBuffer, int16ToFloat32 } from '@/shared/lib/audio-utils' + +const PLAYBACK_SAMPLE_RATE = 24000 + +export interface UseAudioPlaybackResult { + playChunk: (base64: string) => void + stop: () => void + isPlaying: boolean +} + +export function useAudioPlayback(): UseAudioPlaybackResult { + const [isPlaying, setIsPlaying] = useState(false) + + const contextRef = useRef(null) + const lastEndTimeRef = useRef(0) + // Timer qui repasse `isPlaying` à false quand la file se vide. + const isPlayingTimerRef = useRef | null>(null) + const activeSourcesRef = useRef>(new Set()) + + const ensureContext = useCallback((): AudioContext => { + if (contextRef.current && contextRef.current.state !== 'closed') { + return contextRef.current + } + const ctx = new AudioContext({ sampleRate: PLAYBACK_SAMPLE_RATE }) + contextRef.current = ctx + lastEndTimeRef.current = 0 + return ctx + }, []) + + const cleanup = useCallback(() => { + if (isPlayingTimerRef.current !== null) { + clearTimeout(isPlayingTimerRef.current) + isPlayingTimerRef.current = null + } + activeSourcesRef.current.forEach((s) => { + try { + s.stop() + s.disconnect() + } catch { + /* ignore */ + } + }) + activeSourcesRef.current.clear() + if (contextRef.current) { + try { + void contextRef.current.close() + } catch { + /* ignore */ + } + contextRef.current = null + } + lastEndTimeRef.current = 0 + }, []) + + const playChunk = useCallback( + (base64: string) => { + try { + const ctx = ensureContext() + const arrayBuffer = base64ToArrayBuffer(base64) + const int16 = new Int16Array(arrayBuffer) + const float32 = int16ToFloat32(int16) + + if (float32.length === 0) return + + const audioBuffer = ctx.createBuffer(1, float32.length, PLAYBACK_SAMPLE_RATE) + audioBuffer.getChannelData(0).set(float32) + + const source = ctx.createBufferSource() + source.buffer = audioBuffer + source.connect(ctx.destination) + + const startTime = Math.max(ctx.currentTime, lastEndTimeRef.current) + source.start(startTime) + const duration = float32.length / PLAYBACK_SAMPLE_RATE + lastEndTimeRef.current = startTime + duration + + activeSourcesRef.current.add(source) + source.onended = () => { + activeSourcesRef.current.delete(source) + try { + source.disconnect() + } catch { + /* ignore */ + } + } + + setIsPlaying(true) + // Replanifier le passage à false après la fin programmée. + if (isPlayingTimerRef.current !== null) { + clearTimeout(isPlayingTimerRef.current) + } + const remainingMs = (lastEndTimeRef.current - ctx.currentTime) * 1000 + isPlayingTimerRef.current = setTimeout(() => { + setIsPlaying(false) + isPlayingTimerRef.current = null + }, remainingMs + 50) + } catch { + /* ignore — ne pas casser l'app sur un chunk malformé */ + } + }, + [ensureContext], + ) + + const stop = useCallback(() => { + cleanup() + setIsPlaying(false) + }, [cleanup]) + + // Cleanup au démontage. + useEffect(() => { + return () => { + cleanup() + } + }, [cleanup]) + + return { playChunk, stop, isPlaying } +} diff --git a/src/features/t2-live/hooks/useAudioRecording.ts b/src/features/t2-live/hooks/useAudioRecording.ts new file mode 100644 index 0000000..cbac497 --- /dev/null +++ b/src/features/t2-live/hooks/useAudioRecording.ts @@ -0,0 +1,100 @@ +/** + * useAudioRecording — Hook d'accumulation audio pour téléchargement (Sprint 6b). + * + * Buffer chronologique unique des chunks candidat (PCM 16 kHz, ArrayBuffer brut + * sortant du worklet) et IA (PCM 24 kHz, base64 reçu du WS Gemini). Les chunks + * candidat sont rééchantillonnés à 24 kHz à l'ajout pour homogénéiser le buffer. + * + * En fin de session, `exportWAV()` produit un Blob `audio/wav` mono 24 kHz + * concaténant tous les chunks dans l'ordre d'arrivée — adapté pour téléchargement. + * + * Le hook ne touche pas au WebSocket. L'appelant (Sprint 6c) appelle : + * - `addCandidateChunk(arrayBuffer)` à chaque chunk reçu du worklet + * - `addAIChunk(base64)` à chaque chunk reçu du WS Gemini + */ + +import { useCallback, useRef, useState } from 'react' +import { base64ToArrayBuffer, buildWavHeader, resample16kTo24k } from '@/shared/lib/audio-utils' + +const RECORDING_SAMPLE_RATE = 24000 + +export interface UseAudioRecordingResult { + /** Ajoute un chunk candidat (PCM 16 kHz Int16 LE). Rééchantillonné à 24 kHz. */ + addCandidateChunk: (pcm16k: ArrayBuffer) => void + /** Ajoute un chunk IA (PCM 24 kHz Int16 LE encodé en base64). */ + addAIChunk: (base64: string) => void + /** Construit un Blob WAV mono 24 kHz à partir du buffer accumulé. */ + exportWAV: () => Blob + /** Durée totale en secondes (mise à jour à chaque ajout). */ + durationSeconds: number + /** Vide le buffer. */ + reset: () => void +} + +export function useAudioRecording(): UseAudioRecordingResult { + const chunksRef = useRef([]) + const totalSamplesRef = useRef(0) + const [durationSeconds, setDurationSeconds] = useState(0) + + const updateDuration = useCallback((addedSamples: number) => { + totalSamplesRef.current += addedSamples + setDurationSeconds(totalSamplesRef.current / RECORDING_SAMPLE_RATE) + }, []) + + const addCandidateChunk = useCallback( + (pcm16k: ArrayBuffer) => { + if (pcm16k.byteLength === 0) return + const int16 = new Int16Array(pcm16k) + const resampled = resample16kTo24k(int16) + chunksRef.current.push(resampled) + updateDuration(resampled.length) + }, + [updateDuration], + ) + + const addAIChunk = useCallback( + (base64: string) => { + if (base64.length === 0) return + const arrayBuffer = base64ToArrayBuffer(base64) + if (arrayBuffer.byteLength === 0) return + const int16 = new Int16Array(arrayBuffer) + // Copie défensive — base64ToArrayBuffer renvoie un buffer dont la + // vue Int16 partage la mémoire ; on duplique pour éviter tout effet + // de bord si l'appelant réutilise le base64. + const copy = new Int16Array(int16) + chunksRef.current.push(copy) + updateDuration(copy.length) + }, + [updateDuration], + ) + + const exportWAV = useCallback((): Blob => { + // Concaténer tous les chunks en un seul Int16Array. + const total = totalSamplesRef.current + const merged = new Int16Array(total) + let offset = 0 + for (const chunk of chunksRef.current) { + merged.set(chunk, offset) + offset += chunk.length + } + const dataLength = merged.byteLength // = total * 2 + const header = buildWavHeader(dataLength, RECORDING_SAMPLE_RATE) + // Utiliser des Uint8Array : certains environnements (jsdom) ne gèrent pas + // correctement les ArrayBuffer bruts dans le constructeur Blob. + return new Blob([new Uint8Array(header), new Uint8Array(merged.buffer)], { type: 'audio/wav' }) + }, []) + + const reset = useCallback(() => { + chunksRef.current = [] + totalSamplesRef.current = 0 + setDurationSeconds(0) + }, []) + + return { + addCandidateChunk, + addAIChunk, + exportWAV, + durationSeconds, + reset, + } +} diff --git a/src/shared/lib/__tests__/audio-utils.test.ts b/src/shared/lib/__tests__/audio-utils.test.ts new file mode 100644 index 0000000..ba71f3a --- /dev/null +++ b/src/shared/lib/__tests__/audio-utils.test.ts @@ -0,0 +1,113 @@ +import { describe, it, expect } from 'vitest' +import { + arrayBufferToBase64, + base64ToArrayBuffer, + int16ToFloat32, + float32ToInt16, + resample16kTo24k, + buildWavHeader, +} from '../audio-utils' + +describe('arrayBufferToBase64 / base64ToArrayBuffer', () => { + it('round-trip sur un buffer court (4 octets)', () => { + const original = new Uint8Array([0x01, 0x02, 0x03, 0xff]) + const b64 = arrayBufferToBase64(original.buffer) + const decoded = new Uint8Array(base64ToArrayBuffer(b64)) + expect(decoded).toEqual(original) + }) + + it('round-trip sur un buffer vide', () => { + const b64 = arrayBufferToBase64(new ArrayBuffer(0)) + expect(b64).toBe('') + expect(base64ToArrayBuffer('').byteLength).toBe(0) + }) + + it('round-trip sur 8 KB (taille typique chunk T2 Live)', () => { + const bytes = new Uint8Array(8192) + for (let i = 0; i < 8192; i++) bytes[i] = i % 256 + const b64 = arrayBufferToBase64(bytes.buffer) + const decoded = new Uint8Array(base64ToArrayBuffer(b64)) + expect(decoded.length).toBe(8192) + expect(decoded[0]).toBe(0) + expect(decoded[255]).toBe(255) + expect(decoded[8191]).toBe(8191 % 256) + }) +}) + +describe('int16ToFloat32 / float32ToInt16', () => { + it('int16ToFloat32 mappe 0 → 0, 32767 → ~1, -32768 → -1', () => { + const out = int16ToFloat32(new Int16Array([0, 32767, -32768])) + expect(out[0]).toBe(0) + expect(out[1]).toBeCloseTo(0.99997, 4) + expect(out[2]).toBe(-1) + }) + + it('float32ToInt16 clamp les valeurs hors plage', () => { + const out = float32ToInt16(new Float32Array([2.0, -2.0, 0])) + expect(out[0]).toBe(32767) + expect(out[1]).toBe(-32768) + expect(out[2]).toBe(0) + }) + + it('round-trip int16 → float32 → int16 préserve les valeurs (à 1 unité près)', () => { + const original = new Int16Array([-30000, -100, 0, 100, 30000]) + const back = float32ToInt16(int16ToFloat32(original)) + for (let i = 0; i < original.length; i++) { + expect(Math.abs(back[i]! - original[i]!)).toBeLessThanOrEqual(1) + } + }) +}) + +describe('resample16kTo24k', () => { + it('produit ceil(input.length * 1.5) samples en sortie', () => { + expect(resample16kTo24k(new Int16Array(4)).length).toBe(6) + expect(resample16kTo24k(new Int16Array(10)).length).toBe(15) + expect(resample16kTo24k(new Int16Array(4096)).length).toBe(6144) + }) + + it('interpole linéairement entre samples consécutifs', () => { + // Input : [0, 1000] à 16 kHz → 3 samples à 24 kHz + // i=0 : srcIndex=0 → 0 + // i=1 : srcIndex=2/3 → 0 + (2/3)*1000 ≈ 667 + // i=2 : srcIndex=4/3 → clamp à idx 1 → 1000 + const out = resample16kTo24k(new Int16Array([0, 1000])) + expect(out[0]).toBe(0) + expect(out[1]).toBeGreaterThan(600) + expect(out[1]).toBeLessThan(700) + expect(out[2]).toBe(1000) + }) + + it('renvoie un buffer vide pour un input vide', () => { + expect(resample16kTo24k(new Int16Array(0)).length).toBe(0) + }) +}) + +describe('buildWavHeader', () => { + it('renvoie 44 octets', () => { + const header = buildWavHeader(1000, 24000) + expect(header.byteLength).toBe(44) + }) + + it('contient les magic strings RIFF / WAVE / fmt / data', () => { + const view = new DataView(buildWavHeader(1000, 24000)) + const readString = (offset: number, len: number) => { + let s = '' + for (let i = 0; i < len; i++) s += String.fromCharCode(view.getUint8(offset + i)) + return s + } + expect(readString(0, 4)).toBe('RIFF') + expect(readString(8, 4)).toBe('WAVE') + expect(readString(12, 4)).toBe('fmt ') + expect(readString(36, 4)).toBe('data') + }) + + it('encode sampleRate et dataLength en little-endian', () => { + const view = new DataView(buildWavHeader(2000, 24000)) + expect(view.getUint32(24, true)).toBe(24000) // sampleRate + expect(view.getUint32(40, true)).toBe(2000) // dataLength + expect(view.getUint32(4, true)).toBe(36 + 2000) // chunkSize total + expect(view.getUint32(28, true)).toBe(24000 * 2) // byteRate (mono 16-bit) + expect(view.getUint16(22, true)).toBe(1) // numChannels mono + expect(view.getUint16(34, true)).toBe(16) // bitsPerSample + }) +}) diff --git a/src/shared/lib/audio-utils.ts b/src/shared/lib/audio-utils.ts new file mode 100644 index 0000000..779f468 --- /dev/null +++ b/src/shared/lib/audio-utils.ts @@ -0,0 +1,139 @@ +/** + * Helpers audio purs — Sprint 6b T2 Live. + * + * Conversions entre formats utilisés par Gemini Live et les Web Audio APIs : + * - PCM 16 bits little-endian ↔ Float32 [-1, 1] + * - Rééchantillonnage 16 kHz → 24 kHz (interpolation linéaire) + * - Encodage WAV mono pour téléchargement de la session + * + * Toutes les fonctions sont pures (sans état, sans side-effect) et + * cross-env (Node ≥ 16 + tous navigateurs cibles via `btoa`/`atob`). + */ + +/** + * Encode un ArrayBuffer en base64. + * + * Note : pour des chunks > 64 KB, `String.fromCharCode(...arr)` peut dépasser + * la stack limit du runtime. Les chunks T2 Live (256 ms à 16 kHz ≈ 8 KB) + * restent largement sous cette limite. + */ +export function arrayBufferToBase64(buffer: ArrayBuffer): string { + const bytes = new Uint8Array(buffer) + let binary = '' + for (let i = 0; i < bytes.length; i++) { + binary += String.fromCharCode(bytes[i]!) + } + return btoa(binary) +} + +/** + * Décode une chaîne base64 en ArrayBuffer. + */ +export function base64ToArrayBuffer(base64: string): ArrayBuffer { + const binary = atob(base64) + const bytes = new Uint8Array(binary.length) + for (let i = 0; i < binary.length; i++) { + bytes[i] = binary.charCodeAt(i) + } + return bytes.buffer +} + +/** + * Convertit un buffer Int16 PCM en Float32 [-1, 1]. + * Convention symétrique : on divise par 32768 (= 2^15) pour mapper + * [-32768, 32767] vers [-1, 0.99997). + */ +export function int16ToFloat32(int16: Int16Array): Float32Array { + const out = new Float32Array(int16.length) + for (let i = 0; i < int16.length; i++) { + out[i] = int16[i]! / 0x8000 + } + return out +} + +/** + * Convertit un buffer Float32 [-1, 1] en Int16 PCM. + * Clamp les valeurs hors plage avant conversion. + */ +export function float32ToInt16(float32: Float32Array): Int16Array { + const out = new Int16Array(float32.length) + for (let i = 0; i < float32.length; i++) { + const s = Math.max(-1, Math.min(1, float32[i]!)) + out[i] = s < 0 ? Math.round(s * 0x8000) : Math.round(s * 0x7fff) + } + return out +} + +/** + * Rééchantillonne un buffer Int16 PCM 16 kHz vers 24 kHz par + * interpolation linéaire (ratio 1.5 → pour 2 samples in, 3 samples out). + * + * Algorithme : pour chaque sample de sortie i, trouver l'index source + * correspondant `i / 1.5`, interpoler entre les deux samples encadrants. + */ +export function resample16kTo24k(samples: Int16Array): Int16Array { + const ratio = 24000 / 16000 // 1.5 + const outputLength = Math.ceil(samples.length * ratio) + const out = new Int16Array(outputLength) + + for (let i = 0; i < outputLength; i++) { + const srcIndex = i / ratio + const srcFloor = Math.floor(srcIndex) + const srcCeil = Math.min(srcFloor + 1, samples.length - 1) + const frac = srcIndex - srcFloor + out[i] = Math.round(samples[srcFloor]! * (1 - frac) + samples[srcCeil]! * frac) + } + + return out +} + +/** + * Construit un header WAV de 44 octets pour PCM 16 bits mono. + * + * Format RIFF/WAVE standard : + * - bytes 0-3 : "RIFF" + * - bytes 4-7 : taille totale - 8 (uint32 LE) + * - bytes 8-11 : "WAVE" + * - bytes 12-15 : "fmt " + * - bytes 16-19 : taille du sous-chunk fmt = 16 + * - bytes 20-21 : format = 1 (PCM) + * - bytes 22-23 : numChannels = 1 + * - bytes 24-27 : sampleRate + * - bytes 28-31 : byteRate = sampleRate * 2 + * - bytes 32-33 : blockAlign = 2 + * - bytes 34-35 : bitsPerSample = 16 + * - bytes 36-39 : "data" + * - bytes 40-43 : dataLength + * + * `dataLength` = nombre d'octets de PCM (= samples * 2 pour 16 bits). + */ +export function buildWavHeader(dataLength: number, sampleRate: number): ArrayBuffer { + const buffer = new ArrayBuffer(44) + const view = new DataView(buffer) + const numChannels = 1 + const bitsPerSample = 16 + const byteRate = sampleRate * numChannels * (bitsPerSample / 8) + const blockAlign = numChannels * (bitsPerSample / 8) + + const writeString = (offset: number, s: string) => { + for (let i = 0; i < s.length; i++) { + view.setUint8(offset + i, s.charCodeAt(i)) + } + } + + writeString(0, 'RIFF') + view.setUint32(4, 36 + dataLength, true) + writeString(8, 'WAVE') + writeString(12, 'fmt ') + view.setUint32(16, 16, true) + view.setUint16(20, 1, true) + view.setUint16(22, numChannels, true) + view.setUint32(24, sampleRate, true) + view.setUint32(28, byteRate, true) + view.setUint16(32, blockAlign, true) + view.setUint16(34, bitsPerSample, true) + writeString(36, 'data') + view.setUint32(40, dataLength, true) + + return buffer +}