Sprint 6b — Frontend audio capture + playback hooks

feat(audio): pcm-capture-processor.js AudioWorklet (16kHz resample, Int16 LE)
feat(hooks): useAudioCapture (getUserMedia + worklet + onChunk base64)
feat(hooks): useAudioPlayback (24kHz sequential scheduling, gap-free)
feat(hooks): useAudioRecording (chronological buffer, resample 16→24k, WAV export)
feat(lib): audio-utils (base64, int16/float32, resample, WAV header)
test: 12 audio-utils + 7 useAudioRecording = 238/238 green (+19)
This commit is contained in:
Hermann_Kitio 2026-04-26 20:08:45 +03:00
parent 5a31819bca
commit 7862f7c9f3
8 changed files with 862 additions and 0 deletions

View file

@ -0,0 +1,132 @@
import { describe, it, expect } from 'vitest'
import { act, renderHook } from '@testing-library/react'
import { useAudioRecording } from '../useAudioRecording'
import { arrayBufferToBase64 } from '@/shared/lib/audio-utils'
/** Crée un ArrayBuffer Int16 LE à partir d'un tableau de samples. */
function makePcm16(samples: number[]): ArrayBuffer {
return new Int16Array(samples).buffer
}
/** Crée un base64 PCM 24 kHz Int16 LE à partir d'un tableau de samples. */
function makePcm24Base64(samples: number[]): string {
return arrayBufferToBase64(new Int16Array(samples).buffer)
}
describe('useAudioRecording', () => {
it('addCandidateChunk : rééchantillonne 16 → 24 kHz et met à jour durationSeconds', () => {
const { result } = renderHook(() => useAudioRecording())
// 16 samples à 16 kHz = 1 ms → après resample : 24 samples à 24 kHz = 1 ms
act(() => {
result.current.addCandidateChunk(makePcm16(new Array(16).fill(1000)))
})
// 24 samples / 24000 = 0.001 s
expect(result.current.durationSeconds).toBeCloseTo(0.001, 4)
})
it('addAIChunk : ajoute le chunk tel quel et met à jour durationSeconds', () => {
const { result } = renderHook(() => useAudioRecording())
// 24 samples à 24 kHz = 1 ms (déjà au bon sample rate)
act(() => {
result.current.addAIChunk(makePcm24Base64(new Array(24).fill(500)))
})
expect(result.current.durationSeconds).toBeCloseTo(0.001, 4)
})
it('alternance candidat + IA : durée cumulée correcte, ordre chronologique préservé', () => {
const { result } = renderHook(() => useAudioRecording())
act(() => {
// Candidat : 16 samples 16k → 24 samples 24k
result.current.addCandidateChunk(makePcm16(new Array(16).fill(100)))
// IA : 48 samples 24k
result.current.addAIChunk(makePcm24Base64(new Array(48).fill(200)))
// Candidat : 32 samples 16k → 48 samples 24k
result.current.addCandidateChunk(makePcm16(new Array(32).fill(300)))
})
// Total : 24 + 48 + 48 = 120 samples à 24 kHz = 5 ms
expect(result.current.durationSeconds).toBeCloseTo(120 / 24000, 5)
// Vérifier que exportWAV produit le buffer dans le bon ordre.
const blob = result.current.exportWAV()
expect(blob.type).toBe('audio/wav')
expect(blob.size).toBe(44 + 120 * 2) // header + 120 samples × 2 octets
})
it('exportWAV : header valide RIFF/WAVE/fmt/data + sampleRate 24000 LE', async () => {
const { result } = renderHook(() => useAudioRecording())
act(() => {
result.current.addAIChunk(makePcm24Base64([1, 2, 3, 4]))
})
const blob = result.current.exportWAV()
// jsdom : Response/blob.arrayBuffer() peuvent ne pas matérialiser les
// parts ArrayBuffer ; on lit via FileReader qui est plus fiable.
const buf = await new Promise<ArrayBuffer>((resolve, reject) => {
const reader = new FileReader()
reader.onload = () => resolve(reader.result as ArrayBuffer)
reader.onerror = () => reject(reader.error)
reader.readAsArrayBuffer(blob)
})
const view = new DataView(buf)
// Magic strings
const readString = (off: number, len: number) => {
let s = ''
for (let i = 0; i < len; i++) s += String.fromCharCode(view.getUint8(off + i))
return s
}
expect(readString(0, 4)).toBe('RIFF')
expect(readString(8, 4)).toBe('WAVE')
expect(readString(12, 4)).toBe('fmt ')
expect(readString(36, 4)).toBe('data')
// Sample rate (offset 24, uint32 LE)
expect(view.getUint32(24, true)).toBe(24000)
// Data length (offset 40) = 4 samples × 2 octets
expect(view.getUint32(40, true)).toBe(8)
// PCM data : les 4 samples
expect(view.getInt16(44, true)).toBe(1)
expect(view.getInt16(46, true)).toBe(2)
expect(view.getInt16(48, true)).toBe(3)
expect(view.getInt16(50, true)).toBe(4)
})
it('reset : vide le buffer et remet durationSeconds à 0', () => {
const { result } = renderHook(() => useAudioRecording())
act(() => {
result.current.addAIChunk(makePcm24Base64([1, 2, 3, 4]))
})
expect(result.current.durationSeconds).toBeGreaterThan(0)
act(() => {
result.current.reset()
})
expect(result.current.durationSeconds).toBe(0)
const blob = result.current.exportWAV()
expect(blob.size).toBe(44) // juste le header
})
it('exportWAV sans chunks : Blob avec uniquement le header (44 octets)', () => {
const { result } = renderHook(() => useAudioRecording())
const blob = result.current.exportWAV()
expect(blob.size).toBe(44)
expect(blob.type).toBe('audio/wav')
})
it('chunks vides ignorés : addCandidateChunk(empty) et addAIChunk("") nincrémentent pas la durée', () => {
const { result } = renderHook(() => useAudioRecording())
act(() => {
result.current.addCandidateChunk(new ArrayBuffer(0))
result.current.addAIChunk('')
})
expect(result.current.durationSeconds).toBe(0)
})
})

View file

@ -0,0 +1,146 @@
/**
* useAudioCapture Hook de capture micro pour T2 Live (Sprint 6b).
*
* Encapsule le pipeline :
* getUserMedia AudioContext AudioWorklet (pcm-capture-processor.js)
* chunks PCM 16 kHz Int16 LE base64 onChunk()
*
* Le worklet gère le rééchantillonnage si le sample rate natif diffère de 16 kHz.
* Le hook ne touche pas au WebSocket l'appelant (Sprint 6c) branche `onChunk`
* sur `ws.send`.
*
* Cleanup garanti : tracks.stop(), worklet.disconnect(), context.close() au
* stop() ou au démontage du composant.
*/
import { useCallback, useEffect, useRef, useState } from 'react'
import { arrayBufferToBase64 } from '@/shared/lib/audio-utils'
export interface UseAudioCaptureOptions {
/** Callback invoqué pour chaque chunk PCM 16 kHz encodé en base64. */
onChunk: (base64: string) => void
}
export interface UseAudioCaptureResult {
start: () => Promise<void>
stop: () => void
isCapturing: boolean
error: string | null
}
const WORKLET_URL = '/pcm-capture-processor.js'
export function useAudioCapture(options: UseAudioCaptureOptions): UseAudioCaptureResult {
const [isCapturing, setIsCapturing] = useState(false)
const [error, setError] = useState<string | null>(null)
const contextRef = useRef<AudioContext | null>(null)
const streamRef = useRef<MediaStream | null>(null)
const workletNodeRef = useRef<AudioWorkletNode | null>(null)
const sourceNodeRef = useRef<MediaStreamAudioSourceNode | null>(null)
// Capture options dans une ref pour éviter de réabonner les effets
// sur chaque render (l'appelant fournit souvent un onChunk inline).
const optionsRef = useRef(options)
useEffect(() => {
optionsRef.current = options
})
const cleanup = useCallback(() => {
if (workletNodeRef.current) {
try {
workletNodeRef.current.port.onmessage = null
workletNodeRef.current.disconnect()
} catch {
/* ignore */
}
workletNodeRef.current = null
}
if (sourceNodeRef.current) {
try {
sourceNodeRef.current.disconnect()
} catch {
/* ignore */
}
sourceNodeRef.current = null
}
if (streamRef.current) {
streamRef.current.getTracks().forEach((t) => {
try {
t.stop()
} catch {
/* ignore */
}
})
streamRef.current = null
}
if (contextRef.current) {
try {
void contextRef.current.close()
} catch {
/* ignore */
}
contextRef.current = null
}
}, [])
const start = useCallback(async () => {
if (isCapturing) return
setError(null)
try {
const stream = await navigator.mediaDevices.getUserMedia({
audio: {
channelCount: 1,
echoCancellation: true,
noiseSuppression: true,
},
})
streamRef.current = stream
// Tenter 16 kHz natif (Chrome / Firefox modernes l'acceptent).
// Sinon, le worklet rééchantillonnera.
const ctx = new AudioContext({ sampleRate: 16000 })
contextRef.current = ctx
await ctx.audioWorklet.addModule(WORKLET_URL)
const source = ctx.createMediaStreamSource(stream)
sourceNodeRef.current = source
const workletNode = new AudioWorkletNode(ctx, 'pcm-capture-processor')
workletNodeRef.current = workletNode
workletNode.port.onmessage = (e: MessageEvent<ArrayBuffer>) => {
try {
optionsRef.current.onChunk(arrayBufferToBase64(e.data))
} catch {
/* ignore — ne pas casser le worklet sur callback throw */
}
}
source.connect(workletNode)
// Pas besoin de connecter au destination — on ne lit pas le micro local.
setIsCapturing(true)
} catch (err) {
const message = err instanceof Error ? err.message : 'Unknown error'
setError(message)
cleanup()
}
}, [cleanup, isCapturing])
const stop = useCallback(() => {
cleanup()
setIsCapturing(false)
}, [cleanup])
// Cleanup au démontage.
useEffect(() => {
return () => {
cleanup()
}
}, [cleanup])
return { start, stop, isCapturing, error }
}

View file

@ -0,0 +1,132 @@
/**
* useAudioPlayback Hook de lecture audio pour T2 Live (Sprint 6b).
*
* Reçoit des chunks PCM 24 kHz Int16 LE encodés en base64 (format Gemini Live)
* et les joue séquentiellement sans gaps via AudioContext + AudioBufferSourceNode.
*
* Stratégie : chaque chunk est programmé via `source.start(nextStartTime)`
* `nextStartTime = max(ctx.currentTime, lastEndTime)`. Cela garantit une
* lecture continue même si les chunks arrivent par bursts.
*
* Le hook ne touche pas au WebSocket l'appelant (Sprint 6c) appelle
* `playChunk(base64)` à chaque message audio reçu.
*/
import { useCallback, useEffect, useRef, useState } from 'react'
import { base64ToArrayBuffer, int16ToFloat32 } from '@/shared/lib/audio-utils'
const PLAYBACK_SAMPLE_RATE = 24000
export interface UseAudioPlaybackResult {
playChunk: (base64: string) => void
stop: () => void
isPlaying: boolean
}
export function useAudioPlayback(): UseAudioPlaybackResult {
const [isPlaying, setIsPlaying] = useState(false)
const contextRef = useRef<AudioContext | null>(null)
const lastEndTimeRef = useRef<number>(0)
// Timer qui repasse `isPlaying` à false quand la file se vide.
const isPlayingTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null)
const activeSourcesRef = useRef<Set<AudioBufferSourceNode>>(new Set())
const ensureContext = useCallback((): AudioContext => {
if (contextRef.current && contextRef.current.state !== 'closed') {
return contextRef.current
}
const ctx = new AudioContext({ sampleRate: PLAYBACK_SAMPLE_RATE })
contextRef.current = ctx
lastEndTimeRef.current = 0
return ctx
}, [])
const cleanup = useCallback(() => {
if (isPlayingTimerRef.current !== null) {
clearTimeout(isPlayingTimerRef.current)
isPlayingTimerRef.current = null
}
activeSourcesRef.current.forEach((s) => {
try {
s.stop()
s.disconnect()
} catch {
/* ignore */
}
})
activeSourcesRef.current.clear()
if (contextRef.current) {
try {
void contextRef.current.close()
} catch {
/* ignore */
}
contextRef.current = null
}
lastEndTimeRef.current = 0
}, [])
const playChunk = useCallback(
(base64: string) => {
try {
const ctx = ensureContext()
const arrayBuffer = base64ToArrayBuffer(base64)
const int16 = new Int16Array(arrayBuffer)
const float32 = int16ToFloat32(int16)
if (float32.length === 0) return
const audioBuffer = ctx.createBuffer(1, float32.length, PLAYBACK_SAMPLE_RATE)
audioBuffer.getChannelData(0).set(float32)
const source = ctx.createBufferSource()
source.buffer = audioBuffer
source.connect(ctx.destination)
const startTime = Math.max(ctx.currentTime, lastEndTimeRef.current)
source.start(startTime)
const duration = float32.length / PLAYBACK_SAMPLE_RATE
lastEndTimeRef.current = startTime + duration
activeSourcesRef.current.add(source)
source.onended = () => {
activeSourcesRef.current.delete(source)
try {
source.disconnect()
} catch {
/* ignore */
}
}
setIsPlaying(true)
// Replanifier le passage à false après la fin programmée.
if (isPlayingTimerRef.current !== null) {
clearTimeout(isPlayingTimerRef.current)
}
const remainingMs = (lastEndTimeRef.current - ctx.currentTime) * 1000
isPlayingTimerRef.current = setTimeout(() => {
setIsPlaying(false)
isPlayingTimerRef.current = null
}, remainingMs + 50)
} catch {
/* ignore — ne pas casser l'app sur un chunk malformé */
}
},
[ensureContext],
)
const stop = useCallback(() => {
cleanup()
setIsPlaying(false)
}, [cleanup])
// Cleanup au démontage.
useEffect(() => {
return () => {
cleanup()
}
}, [cleanup])
return { playChunk, stop, isPlaying }
}

View file

@ -0,0 +1,100 @@
/**
* useAudioRecording Hook d'accumulation audio pour téléchargement (Sprint 6b).
*
* Buffer chronologique unique des chunks candidat (PCM 16 kHz, ArrayBuffer brut
* sortant du worklet) et IA (PCM 24 kHz, base64 reçu du WS Gemini). Les chunks
* candidat sont rééchantillonnés à 24 kHz à l'ajout pour homogénéiser le buffer.
*
* En fin de session, `exportWAV()` produit un Blob `audio/wav` mono 24 kHz
* concaténant tous les chunks dans l'ordre d'arrivée adapté pour téléchargement.
*
* Le hook ne touche pas au WebSocket. L'appelant (Sprint 6c) appelle :
* - `addCandidateChunk(arrayBuffer)` à chaque chunk reçu du worklet
* - `addAIChunk(base64)` à chaque chunk reçu du WS Gemini
*/
import { useCallback, useRef, useState } from 'react'
import { base64ToArrayBuffer, buildWavHeader, resample16kTo24k } from '@/shared/lib/audio-utils'
const RECORDING_SAMPLE_RATE = 24000
export interface UseAudioRecordingResult {
/** Ajoute un chunk candidat (PCM 16 kHz Int16 LE). Rééchantillonné à 24 kHz. */
addCandidateChunk: (pcm16k: ArrayBuffer) => void
/** Ajoute un chunk IA (PCM 24 kHz Int16 LE encodé en base64). */
addAIChunk: (base64: string) => void
/** Construit un Blob WAV mono 24 kHz à partir du buffer accumulé. */
exportWAV: () => Blob
/** Durée totale en secondes (mise à jour à chaque ajout). */
durationSeconds: number
/** Vide le buffer. */
reset: () => void
}
export function useAudioRecording(): UseAudioRecordingResult {
const chunksRef = useRef<Int16Array[]>([])
const totalSamplesRef = useRef<number>(0)
const [durationSeconds, setDurationSeconds] = useState<number>(0)
const updateDuration = useCallback((addedSamples: number) => {
totalSamplesRef.current += addedSamples
setDurationSeconds(totalSamplesRef.current / RECORDING_SAMPLE_RATE)
}, [])
const addCandidateChunk = useCallback(
(pcm16k: ArrayBuffer) => {
if (pcm16k.byteLength === 0) return
const int16 = new Int16Array(pcm16k)
const resampled = resample16kTo24k(int16)
chunksRef.current.push(resampled)
updateDuration(resampled.length)
},
[updateDuration],
)
const addAIChunk = useCallback(
(base64: string) => {
if (base64.length === 0) return
const arrayBuffer = base64ToArrayBuffer(base64)
if (arrayBuffer.byteLength === 0) return
const int16 = new Int16Array(arrayBuffer)
// Copie défensive — base64ToArrayBuffer renvoie un buffer dont la
// vue Int16 partage la mémoire ; on duplique pour éviter tout effet
// de bord si l'appelant réutilise le base64.
const copy = new Int16Array(int16)
chunksRef.current.push(copy)
updateDuration(copy.length)
},
[updateDuration],
)
const exportWAV = useCallback((): Blob => {
// Concaténer tous les chunks en un seul Int16Array.
const total = totalSamplesRef.current
const merged = new Int16Array(total)
let offset = 0
for (const chunk of chunksRef.current) {
merged.set(chunk, offset)
offset += chunk.length
}
const dataLength = merged.byteLength // = total * 2
const header = buildWavHeader(dataLength, RECORDING_SAMPLE_RATE)
// Utiliser des Uint8Array : certains environnements (jsdom) ne gèrent pas
// correctement les ArrayBuffer bruts dans le constructeur Blob.
return new Blob([new Uint8Array(header), new Uint8Array(merged.buffer)], { type: 'audio/wav' })
}, [])
const reset = useCallback(() => {
chunksRef.current = []
totalSamplesRef.current = 0
setDurationSeconds(0)
}, [])
return {
addCandidateChunk,
addAIChunk,
exportWAV,
durationSeconds,
reset,
}
}