feat(t1-live): examinateur avec interruption probabiliste pilotee backend (Sprint 7a)
- Session T1 Live : monologue candidat + interruptions pilotees backend (VAD manuel).
- Voix examinateur native Gemini ; le backend decide le timing (tirage probabiliste 0-2, fenetre [25s,75s]), Gemini formule la relance sur signal d'injection (anti-TD-22).
- Injection : activityEnd -> clientContent -> activityStart ; signaux WS interruption_start/end.
- Fin de session : activityEnd final flushe le dernier segment candidat ; relance terminale coupee (audio non renvoye, texte jete) ; seul le texte candidat conserve pour l'evaluation.
- buildT1SystemPrompt : nouvel artefact, regle 7 du T2 NON propagee (questions autorisees).
- Route /t1/live : auth Premium reutilisee, contexte questionnaire dynamique, persistance EO_T1 (sujet_id null), evaluation via correctEO('EO_T1'), phonologie stub /4 (TD-08 gele).
- geminiLive.ts : exports additifs + buildSetupFrame parametrable VAD (T2 inchange).
- gitignore : exclusion des artefacts jetables de test/spike.
This commit is contained in:
parent
5f7e52d88a
commit
868bd09397
7 changed files with 1404 additions and 17 deletions
|
|
@ -97,7 +97,7 @@ export interface OpenGeminiLiveSessionOptions {
|
|||
/**
|
||||
* Forme minimale d'un message Gemini Live JSON entrant.
|
||||
*/
|
||||
interface GeminiServerMessage {
|
||||
export interface GeminiServerMessage {
|
||||
setupComplete?: unknown;
|
||||
serverContent?: {
|
||||
modelTurn?: {
|
||||
|
|
@ -112,12 +112,12 @@ interface GeminiServerMessage {
|
|||
};
|
||||
}
|
||||
|
||||
interface TranscriptEntry {
|
||||
export interface TranscriptEntry {
|
||||
speaker: "candidat" | "examinateur";
|
||||
text: string;
|
||||
}
|
||||
|
||||
function reconstructTranscript(entries: TranscriptEntry[]): string {
|
||||
export function reconstructTranscript(entries: TranscriptEntry[]): string {
|
||||
return entries
|
||||
.map((e) =>
|
||||
e.speaker === "candidat"
|
||||
|
|
@ -130,7 +130,7 @@ function reconstructTranscript(entries: TranscriptEntry[]): string {
|
|||
/**
|
||||
* Détecte un signal de fin de session envoyé par le client : `{type:'end'}`.
|
||||
*/
|
||||
function isEndSignal(data: unknown): boolean {
|
||||
export function isEndSignal(data: unknown): boolean {
|
||||
let text: string;
|
||||
if (typeof data === "string") {
|
||||
text = data;
|
||||
|
|
@ -156,7 +156,7 @@ function isEndSignal(data: unknown): boolean {
|
|||
* Parse un message client `{type:'audio', data: base64}` et renvoie le base64
|
||||
* si le format est valide, sinon null.
|
||||
*/
|
||||
function parseAudioChunk(data: unknown): string | null {
|
||||
export function parseAudioChunk(data: unknown): string | null {
|
||||
let text: string;
|
||||
if (typeof data === "string") {
|
||||
text = data;
|
||||
|
|
@ -184,7 +184,7 @@ function parseAudioChunk(data: unknown): string | null {
|
|||
/**
|
||||
* Tente de parser un message Gemini en JSON. Retourne null si binaire / non-JSON.
|
||||
*/
|
||||
function tryParseGeminiJson(data: unknown): GeminiServerMessage | null {
|
||||
export function tryParseGeminiJson(data: unknown): GeminiServerMessage | null {
|
||||
let text: string;
|
||||
if (typeof data === "string") {
|
||||
text = data;
|
||||
|
|
@ -213,12 +213,32 @@ function tryParseGeminiJson(data: unknown): GeminiServerMessage | null {
|
|||
}
|
||||
|
||||
/**
|
||||
* Construit le setup frame Gemini Live : model + responseModalities AUDIO,
|
||||
* systemInstruction (prompt T2), input/outputAudioTranscription, et
|
||||
* realtimeInputConfig.automaticActivityDetection (VAD : START/END_SENSITIVITY_LOW,
|
||||
* 2 s de silence avant que l'IA réponde — cf. IMPLEMENTATION_T2_LIVE.md §3).
|
||||
* VAD automatique par défaut (T2 Live) : START/END_SENSITIVITY_LOW, 2 s de
|
||||
* silence avant que l'IA réponde — cf. IMPLEMENTATION_T2_LIVE.md §3.
|
||||
*/
|
||||
function buildSetupFrame(systemPrompt: string): string {
|
||||
export const T2_AUTOMATIC_ACTIVITY_DETECTION = {
|
||||
disabled: false,
|
||||
startOfSpeechSensitivity: "START_SENSITIVITY_LOW",
|
||||
endOfSpeechSensitivity: "END_SENSITIVITY_LOW",
|
||||
silenceDurationMs: 2000,
|
||||
} as const;
|
||||
|
||||
/**
|
||||
* Construit le setup frame Gemini Live : model + responseModalities AUDIO,
|
||||
* systemInstruction, input/outputAudioTranscription, et
|
||||
* realtimeInputConfig.automaticActivityDetection.
|
||||
*
|
||||
* `automaticActivityDetection` est paramétrable (défaut = VAD T2 inchangé).
|
||||
* T1 Live (VAD manuel) passera `{ disabled: true }` pour piloter les bornes de
|
||||
* tour côté backend (activityStart / activityEnd).
|
||||
*/
|
||||
export function buildSetupFrame(
|
||||
systemPrompt: string,
|
||||
automaticActivityDetection: Record<
|
||||
string,
|
||||
unknown
|
||||
> = T2_AUTOMATIC_ACTIVITY_DETECTION,
|
||||
): string {
|
||||
return JSON.stringify({
|
||||
setup: {
|
||||
model: `models/${GEMINI_LIVE_MODEL}`,
|
||||
|
|
@ -231,12 +251,7 @@ function buildSetupFrame(systemPrompt: string): string {
|
|||
inputAudioTranscription: {},
|
||||
outputAudioTranscription: {},
|
||||
realtimeInputConfig: {
|
||||
automaticActivityDetection: {
|
||||
disabled: false,
|
||||
startOfSpeechSensitivity: "START_SENSITIVITY_LOW",
|
||||
endOfSpeechSensitivity: "END_SENSITIVITY_LOW",
|
||||
silenceDurationMs: 2000,
|
||||
},
|
||||
automaticActivityDetection,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue