feat(t1-live): examinateur avec interruption probabiliste pilotee backend (Sprint 7a)
- Session T1 Live : monologue candidat + interruptions pilotees backend (VAD manuel).
- Voix examinateur native Gemini ; le backend decide le timing (tirage probabiliste 0-2, fenetre [25s,75s]), Gemini formule la relance sur signal d'injection (anti-TD-22).
- Injection : activityEnd -> clientContent -> activityStart ; signaux WS interruption_start/end.
- Fin de session : activityEnd final flushe le dernier segment candidat ; relance terminale coupee (audio non renvoye, texte jete) ; seul le texte candidat conserve pour l'evaluation.
- buildT1SystemPrompt : nouvel artefact, regle 7 du T2 NON propagee (questions autorisees).
- Route /t1/live : auth Premium reutilisee, contexte questionnaire dynamique, persistance EO_T1 (sujet_id null), evaluation via correctEO('EO_T1'), phonologie stub /4 (TD-08 gele).
- geminiLive.ts : exports additifs + buildSetupFrame parametrable VAD (T2 inchange).
- gitignore : exclusion des artefacts jetables de test/spike.
This commit is contained in:
parent
5f7e52d88a
commit
868bd09397
7 changed files with 1404 additions and 17 deletions
7
.gitignore
vendored
7
.gitignore
vendored
|
|
@ -3,3 +3,10 @@ dist
|
|||
.env
|
||||
.env.local
|
||||
.claude/
|
||||
|
||||
# Artefacts jetables de test/spike T1 Live (non versionnés)
|
||||
scripts/t1-spike.mjs
|
||||
scripts/check-sujet-nullable.mjs
|
||||
scripts/t1-route-test.mjs
|
||||
*.pcm
|
||||
candidat-*.wav
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ import presentationsRoutes from "./routes/presentations.js";
|
|||
import transcriptionsRoutes from "./routes/transcriptions.js";
|
||||
import stripeRoutes from "./routes/stripe.js";
|
||||
import createT2LiveRoutes from "./routes/t2live.js";
|
||||
import createT1LiveRoutes from "./routes/t1live.js";
|
||||
import usersRoutes from "./routes/users.js";
|
||||
import { supabase } from "./lib/supabase.js";
|
||||
|
||||
|
|
@ -64,6 +65,7 @@ app.route("/presentations", presentationsRoutes);
|
|||
app.route("/transcriptions", transcriptionsRoutes);
|
||||
app.route("/stripe", stripeRoutes);
|
||||
app.route("/t2", createT2LiveRoutes(upgradeWebSocket));
|
||||
app.route("/t1", createT1LiveRoutes(upgradeWebSocket));
|
||||
app.route("/users", usersRoutes);
|
||||
|
||||
const port = Number(process.env.PORT) || 3000;
|
||||
|
|
|
|||
316
src/lib/__tests__/geminiLiveT1.test.ts
Normal file
316
src/lib/__tests__/geminiLiveT1.test.ts
Normal file
|
|
@ -0,0 +1,316 @@
|
|||
import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
|
||||
import { EventEmitter } from "node:events";
|
||||
import {
|
||||
buildT1SystemPrompt,
|
||||
openGeminiLiveT1Session,
|
||||
drawT1InterruptionCount,
|
||||
planT1InterruptionInstants,
|
||||
T1_INTERRUPTION_WINDOW_START_MS,
|
||||
T1_INTERRUPTION_WINDOW_END_MS,
|
||||
T1_INTERRUPTION_MIN_SPACING_MS,
|
||||
} from "../geminiLiveT1";
|
||||
import type { WebSocketLike } from "../geminiLive";
|
||||
import type { PresentationReponses } from "../../controllers/presentationController";
|
||||
|
||||
class FakeWs extends EventEmitter implements WebSocketLike {
|
||||
public sent: unknown[] = [];
|
||||
public closed = false;
|
||||
public closeCode?: number;
|
||||
public closeReason?: string;
|
||||
|
||||
send(data: unknown): void {
|
||||
this.sent.push(data);
|
||||
}
|
||||
|
||||
close(code?: number, reason?: string): void {
|
||||
if (this.closed) return;
|
||||
this.closed = true;
|
||||
this.closeCode = code;
|
||||
this.closeReason = reason;
|
||||
}
|
||||
}
|
||||
|
||||
/** random() déterministe : renvoie chaque valeur de la liste à tour de rôle. */
|
||||
function seqRandom(values: number[]): () => number {
|
||||
let i = 0;
|
||||
return () => values[i++ % values.length];
|
||||
}
|
||||
|
||||
/** Helper : signaux {type:'...'} reçus côté client (hors forwards verbatim). */
|
||||
function clientSignals(client: FakeWs): { type: string }[] {
|
||||
return client.sent
|
||||
.filter((f): f is string => typeof f === "string")
|
||||
.map((f) => {
|
||||
try {
|
||||
return JSON.parse(f) as { type?: string };
|
||||
} catch {
|
||||
return {};
|
||||
}
|
||||
})
|
||||
.filter((o): o is { type: string } => typeof o.type === "string");
|
||||
}
|
||||
|
||||
const REPONSES: PresentationReponses = {
|
||||
prenom_age_ville: "Hermann, 35 ans, Lyon",
|
||||
formation_metier: "ingénieur en informatique",
|
||||
situation_familiale: "marié, deux enfants",
|
||||
loisirs: "la randonnée et la photographie",
|
||||
motivation_canada: "de meilleures opportunités professionnelles",
|
||||
};
|
||||
|
||||
describe("buildT1SystemPrompt", () => {
|
||||
it("définit un examinateur qui relance le candidat par une question", () => {
|
||||
const prompt = buildT1SystemPrompt({ reponses: REPONSES });
|
||||
expect(prompt).toContain("examinateur");
|
||||
expect(prompt.toLowerCase()).toContain("relanc");
|
||||
expect(prompt.toLowerCase()).toContain("question");
|
||||
});
|
||||
|
||||
it("intègre les réponses du questionnaire candidat", () => {
|
||||
const prompt = buildT1SystemPrompt({ reponses: REPONSES });
|
||||
expect(prompt).toContain("Hermann, 35 ans, Lyon");
|
||||
expect(prompt).toContain("ingénieur en informatique");
|
||||
expect(prompt).toContain("marié, deux enfants");
|
||||
expect(prompt).toContain("la randonnée et la photographie");
|
||||
expect(prompt).toContain("de meilleures opportunités professionnelles");
|
||||
});
|
||||
|
||||
it("AUTORISE les questions — ne propage PAS la règle 7 du T2", () => {
|
||||
const prompt = buildT1SystemPrompt({ reponses: REPONSES });
|
||||
const upper = prompt.toUpperCase();
|
||||
// La règle 7 T2 interdit les questions et bannit le point d'interrogation.
|
||||
expect(upper).not.toContain("INTERDICTION DE POSER DES QUESTIONS");
|
||||
expect(prompt).not.toContain(
|
||||
"pas le droit d'utiliser de point d'interrogation",
|
||||
);
|
||||
// Au contraire, l'examinateur T1 DOIT poser des questions.
|
||||
expect(prompt).toContain("DOIS poser des questions");
|
||||
});
|
||||
});
|
||||
|
||||
describe("drawT1InterruptionCount (tirage déterministe)", () => {
|
||||
it("suit la distribution P0/P1/P2 selon le random injecté", () => {
|
||||
expect(drawT1InterruptionCount(() => 0.1)).toBe(0);
|
||||
expect(drawT1InterruptionCount(() => 0.5)).toBe(1);
|
||||
expect(drawT1InterruptionCount(() => 0.9)).toBe(2);
|
||||
});
|
||||
|
||||
it("gère correctement les bornes de la distribution", () => {
|
||||
expect(drawT1InterruptionCount(() => 0.0)).toBe(0); // < 0.2
|
||||
expect(drawT1InterruptionCount(() => 0.2)).toBe(1); // ≥ 0.2, < 0.8
|
||||
expect(drawT1InterruptionCount(() => 0.8)).toBe(2); // ≥ 0.8
|
||||
});
|
||||
});
|
||||
|
||||
describe("planT1InterruptionInstants", () => {
|
||||
it("ne planifie rien quand count = 0", () => {
|
||||
expect(planT1InterruptionInstants(0, () => 0.5)).toEqual([]);
|
||||
});
|
||||
|
||||
it("place 1 interruption dans la fenêtre [START, END]", () => {
|
||||
expect(planT1InterruptionInstants(1, () => 0)).toEqual([
|
||||
T1_INTERRUPTION_WINDOW_START_MS,
|
||||
]);
|
||||
const [instant] = planT1InterruptionInstants(1, () => 0.5);
|
||||
expect(instant).toBeGreaterThanOrEqual(T1_INTERRUPTION_WINDOW_START_MS);
|
||||
expect(instant).toBeLessThanOrEqual(T1_INTERRUPTION_WINDOW_END_MS);
|
||||
});
|
||||
|
||||
it("place 2 interruptions espacées d'au moins MIN_SPACING, dans la fenêtre", () => {
|
||||
for (const r of [0, 0.3, 0.7, 1]) {
|
||||
const [a, b] = planT1InterruptionInstants(2, () => r);
|
||||
expect(a).toBeGreaterThanOrEqual(T1_INTERRUPTION_WINDOW_START_MS);
|
||||
expect(b).toBeLessThanOrEqual(T1_INTERRUPTION_WINDOW_END_MS);
|
||||
expect(b - a).toBeGreaterThanOrEqual(T1_INTERRUPTION_MIN_SPACING_MS);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
const SETUP_COMPLETE = JSON.stringify({ setupComplete: {} });
|
||||
|
||||
describe("openGeminiLiveT1Session (raw WS, VAD manuel)", () => {
|
||||
let originalKey: string | undefined;
|
||||
|
||||
beforeEach(() => {
|
||||
originalKey = process.env.GEMINI_API_KEY;
|
||||
process.env.GEMINI_API_KEY = "test-key";
|
||||
vi.useFakeTimers();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (originalKey === undefined) {
|
||||
delete process.env.GEMINI_API_KEY;
|
||||
} else {
|
||||
process.env.GEMINI_API_KEY = originalKey;
|
||||
}
|
||||
vi.useRealTimers();
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it("envoie le setup frame en VAD manuel (disabled:true)", () => {
|
||||
const client = new FakeWs();
|
||||
const gemini = new FakeWs();
|
||||
openGeminiLiveT1Session(client, {
|
||||
reponses: REPONSES,
|
||||
clientFactory: () => gemini,
|
||||
random: seqRandom([0.1]),
|
||||
});
|
||||
gemini.emit("open");
|
||||
|
||||
const setup = JSON.parse(gemini.sent[0] as string);
|
||||
expect(setup.setup.realtimeInputConfig.automaticActivityDetection).toEqual({
|
||||
disabled: true,
|
||||
});
|
||||
expect(setup.setup.systemInstruction.parts[0].text).toContain(
|
||||
"examinateur",
|
||||
);
|
||||
});
|
||||
|
||||
it("injecte la relance à l'instant planifié + émet interruption_start/end et la séquence activityEnd→clientContent→activityStart", async () => {
|
||||
const client = new FakeWs();
|
||||
const gemini = new FakeWs();
|
||||
// drawCount(0.5)=1 ; planInstants(1, 0)=START_MS.
|
||||
openGeminiLiveT1Session(client, {
|
||||
reponses: REPONSES,
|
||||
clientFactory: () => gemini,
|
||||
random: seqRandom([0.5, 0]),
|
||||
});
|
||||
gemini.emit("open");
|
||||
gemini.emit("message", SETUP_COMPLETE);
|
||||
|
||||
// [0]=setup, [1]=activityStart (ouverture du 1er tour candidat).
|
||||
expect(gemini.sent).toHaveLength(2);
|
||||
expect(JSON.parse(gemini.sent[1] as string)).toEqual({
|
||||
realtimeInput: { activityStart: {} },
|
||||
});
|
||||
|
||||
// Rien ne se passe avant l'instant planifié.
|
||||
await vi.advanceTimersByTimeAsync(T1_INTERRUPTION_WINDOW_START_MS - 1);
|
||||
expect(
|
||||
clientSignals(client).some((s) => s.type === "interruption_start"),
|
||||
).toBe(false);
|
||||
|
||||
// À l'instant planifié : injection.
|
||||
await vi.advanceTimersByTimeAsync(1);
|
||||
expect(
|
||||
clientSignals(client).some((s) => s.type === "interruption_start"),
|
||||
).toBe(true);
|
||||
// [2]=activityEnd, [3]=clientContent(relance, turnComplete).
|
||||
expect(JSON.parse(gemini.sent[2] as string)).toEqual({
|
||||
realtimeInput: { activityEnd: {} },
|
||||
});
|
||||
const relance = JSON.parse(gemini.sent[3] as string);
|
||||
expect(relance.clientContent.turnComplete).toBe(true);
|
||||
expect(relance.clientContent.turns[0].role).toBe("user");
|
||||
|
||||
// Gemini termine la relance → reprise candidat.
|
||||
gemini.emit(
|
||||
"message",
|
||||
JSON.stringify({ serverContent: { turnComplete: true } }),
|
||||
);
|
||||
expect(
|
||||
clientSignals(client).some((s) => s.type === "interruption_end"),
|
||||
).toBe(true);
|
||||
// [4]=activityStart (réouverture du tour candidat).
|
||||
expect(JSON.parse(gemini.sent[4] as string)).toEqual({
|
||||
realtimeInput: { activityStart: {} },
|
||||
});
|
||||
});
|
||||
|
||||
it("FIN : envoie l'activityEnd final, garde le texte candidat final, coupe l'audio + le texte de la relance terminale", async () => {
|
||||
const client = new FakeWs();
|
||||
const gemini = new FakeWs();
|
||||
const onSessionEnd = vi.fn();
|
||||
// count=0 : aucune interruption programmée, on teste juste le flush terminal.
|
||||
openGeminiLiveT1Session(client, {
|
||||
reponses: REPONSES,
|
||||
clientFactory: () => gemini,
|
||||
random: seqRandom([0.1]),
|
||||
onSessionEnd,
|
||||
});
|
||||
gemini.emit("open");
|
||||
gemini.emit("message", SETUP_COMPLETE);
|
||||
|
||||
// Le candidat parle (message normal, forwardé verbatim).
|
||||
gemini.emit(
|
||||
"message",
|
||||
JSON.stringify({
|
||||
serverContent: {
|
||||
inputTranscription: { text: "Je m'appelle Hermann." },
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
// Fin de session demandée par le client.
|
||||
client.emit("message", JSON.stringify({ type: "end" }));
|
||||
|
||||
// activityEnd FINAL envoyé pour flusher le dernier segment candidat.
|
||||
const lastFrame = JSON.parse(gemini.sent[gemini.sent.length - 1] as string);
|
||||
expect(lastFrame).toEqual({ realtimeInput: { activityEnd: {} } });
|
||||
|
||||
// POINT DE VIGILANCE : un SEUL message Gemini pendant le flush terminal
|
||||
// contient À LA FOIS le texte candidat final (à GARDER) ET la relance
|
||||
// terminale examinateur — audio + texte (à COUPER).
|
||||
const clientSentBefore = client.sent.length;
|
||||
gemini.emit(
|
||||
"message",
|
||||
JSON.stringify({
|
||||
serverContent: {
|
||||
inputTranscription: { text: " ma ville préférée." },
|
||||
outputTranscription: { text: "Quelle est votre ville préférée ?" },
|
||||
modelTurn: {
|
||||
parts: [{ inlineData: { data: "AAAA", mimeType: "audio/pcm" } }],
|
||||
},
|
||||
},
|
||||
}),
|
||||
);
|
||||
// Ce message n'est PAS forwardé au client (audio relance terminale coupé).
|
||||
expect(client.sent.length).toBe(clientSentBefore);
|
||||
|
||||
await vi.runAllTimersAsync();
|
||||
|
||||
expect(onSessionEnd).toHaveBeenCalledTimes(1);
|
||||
const transcript = onSessionEnd.mock.calls[0][0] as string;
|
||||
// Texte candidat final BIEN conservé.
|
||||
expect(transcript).toContain("Je m'appelle Hermann.");
|
||||
expect(transcript).toContain("ma ville préférée.");
|
||||
// Texte de la relance terminale examinateur JETÉ.
|
||||
expect(transcript).not.toContain("Quelle est votre ville préférée ?");
|
||||
});
|
||||
|
||||
it("idempotence de end : un double signal end ne casse pas (un seul onSessionEnd)", async () => {
|
||||
const client = new FakeWs();
|
||||
const gemini = new FakeWs();
|
||||
const onSessionEnd = vi.fn();
|
||||
openGeminiLiveT1Session(client, {
|
||||
reponses: REPONSES,
|
||||
clientFactory: () => gemini,
|
||||
random: seqRandom([0.1]),
|
||||
onSessionEnd,
|
||||
});
|
||||
gemini.emit("open");
|
||||
gemini.emit("message", SETUP_COMPLETE);
|
||||
|
||||
client.emit("message", JSON.stringify({ type: "end" }));
|
||||
client.emit("message", JSON.stringify({ type: "end" }));
|
||||
await vi.runAllTimersAsync();
|
||||
|
||||
expect(onSessionEnd).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("absence de GEMINI_API_KEY → close client 4005 GEMINI_CONFIG sans factory", () => {
|
||||
delete process.env.GEMINI_API_KEY;
|
||||
const client = new FakeWs();
|
||||
const factory = vi.fn(() => new FakeWs());
|
||||
|
||||
openGeminiLiveT1Session(client, {
|
||||
reponses: REPONSES,
|
||||
clientFactory: factory,
|
||||
});
|
||||
|
||||
expect(factory).not.toHaveBeenCalled();
|
||||
expect(client.closed).toBe(true);
|
||||
expect(client.closeCode).toBe(4005);
|
||||
expect(client.closeReason).toBe("GEMINI_CONFIG");
|
||||
});
|
||||
});
|
||||
|
|
@ -97,7 +97,7 @@ export interface OpenGeminiLiveSessionOptions {
|
|||
/**
|
||||
* Forme minimale d'un message Gemini Live JSON entrant.
|
||||
*/
|
||||
interface GeminiServerMessage {
|
||||
export interface GeminiServerMessage {
|
||||
setupComplete?: unknown;
|
||||
serverContent?: {
|
||||
modelTurn?: {
|
||||
|
|
@ -112,12 +112,12 @@ interface GeminiServerMessage {
|
|||
};
|
||||
}
|
||||
|
||||
interface TranscriptEntry {
|
||||
export interface TranscriptEntry {
|
||||
speaker: "candidat" | "examinateur";
|
||||
text: string;
|
||||
}
|
||||
|
||||
function reconstructTranscript(entries: TranscriptEntry[]): string {
|
||||
export function reconstructTranscript(entries: TranscriptEntry[]): string {
|
||||
return entries
|
||||
.map((e) =>
|
||||
e.speaker === "candidat"
|
||||
|
|
@ -130,7 +130,7 @@ function reconstructTranscript(entries: TranscriptEntry[]): string {
|
|||
/**
|
||||
* Détecte un signal de fin de session envoyé par le client : `{type:'end'}`.
|
||||
*/
|
||||
function isEndSignal(data: unknown): boolean {
|
||||
export function isEndSignal(data: unknown): boolean {
|
||||
let text: string;
|
||||
if (typeof data === "string") {
|
||||
text = data;
|
||||
|
|
@ -156,7 +156,7 @@ function isEndSignal(data: unknown): boolean {
|
|||
* Parse un message client `{type:'audio', data: base64}` et renvoie le base64
|
||||
* si le format est valide, sinon null.
|
||||
*/
|
||||
function parseAudioChunk(data: unknown): string | null {
|
||||
export function parseAudioChunk(data: unknown): string | null {
|
||||
let text: string;
|
||||
if (typeof data === "string") {
|
||||
text = data;
|
||||
|
|
@ -184,7 +184,7 @@ function parseAudioChunk(data: unknown): string | null {
|
|||
/**
|
||||
* Tente de parser un message Gemini en JSON. Retourne null si binaire / non-JSON.
|
||||
*/
|
||||
function tryParseGeminiJson(data: unknown): GeminiServerMessage | null {
|
||||
export function tryParseGeminiJson(data: unknown): GeminiServerMessage | null {
|
||||
let text: string;
|
||||
if (typeof data === "string") {
|
||||
text = data;
|
||||
|
|
@ -213,12 +213,32 @@ function tryParseGeminiJson(data: unknown): GeminiServerMessage | null {
|
|||
}
|
||||
|
||||
/**
|
||||
* Construit le setup frame Gemini Live : model + responseModalities AUDIO,
|
||||
* systemInstruction (prompt T2), input/outputAudioTranscription, et
|
||||
* realtimeInputConfig.automaticActivityDetection (VAD : START/END_SENSITIVITY_LOW,
|
||||
* 2 s de silence avant que l'IA réponde — cf. IMPLEMENTATION_T2_LIVE.md §3).
|
||||
* VAD automatique par défaut (T2 Live) : START/END_SENSITIVITY_LOW, 2 s de
|
||||
* silence avant que l'IA réponde — cf. IMPLEMENTATION_T2_LIVE.md §3.
|
||||
*/
|
||||
function buildSetupFrame(systemPrompt: string): string {
|
||||
export const T2_AUTOMATIC_ACTIVITY_DETECTION = {
|
||||
disabled: false,
|
||||
startOfSpeechSensitivity: "START_SENSITIVITY_LOW",
|
||||
endOfSpeechSensitivity: "END_SENSITIVITY_LOW",
|
||||
silenceDurationMs: 2000,
|
||||
} as const;
|
||||
|
||||
/**
|
||||
* Construit le setup frame Gemini Live : model + responseModalities AUDIO,
|
||||
* systemInstruction, input/outputAudioTranscription, et
|
||||
* realtimeInputConfig.automaticActivityDetection.
|
||||
*
|
||||
* `automaticActivityDetection` est paramétrable (défaut = VAD T2 inchangé).
|
||||
* T1 Live (VAD manuel) passera `{ disabled: true }` pour piloter les bornes de
|
||||
* tour côté backend (activityStart / activityEnd).
|
||||
*/
|
||||
export function buildSetupFrame(
|
||||
systemPrompt: string,
|
||||
automaticActivityDetection: Record<
|
||||
string,
|
||||
unknown
|
||||
> = T2_AUTOMATIC_ACTIVITY_DETECTION,
|
||||
): string {
|
||||
return JSON.stringify({
|
||||
setup: {
|
||||
model: `models/${GEMINI_LIVE_MODEL}`,
|
||||
|
|
@ -231,12 +251,7 @@ function buildSetupFrame(systemPrompt: string): string {
|
|||
inputAudioTranscription: {},
|
||||
outputAudioTranscription: {},
|
||||
realtimeInputConfig: {
|
||||
automaticActivityDetection: {
|
||||
disabled: false,
|
||||
startOfSpeechSensitivity: "START_SENSITIVITY_LOW",
|
||||
endOfSpeechSensitivity: "END_SENSITIVITY_LOW",
|
||||
silenceDurationMs: 2000,
|
||||
},
|
||||
automaticActivityDetection,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
|
|
|||
487
src/lib/geminiLiveT1.ts
Normal file
487
src/lib/geminiLiveT1.ts
Normal file
|
|
@ -0,0 +1,487 @@
|
|||
/**
|
||||
* geminiLiveT1.ts — Sprint 7a (T1 EO Live, examinateur avec interruption
|
||||
* pilotée par le BACKEND).
|
||||
*
|
||||
* Ce module porte la spécificité T1 :
|
||||
* - buildT1SystemPrompt : le prompt système de l'examinateur ;
|
||||
* - openGeminiLiveT1Session : le proxy WS + l'horloge probabiliste qui décide
|
||||
* QUAND interrompre, et l'injection de la relance (clientContent).
|
||||
*
|
||||
* Les helpers WS bas niveau (parseAudioChunk, isEndSignal, tryParseGeminiJson,
|
||||
* reconstructTranscript) et le setup frame paramétrable (buildSetupFrame) sont
|
||||
* réutilisés depuis `geminiLive.ts` (exports additifs Sprint 7a).
|
||||
*
|
||||
* ⚠ Différence fondamentale avec T2 : en T1, l'examinateur DOIT poser des
|
||||
* questions pour relancer le candidat. La règle 7 du T2 (interdiction absolue
|
||||
* de poser des questions / bannissement du point d'interrogation) NE DOIT
|
||||
* JAMAIS être propagée ici. Cf. TD-22 / TD-23.
|
||||
*
|
||||
* MODÈLE 1 (acté) : c'est l'HORLOGE PROBABILISTE du backend qui décide seule du
|
||||
* timing des interruptions. Le backend NE lit PAS la transcription partielle
|
||||
* pour décider — Gemini formule la relance à partir de son contexte audio
|
||||
* interne. (Découverte spike : en VAD manuel, inputTranscription n'est flushé
|
||||
* qu'à l'envoi d'activityEnd, pas en continu.)
|
||||
*/
|
||||
|
||||
import { WebSocket as NodeWebSocket } from "ws";
|
||||
import type { PresentationReponses } from "../controllers/presentationController.js";
|
||||
import {
|
||||
GEMINI_LIVE_URL,
|
||||
buildSetupFrame,
|
||||
isEndSignal,
|
||||
parseAudioChunk,
|
||||
reconstructTranscript,
|
||||
tryParseGeminiJson,
|
||||
type TranscriptEntry,
|
||||
type WebSocketLike,
|
||||
} from "./geminiLive.js";
|
||||
|
||||
/**
|
||||
* Construit le prompt système T1 Live à partir des réponses du questionnaire
|
||||
* candidat (transmises dynamiquement — il n'existe pas de sujet T1 en base).
|
||||
*
|
||||
* Le prompt définit le RÔLE de l'examinateur : il reste silencieux par défaut
|
||||
* et ne prend la parole QUE lorsque le backend le lui signale (injection
|
||||
* `clientContent` au moment choisi par l'horloge probabiliste). C'est le
|
||||
* BACKEND qui décide du TIMING ; l'examinateur, lui, formule librement une
|
||||
* relance courte à partir de son contexte audio interne.
|
||||
*/
|
||||
export function buildT1SystemPrompt(input: {
|
||||
reponses: PresentationReponses;
|
||||
}): string {
|
||||
const { reponses } = input;
|
||||
return `RÔLE : Tu es un examinateur bienveillant de l'épreuve d'Expression Orale du TCF Canada (Tâche 1, entretien dirigé). Le candidat se présente en monologue : identité, parcours, situation familiale, loisirs, et projet d'immigration au Canada.
|
||||
|
||||
CONTEXTE DU CANDIDAT (pour formuler des relances pertinentes et personnalisées) :
|
||||
- Identité : ${reponses.prenom_age_ville}
|
||||
- Formation / métier : ${reponses.formation_metier}
|
||||
- Situation familiale : ${reponses.situation_familiale}
|
||||
- Loisirs : ${reponses.loisirs}
|
||||
- Projet Canada : ${reponses.motivation_canada}
|
||||
|
||||
RÈGLES :
|
||||
1. Tu parles TOUJOURS en français naturel et courant, niveau B2-C1, sur un ton bienveillant et professionnel.
|
||||
2. Tu RESTES SILENCIEUX par défaut. Tant que le candidat parle, tu n'interviens JAMAIS de ta propre initiative.
|
||||
3. Tu prends la parole UNIQUEMENT lorsqu'on te le signale, et alors UNIQUEMENT pour relancer le candidat par UNE question.
|
||||
4. Ta relance est COURTE : une seule question de 10 à 20 mots, liée à ce que le candidat vient de dire ou à son contexte ci-dessus.
|
||||
5. Tu PEUX et tu DOIS poser des questions : c'est le cœur de ton rôle d'examinateur en Tâche 1. Utilise le point d'interrogation normalement.
|
||||
6. Une seule question à la fois. Jamais de liste, jamais d'enchaînement de plusieurs questions dans la même prise de parole.
|
||||
7. Tu ne corriges JAMAIS les erreurs du candidat et tu ne commentes jamais sa langue, ses erreurs ou sa performance.
|
||||
8. Tu restes toujours dans ton rôle d'examinateur. Tu ne mentionnes jamais que tu es une IA ou un modèle.`;
|
||||
}
|
||||
|
||||
// ── Constantes nommées (PAS de nombres magiques) ────────────────────────────
|
||||
|
||||
/** Timeout total de la session T1 Live (filet de sécurité). */
|
||||
export const T1_SESSION_TIMEOUT_MS = 180_000;
|
||||
/** Warning client : 30 s avant le timeout. */
|
||||
export const T1_SESSION_WARNING_MS = 150_000;
|
||||
|
||||
/** Distribution du nombre d'interruptions tirées au début de session. */
|
||||
export const T1_INTERRUPTION_P0 = 0.2; // P(0 interruption)
|
||||
export const T1_INTERRUPTION_P1 = 0.6; // P(1 interruption)
|
||||
export const T1_INTERRUPTION_P2 = 0.2; // P(2 interruptions)
|
||||
|
||||
/** Fenêtre temporelle (depuis le début de session) où placer les interruptions. */
|
||||
export const T1_INTERRUPTION_WINDOW_START_MS = 25_000;
|
||||
export const T1_INTERRUPTION_WINDOW_END_MS = 75_000;
|
||||
/** Espacement minimal garanti entre deux interruptions. */
|
||||
export const T1_INTERRUPTION_MIN_SPACING_MS = 20_000;
|
||||
|
||||
/**
|
||||
* Délai d'attente, après l'activityEnd FINAL, pour laisser Gemini flusher la
|
||||
* transcription du dernier segment candidat avant de finaliser la session.
|
||||
*/
|
||||
export const T1_TERMINAL_FLUSH_GRACE_MS = 3_000;
|
||||
|
||||
/** MIME du flux audio candidat (PCM 16 kHz mono), identique au T2. */
|
||||
const T1_INPUT_AUDIO_MIME = "audio/pcm;rate=16000";
|
||||
|
||||
/** VAD MANUEL : c'est le backend qui borne les tours (activityStart/End). */
|
||||
const T1_MANUAL_VAD = { disabled: true } as const;
|
||||
|
||||
/** Consigne interne injectée pour déclencher une relance (jamais lue à voix haute). */
|
||||
const T1_RELANCE_INSTRUCTION =
|
||||
"[CONSIGNE INTERNE — ne pas répéter] Interromps maintenant le candidat avec UNE seule question de relance courte et pertinente, liée à ce qu'il vient de dire.";
|
||||
|
||||
const ACTIVITY_START_FRAME = JSON.stringify({
|
||||
realtimeInput: { activityStart: {} },
|
||||
});
|
||||
const ACTIVITY_END_FRAME = JSON.stringify({
|
||||
realtimeInput: { activityEnd: {} },
|
||||
});
|
||||
|
||||
function buildRelanceFrame(): string {
|
||||
return JSON.stringify({
|
||||
clientContent: {
|
||||
turns: [{ role: "user", parts: [{ text: T1_RELANCE_INSTRUCTION }] }],
|
||||
turnComplete: true,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// ── Logique probabiliste (fonctions pures, testables avec random injecté) ────
|
||||
|
||||
/**
|
||||
* Tire le nombre d'interruptions de la session selon la distribution
|
||||
* P0/P1/P2. `random()` ∈ [0,1).
|
||||
*/
|
||||
export function drawT1InterruptionCount(random: () => number): 0 | 1 | 2 {
|
||||
const r = random();
|
||||
if (r < T1_INTERRUPTION_P0) return 0;
|
||||
if (r < T1_INTERRUPTION_P0 + T1_INTERRUPTION_P1) return 1;
|
||||
return 2;
|
||||
}
|
||||
|
||||
/**
|
||||
* Planifie les instants (offsets ms depuis le début de session) des
|
||||
* interruptions dans la fenêtre [START, END], avec un espacement minimal
|
||||
* garanti de MIN_SPACING entre deux interruptions.
|
||||
*/
|
||||
export function planT1InterruptionInstants(
|
||||
count: 0 | 1 | 2,
|
||||
random: () => number,
|
||||
): number[] {
|
||||
const start = T1_INTERRUPTION_WINDOW_START_MS;
|
||||
const end = T1_INTERRUPTION_WINDOW_END_MS;
|
||||
const spacing = T1_INTERRUPTION_MIN_SPACING_MS;
|
||||
|
||||
if (count === 0) return [];
|
||||
if (count === 1) {
|
||||
return [start + random() * (end - start)];
|
||||
}
|
||||
// count === 2 : premier dans [start, end - spacing], second au moins
|
||||
// `spacing` après le premier et au plus `end`.
|
||||
const first = start + random() * (end - spacing - start);
|
||||
const second = first + spacing + random() * (end - (first + spacing));
|
||||
return [first, second];
|
||||
}
|
||||
|
||||
// ── Options de session ───────────────────────────────────────────────────────
|
||||
|
||||
export interface OpenGeminiLiveT1SessionOptions {
|
||||
/** Réponses du questionnaire candidat (contexte du prompt T1). */
|
||||
reponses: PresentationReponses;
|
||||
/** Callback de fin de session avec le transcript reconstruit. */
|
||||
onSessionEnd?: (transcript: string) => void | Promise<void>;
|
||||
/** Override timeout (défaut T1_SESSION_TIMEOUT_MS). */
|
||||
timeoutMs?: number;
|
||||
/** Override warning (défaut T1_SESSION_WARNING_MS). */
|
||||
warningMs?: number;
|
||||
/** Surcharge la clé API (défaut process.env.GEMINI_API_KEY). */
|
||||
apiKey?: string;
|
||||
/** Injection pour les tests — fabrique de WebSocket vers Gemini. */
|
||||
clientFactory?: (url: string) => WebSocketLike;
|
||||
/** Source d'aléa injectable (défaut Math.random) pour la testabilité. */
|
||||
random?: () => number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ouvre une session T1 Live : proxy WS bidirectionnel client ⇄ Gemini en VAD
|
||||
* MANUEL, avec interruption(s) injectée(s) au(x) instant(s) tiré(s) par
|
||||
* l'horloge probabiliste.
|
||||
*
|
||||
* Contrat WS côté client (figé — la suite du sprint 7b en dépend) :
|
||||
* - {type:'interruption_start'} : l'examinateur prend la parole ;
|
||||
* - {type:'interruption_end'} : le candidat peut reprendre.
|
||||
*
|
||||
* Séquence d'une interruption (Modèle 1) :
|
||||
* activityEnd → clientContent(relance, turnComplete) → (turnComplete Gemini)
|
||||
* → activityStart.
|
||||
*
|
||||
* FIN DE SESSION : on envoie un activityEnd FINAL pour flusher le dernier
|
||||
* segment candidat (sinon perdu — la transcription n'est flushée qu'à
|
||||
* activityEnd en VAD manuel). Cet activityEnd déclenche AUSSI une relance
|
||||
* examinateur « terminale » : on la SUPPRIME (audio non forwardé au client,
|
||||
* texte jeté). Cf. point de vigilance dans le handler de message.
|
||||
*/
|
||||
export function openGeminiLiveT1Session(
|
||||
clientWs: WebSocketLike,
|
||||
opts: OpenGeminiLiveT1SessionOptions,
|
||||
): void {
|
||||
const apiKey = opts.apiKey ?? process.env.GEMINI_API_KEY;
|
||||
if (!apiKey) {
|
||||
clientWs.close(4005, "GEMINI_CONFIG");
|
||||
return;
|
||||
}
|
||||
|
||||
const timeoutMs = opts.timeoutMs ?? T1_SESSION_TIMEOUT_MS;
|
||||
const warningMs = opts.warningMs ?? T1_SESSION_WARNING_MS;
|
||||
const random = opts.random ?? Math.random;
|
||||
const systemPrompt = buildT1SystemPrompt({ reponses: opts.reponses });
|
||||
|
||||
const url = `${GEMINI_LIVE_URL}?key=${apiKey}`;
|
||||
const factory =
|
||||
opts.clientFactory ??
|
||||
((u: string) => new NodeWebSocket(u) as unknown as WebSocketLike);
|
||||
|
||||
const geminiWs = factory(url);
|
||||
|
||||
const entries: TranscriptEntry[] = [];
|
||||
|
||||
// ── État ──
|
||||
let started = false; // startSession() exécuté une seule fois
|
||||
let sessionEnded = false; // endSession() entamé (idempotence)
|
||||
let finalized = false; // finalize() exécuté une seule fois
|
||||
let candidateTurnOpen = false; // un tour candidat est ouvert côté Gemini
|
||||
let injecting = false; // une interruption est en cours
|
||||
let awaitingRelance = false; // on attend le turnComplete de la relance
|
||||
// terminalFlush : on a envoyé l'activityEnd FINAL. À partir de là, l'audio et
|
||||
// le texte de l'examinateur (relance terminale) sont SUPPRIMÉS ; seule la
|
||||
// transcription candidat reste collectée.
|
||||
let terminalFlush = false;
|
||||
|
||||
const interruptionTimers: ReturnType<typeof setTimeout>[] = [];
|
||||
let warningTimer: ReturnType<typeof setTimeout> | null = null;
|
||||
let timeoutTimer: ReturnType<typeof setTimeout> | null = null;
|
||||
let finalizeTimer: ReturnType<typeof setTimeout> | null = null;
|
||||
|
||||
const clearTimers = () => {
|
||||
for (const t of interruptionTimers) clearTimeout(t);
|
||||
interruptionTimers.length = 0;
|
||||
if (warningTimer !== null) {
|
||||
clearTimeout(warningTimer);
|
||||
warningTimer = null;
|
||||
}
|
||||
if (timeoutTimer !== null) {
|
||||
clearTimeout(timeoutTimer);
|
||||
timeoutTimer = null;
|
||||
}
|
||||
};
|
||||
|
||||
const geminiSend = (frame: string) => {
|
||||
try {
|
||||
geminiWs.send(frame);
|
||||
} catch (err) {
|
||||
console.error(
|
||||
"[T1] Gemini send failed:",
|
||||
err instanceof Error ? err.message : String(err),
|
||||
);
|
||||
void endSession();
|
||||
}
|
||||
};
|
||||
|
||||
const clientSend = (obj: unknown) => {
|
||||
try {
|
||||
clientWs.send(JSON.stringify(obj));
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
};
|
||||
|
||||
// ── Injection d'une interruption ──
|
||||
const doInterruption = () => {
|
||||
if (sessionEnded || terminalFlush || injecting || !candidateTurnOpen)
|
||||
return;
|
||||
injecting = true;
|
||||
awaitingRelance = true;
|
||||
candidateTurnOpen = false;
|
||||
clientSend({ type: "interruption_start" });
|
||||
geminiSend(ACTIVITY_END_FRAME);
|
||||
geminiSend(buildRelanceFrame());
|
||||
};
|
||||
|
||||
const resumeAfterInjection = () => {
|
||||
awaitingRelance = false;
|
||||
injecting = false;
|
||||
geminiSend(ACTIVITY_START_FRAME);
|
||||
candidateTurnOpen = true;
|
||||
clientSend({ type: "interruption_end" });
|
||||
};
|
||||
|
||||
// ── Démarrage (sur setupComplete) ──
|
||||
const startSession = () => {
|
||||
if (started) return;
|
||||
started = true;
|
||||
|
||||
// Ouvre le premier tour candidat.
|
||||
geminiSend(ACTIVITY_START_FRAME);
|
||||
candidateTurnOpen = true;
|
||||
|
||||
// Tire et planifie les interruptions.
|
||||
const count = drawT1InterruptionCount(random);
|
||||
const instants = planT1InterruptionInstants(count, random);
|
||||
for (const offset of instants) {
|
||||
interruptionTimers.push(setTimeout(() => doInterruption(), offset));
|
||||
}
|
||||
|
||||
warningTimer = setTimeout(() => {
|
||||
if (sessionEnded) return;
|
||||
clientSend({ type: "warning", message: "30 secondes restantes" });
|
||||
}, warningMs);
|
||||
|
||||
timeoutTimer = setTimeout(() => {
|
||||
void endSession();
|
||||
}, timeoutMs);
|
||||
};
|
||||
|
||||
const finalize = async () => {
|
||||
if (finalized) return;
|
||||
finalized = true;
|
||||
try {
|
||||
geminiWs.close(1000);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
if (opts.onSessionEnd) {
|
||||
try {
|
||||
await opts.onSessionEnd(reconstructTranscript(entries));
|
||||
} catch (err) {
|
||||
console.error(
|
||||
"[T1] onSessionEnd threw:",
|
||||
err instanceof Error ? err.message : String(err),
|
||||
);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// endSession est idempotent : double signal end → un seul flush + finalize.
|
||||
async function endSession() {
|
||||
if (sessionEnded) return;
|
||||
sessionEnded = true;
|
||||
clearTimers();
|
||||
terminalFlush = true;
|
||||
// Flush du dernier segment candidat : indispensable car en VAD manuel la
|
||||
// transcription candidat n'est émise qu'à l'activityEnd.
|
||||
if (candidateTurnOpen) {
|
||||
geminiSend(ACTIVITY_END_FRAME);
|
||||
candidateTurnOpen = false;
|
||||
}
|
||||
// Laisse à Gemini le temps d'émettre l'inputTranscription flushée, puis
|
||||
// finalise (la relance terminale éventuelle est ignorée — cf. handler).
|
||||
finalizeTimer = setTimeout(
|
||||
() => void finalize(),
|
||||
T1_TERMINAL_FLUSH_GRACE_MS,
|
||||
);
|
||||
}
|
||||
|
||||
// ── Gemini → client ──
|
||||
geminiWs.on("open", () => {
|
||||
geminiSend(buildSetupFrame(systemPrompt, T1_MANUAL_VAD));
|
||||
});
|
||||
|
||||
geminiWs.on("message", (data) => {
|
||||
const parsed = tryParseGeminiJson(data);
|
||||
|
||||
if (parsed?.setupComplete) {
|
||||
startSession();
|
||||
}
|
||||
|
||||
if (parsed) {
|
||||
const sc = parsed.serverContent;
|
||||
|
||||
// POINT DE VIGILANCE — séparation "audio relance terminale à couper" vs
|
||||
// "texte candidat final à garder" quand ils arrivent dans le MÊME
|
||||
// message Gemini : on traite CHAMP PAR CHAMP, pas message par message.
|
||||
// - serverContent.inputTranscription.text = CANDIDAT → toujours gardé,
|
||||
// y compris pendant le flush terminal (c'est précisément ce qu'on veut
|
||||
// récupérer).
|
||||
// - serverContent.outputTranscription.text = EXAMINATEUR → ignoré
|
||||
// pendant le flush terminal (relance terminale jetée).
|
||||
// - serverContent.modelTurn.*.inlineData = audio EXAMINATEUR → non
|
||||
// forwardé au client pendant le flush terminal (cf. plus bas).
|
||||
if (sc?.inputTranscription?.text) {
|
||||
entries.push({ speaker: "candidat", text: sc.inputTranscription.text });
|
||||
}
|
||||
if (!terminalFlush && sc?.outputTranscription?.text) {
|
||||
entries.push({
|
||||
speaker: "examinateur",
|
||||
text: sc.outputTranscription.text,
|
||||
});
|
||||
}
|
||||
|
||||
// Reprise candidat après la relance (jamais pendant le flush terminal :
|
||||
// on ne rouvre pas de tour, la session se termine).
|
||||
if (sc?.turnComplete && injecting && awaitingRelance && !terminalFlush) {
|
||||
resumeAfterInjection();
|
||||
}
|
||||
}
|
||||
|
||||
// Forward verbatim au client SAUF pendant le flush terminal : ainsi l'audio
|
||||
// de la relance terminale (modelTurn inlineData) n'est jamais entendu par
|
||||
// le candidat.
|
||||
if (!terminalFlush) {
|
||||
try {
|
||||
clientWs.send(data);
|
||||
} catch {
|
||||
void endSession();
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
geminiWs.on("close", () => {
|
||||
if (!sessionEnded) {
|
||||
clearTimers();
|
||||
sessionEnded = true;
|
||||
try {
|
||||
clientWs.close(4006, "GEMINI_DISCONNECTED");
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
geminiWs.on("error", () => {
|
||||
if (!sessionEnded) {
|
||||
clearTimers();
|
||||
sessionEnded = true;
|
||||
try {
|
||||
clientWs.close(4006, "GEMINI_DISCONNECTED");
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// ── Client → Gemini ──
|
||||
clientWs.on("message", (data) => {
|
||||
if (isEndSignal(data)) {
|
||||
void endSession();
|
||||
return;
|
||||
}
|
||||
const audioBase64 = parseAudioChunk(data);
|
||||
if (
|
||||
audioBase64 !== null &&
|
||||
!sessionEnded &&
|
||||
candidateTurnOpen &&
|
||||
!injecting
|
||||
) {
|
||||
geminiSend(
|
||||
JSON.stringify({
|
||||
realtimeInput: {
|
||||
audio: { data: audioBase64, mimeType: T1_INPUT_AUDIO_MIME },
|
||||
},
|
||||
}),
|
||||
);
|
||||
}
|
||||
// Tout autre message client est ignoré.
|
||||
});
|
||||
|
||||
clientWs.on("close", () => {
|
||||
clearTimers();
|
||||
if (finalizeTimer !== null) {
|
||||
clearTimeout(finalizeTimer);
|
||||
finalizeTimer = null;
|
||||
}
|
||||
sessionEnded = true;
|
||||
try {
|
||||
geminiWs.close(1000);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
});
|
||||
|
||||
clientWs.on("error", () => {
|
||||
clearTimers();
|
||||
if (finalizeTimer !== null) {
|
||||
clearTimeout(finalizeTimer);
|
||||
finalizeTimer = null;
|
||||
}
|
||||
sessionEnded = true;
|
||||
try {
|
||||
geminiWs.close(1011);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
});
|
||||
}
|
||||
238
src/routes/__tests__/t1live.test.ts
Normal file
238
src/routes/__tests__/t1live.test.ts
Normal file
|
|
@ -0,0 +1,238 @@
|
|||
import { describe, it, expect, vi, beforeEach } from "vitest";
|
||||
import { EventEmitter } from "node:events";
|
||||
|
||||
// ─── Mocks ───────────────────────────────────────────────────────────────────
|
||||
|
||||
vi.mock("../../lib/supabase", () => ({
|
||||
supabase: {
|
||||
auth: {
|
||||
getUser: vi.fn(),
|
||||
},
|
||||
from: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
vi.mock("../../lib/deepseek", async () => {
|
||||
const actual =
|
||||
await vi.importActual<typeof import("../../lib/deepseek")>(
|
||||
"../../lib/deepseek",
|
||||
);
|
||||
return {
|
||||
...actual,
|
||||
correctEO: vi.fn(),
|
||||
};
|
||||
});
|
||||
|
||||
vi.mock("../../lib/geminiPhonology", () => ({
|
||||
PHONOLOGY_STUB: {
|
||||
score: 2,
|
||||
commentaire: "Stub",
|
||||
note_phonologie: "Stub",
|
||||
},
|
||||
}));
|
||||
|
||||
import { supabase } from "../../lib/supabase";
|
||||
import { correctEO as deepseekCorrectEO } from "../../lib/deepseek";
|
||||
import { parseT1Context, runT1LiveCorrection } from "../t1live";
|
||||
import type { WebSocketLike } from "../../lib/geminiLive";
|
||||
|
||||
// ─── Helpers ─────────────────────────────────────────────────────────────────
|
||||
|
||||
class FakeWs extends EventEmitter implements WebSocketLike {
|
||||
public sent: unknown[] = [];
|
||||
public closed = false;
|
||||
public closeCode?: number;
|
||||
public closeReason?: string;
|
||||
send(data: unknown): void {
|
||||
this.sent.push(data);
|
||||
}
|
||||
close(code?: number, reason?: string): void {
|
||||
if (this.closed) return;
|
||||
this.closed = true;
|
||||
this.closeCode = code;
|
||||
this.closeReason = reason;
|
||||
}
|
||||
}
|
||||
|
||||
function mockProductionInsert(
|
||||
resultId: string | null,
|
||||
errorMsg: string | null = null,
|
||||
) {
|
||||
vi.mocked(supabase.from).mockReturnValueOnce({
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
insert: vi.fn(() => ({
|
||||
select: vi.fn(() => ({
|
||||
single: vi.fn(async () =>
|
||||
errorMsg
|
||||
? { data: null, error: { message: errorMsg } }
|
||||
: { data: { id: resultId }, error: null },
|
||||
),
|
||||
})),
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
})) as any,
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
} as any);
|
||||
}
|
||||
|
||||
function mockProductionUpdate(errorMsg: string | null = null) {
|
||||
vi.mocked(supabase.from).mockReturnValueOnce({
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
update: vi.fn(() => ({
|
||||
eq: vi.fn(async () =>
|
||||
errorMsg ? { error: { message: errorMsg } } : { error: null },
|
||||
),
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
})) as any,
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
} as any);
|
||||
}
|
||||
|
||||
const REPONSES = {
|
||||
prenom_age_ville: "Hermann, 35 ans, Lyon",
|
||||
formation_metier: "ingénieur en informatique",
|
||||
situation_familiale: "marié, deux enfants",
|
||||
loisirs: "la randonnée et la photographie",
|
||||
motivation_canada: "de meilleures opportunités professionnelles",
|
||||
};
|
||||
|
||||
const FAKE_RAPPORT = {
|
||||
score: 14,
|
||||
nclc: 8,
|
||||
nclc_cible: 9 as const,
|
||||
revelation: { croyance: "a", realite: "b", consequence: "c" },
|
||||
diagnostic: "d",
|
||||
criteres: [],
|
||||
conseil_nclc: { nclc_cible: "NCLC 9", ecart: "e", action_prioritaire: "p" },
|
||||
erreurs_codes: [],
|
||||
};
|
||||
|
||||
// ─── Tests ───────────────────────────────────────────────────────────────────
|
||||
|
||||
describe("parseT1Context", () => {
|
||||
it("accepte un message {type:'context', reponses} valide", () => {
|
||||
const result = parseT1Context(
|
||||
JSON.stringify({ type: "context", reponses: REPONSES }),
|
||||
);
|
||||
expect(result).toEqual({ ok: true, reponses: REPONSES });
|
||||
});
|
||||
|
||||
it("refuse un message sans type 'context'", () => {
|
||||
const result = parseT1Context(
|
||||
JSON.stringify({ type: "audio", data: "AAAA" }),
|
||||
);
|
||||
expect(result).toEqual({ ok: false });
|
||||
});
|
||||
|
||||
it("refuse un contexte aux réponses invalides (champ manquant)", () => {
|
||||
const { motivation_canada: _omit, ...partiel } = REPONSES;
|
||||
const result = parseT1Context(
|
||||
JSON.stringify({ type: "context", reponses: partiel }),
|
||||
);
|
||||
expect(result).toEqual({ ok: false });
|
||||
});
|
||||
|
||||
it("refuse un payload non-JSON", () => {
|
||||
expect(parseT1Context("pas du json {")).toEqual({ ok: false });
|
||||
});
|
||||
});
|
||||
|
||||
describe("runT1LiveCorrection", () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
const profile = { id: "u1", plan: "premium" as const };
|
||||
|
||||
it("transcript vide → EMPTY_TRANSCRIPT + close 1000 sans appeler DeepSeek", async () => {
|
||||
const ws = new FakeWs();
|
||||
await runT1LiveCorrection({ clientWs: ws, profile, transcript: " " });
|
||||
expect(deepseekCorrectEO).not.toHaveBeenCalled();
|
||||
expect(ws.closed).toBe(true);
|
||||
expect(ws.closeCode).toBe(1000);
|
||||
const sent = JSON.parse(ws.sent[0] as string);
|
||||
expect(sent).toMatchObject({ type: "error", code: "EMPTY_TRANSCRIPT" });
|
||||
});
|
||||
|
||||
it("flux nominal : insert EO_T1 (sujet_id null) → DeepSeek → update → report → close 1000", async () => {
|
||||
const ws = new FakeWs();
|
||||
const insertSpy = vi.fn(() => ({
|
||||
select: vi.fn(() => ({
|
||||
single: vi.fn(async () => ({ data: { id: "prod-t1" }, error: null })),
|
||||
})),
|
||||
}));
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
vi.mocked(supabase.from).mockReturnValueOnce({ insert: insertSpy } as any);
|
||||
vi.mocked(deepseekCorrectEO).mockResolvedValueOnce(FAKE_RAPPORT);
|
||||
mockProductionUpdate();
|
||||
|
||||
await runT1LiveCorrection({
|
||||
clientWs: ws,
|
||||
profile,
|
||||
transcript:
|
||||
"Candidat : Je m'appelle Hermann\nExaminateur : Où vivez-vous ?",
|
||||
});
|
||||
|
||||
// Persistance : tache EO_T1, sujet_id NULL.
|
||||
expect(insertSpy).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
user_id: "u1",
|
||||
tache: "EO_T1",
|
||||
sujet_id: null,
|
||||
mode: "entrainement",
|
||||
}),
|
||||
);
|
||||
// Correction : tache EO_T1, nclcCible 9, pas de consigne.
|
||||
expect(deepseekCorrectEO).toHaveBeenCalledWith(
|
||||
"Candidat : Je m'appelle Hermann\nExaminateur : Où vivez-vous ?",
|
||||
"EO_T1",
|
||||
9,
|
||||
null,
|
||||
);
|
||||
expect(ws.closed).toBe(true);
|
||||
expect(ws.closeCode).toBe(1000);
|
||||
const reportFrame = ws.sent.find(
|
||||
(f) => typeof f === "string" && f.includes('"report"'),
|
||||
);
|
||||
expect(reportFrame).toBeDefined();
|
||||
const parsed = JSON.parse(reportFrame as string);
|
||||
expect(parsed.type).toBe("report");
|
||||
// Score textuel 14 + phonologie stub 2 = 16.
|
||||
expect(parsed.data.score).toBe(16);
|
||||
expect(parsed.data.nclc).toBe(8);
|
||||
expect(parsed.data.simulation_id).toBe("prod-t1");
|
||||
});
|
||||
|
||||
it("insert production échoue → PERSISTENCE_FAILED + close 1011", async () => {
|
||||
const ws = new FakeWs();
|
||||
mockProductionInsert(null, "db down");
|
||||
|
||||
await runT1LiveCorrection({
|
||||
clientWs: ws,
|
||||
profile,
|
||||
transcript: "Candidat : Bonjour",
|
||||
});
|
||||
|
||||
expect(deepseekCorrectEO).not.toHaveBeenCalled();
|
||||
expect(ws.closed).toBe(true);
|
||||
expect(ws.closeCode).toBe(1011);
|
||||
const sent = JSON.parse(ws.sent[0] as string);
|
||||
expect(sent.code).toBe("PERSISTENCE_FAILED");
|
||||
});
|
||||
|
||||
it("DeepSeek throw → CORRECTION_FAILED + close 1011", async () => {
|
||||
const ws = new FakeWs();
|
||||
mockProductionInsert("prod-t1");
|
||||
vi.mocked(deepseekCorrectEO).mockRejectedValueOnce(new Error("timeout"));
|
||||
|
||||
await runT1LiveCorrection({
|
||||
clientWs: ws,
|
||||
profile,
|
||||
transcript: "Candidat : Bonjour",
|
||||
});
|
||||
|
||||
expect(ws.closed).toBe(true);
|
||||
expect(ws.closeCode).toBe(1011);
|
||||
const sent = JSON.parse(ws.sent[0] as string);
|
||||
expect(sent.code).toBe("CORRECTION_FAILED");
|
||||
});
|
||||
});
|
||||
322
src/routes/t1live.ts
Normal file
322
src/routes/t1live.ts
Normal file
|
|
@ -0,0 +1,322 @@
|
|||
import { Hono } from "hono";
|
||||
import type { UpgradeWebSocket } from "hono/ws";
|
||||
import { EventEmitter } from "node:events";
|
||||
import { supabase } from "../lib/supabase.js";
|
||||
import type { Plan } from "../lib/access.js";
|
||||
import { correctEO as deepseekCorrectEO } from "../lib/deepseek.js";
|
||||
import { PHONOLOGY_STUB } from "../lib/geminiPhonology.js";
|
||||
import {
|
||||
validateReponses,
|
||||
type PresentationReponses,
|
||||
} from "../controllers/presentationController.js";
|
||||
import {
|
||||
openGeminiLiveT1Session,
|
||||
type OpenGeminiLiveT1SessionOptions,
|
||||
} from "../lib/geminiLiveT1.js";
|
||||
import type { WebSocketLike } from "../lib/geminiLive.js";
|
||||
// RÉUTILISATION : même gate d'authentification + permission Premium que le T2.
|
||||
// `authenticate` vérifie le JWT Supabase puis checkFeatureAccess(plan, 'oral_t2_live').
|
||||
import { authenticate } from "./t2live.js";
|
||||
|
||||
interface Profile {
|
||||
id: string;
|
||||
plan: Plan;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse et valide le 1er message attendu sur la socket T1 : `{type:'context',
|
||||
* reponses}`. Les réponses sont validées via `validateReponses` (réutilisée du
|
||||
* contrôleur de présentation — T1 EO n'est PAS subject-based, le contexte vient
|
||||
* du questionnaire candidat, pas d'un sujet en base).
|
||||
*/
|
||||
export function parseT1Context(
|
||||
data: unknown,
|
||||
): { ok: true; reponses: PresentationReponses } | { ok: false } {
|
||||
let parsed: unknown;
|
||||
if (typeof data === "string") {
|
||||
try {
|
||||
parsed = JSON.parse(data);
|
||||
} catch {
|
||||
return { ok: false };
|
||||
}
|
||||
} else if (data !== null && typeof data === "object") {
|
||||
parsed = data;
|
||||
} else {
|
||||
return { ok: false };
|
||||
}
|
||||
|
||||
if (parsed === null || typeof parsed !== "object") return { ok: false };
|
||||
const msg = parsed as Record<string, unknown>;
|
||||
if (msg.type !== "context") return { ok: false };
|
||||
|
||||
const validation = validateReponses(msg.reponses);
|
||||
if ("error" in validation) return { ok: false };
|
||||
return { ok: true, reponses: validation.reponses };
|
||||
}
|
||||
|
||||
/**
|
||||
* Pipeline post-session T1 : crée la production, lance la correction EO sur le
|
||||
* transcript reconstruit, persiste le rapport, envoie au client puis ferme.
|
||||
*
|
||||
* Calqué sur runT2LiveCorrection (t2live.ts) mais spécifique T1 :
|
||||
* - tache='EO_T1' (enum DB existant — aucune migration, pas de EO_T1_LIVE) ;
|
||||
* - sujet_id=null (T1 EO non subject-based — déjà fait par le flux batch
|
||||
* EO_T1, cf. simulationController) ;
|
||||
* - correctEO(transcript, 'EO_T1', 9, null) : pas de consigne de sujet en T1 ;
|
||||
* - phonologie = PHONOLOGY_STUB (TD-08 — pas d'audio brut côté backend).
|
||||
*/
|
||||
export async function runT1LiveCorrection(args: {
|
||||
clientWs: WebSocketLike;
|
||||
profile: Profile;
|
||||
transcript: string;
|
||||
}): Promise<void> {
|
||||
const { clientWs, profile, transcript } = args;
|
||||
|
||||
if (transcript.trim().length === 0) {
|
||||
try {
|
||||
clientWs.send(
|
||||
JSON.stringify({
|
||||
type: "error",
|
||||
code: "EMPTY_TRANSCRIPT",
|
||||
message: "Aucun échange enregistré.",
|
||||
}),
|
||||
);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
try {
|
||||
clientWs.close(1000, "EMPTY_TRANSCRIPT");
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// 1. Créer la production (rapport=null pour l'instant).
|
||||
const { data: created, error: insertError } = await supabase
|
||||
.from("productions")
|
||||
.insert({
|
||||
user_id: profile.id,
|
||||
tache: "EO_T1",
|
||||
mode: "entrainement",
|
||||
sujet_id: null,
|
||||
contenu: transcript,
|
||||
})
|
||||
.select("id")
|
||||
.single();
|
||||
|
||||
if (insertError || !created) {
|
||||
console.error("[T1] production insert failed:", insertError?.message);
|
||||
try {
|
||||
clientWs.send(
|
||||
JSON.stringify({
|
||||
type: "error",
|
||||
code: "PERSISTENCE_FAILED",
|
||||
message: "Impossible d'enregistrer la session.",
|
||||
}),
|
||||
);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
try {
|
||||
clientWs.close(1011, "PERSISTENCE_FAILED");
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const productionId = (created as { id: string }).id;
|
||||
|
||||
// 2. Lancer la correction EO via DeepSeek (pas de consigne de sujet en T1).
|
||||
let rapport;
|
||||
try {
|
||||
rapport = await deepseekCorrectEO(transcript, "EO_T1", 9, null);
|
||||
} catch (err) {
|
||||
console.error(
|
||||
"[T1] DeepSeek correction failed:",
|
||||
err instanceof Error ? err.message : String(err),
|
||||
);
|
||||
try {
|
||||
clientWs.send(
|
||||
JSON.stringify({
|
||||
type: "error",
|
||||
code: "CORRECTION_FAILED",
|
||||
message: "Erreur lors de la correction.",
|
||||
}),
|
||||
);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
try {
|
||||
clientWs.close(1011, "CORRECTION_FAILED");
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// 3. Appliquer phonologie stub (TD-08) : score textuel /16 + phonologie /4 = /20.
|
||||
const scoreTextuel = rapport.score;
|
||||
const scoreFinal = scoreTextuel + PHONOLOGY_STUB.score;
|
||||
|
||||
// 4. Persister le rapport.
|
||||
const { error: updateError } = await supabase
|
||||
.from("productions")
|
||||
.update({
|
||||
rapport,
|
||||
score: scoreFinal,
|
||||
nclc: rapport.nclc,
|
||||
})
|
||||
.eq("id", productionId);
|
||||
|
||||
if (updateError) {
|
||||
console.error("[T1] production update failed:", updateError.message);
|
||||
}
|
||||
|
||||
// 5. Envoyer le rapport au client puis fermer.
|
||||
try {
|
||||
clientWs.send(
|
||||
JSON.stringify({
|
||||
type: "report",
|
||||
data: {
|
||||
...rapport,
|
||||
score: scoreFinal,
|
||||
simulation_id: productionId,
|
||||
},
|
||||
}),
|
||||
);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
try {
|
||||
clientWs.close(1000);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
|
||||
export interface CreateT1LiveRoutesOptions {
|
||||
/** Injection pour les tests : fabrique de WebSocket vers Gemini. */
|
||||
clientFactory?: OpenGeminiLiveT1SessionOptions["clientFactory"];
|
||||
/** Injection pour les tests : override timeout/warning. */
|
||||
timeoutMs?: number;
|
||||
warningMs?: number;
|
||||
/** Injection pour les tests : source d'aléa de l'horloge probabiliste. */
|
||||
random?: OpenGeminiLiveT1SessionOptions["random"];
|
||||
}
|
||||
|
||||
/**
|
||||
* Crée le router pour `WS /t1/live`.
|
||||
* - Auth : JWT Supabase en query param `?token=<jwt>` (RÉUTILISE authenticate de
|
||||
* t2live — même permission Premium `oral_t2_live`).
|
||||
* - Contexte : pas de sujet en base. On attend le 1er message
|
||||
* `{type:'context', reponses}` (validé par validateReponses). Absent/invalide
|
||||
* → close 4004 CONTEXT_MISSING.
|
||||
* - OK → openGeminiLiveT1Session → onSessionEnd : correction EO_T1 + persistance.
|
||||
*/
|
||||
export default function createT1LiveRoutes(
|
||||
upgradeWebSocket: UpgradeWebSocket,
|
||||
opts: CreateT1LiveRoutesOptions = {},
|
||||
) {
|
||||
const app = new Hono();
|
||||
|
||||
app.get(
|
||||
"/live",
|
||||
upgradeWebSocket(async (c) => {
|
||||
const token = c.req.query("token");
|
||||
|
||||
let denyCode: number | null = null;
|
||||
let denyReason = "";
|
||||
let profile: Profile | null = null;
|
||||
|
||||
const auth = await authenticate(token);
|
||||
if (!auth.ok) {
|
||||
denyCode = auth.code;
|
||||
denyReason = auth.reason;
|
||||
} else {
|
||||
profile = auth.profile;
|
||||
}
|
||||
|
||||
// Adapter EventEmitter → WebSocketLike pour réutiliser openGeminiLiveT1Session.
|
||||
const adapter = new EventEmitter() as EventEmitter & WebSocketLike;
|
||||
adapter.send = () => {};
|
||||
adapter.close = () => {};
|
||||
|
||||
// La session Gemini ne démarre qu'à réception d'un contexte valide.
|
||||
let started = false;
|
||||
|
||||
return {
|
||||
onOpen(_evt, ws) {
|
||||
adapter.send = (data: unknown) =>
|
||||
ws.send(data as Parameters<typeof ws.send>[0]);
|
||||
adapter.close = (code?: number, reason?: string) =>
|
||||
ws.close(code, reason);
|
||||
|
||||
if (denyCode !== null) {
|
||||
try {
|
||||
ws.send(JSON.stringify({ error: true, code: denyReason }));
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
setTimeout(() => ws.close(denyCode!, denyReason), 100);
|
||||
}
|
||||
},
|
||||
onMessage(evt, ws) {
|
||||
if (denyCode !== null) return;
|
||||
|
||||
// Tant que la session n'est pas démarrée, on attend le contexte.
|
||||
if (!started) {
|
||||
const raw =
|
||||
typeof evt.data === "string"
|
||||
? evt.data
|
||||
: Buffer.isBuffer(evt.data)
|
||||
? evt.data.toString("utf8")
|
||||
: String(evt.data);
|
||||
const ctx = parseT1Context(raw);
|
||||
if (!ctx.ok) {
|
||||
try {
|
||||
ws.send(
|
||||
JSON.stringify({ error: true, code: "CONTEXT_MISSING" }),
|
||||
);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
setTimeout(() => ws.close(4004, "CONTEXT_MISSING"), 100);
|
||||
return;
|
||||
}
|
||||
|
||||
started = true;
|
||||
const profileNonNull = profile!;
|
||||
openGeminiLiveT1Session(adapter, {
|
||||
reponses: ctx.reponses,
|
||||
clientFactory: opts.clientFactory,
|
||||
timeoutMs: opts.timeoutMs,
|
||||
warningMs: opts.warningMs,
|
||||
random: opts.random,
|
||||
onSessionEnd: async (transcript) => {
|
||||
await runT1LiveCorrection({
|
||||
clientWs: adapter,
|
||||
profile: profileNonNull,
|
||||
transcript,
|
||||
});
|
||||
},
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// Session démarrée : on relaie les messages (audio / end) à la session.
|
||||
adapter.emit("message", evt.data);
|
||||
},
|
||||
onClose() {
|
||||
if (started) adapter.emit("close");
|
||||
},
|
||||
onError() {
|
||||
if (started) adapter.emit("error", new Error("CLIENT_ERROR"));
|
||||
},
|
||||
};
|
||||
}),
|
||||
);
|
||||
|
||||
return app;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue