Sprint 6d — Migrate Gemini Live to @google/genai SDK

feat(geminiLive): rewrite with GoogleGenAI SDK (vertexai: true, apiKey)
  replaces raw WebSocket to generativelanguage.googleapis.com
feat(geminiLive): restore full setup config (systemInstruction,
  inputAudioTranscription, outputAudioTranscription, VAD)
fix(geminiLive): buildSetupFrame → SDK config object (no manual JSON)
fix(useT2LiveSession): cancelTokenRef for idempotent startDialogue,
  closeAllRef for stable unmount cleanup
chore: add @google/genai@^1.50.1 dependency
test: 11 geminiLive tests rewritten with SDK mock
  292/292 backend tests green
This commit is contained in:
Hermann_Kitio 2026-04-27 02:25:58 +03:00
parent d89b0b1e89
commit 0662e766d4
6 changed files with 970 additions and 331 deletions

View file

@ -1,5 +1,50 @@
import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
import { EventEmitter } from "node:events";
// ─── Mock du SDK @google/genai ───────────────────────────────────────────────
//
// On capture les callbacks passés à `ai.live.connect` pour pouvoir simuler les
// événements (onopen, onmessage, onerror, onclose) depuis les tests. La
// fabrique `clientFactory` injectée dans openGeminiLiveSession permet de
// remplacer `new GoogleGenAI(...)` par un stub.
interface CapturedConnect {
model: string;
config: Record<string, unknown>;
callbacks: {
onopen?: () => void;
onmessage?: (msg: unknown) => void;
onerror?: (err: unknown) => void;
onclose?: (evt: unknown) => void;
};
session: {
sendRealtimeInput: ReturnType<typeof vi.fn>;
close: ReturnType<typeof vi.fn>;
};
}
let capturedConnect: CapturedConnect | null = null;
function makeFakeClient() {
return {
live: {
connect: vi.fn(async (params: CapturedConnect) => {
const session = {
sendRealtimeInput: vi.fn(),
close: vi.fn(),
};
capturedConnect = {
model: params.model,
config: params.config,
callbacks: params.callbacks,
session,
};
return session;
}),
},
};
}
import {
openGeminiLiveSession,
buildT2SystemPrompt,
@ -30,6 +75,33 @@ const SUJET_OPTS = {
"Vous cherchez un appartement de 2 pièces dans le centre-ville, votre budget est limité et vous souhaitez emménager le mois prochain.",
};
/** Helper : ouvre une session avec un client mocké et retourne la capture. */
async function openWithMock(
client: FakeWs,
extra: Partial<{
onSessionEnd: (transcript: string) => void | Promise<void>;
timeoutMs: number;
warningMs: number;
}> = {},
) {
capturedConnect = null;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
openGeminiLiveSession(client, {
...SUJET_OPTS,
apiKey: "test-key",
clientFactory: () => makeFakeClient() as any,
...extra,
});
// Le `await live.connect()` est dans un `.then()` du code prod ; on laisse
// les microtasks se vider avant de retourner la capture.
await Promise.resolve();
await Promise.resolve();
if (!capturedConnect) {
throw new Error("Le mock du SDK n'a pas capturé de connect()");
}
return capturedConnect;
}
describe("buildT2SystemPrompt", () => {
it("substitue role et contexte dans le template", () => {
const prompt = buildT2SystemPrompt(SUJET_OPTS);
@ -42,170 +114,120 @@ describe("buildT2SystemPrompt", () => {
});
});
describe("openGeminiLiveSession", () => {
let originalKey: string | undefined;
describe("openGeminiLiveSession (SDK)", () => {
beforeEach(() => {
originalKey = process.env.GEMINI_API_KEY;
process.env.GEMINI_API_KEY = "test-key";
vi.useFakeTimers();
});
afterEach(() => {
if (originalKey === undefined) {
delete process.env.GEMINI_API_KEY;
} else {
process.env.GEMINI_API_KEY = originalKey;
}
vi.useRealTimers();
vi.restoreAllMocks();
capturedConnect = null;
});
it("envoie le setup frame avec prompt dynamique + VAD + transcriptions", () => {
it("appelle live.connect avec le modèle + config Live (audio + system + transcripts + VAD)", async () => {
const client = new FakeWs();
const gemini = new FakeWs();
const capture = await openWithMock(client);
openGeminiLiveSession(client, {
...SUJET_OPTS,
geminiFactory: () => gemini,
});
gemini.emit("open");
expect(gemini.sent).toHaveLength(1);
const setup = JSON.parse(gemini.sent[0] as string);
expect(setup.setup.model).toMatch(/gemini/);
expect(setup.setup.systemInstruction.parts[0].text).toContain(
expect(capture.model).toMatch(/gemini/);
const config = capture.config;
expect(config.responseModalities).toContain("AUDIO");
expect(config.systemInstruction).toContain(
"un bailleur qui propose un appartement",
);
expect(setup.setup.generationConfig.responseModalities).toContain("AUDIO");
expect(setup.setup.inputAudioTranscription).toEqual({});
expect(setup.setup.outputAudioTranscription).toEqual({});
expect(
setup.setup.realtimeInputConfig.automaticActivityDetection,
).toMatchObject({
disabled: false,
startOfSpeechSensitivity: "START_SENSITIVITY_LOW",
endOfSpeechSensitivity: "END_SENSITIVITY_LOW",
silenceDurationMs: 2000,
expect(config.inputAudioTranscription).toEqual({});
expect(config.outputAudioTranscription).toEqual({});
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const vad: any = (config.realtimeInputConfig as any)
?.automaticActivityDetection;
expect(vad?.disabled).toBe(false);
expect(vad?.silenceDurationMs).toBe(2000);
});
it("forwarde un chunk audio client {type:'audio'} via session.sendRealtimeInput (PCM 16k base64)", async () => {
const client = new FakeWs();
const capture = await openWithMock(client);
capture.callbacks.onopen?.();
const base64 = "AQIDBA=="; // base64 de [1,2,3,4]
client.emit("message", JSON.stringify({ type: "audio", data: base64 }));
expect(capture.session.sendRealtimeInput).toHaveBeenCalledTimes(1);
expect(capture.session.sendRealtimeInput).toHaveBeenCalledWith({
audio: { data: base64, mimeType: "audio/pcm;rate=16000" },
});
});
it("forwarde un chunk audio client (Buffer) vers Gemini", () => {
it("forwarde un message Gemini (audio inlineData) au client en JSON", async () => {
const client = new FakeWs();
const gemini = new FakeWs();
openGeminiLiveSession(client, {
...SUJET_OPTS,
geminiFactory: () => gemini,
});
gemini.emit("open");
const capture = await openWithMock(client);
capture.callbacks.onopen?.();
const audioChunk = Buffer.from([0x01, 0x02, 0x03, 0x04]);
client.emit("message", audioChunk);
const geminiMsg = {
serverContent: {
modelTurn: {
parts: [
{
inlineData: { data: "EAYE", mimeType: "audio/pcm;rate=24000" },
},
],
},
},
};
capture.callbacks.onmessage?.(geminiMsg);
// [0] = setup, [1] = chunk audio
expect(gemini.sent).toHaveLength(2);
expect(gemini.sent[1]).toBe(audioChunk);
});
it("forwarde un chunk audio Gemini (Buffer non-JSON) vers le client sans accumuler de transcript", async () => {
const client = new FakeWs();
const gemini = new FakeWs();
const onSessionEnd = vi.fn();
openGeminiLiveSession(client, {
...SUJET_OPTS,
geminiFactory: () => gemini,
onSessionEnd,
});
gemini.emit("open");
const examinerAudio = Buffer.from([0x10, 0x20, 0x30]);
gemini.emit("message", examinerAudio);
expect(client.sent).toHaveLength(1);
expect(client.sent[0]).toBe(examinerAudio);
// Fin de session via signal client → transcript vide
client.emit("message", JSON.stringify({ type: "end" }));
await vi.runAllTimersAsync();
expect(onSessionEnd).toHaveBeenCalledWith("");
expect(JSON.parse(client.sent[0] as string)).toEqual(geminiMsg);
});
it("accumule inputTranscription et outputTranscription depuis Gemini", async () => {
it("accumule input/outputTranscription et reconstruit le transcript chronologique", async () => {
const client = new FakeWs();
const gemini = new FakeWs();
const onSessionEnd = vi.fn();
openGeminiLiveSession(client, {
...SUJET_OPTS,
geminiFactory: () => gemini,
onSessionEnd,
});
gemini.emit("open");
const capture = await openWithMock(client, { onSessionEnd });
capture.callbacks.onopen?.();
gemini.emit(
"message",
JSON.stringify({
serverContent: {
inputTranscription: { text: "Bonjour, je voudrais louer." },
},
}),
);
gemini.emit(
"message",
JSON.stringify({
serverContent: {
outputTranscription: { text: "Bonjour, cest pour quel quartier ?" },
},
}),
);
gemini.emit(
"message",
JSON.stringify({
serverContent: {
inputTranscription: { text: "Le centre-ville." },
},
}),
);
capture.callbacks.onmessage?.({
serverContent: {
inputTranscription: { text: "Bonjour, je voudrais louer." },
},
});
capture.callbacks.onmessage?.({
serverContent: {
outputTranscription: { text: "Bonjour, cest pour quel quartier ?" },
},
});
capture.callbacks.onmessage?.({
serverContent: { inputTranscription: { text: "Le centre-ville." } },
});
client.emit("message", JSON.stringify({ type: "end" }));
await vi.runAllTimersAsync();
expect(onSessionEnd).toHaveBeenCalledTimes(1);
const transcript = onSessionEnd.mock.calls[0][0] as string;
expect(transcript).toBe(
expect(onSessionEnd.mock.calls[0][0]).toBe(
"Candidat : Bonjour, je voudrais louer.\nExaminateur : Bonjour, cest pour quel quartier ?\nCandidat : Le centre-ville.",
);
});
it("ferme Gemini après onSessionEnd, sans fermer le client (réservé à lappelant)", async () => {
it("ferme la session SDK après onSessionEnd, sans fermer le client", async () => {
const client = new FakeWs();
const gemini = new FakeWs();
const onSessionEnd = vi.fn();
openGeminiLiveSession(client, {
...SUJET_OPTS,
geminiFactory: () => gemini,
onSessionEnd,
});
gemini.emit("open");
const capture = await openWithMock(client, { onSessionEnd });
capture.callbacks.onopen?.();
client.emit("message", JSON.stringify({ type: "end" }));
await vi.runAllTimersAsync();
expect(gemini.closed).toBe(true);
expect(gemini.closeCode).toBe(1000);
expect(capture.session.close).toHaveBeenCalledTimes(1);
expect(client.closed).toBe(false);
});
it("warning à 180 s puis timeout à 210 s déclenche endSession", async () => {
const client = new FakeWs();
const gemini = new FakeWs();
const onSessionEnd = vi.fn();
openGeminiLiveSession(client, {
...SUJET_OPTS,
geminiFactory: () => gemini,
onSessionEnd,
});
gemini.emit("open");
const capture = await openWithMock(client, { onSessionEnd });
capture.callbacks.onopen?.();
// Avancer à 180 s → warning au client
await vi.advanceTimersByTimeAsync(180_000);
const warningFrame = client.sent.find(
(f) => typeof f === "string" && f.includes('"warning"'),
@ -217,22 +239,16 @@ describe("openGeminiLiveSession", () => {
});
expect(onSessionEnd).not.toHaveBeenCalled();
// Avancer à 210 s total → timeout déclenche endSession
await vi.advanceTimersByTimeAsync(30_000);
expect(onSessionEnd).toHaveBeenCalledTimes(1);
expect(gemini.closed).toBe(true);
expect(capture.session.close).toHaveBeenCalled();
});
it("signal end client déclenche endSession une seule fois (idempotent)", async () => {
it("signal end client est idempotent (un seul onSessionEnd)", async () => {
const client = new FakeWs();
const gemini = new FakeWs();
const onSessionEnd = vi.fn();
openGeminiLiveSession(client, {
...SUJET_OPTS,
geminiFactory: () => gemini,
onSessionEnd,
});
gemini.emit("open");
const capture = await openWithMock(client, { onSessionEnd });
capture.callbacks.onopen?.();
client.emit("message", JSON.stringify({ type: "end" }));
client.emit("message", JSON.stringify({ type: "end" }));
@ -241,47 +257,47 @@ describe("openGeminiLiveSession", () => {
expect(onSessionEnd).toHaveBeenCalledTimes(1);
});
it("fermeture Gemini avant fin → close client 4006 GEMINI_DISCONNECTED", () => {
it("onclose SDK avant fin → close client 4006 GEMINI_DISCONNECTED", async () => {
const client = new FakeWs();
const gemini = new FakeWs();
openGeminiLiveSession(client, {
...SUJET_OPTS,
geminiFactory: () => gemini,
});
gemini.emit("open");
const capture = await openWithMock(client);
capture.callbacks.onopen?.();
gemini.emit("close");
capture.callbacks.onclose?.({ code: 1000 });
expect(client.closed).toBe(true);
expect(client.closeCode).toBe(4006);
expect(client.closeReason).toBe("GEMINI_DISCONNECTED");
});
it("erreur Gemini → close client 4006 GEMINI_DISCONNECTED", () => {
it("onerror SDK → close client 4006", async () => {
const client = new FakeWs();
const gemini = new FakeWs();
openGeminiLiveSession(client, {
...SUJET_OPTS,
geminiFactory: () => gemini,
});
gemini.emit("open");
const capture = await openWithMock(client);
capture.callbacks.onopen?.();
gemini.emit("error", new Error("boom"));
capture.callbacks.onerror?.(new Error("boom"));
expect(client.closed).toBe(true);
expect(client.closeCode).toBe(4006);
});
it("absence de GEMINI_API_KEY → close client 4005 GEMINI_CONFIG sans appel à la factory", () => {
it("absence de GEMINI_API_KEY → close client 4005 GEMINI_CONFIG sans appel à live.connect", () => {
const originalKey = process.env.GEMINI_API_KEY;
delete process.env.GEMINI_API_KEY;
capturedConnect = null;
const client = new FakeWs();
const factory = vi.fn(() => new FakeWs());
const factory = vi.fn(() => makeFakeClient());
openGeminiLiveSession(client, { ...SUJET_OPTS, geminiFactory: factory });
openGeminiLiveSession(client, {
...SUJET_OPTS,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
clientFactory: factory as any,
});
expect(factory).not.toHaveBeenCalled();
expect(client.closed).toBe(true);
expect(client.closeCode).toBe(4005);
expect(client.closeReason).toBe("GEMINI_CONFIG");
if (originalKey !== undefined) process.env.GEMINI_API_KEY = originalKey;
});
});