From 8f8a900449ecb5bee91fc67adddbbbec0a2d4992 Mon Sep 17 00:00:00 2001 From: Hermann_Kitio Date: Sat, 25 Apr 2026 05:59:53 +0300 Subject: [PATCH] feat(eo): restore audioBase64 mode for Gemini batch transcription MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - POST /corrections/eo accepts audioBase64 + mimeType (XOR with transcript) - Gemini transcribeAudio called server-side before correction - No audio storage (client downloads locally) - /transcriptions/token kept for future Deepgram live use Typecheck: OK · Tests: all green Co-Authored-By: Claude Opus 4.7 (1M context) --- src/controllers/__tests__/correctEO.test.ts | 182 +++++++++++++++++++- src/controllers/correctionController.ts | 74 ++++++-- src/routes/__tests__/correctionsEO.test.ts | 60 ++++++- src/routes/corrections.ts | 67 +++++-- 4 files changed, 352 insertions(+), 31 deletions(-) diff --git a/src/controllers/__tests__/correctEO.test.ts b/src/controllers/__tests__/correctEO.test.ts index acd0c26..050f25b 100644 --- a/src/controllers/__tests__/correctEO.test.ts +++ b/src/controllers/__tests__/correctEO.test.ts @@ -129,7 +129,7 @@ function createSupabaseMock(production: ProductionRow | null) { // ── Tests ──────────────────────────────────────────────────────────────── -describe("correctionController.correctEO — Sprint 4b (transcript-only)", () => { +describe("correctionController.correctEO — Sprint 4b.2 (transcript ou audio batch)", () => { beforeEach(() => { vi.resetModules(); vi.restoreAllMocks(); @@ -307,4 +307,184 @@ describe("correctionController.correctEO — Sprint 4b (transcript-only)", () => ); expect(persisted!.data.nclc_cible).toBe(10); }); + + // ── Mode audio batch (Sprint 4b.2) ──────────────────────────────────── + + it("mode audio : transcrit via Gemini puis utilise le transcript pour la correction", async () => { + const { mock, updates } = createSupabaseMock({ + id: "sim-audio-1", + user_id: "user-1", + tache: "EO_T1", + sujet_id: null, + }); + vi.doMock("../../lib/supabase", () => ({ supabase: mock })); + + const correctEOSpy = vi.fn().mockResolvedValue(VALID_RAPPORT_EO); + vi.doMock("../../lib/deepseek", () => ({ + correctEE: vi.fn(), + correctEO: correctEOSpy, + generateProductionModele: vi.fn().mockResolvedValue({ + production_modele_propre: "t", + notes_pedagogiques: [], + transformations: [], + message: "", + nclc_modele: 9, + nclc_obtenu: 8, + score_cible: 14, + tcf_word_count: 1, + tcf_word_min: 200, + tcf_word_max: 300, + tcf_truncated: false, + }), + generateExercices: vi.fn().mockResolvedValue([]), + })); + + const transcribeAudio = vi + .fn() + .mockResolvedValue("Bonjour, je m'appelle Marie."); + const isAcceptedAudioMime = vi.fn().mockReturnValue(true); + vi.doMock("../../lib/gemini", () => ({ + transcribeAudio, + isAcceptedAudioMime, + })); + + const { correctEO } = await import("../correctionController"); + const result = await correctEO( + { + simulationId: "sim-audio-1", + tache: "EO_T1", + nclcCible: 9, + audioBase64: "AAAA", + mimeType: "audio/webm", + }, + PROFILE, + ); + + expect("data" in result).toBe(true); + expect(transcribeAudio).toHaveBeenCalledWith("AAAA", "audio/webm"); + expect(correctEOSpy).toHaveBeenCalledWith( + "Bonjour, je m'appelle Marie.", + "EO_T1", + 9, + null, + ); + + const persisted = updates.find( + (u) => u.table === "productions" && u.data.score !== undefined, + ); + expect(persisted!.data.contenu).toBe("Bonjour, je m'appelle Marie."); + // Pas d'audio_url — le backend ne stocke aucun audio. + expect(persisted!.data.audio_url).toBeUndefined(); + }); + + it("mimeType non accepté → VALIDATION_ERROR 400", async () => { + const { mock } = createSupabaseMock({ + id: "sim-audio-2", + user_id: "user-1", + tache: "EO_T1", + sujet_id: null, + }); + vi.doMock("../../lib/supabase", () => ({ supabase: mock })); + vi.doMock("../../lib/deepseek", () => ({ + correctEE: vi.fn(), + correctEO: vi.fn(), + generateProductionModele: vi.fn(), + generateExercices: vi.fn(), + })); + vi.doMock("../../lib/gemini", () => ({ + transcribeAudio: vi.fn(), + isAcceptedAudioMime: vi.fn().mockReturnValue(false), + })); + + const { correctEO } = await import("../correctionController"); + const result = await correctEO( + { + simulationId: "sim-audio-2", + tache: "EO_T1", + nclcCible: 9, + audioBase64: "AAAA", + mimeType: "audio/ogg", + }, + PROFILE, + ); + + expect("error" in result).toBe(true); + if ("error" in result) { + expect(result.code).toBe("VALIDATION_ERROR"); + expect(result.status).toBe(400); + } + }); + + it("transcription Gemini échoue → INTERNAL_ERROR 500", async () => { + const { mock } = createSupabaseMock({ + id: "sim-audio-3", + user_id: "user-1", + tache: "EO_T1", + sujet_id: null, + }); + vi.doMock("../../lib/supabase", () => ({ supabase: mock })); + vi.doMock("../../lib/deepseek", () => ({ + correctEE: vi.fn(), + correctEO: vi.fn(), + generateProductionModele: vi.fn(), + generateExercices: vi.fn(), + })); + vi.doMock("../../lib/gemini", () => ({ + transcribeAudio: vi.fn().mockRejectedValue(new Error("Gemini timeout")), + isAcceptedAudioMime: vi.fn().mockReturnValue(true), + })); + + const { correctEO } = await import("../correctionController"); + const result = await correctEO( + { + simulationId: "sim-audio-3", + tache: "EO_T1", + nclcCible: 9, + audioBase64: "AAAA", + mimeType: "audio/webm", + }, + PROFILE, + ); + + expect("error" in result).toBe(true); + if ("error" in result) { + expect(result.code).toBe("INTERNAL_ERROR"); + expect(result.status).toBe(500); + } + }); + + it("ni transcript ni audioBase64 → VALIDATION_ERROR 400", async () => { + const { mock } = createSupabaseMock({ + id: "sim-audio-4", + user_id: "user-1", + tache: "EO_T1", + sujet_id: null, + }); + vi.doMock("../../lib/supabase", () => ({ supabase: mock })); + vi.doMock("../../lib/deepseek", () => ({ + correctEE: vi.fn(), + correctEO: vi.fn(), + generateProductionModele: vi.fn(), + generateExercices: vi.fn(), + })); + vi.doMock("../../lib/gemini", () => ({ + transcribeAudio: vi.fn(), + isAcceptedAudioMime: vi.fn(), + })); + + const { correctEO } = await import("../correctionController"); + const result = await correctEO( + { + simulationId: "sim-audio-4", + tache: "EO_T1", + nclcCible: 9, + }, + PROFILE, + ); + + expect("error" in result).toBe(true); + if ("error" in result) { + expect(result.code).toBe("VALIDATION_ERROR"); + } + }); }); diff --git a/src/controllers/correctionController.ts b/src/controllers/correctionController.ts index 3107be9..c289368 100644 --- a/src/controllers/correctionController.ts +++ b/src/controllers/correctionController.ts @@ -30,6 +30,7 @@ import { type TacheCorrection, } from "../lib/deepseek.js"; import { PLANS, type Plan } from "../lib/access.js"; +import { transcribeAudio, isAcceptedAudioMime } from "../lib/gemini.js"; import type { AuthProfile } from "../middleware/auth.js"; type CorrectionError = { @@ -311,20 +312,23 @@ async function runExercicesJob(input: ExercicesJobInput): Promise { } } -// ── EO — Sprint 4b : transcript-only (audio géré côté frontend) ───────── +// ── EO — Sprint 4b.2 : transcript OU audio batch (Gemini) ────────────── // -// Décision Sprint 4b : Deepgram en connexion directe navigateur ↔ Deepgram via -// token éphémère (cf. /transcriptions/token). Le backend reçoit uniquement le -// transcript final ; aucun audio n'est stocké côté serveur. +// Bascule Sprint 4b.2 : abandon de Deepgram live au profit de Gemini batch +// côté serveur. Le frontend envoie soit un transcript déjà constitué, soit +// l'audio brut en base64 — auquel cas le backend appelle `transcribeAudio` +// (Gemini) avant de poursuivre le pipeline correction. L'audio n'est PAS +// stocké côté serveur ; le client en garde une copie locale s'il le souhaite. // // Flux POST /corrections/eo : -// 1. Vérifier que la production existe, appartient à l'utilisateur. +// 1. Vérifier production + ownership. // 2. Charger la consigne (utile au prompt EO). -// 3. Lancer correction EO + modèle EO en parallèle (mêmes patterns que EE). -// 4. Persister le rapport (revelation, diagnostic, conseil_nclc, erreurs_codes, -// contenu = transcript). -// 5. Lancer les exercices fire-and-forget. -// 6. Incrémenter le quota. +// 3. Mode A (audioBase64) : valider MIME → transcribeAudio → transcript. +// Mode B (transcript direct) : passer. +// 4. Lancer correction EO + modèle EO en parallèle (mêmes patterns que EE). +// 5. Persister le rapport (contenu = transcript). +// 6. Lancer les exercices fire-and-forget. +// 7. Incrémenter le quota. // // Le risque race-condition décrit dans correctEE s'applique aussi ici : on ne // touche PAS aux colonnes *_status dans l'update final. @@ -333,7 +337,12 @@ export interface CorrectEOInput { simulationId: string; tache: TacheEO; nclcCible: NclcCible; - transcript: string; + /** Transcript texte fourni directement par le client (mode A). */ + transcript?: string; + /** Audio brut en base64 (mode B — Gemini transcrit côté serveur). */ + audioBase64?: string; + /** MIME du payload audio quand audioBase64 est fourni. */ + mimeType?: string; } export async function correctEO( @@ -342,7 +351,7 @@ export async function correctEO( ): Promise< { data: CorrectionRapport & { simulation_id: string } } | CorrectionError > { - const { simulationId, tache, nclcCible, transcript } = input; + const { simulationId, tache, nclcCible } = input; // 1. Vérifier la production + ownership. const { data: production, error: fetchError } = await supabase @@ -382,7 +391,46 @@ export async function correctEO( } } - // 3. Lancer correction EO + modèle EO en parallèle. + // 3. Mode batch audio : transcrire d'abord. Mode transcript direct : passer. + let transcript: string; + if (input.audioBase64 && input.mimeType) { + if (!isAcceptedAudioMime(input.mimeType)) { + return { + error: true, + code: "VALIDATION_ERROR", + message: + "mimeType non supporté. Valeurs acceptées : audio/webm, audio/mp4, audio/wav.", + status: 400, + }; + } + try { + transcript = await transcribeAudio(input.audioBase64, input.mimeType); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.error("[correctionController.correctEO] transcription failed", { + simulationId, + message, + }); + return { + error: true, + code: "INTERNAL_ERROR", + message: + "Impossible de transcrire l'audio. Veuillez réessayer dans quelques instants.", + status: 500, + }; + } + } else if (typeof input.transcript === "string") { + transcript = input.transcript; + } else { + return { + error: true, + code: "VALIDATION_ERROR", + message: "Fournir soit `transcript`, soit `audioBase64` + `mimeType`.", + status: 400, + }; + } + + // 4. Lancer correction EO + modèle EO en parallèle. const correctionPromise = deepseekCorrectEO( transcript, tache, diff --git a/src/routes/__tests__/correctionsEO.test.ts b/src/routes/__tests__/correctionsEO.test.ts index d68f86a..6683561 100644 --- a/src/routes/__tests__/correctionsEO.test.ts +++ b/src/routes/__tests__/correctionsEO.test.ts @@ -73,7 +73,7 @@ describe("POST /corrections/eo — Sprint 4a", () => { expect(res.status).toBe(400); }); - it("400 si transcript manquant", async () => { + it("400 si ni transcript ni audioBase64 fournis", async () => { const app = buildApp(); const res = await app.request("/corrections/eo", { method: "POST", @@ -85,6 +85,36 @@ describe("POST /corrections/eo — Sprint 4a", () => { expect(body.code).toBe("VALIDATION_ERROR"); }); + it("400 si transcript ET audioBase64 fournis simultanément (XOR)", async () => { + const app = buildApp(); + const res = await app.request("/corrections/eo", { + method: "POST", + headers: JSON_HEADERS, + body: JSON.stringify({ + simulationId: "s1", + tache: "EO_T1", + transcript: "t", + audioBase64: "AAAA", + mimeType: "audio/webm", + }), + }); + expect(res.status).toBe(400); + }); + + it("400 si audioBase64 sans mimeType", async () => { + const app = buildApp(); + const res = await app.request("/corrections/eo", { + method: "POST", + headers: JSON_HEADERS, + body: JSON.stringify({ + simulationId: "s1", + tache: "EO_T1", + audioBase64: "AAAA", + }), + }); + expect(res.status).toBe(400); + }); + it("400 si nclc_cible invalide", async () => { const app = buildApp(); const res = await app.request("/corrections/eo", { @@ -133,6 +163,34 @@ describe("POST /corrections/eo — Sprint 4a", () => { ); }); + it("200 mode batch audio (transmet audioBase64 + mimeType au controller)", async () => { + correctEOMock.mockResolvedValue({ + data: { score: 14, nclc: 9, simulation_id: "s-audio", diagnostic: "d" }, + }); + const app = buildApp(); + const res = await app.request("/corrections/eo", { + method: "POST", + headers: JSON_HEADERS, + body: JSON.stringify({ + simulationId: "s-audio", + tache: "EO_T1", + audioBase64: "AAAA", + mimeType: "audio/webm", + }), + }); + expect(res.status).toBe(200); + expect(correctEOMock).toHaveBeenCalledWith( + expect.objectContaining({ + simulationId: "s-audio", + tache: "EO_T1", + nclcCible: 9, + audioBase64: "AAAA", + mimeType: "audio/webm", + }), + expect.any(Object), + ); + }); + it("200 avec nclc_cible=10 transmis au controller", async () => { correctEOMock.mockResolvedValue({ data: { score: 16, nclc: 10, simulation_id: "s2", diagnostic: "d" }, diff --git a/src/routes/corrections.ts b/src/routes/corrections.ts index d2a0f08..38bf75c 100644 --- a/src/routes/corrections.ts +++ b/src/routes/corrections.ts @@ -91,15 +91,19 @@ corrections.post("/ee", authMiddleware, async (c) => { return c.json(result.data, 200); }); -// Sprint 4b — POST /corrections/eo reçoit uniquement le transcript final. -// La transcription live est gérée navigateur ↔ Deepgram (cf. /transcriptions/token). -// Aucun audio n'est stocké côté backend. +// Sprint 4b.2 — POST /corrections/eo accepte SOIT un transcript texte +// SOIT un audio base64 + mimeType (transcrit côté backend via Gemini). +// Aucun audio n'est stocké côté serveur ; le client garde une copie locale. +const MAX_AUDIO_BASE64_LEN = 14 * 1024 * 1024; + corrections.post("/eo", authMiddleware, async (c) => { let body: { simulationId?: unknown; transcript?: unknown; tache?: unknown; nclc_cible?: unknown; + audioBase64?: unknown; + mimeType?: unknown; }; try { body = await c.req.json(); @@ -125,17 +129,6 @@ corrections.post("/eo", authMiddleware, async (c) => { ); } - if (!body.transcript || typeof body.transcript !== "string") { - return c.json( - { - error: true, - code: "VALIDATION_ERROR", - message: "transcript est requis.", - }, - 400, - ); - } - if (!body.tache || !VALID_TACHES_EO.includes(body.tache as string)) { return c.json( { @@ -147,6 +140,46 @@ corrections.post("/eo", authMiddleware, async (c) => { ); } + // XOR : transcript OU (audioBase64 + mimeType). Pas les deux, pas aucun. + const hasTranscript = + typeof body.transcript === "string" && body.transcript.length > 0; + const hasAudio = + typeof body.audioBase64 === "string" && body.audioBase64.length > 0; + if (hasTranscript === hasAudio) { + return c.json( + { + error: true, + code: "VALIDATION_ERROR", + message: + "Fournir exactement un des deux : `transcript` (texte) ou `audioBase64` + `mimeType` (audio).", + }, + 400, + ); + } + + if (hasAudio) { + if (typeof body.mimeType !== "string" || body.mimeType.length === 0) { + return c.json( + { + error: true, + code: "VALIDATION_ERROR", + message: "`mimeType` est requis quand `audioBase64` est fourni.", + }, + 400, + ); + } + if ((body.audioBase64 as string).length > MAX_AUDIO_BASE64_LEN) { + return c.json( + { + error: true, + code: "VALIDATION_ERROR", + message: "Audio trop volumineux (max ~10 Mo).", + }, + 413, + ); + } + } + // nclc_cible optionnel (défaut 9, valeurs 9 ou 10). let nclcCible: 9 | 10 = 9; if (body.nclc_cible !== undefined) { @@ -169,13 +202,15 @@ corrections.post("/eo", authMiddleware, async (c) => { simulationId: body.simulationId, tache: body.tache as "EO_T1" | "EO_T3", nclcCible, - transcript: body.transcript, + transcript: hasTranscript ? (body.transcript as string) : undefined, + audioBase64: hasAudio ? (body.audioBase64 as string) : undefined, + mimeType: hasAudio ? (body.mimeType as string) : undefined, }, profile, ); if ("error" in result) { - return c.json(result, result.status as 401 | 404 | 500); + return c.json(result, result.status as 400 | 401 | 404 | 500); } return c.json(result.data, 200);