feat(eo): restore audioBase64 mode for Gemini batch transcription

- POST /corrections/eo accepts audioBase64 + mimeType (XOR with transcript) - Gemini transcribeAudio called server-side before correction - No audio storage (client downloads locally) - /transcriptions/token kept for future Deepgram live use Typecheck: OK · Tests: all green Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-25 05:59:53 +03:00 · 2026-04-25 05:59:53 +03:00 · 8f8a900449
commit 8f8a900449
parent 14880fe94c
4 changed files with 352 additions and 31 deletions
--- a/src/controllers/tests/correctEO.test.ts
+++ b/src/controllers/tests/correctEO.test.ts
@ -129,7 +129,7 @@ function createSupabaseMock(production: ProductionRow | null) {

 // ── Tests ────────────────────────────────────────────────────────────────

-describe("correctionController.correctEO — Sprint 4b (transcript-only)", () => {
+describe("correctionController.correctEO — Sprint 4b.2 (transcript ou audio batch)", () => {
  beforeEach(() => {
    vi.resetModules();
    vi.restoreAllMocks();
@ -307,4 +307,184 @@ describe("correctionController.correctEO — Sprint 4b (transcript-only)", () =>
    );
    expect(persisted!.data.nclc_cible).toBe(10);
  });
+
+  // ── Mode audio batch (Sprint 4b.2) ────────────────────────────────────
+
+  it("mode audio : transcrit via Gemini puis utilise le transcript pour la correction", async () => {
+    const { mock, updates } = createSupabaseMock({
+      id: "sim-audio-1",
+      user_id: "user-1",
+      tache: "EO_T1",
+      sujet_id: null,
+    });
+    vi.doMock("../../lib/supabase", () => ({ supabase: mock }));
+
+    const correctEOSpy = vi.fn().mockResolvedValue(VALID_RAPPORT_EO);
+    vi.doMock("../../lib/deepseek", () => ({
+      correctEE: vi.fn(),
+      correctEO: correctEOSpy,
+      generateProductionModele: vi.fn().mockResolvedValue({
+        production_modele_propre: "t",
+        notes_pedagogiques: [],
+        transformations: [],
+        message: "",
+        nclc_modele: 9,
+        nclc_obtenu: 8,
+        score_cible: 14,
+        tcf_word_count: 1,
+        tcf_word_min: 200,
+        tcf_word_max: 300,
+        tcf_truncated: false,
+      }),
+      generateExercices: vi.fn().mockResolvedValue([]),
+    }));
+
+    const transcribeAudio = vi
+      .fn()
+      .mockResolvedValue("Bonjour, je m'appelle Marie.");
+    const isAcceptedAudioMime = vi.fn().mockReturnValue(true);
+    vi.doMock("../../lib/gemini", () => ({
+      transcribeAudio,
+      isAcceptedAudioMime,
+    }));
+
+    const { correctEO } = await import("../correctionController");
+    const result = await correctEO(
+      {
+        simulationId: "sim-audio-1",
+        tache: "EO_T1",
+        nclcCible: 9,
+        audioBase64: "AAAA",
+        mimeType: "audio/webm",
+      },
+      PROFILE,
+    );
+
+    expect("data" in result).toBe(true);
+    expect(transcribeAudio).toHaveBeenCalledWith("AAAA", "audio/webm");
+    expect(correctEOSpy).toHaveBeenCalledWith(
+      "Bonjour, je m'appelle Marie.",
+      "EO_T1",
+      9,
+      null,
+    );
+
+    const persisted = updates.find(
+      (u) => u.table === "productions" && u.data.score !== undefined,
+    );
+    expect(persisted!.data.contenu).toBe("Bonjour, je m'appelle Marie.");
+    // Pas d'audio_url — le backend ne stocke aucun audio.
+    expect(persisted!.data.audio_url).toBeUndefined();
+  });
+
+  it("mimeType non accepté → VALIDATION_ERROR 400", async () => {
+    const { mock } = createSupabaseMock({
+      id: "sim-audio-2",
+      user_id: "user-1",
+      tache: "EO_T1",
+      sujet_id: null,
+    });
+    vi.doMock("../../lib/supabase", () => ({ supabase: mock }));
+    vi.doMock("../../lib/deepseek", () => ({
+      correctEE: vi.fn(),
+      correctEO: vi.fn(),
+      generateProductionModele: vi.fn(),
+      generateExercices: vi.fn(),
+    }));
+    vi.doMock("../../lib/gemini", () => ({
+      transcribeAudio: vi.fn(),
+      isAcceptedAudioMime: vi.fn().mockReturnValue(false),
+    }));
+
+    const { correctEO } = await import("../correctionController");
+    const result = await correctEO(
+      {
+        simulationId: "sim-audio-2",
+        tache: "EO_T1",
+        nclcCible: 9,
+        audioBase64: "AAAA",
+        mimeType: "audio/ogg",
+      },
+      PROFILE,
+    );
+
+    expect("error" in result).toBe(true);
+    if ("error" in result) {
+      expect(result.code).toBe("VALIDATION_ERROR");
+      expect(result.status).toBe(400);
+    }
+  });
+
+  it("transcription Gemini échoue → INTERNAL_ERROR 500", async () => {
+    const { mock } = createSupabaseMock({
+      id: "sim-audio-3",
+      user_id: "user-1",
+      tache: "EO_T1",
+      sujet_id: null,
+    });
+    vi.doMock("../../lib/supabase", () => ({ supabase: mock }));
+    vi.doMock("../../lib/deepseek", () => ({
+      correctEE: vi.fn(),
+      correctEO: vi.fn(),
+      generateProductionModele: vi.fn(),
+      generateExercices: vi.fn(),
+    }));
+    vi.doMock("../../lib/gemini", () => ({
+      transcribeAudio: vi.fn().mockRejectedValue(new Error("Gemini timeout")),
+      isAcceptedAudioMime: vi.fn().mockReturnValue(true),
+    }));
+
+    const { correctEO } = await import("../correctionController");
+    const result = await correctEO(
+      {
+        simulationId: "sim-audio-3",
+        tache: "EO_T1",
+        nclcCible: 9,
+        audioBase64: "AAAA",
+        mimeType: "audio/webm",
+      },
+      PROFILE,
+    );
+
+    expect("error" in result).toBe(true);
+    if ("error" in result) {
+      expect(result.code).toBe("INTERNAL_ERROR");
+      expect(result.status).toBe(500);
+    }
+  });
+
+  it("ni transcript ni audioBase64 → VALIDATION_ERROR 400", async () => {
+    const { mock } = createSupabaseMock({
+      id: "sim-audio-4",
+      user_id: "user-1",
+      tache: "EO_T1",
+      sujet_id: null,
+    });
+    vi.doMock("../../lib/supabase", () => ({ supabase: mock }));
+    vi.doMock("../../lib/deepseek", () => ({
+      correctEE: vi.fn(),
+      correctEO: vi.fn(),
+      generateProductionModele: vi.fn(),
+      generateExercices: vi.fn(),
+    }));
+    vi.doMock("../../lib/gemini", () => ({
+      transcribeAudio: vi.fn(),
+      isAcceptedAudioMime: vi.fn(),
+    }));
+
+    const { correctEO } = await import("../correctionController");
+    const result = await correctEO(
+      {
+        simulationId: "sim-audio-4",
+        tache: "EO_T1",
+        nclcCible: 9,
+      },
+      PROFILE,
+    );
+
+    expect("error" in result).toBe(true);
+    if ("error" in result) {
+      expect(result.code).toBe("VALIDATION_ERROR");
+    }
+  });
 });
--- a/src/controllers/correctionController.ts
+++ b/src/controllers/correctionController.ts
@ -30,6 +30,7 @@ import {
  type TacheCorrection,
 } from "../lib/deepseek.js";
 import { PLANS, type Plan } from "../lib/access.js";
+import { transcribeAudio, isAcceptedAudioMime } from "../lib/gemini.js";
 import type { AuthProfile } from "../middleware/auth.js";

 type CorrectionError = {
@ -311,20 +312,23 @@ async function runExercicesJob(input: ExercicesJobInput): Promise<void> {
  }
 }

-// ── EO — Sprint 4b : transcript-only (audio géré côté frontend) ─────────
+// ── EO — Sprint 4b.2 : transcript OU audio batch (Gemini) ──────────────
 //
-// Décision Sprint 4b : Deepgram en connexion directe navigateur ↔ Deepgram via
-// token éphémère (cf. /transcriptions/token). Le backend reçoit uniquement le
-// transcript final ; aucun audio n'est stocké côté serveur.
+// Bascule Sprint 4b.2 : abandon de Deepgram live au profit de Gemini batch
+// côté serveur. Le frontend envoie soit un transcript déjà constitué, soit
+// l'audio brut en base64 — auquel cas le backend appelle `transcribeAudio`
+// (Gemini) avant de poursuivre le pipeline correction. L'audio n'est PAS
+// stocké côté serveur ; le client en garde une copie locale s'il le souhaite.
 //
 // Flux POST /corrections/eo :
-//   1. Vérifier que la production existe, appartient à l'utilisateur.
+//   1. Vérifier production + ownership.
 //   2. Charger la consigne (utile au prompt EO).
-//   3. Lancer correction EO + modèle EO en parallèle (mêmes patterns que EE).
-//   4. Persister le rapport (revelation, diagnostic, conseil_nclc, erreurs_codes,
-//      contenu = transcript).
-//   5. Lancer les exercices fire-and-forget.
-//   6. Incrémenter le quota.
+//   3. Mode A (audioBase64) : valider MIME → transcribeAudio → transcript.
+//      Mode B (transcript direct) : passer.
+//   4. Lancer correction EO + modèle EO en parallèle (mêmes patterns que EE).
+//   5. Persister le rapport (contenu = transcript).
+//   6. Lancer les exercices fire-and-forget.
+//   7. Incrémenter le quota.
 //
 // Le risque race-condition décrit dans correctEE s'applique aussi ici : on ne
 // touche PAS aux colonnes *_status dans l'update final.
@ -333,7 +337,12 @@ export interface CorrectEOInput {
  simulationId: string;
  tache: TacheEO;
  nclcCible: NclcCible;
-  transcript: string;
+  /** Transcript texte fourni directement par le client (mode A). */
+  transcript?: string;
+  /** Audio brut en base64 (mode B — Gemini transcrit côté serveur). */
+  audioBase64?: string;
+  /** MIME du payload audio quand audioBase64 est fourni. */
+  mimeType?: string;
 }

 export async function correctEO(
@ -342,7 +351,7 @@ export async function correctEO(
 ): Promise<
  { data: CorrectionRapport & { simulation_id: string } } | CorrectionError
 > {
-  const { simulationId, tache, nclcCible, transcript } = input;
+  const { simulationId, tache, nclcCible } = input;

  // 1. Vérifier la production + ownership.
  const { data: production, error: fetchError } = await supabase
@ -382,7 +391,46 @@ export async function correctEO(
    }
  }

-  // 3. Lancer correction EO + modèle EO en parallèle.
+  // 3. Mode batch audio : transcrire d'abord. Mode transcript direct : passer.
+  let transcript: string;
+  if (input.audioBase64 && input.mimeType) {
+    if (!isAcceptedAudioMime(input.mimeType)) {
+      return {
+        error: true,
+        code: "VALIDATION_ERROR",
+        message:
+          "mimeType non supporté. Valeurs acceptées : audio/webm, audio/mp4, audio/wav.",
+        status: 400,
+      };
+    }
+    try {
+      transcript = await transcribeAudio(input.audioBase64, input.mimeType);
+    } catch (err) {
+      const message = err instanceof Error ? err.message : String(err);
+      console.error("[correctionController.correctEO] transcription failed", {
+        simulationId,
+        message,
+      });
+      return {
+        error: true,
+        code: "INTERNAL_ERROR",
+        message:
+          "Impossible de transcrire l'audio. Veuillez réessayer dans quelques instants.",
+        status: 500,
+      };
+    }
+  } else if (typeof input.transcript === "string") {
+    transcript = input.transcript;
+  } else {
+    return {
+      error: true,
+      code: "VALIDATION_ERROR",
+      message: "Fournir soit `transcript`, soit `audioBase64` + `mimeType`.",
+      status: 400,
+    };
+  }
+
+  // 4. Lancer correction EO + modèle EO en parallèle.
  const correctionPromise = deepseekCorrectEO(
    transcript,
    tache,
--- a/src/routes/tests/correctionsEO.test.ts
+++ b/src/routes/tests/correctionsEO.test.ts
@ -73,7 +73,7 @@ describe("POST /corrections/eo — Sprint 4a", () => {
    expect(res.status).toBe(400);
  });

-  it("400 si transcript manquant", async () => {
+  it("400 si ni transcript ni audioBase64 fournis", async () => {
    const app = buildApp();
    const res = await app.request("/corrections/eo", {
      method: "POST",
@ -85,6 +85,36 @@ describe("POST /corrections/eo — Sprint 4a", () => {
    expect(body.code).toBe("VALIDATION_ERROR");
  });

+  it("400 si transcript ET audioBase64 fournis simultanément (XOR)", async () => {
+    const app = buildApp();
+    const res = await app.request("/corrections/eo", {
+      method: "POST",
+      headers: JSON_HEADERS,
+      body: JSON.stringify({
+        simulationId: "s1",
+        tache: "EO_T1",
+        transcript: "t",
+        audioBase64: "AAAA",
+        mimeType: "audio/webm",
+      }),
+    });
+    expect(res.status).toBe(400);
+  });
+
+  it("400 si audioBase64 sans mimeType", async () => {
+    const app = buildApp();
+    const res = await app.request("/corrections/eo", {
+      method: "POST",
+      headers: JSON_HEADERS,
+      body: JSON.stringify({
+        simulationId: "s1",
+        tache: "EO_T1",
+        audioBase64: "AAAA",
+      }),
+    });
+    expect(res.status).toBe(400);
+  });
+
  it("400 si nclc_cible invalide", async () => {
    const app = buildApp();
    const res = await app.request("/corrections/eo", {
@ -133,6 +163,34 @@ describe("POST /corrections/eo — Sprint 4a", () => {
    );
  });

+  it("200 mode batch audio (transmet audioBase64 + mimeType au controller)", async () => {
+    correctEOMock.mockResolvedValue({
+      data: { score: 14, nclc: 9, simulation_id: "s-audio", diagnostic: "d" },
+    });
+    const app = buildApp();
+    const res = await app.request("/corrections/eo", {
+      method: "POST",
+      headers: JSON_HEADERS,
+      body: JSON.stringify({
+        simulationId: "s-audio",
+        tache: "EO_T1",
+        audioBase64: "AAAA",
+        mimeType: "audio/webm",
+      }),
+    });
+    expect(res.status).toBe(200);
+    expect(correctEOMock).toHaveBeenCalledWith(
+      expect.objectContaining({
+        simulationId: "s-audio",
+        tache: "EO_T1",
+        nclcCible: 9,
+        audioBase64: "AAAA",
+        mimeType: "audio/webm",
+      }),
+      expect.any(Object),
+    );
+  });
+
  it("200 avec nclc_cible=10 transmis au controller", async () => {
    correctEOMock.mockResolvedValue({
      data: { score: 16, nclc: 10, simulation_id: "s2", diagnostic: "d" },
--- a/src/routes/corrections.ts
+++ b/src/routes/corrections.ts
@ -91,15 +91,19 @@ corrections.post("/ee", authMiddleware, async (c) => {
  return c.json(result.data, 200);
 });

-// Sprint 4b — POST /corrections/eo reçoit uniquement le transcript final.
-// La transcription live est gérée navigateur ↔ Deepgram (cf. /transcriptions/token).
-// Aucun audio n'est stocké côté backend.
+// Sprint 4b.2 — POST /corrections/eo accepte SOIT un transcript texte
+// SOIT un audio base64 + mimeType (transcrit côté backend via Gemini).
+// Aucun audio n'est stocké côté serveur ; le client garde une copie locale.
+const MAX_AUDIO_BASE64_LEN = 14 * 1024 * 1024;
+
 corrections.post("/eo", authMiddleware, async (c) => {
  let body: {
    simulationId?: unknown;
    transcript?: unknown;
    tache?: unknown;
    nclc_cible?: unknown;
+    audioBase64?: unknown;
+    mimeType?: unknown;
  };
  try {
    body = await c.req.json();
@ -125,17 +129,6 @@ corrections.post("/eo", authMiddleware, async (c) => {
    );
  }

-  if (!body.transcript || typeof body.transcript !== "string") {
-    return c.json(
-      {
-        error: true,
-        code: "VALIDATION_ERROR",
-        message: "transcript est requis.",
-      },
-      400,
-    );
-  }
-
  if (!body.tache || !VALID_TACHES_EO.includes(body.tache as string)) {
    return c.json(
      {
@ -147,6 +140,46 @@ corrections.post("/eo", authMiddleware, async (c) => {
    );
  }

+  // XOR : transcript OU (audioBase64 + mimeType). Pas les deux, pas aucun.
+  const hasTranscript =
+    typeof body.transcript === "string" && body.transcript.length > 0;
+  const hasAudio =
+    typeof body.audioBase64 === "string" && body.audioBase64.length > 0;
+  if (hasTranscript === hasAudio) {
+    return c.json(
+      {
+        error: true,
+        code: "VALIDATION_ERROR",
+        message:
+          "Fournir exactement un des deux : `transcript` (texte) ou `audioBase64` + `mimeType` (audio).",
+      },
+      400,
+    );
+  }
+
+  if (hasAudio) {
+    if (typeof body.mimeType !== "string" || body.mimeType.length === 0) {
+      return c.json(
+        {
+          error: true,
+          code: "VALIDATION_ERROR",
+          message: "`mimeType` est requis quand `audioBase64` est fourni.",
+        },
+        400,
+      );
+    }
+    if ((body.audioBase64 as string).length > MAX_AUDIO_BASE64_LEN) {
+      return c.json(
+        {
+          error: true,
+          code: "VALIDATION_ERROR",
+          message: "Audio trop volumineux (max ~10 Mo).",
+        },
+        413,
+      );
+    }
+  }
+
  // nclc_cible optionnel (défaut 9, valeurs 9 ou 10).
  let nclcCible: 9 | 10 = 9;
  if (body.nclc_cible !== undefined) {
@ -169,13 +202,15 @@ corrections.post("/eo", authMiddleware, async (c) => {
      simulationId: body.simulationId,
      tache: body.tache as "EO_T1" | "EO_T3",
      nclcCible,
-      transcript: body.transcript,
+      transcript: hasTranscript ? (body.transcript as string) : undefined,
+      audioBase64: hasAudio ? (body.audioBase64 as string) : undefined,
+      mimeType: hasAudio ? (body.mimeType as string) : undefined,
    },
    profile,
  );

  if ("error" in result) {
-    return c.json(result, result.status as 401 | 404 | 500);
+    return c.json(result, result.status as 400 | 401 | 404 | 500);
  }

  return c.json(result.data, 200);