feat(eo): restore audioBase64 mode for Gemini batch transcription

- POST /corrections/eo accepts audioBase64 + mimeType (XOR with transcript) - Gemini transcribeAudio called server-side before correction - No audio storage (client downloads locally) - /transcriptions/token kept for future Deepgram live use Typecheck: OK · Tests: all green Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-25 05:59:53 +03:00 · 2026-04-25 05:59:53 +03:00 · 8f8a900449
commit 8f8a900449
parent 14880fe94c
4 changed files with 352 additions and 31 deletions
--- a/src/controllers/tests/correctEO.test.ts
+++ b/src/controllers/tests/correctEO.test.ts
@ -129,7 +129,7 @@ function createSupabaseMock(production: ProductionRow | null) {
 // ── Tests ────────────────────────────────────────────────────────────────
-describe("correctionController.correctEO — Sprint 4b (transcript-only)", () => {
+describe("correctionController.correctEO — Sprint 4b.2 (transcript ou audio batch)", () => {
  beforeEach(() => {
    vi.resetModules();
    vi.restoreAllMocks();
@ -307,4 +307,184 @@ describe("correctionController.correctEO — Sprint 4b (transcript-only)", () =>
    );
    expect(persisted!.data.nclc_cible).toBe(10);
  });
  // ── Mode audio batch (Sprint 4b.2) ────────────────────────────────────
  it("mode audio : transcrit via Gemini puis utilise le transcript pour la correction", async () => {
    const { mock, updates } = createSupabaseMock({
      id: "sim-audio-1",
      user_id: "user-1",
      tache: "EO_T1",
      sujet_id: null,
    });
    vi.doMock("../../lib/supabase", () => ({ supabase: mock }));
    const correctEOSpy = vi.fn().mockResolvedValue(VALID_RAPPORT_EO);
    vi.doMock("../../lib/deepseek", () => ({
      correctEE: vi.fn(),
      correctEO: correctEOSpy,
      generateProductionModele: vi.fn().mockResolvedValue({
        production_modele_propre: "t",
        notes_pedagogiques: [],
        transformations: [],
        message: "",
        nclc_modele: 9,
        nclc_obtenu: 8,
        score_cible: 14,
        tcf_word_count: 1,
        tcf_word_min: 200,
        tcf_word_max: 300,
        tcf_truncated: false,
      }),
      generateExercices: vi.fn().mockResolvedValue([]),
    }));
    const transcribeAudio = vi
      .fn()
      .mockResolvedValue("Bonjour, je m'appelle Marie.");
    const isAcceptedAudioMime = vi.fn().mockReturnValue(true);
    vi.doMock("../../lib/gemini", () => ({
      transcribeAudio,
      isAcceptedAudioMime,
    }));
    const { correctEO } = await import("../correctionController");
    const result = await correctEO(
      {
        simulationId: "sim-audio-1",
        tache: "EO_T1",
        nclcCible: 9,
        audioBase64: "AAAA",
        mimeType: "audio/webm",
      },
      PROFILE,
    );
    expect("data" in result).toBe(true);
    expect(transcribeAudio).toHaveBeenCalledWith("AAAA", "audio/webm");
    expect(correctEOSpy).toHaveBeenCalledWith(
      "Bonjour, je m'appelle Marie.",
      "EO_T1",
      9,
      null,
    );
    const persisted = updates.find(
      (u) => u.table === "productions" && u.data.score !== undefined,
    );
    expect(persisted!.data.contenu).toBe("Bonjour, je m'appelle Marie.");
    // Pas d'audio_url — le backend ne stocke aucun audio.
    expect(persisted!.data.audio_url).toBeUndefined();
  });
  it("mimeType non accepté → VALIDATION_ERROR 400", async () => {
    const { mock } = createSupabaseMock({
      id: "sim-audio-2",
      user_id: "user-1",
      tache: "EO_T1",
      sujet_id: null,
    });
    vi.doMock("../../lib/supabase", () => ({ supabase: mock }));
    vi.doMock("../../lib/deepseek", () => ({
      correctEE: vi.fn(),
      correctEO: vi.fn(),
      generateProductionModele: vi.fn(),
      generateExercices: vi.fn(),
    }));
    vi.doMock("../../lib/gemini", () => ({
      transcribeAudio: vi.fn(),
      isAcceptedAudioMime: vi.fn().mockReturnValue(false),
    }));
    const { correctEO } = await import("../correctionController");
    const result = await correctEO(
      {
        simulationId: "sim-audio-2",
        tache: "EO_T1",
        nclcCible: 9,
        audioBase64: "AAAA",
        mimeType: "audio/ogg",
      },
      PROFILE,
    );
    expect("error" in result).toBe(true);
    if ("error" in result) {
      expect(result.code).toBe("VALIDATION_ERROR");
      expect(result.status).toBe(400);
    }
  });
  it("transcription Gemini échoue → INTERNAL_ERROR 500", async () => {
    const { mock } = createSupabaseMock({
      id: "sim-audio-3",
      user_id: "user-1",
      tache: "EO_T1",
      sujet_id: null,
    });
    vi.doMock("../../lib/supabase", () => ({ supabase: mock }));
    vi.doMock("../../lib/deepseek", () => ({
      correctEE: vi.fn(),
      correctEO: vi.fn(),
      generateProductionModele: vi.fn(),
      generateExercices: vi.fn(),
    }));
    vi.doMock("../../lib/gemini", () => ({
      transcribeAudio: vi.fn().mockRejectedValue(new Error("Gemini timeout")),
      isAcceptedAudioMime: vi.fn().mockReturnValue(true),
    }));
    const { correctEO } = await import("../correctionController");
    const result = await correctEO(
      {
        simulationId: "sim-audio-3",
        tache: "EO_T1",
        nclcCible: 9,
        audioBase64: "AAAA",
        mimeType: "audio/webm",
      },
      PROFILE,
    );
    expect("error" in result).toBe(true);
    if ("error" in result) {
      expect(result.code).toBe("INTERNAL_ERROR");
      expect(result.status).toBe(500);
    }
  });
  it("ni transcript ni audioBase64 → VALIDATION_ERROR 400", async () => {
    const { mock } = createSupabaseMock({
      id: "sim-audio-4",
      user_id: "user-1",
      tache: "EO_T1",
      sujet_id: null,
    });
    vi.doMock("../../lib/supabase", () => ({ supabase: mock }));
    vi.doMock("../../lib/deepseek", () => ({
      correctEE: vi.fn(),
      correctEO: vi.fn(),
      generateProductionModele: vi.fn(),
      generateExercices: vi.fn(),
    }));
    vi.doMock("../../lib/gemini", () => ({
      transcribeAudio: vi.fn(),
      isAcceptedAudioMime: vi.fn(),
    }));
    const { correctEO } = await import("../correctionController");
    const result = await correctEO(
      {
        simulationId: "sim-audio-4",
        tache: "EO_T1",
        nclcCible: 9,
      },
      PROFILE,
    );
    expect("error" in result).toBe(true);
    if ("error" in result) {
      expect(result.code).toBe("VALIDATION_ERROR");
    }
  });
 });
--- a/src/controllers/correctionController.ts
+++ b/src/controllers/correctionController.ts
@ -30,6 +30,7 @@ import {
  type TacheCorrection,
 } from "../lib/deepseek.js";
 import { PLANS, type Plan } from "../lib/access.js";
 import { transcribeAudio, isAcceptedAudioMime } from "../lib/gemini.js";
 import type { AuthProfile } from "../middleware/auth.js";
 type CorrectionError = {
@ -311,20 +312,23 @@ async function runExercicesJob(input: ExercicesJobInput): Promise<void> {
  }
 }
-// ── EO — Sprint 4b : transcript-only (audio géré côté frontend) ─────────
+// ── EO — Sprint 4b.2 : transcript OU audio batch (Gemini) ──────────────
 //
-// Décision Sprint 4b : Deepgram en connexion directe navigateur ↔ Deepgram via
+// Bascule Sprint 4b.2 : abandon de Deepgram live au profit de Gemini batch
-// token éphémère (cf. /transcriptions/token). Le backend reçoit uniquement le
+// côté serveur. Le frontend envoie soit un transcript déjà constitué, soit
-// transcript final ; aucun audio n'est stocké côté serveur.
+// l'audio brut en base64 — auquel cas le backend appelle `transcribeAudio`
 // (Gemini) avant de poursuivre le pipeline correction. L'audio n'est PAS
 // stocké côté serveur ; le client en garde une copie locale s'il le souhaite.
 //
 // Flux POST /corrections/eo :
-//   1. Vérifier que la production existe, appartient à l'utilisateur.
+//   1. Vérifier production + ownership.
 //   2. Charger la consigne (utile au prompt EO).
-//   3. Lancer correction EO + modèle EO en parallèle (mêmes patterns que EE).
+//   3. Mode A (audioBase64) : valider MIME → transcribeAudio → transcript.
-//   4. Persister le rapport (revelation, diagnostic, conseil_nclc, erreurs_codes,
+//      Mode B (transcript direct) : passer.
-//      contenu = transcript).
+//   4. Lancer correction EO + modèle EO en parallèle (mêmes patterns que EE).
-//   5. Lancer les exercices fire-and-forget.
+//   5. Persister le rapport (contenu = transcript).
-//   6. Incrémenter le quota.
+//   6. Lancer les exercices fire-and-forget.
 //   7. Incrémenter le quota.
 //
 // Le risque race-condition décrit dans correctEE s'applique aussi ici : on ne
 // touche PAS aux colonnes *_status dans l'update final.
@ -333,7 +337,12 @@ export interface CorrectEOInput {
  simulationId: string;
  tache: TacheEO;
  nclcCible: NclcCible;
-  transcript: string;
+  /** Transcript texte fourni directement par le client (mode A). */
  transcript?: string;
  /** Audio brut en base64 (mode B — Gemini transcrit côté serveur). */
  audioBase64?: string;
  /** MIME du payload audio quand audioBase64 est fourni. */
  mimeType?: string;
 }
 export async function correctEO(
@ -342,7 +351,7 @@ export async function correctEO(
 ): Promise<
  { data: CorrectionRapport & { simulation_id: string } } | CorrectionError
 > {
-  const { simulationId, tache, nclcCible, transcript } = input;
+  const { simulationId, tache, nclcCible } = input;
  // 1. Vérifier la production + ownership.
  const { data: production, error: fetchError } = await supabase
@ -382,7 +391,46 @@ export async function correctEO(
    }
  }
-  // 3. Lancer correction EO + modèle EO en parallèle.
+  // 3. Mode batch audio : transcrire d'abord. Mode transcript direct : passer.
  let transcript: string;
  if (input.audioBase64 && input.mimeType) {
    if (!isAcceptedAudioMime(input.mimeType)) {
      return {
        error: true,
        code: "VALIDATION_ERROR",
        message:
          "mimeType non supporté. Valeurs acceptées : audio/webm, audio/mp4, audio/wav.",
        status: 400,
      };
    }
    try {
      transcript = await transcribeAudio(input.audioBase64, input.mimeType);
    } catch (err) {
      const message = err instanceof Error ? err.message : String(err);
      console.error("[correctionController.correctEO] transcription failed", {
        simulationId,
        message,
      });
      return {
        error: true,
        code: "INTERNAL_ERROR",
        message:
          "Impossible de transcrire l'audio. Veuillez réessayer dans quelques instants.",
        status: 500,
      };
    }
  } else if (typeof input.transcript === "string") {
    transcript = input.transcript;
  } else {
    return {
      error: true,
      code: "VALIDATION_ERROR",
      message: "Fournir soit `transcript`, soit `audioBase64` + `mimeType`.",
      status: 400,
    };
  }
  // 4. Lancer correction EO + modèle EO en parallèle.
  const correctionPromise = deepseekCorrectEO(
    transcript,
    tache,
--- a/src/routes/tests/correctionsEO.test.ts
+++ b/src/routes/tests/correctionsEO.test.ts
@ -73,7 +73,7 @@ describe("POST /corrections/eo — Sprint 4a", () => {
    expect(res.status).toBe(400);
  });
-  it("400 si transcript manquant", async () => {
+  it("400 si ni transcript ni audioBase64 fournis", async () => {
    const app = buildApp();
    const res = await app.request("/corrections/eo", {
      method: "POST",
@ -85,6 +85,36 @@ describe("POST /corrections/eo — Sprint 4a", () => {
    expect(body.code).toBe("VALIDATION_ERROR");
  });
  it("400 si transcript ET audioBase64 fournis simultanément (XOR)", async () => {
    const app = buildApp();
    const res = await app.request("/corrections/eo", {
      method: "POST",
      headers: JSON_HEADERS,
      body: JSON.stringify({
        simulationId: "s1",
        tache: "EO_T1",
        transcript: "t",
        audioBase64: "AAAA",
        mimeType: "audio/webm",
      }),
    });
    expect(res.status).toBe(400);
  });
  it("400 si audioBase64 sans mimeType", async () => {
    const app = buildApp();
    const res = await app.request("/corrections/eo", {
      method: "POST",
      headers: JSON_HEADERS,
      body: JSON.stringify({
        simulationId: "s1",
        tache: "EO_T1",
        audioBase64: "AAAA",
      }),
    });
    expect(res.status).toBe(400);
  });
  it("400 si nclc_cible invalide", async () => {
    const app = buildApp();
    const res = await app.request("/corrections/eo", {
@ -133,6 +163,34 @@ describe("POST /corrections/eo — Sprint 4a", () => {
    );
  });
  it("200 mode batch audio (transmet audioBase64 + mimeType au controller)", async () => {
    correctEOMock.mockResolvedValue({
      data: { score: 14, nclc: 9, simulation_id: "s-audio", diagnostic: "d" },
    });
    const app = buildApp();
    const res = await app.request("/corrections/eo", {
      method: "POST",
      headers: JSON_HEADERS,
      body: JSON.stringify({
        simulationId: "s-audio",
        tache: "EO_T1",
        audioBase64: "AAAA",
        mimeType: "audio/webm",
      }),
    });
    expect(res.status).toBe(200);
    expect(correctEOMock).toHaveBeenCalledWith(
      expect.objectContaining({
        simulationId: "s-audio",
        tache: "EO_T1",
        nclcCible: 9,
        audioBase64: "AAAA",
        mimeType: "audio/webm",
      }),
      expect.any(Object),
    );
  });
  it("200 avec nclc_cible=10 transmis au controller", async () => {
    correctEOMock.mockResolvedValue({
      data: { score: 16, nclc: 10, simulation_id: "s2", diagnostic: "d" },
--- a/src/routes/corrections.ts
+++ b/src/routes/corrections.ts
@ -91,15 +91,19 @@ corrections.post("/ee", authMiddleware, async (c) => {
  return c.json(result.data, 200);
 });
-// Sprint 4b — POST /corrections/eo reçoit uniquement le transcript final.
+// Sprint 4b.2 — POST /corrections/eo accepte SOIT un transcript texte
-// La transcription live est gérée navigateur ↔ Deepgram (cf. /transcriptions/token).
+// SOIT un audio base64 + mimeType (transcrit côté backend via Gemini).
-// Aucun audio n'est stocké côté backend.
+// Aucun audio n'est stocké côté serveur ; le client garde une copie locale.
 const MAX_AUDIO_BASE64_LEN = 14 * 1024 * 1024;
 corrections.post("/eo", authMiddleware, async (c) => {
  let body: {
    simulationId?: unknown;
    transcript?: unknown;
    tache?: unknown;
    nclc_cible?: unknown;
    audioBase64?: unknown;
    mimeType?: unknown;
  };
  try {
    body = await c.req.json();
@ -125,17 +129,6 @@ corrections.post("/eo", authMiddleware, async (c) => {
    );
  }
  if (!body.transcript || typeof body.transcript !== "string") {
    return c.json(
      {
        error: true,
        code: "VALIDATION_ERROR",
        message: "transcript est requis.",
      },
      400,
    );
  }
  if (!body.tache || !VALID_TACHES_EO.includes(body.tache as string)) {
    return c.json(
      {
@ -147,6 +140,46 @@ corrections.post("/eo", authMiddleware, async (c) => {
    );
  }
  // XOR : transcript OU (audioBase64 + mimeType). Pas les deux, pas aucun.
  const hasTranscript =
    typeof body.transcript === "string" && body.transcript.length > 0;
  const hasAudio =
    typeof body.audioBase64 === "string" && body.audioBase64.length > 0;
  if (hasTranscript === hasAudio) {
    return c.json(
      {
        error: true,
        code: "VALIDATION_ERROR",
        message:
          "Fournir exactement un des deux : `transcript` (texte) ou `audioBase64` + `mimeType` (audio).",
      },
      400,
    );
  }
  if (hasAudio) {
    if (typeof body.mimeType !== "string" || body.mimeType.length === 0) {
      return c.json(
        {
          error: true,
          code: "VALIDATION_ERROR",
          message: "`mimeType` est requis quand `audioBase64` est fourni.",
        },
        400,
      );
    }
    if ((body.audioBase64 as string).length > MAX_AUDIO_BASE64_LEN) {
      return c.json(
        {
          error: true,
          code: "VALIDATION_ERROR",
          message: "Audio trop volumineux (max ~10 Mo).",
        },
        413,
      );
    }
  }
  // nclc_cible optionnel (défaut 9, valeurs 9 ou 10).
  let nclcCible: 9 | 10 = 9;
  if (body.nclc_cible !== undefined) {
@ -169,13 +202,15 @@ corrections.post("/eo", authMiddleware, async (c) => {
      simulationId: body.simulationId,
      tache: body.tache as "EO_T1" | "EO_T3",
      nclcCible,
-      transcript: body.transcript,
+      transcript: hasTranscript ? (body.transcript as string) : undefined,
      audioBase64: hasAudio ? (body.audioBase64 as string) : undefined,
      mimeType: hasAudio ? (body.mimeType as string) : undefined,
    },
    profile,
  );
  if ("error" in result) {
-    return c.json(result, result.status as 401 | 404 | 500);
+    return c.json(result, result.status as 400 | 401 | 404 | 500);
  }
  return c.json(result.data, 200);