feat(eo): restore audioBase64 mode for Gemini batch transcription

- POST /corrections/eo accepts audioBase64 + mimeType (XOR with transcript)
- Gemini transcribeAudio called server-side before correction
- No audio storage (client downloads locally)
- /transcriptions/token kept for future Deepgram live use

Typecheck: OK · Tests: all green

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hermann_Kitio 2026-04-25 05:59:53 +03:00
parent 14880fe94c
commit 8f8a900449
4 changed files with 352 additions and 31 deletions

View file

@ -129,7 +129,7 @@ function createSupabaseMock(production: ProductionRow | null) {
// ── Tests ──────────────────────────────────────────────────────────────── // ── Tests ────────────────────────────────────────────────────────────────
describe("correctionController.correctEO — Sprint 4b (transcript-only)", () => { describe("correctionController.correctEO — Sprint 4b.2 (transcript ou audio batch)", () => {
beforeEach(() => { beforeEach(() => {
vi.resetModules(); vi.resetModules();
vi.restoreAllMocks(); vi.restoreAllMocks();
@ -307,4 +307,184 @@ describe("correctionController.correctEO — Sprint 4b (transcript-only)", () =>
); );
expect(persisted!.data.nclc_cible).toBe(10); expect(persisted!.data.nclc_cible).toBe(10);
}); });
// ── Mode audio batch (Sprint 4b.2) ────────────────────────────────────
it("mode audio : transcrit via Gemini puis utilise le transcript pour la correction", async () => {
const { mock, updates } = createSupabaseMock({
id: "sim-audio-1",
user_id: "user-1",
tache: "EO_T1",
sujet_id: null,
});
vi.doMock("../../lib/supabase", () => ({ supabase: mock }));
const correctEOSpy = vi.fn().mockResolvedValue(VALID_RAPPORT_EO);
vi.doMock("../../lib/deepseek", () => ({
correctEE: vi.fn(),
correctEO: correctEOSpy,
generateProductionModele: vi.fn().mockResolvedValue({
production_modele_propre: "t",
notes_pedagogiques: [],
transformations: [],
message: "",
nclc_modele: 9,
nclc_obtenu: 8,
score_cible: 14,
tcf_word_count: 1,
tcf_word_min: 200,
tcf_word_max: 300,
tcf_truncated: false,
}),
generateExercices: vi.fn().mockResolvedValue([]),
}));
const transcribeAudio = vi
.fn()
.mockResolvedValue("Bonjour, je m'appelle Marie.");
const isAcceptedAudioMime = vi.fn().mockReturnValue(true);
vi.doMock("../../lib/gemini", () => ({
transcribeAudio,
isAcceptedAudioMime,
}));
const { correctEO } = await import("../correctionController");
const result = await correctEO(
{
simulationId: "sim-audio-1",
tache: "EO_T1",
nclcCible: 9,
audioBase64: "AAAA",
mimeType: "audio/webm",
},
PROFILE,
);
expect("data" in result).toBe(true);
expect(transcribeAudio).toHaveBeenCalledWith("AAAA", "audio/webm");
expect(correctEOSpy).toHaveBeenCalledWith(
"Bonjour, je m'appelle Marie.",
"EO_T1",
9,
null,
);
const persisted = updates.find(
(u) => u.table === "productions" && u.data.score !== undefined,
);
expect(persisted!.data.contenu).toBe("Bonjour, je m'appelle Marie.");
// Pas d'audio_url — le backend ne stocke aucun audio.
expect(persisted!.data.audio_url).toBeUndefined();
});
it("mimeType non accepté → VALIDATION_ERROR 400", async () => {
const { mock } = createSupabaseMock({
id: "sim-audio-2",
user_id: "user-1",
tache: "EO_T1",
sujet_id: null,
});
vi.doMock("../../lib/supabase", () => ({ supabase: mock }));
vi.doMock("../../lib/deepseek", () => ({
correctEE: vi.fn(),
correctEO: vi.fn(),
generateProductionModele: vi.fn(),
generateExercices: vi.fn(),
}));
vi.doMock("../../lib/gemini", () => ({
transcribeAudio: vi.fn(),
isAcceptedAudioMime: vi.fn().mockReturnValue(false),
}));
const { correctEO } = await import("../correctionController");
const result = await correctEO(
{
simulationId: "sim-audio-2",
tache: "EO_T1",
nclcCible: 9,
audioBase64: "AAAA",
mimeType: "audio/ogg",
},
PROFILE,
);
expect("error" in result).toBe(true);
if ("error" in result) {
expect(result.code).toBe("VALIDATION_ERROR");
expect(result.status).toBe(400);
}
});
it("transcription Gemini échoue → INTERNAL_ERROR 500", async () => {
const { mock } = createSupabaseMock({
id: "sim-audio-3",
user_id: "user-1",
tache: "EO_T1",
sujet_id: null,
});
vi.doMock("../../lib/supabase", () => ({ supabase: mock }));
vi.doMock("../../lib/deepseek", () => ({
correctEE: vi.fn(),
correctEO: vi.fn(),
generateProductionModele: vi.fn(),
generateExercices: vi.fn(),
}));
vi.doMock("../../lib/gemini", () => ({
transcribeAudio: vi.fn().mockRejectedValue(new Error("Gemini timeout")),
isAcceptedAudioMime: vi.fn().mockReturnValue(true),
}));
const { correctEO } = await import("../correctionController");
const result = await correctEO(
{
simulationId: "sim-audio-3",
tache: "EO_T1",
nclcCible: 9,
audioBase64: "AAAA",
mimeType: "audio/webm",
},
PROFILE,
);
expect("error" in result).toBe(true);
if ("error" in result) {
expect(result.code).toBe("INTERNAL_ERROR");
expect(result.status).toBe(500);
}
});
it("ni transcript ni audioBase64 → VALIDATION_ERROR 400", async () => {
const { mock } = createSupabaseMock({
id: "sim-audio-4",
user_id: "user-1",
tache: "EO_T1",
sujet_id: null,
});
vi.doMock("../../lib/supabase", () => ({ supabase: mock }));
vi.doMock("../../lib/deepseek", () => ({
correctEE: vi.fn(),
correctEO: vi.fn(),
generateProductionModele: vi.fn(),
generateExercices: vi.fn(),
}));
vi.doMock("../../lib/gemini", () => ({
transcribeAudio: vi.fn(),
isAcceptedAudioMime: vi.fn(),
}));
const { correctEO } = await import("../correctionController");
const result = await correctEO(
{
simulationId: "sim-audio-4",
tache: "EO_T1",
nclcCible: 9,
},
PROFILE,
);
expect("error" in result).toBe(true);
if ("error" in result) {
expect(result.code).toBe("VALIDATION_ERROR");
}
});
}); });

View file

@ -30,6 +30,7 @@ import {
type TacheCorrection, type TacheCorrection,
} from "../lib/deepseek.js"; } from "../lib/deepseek.js";
import { PLANS, type Plan } from "../lib/access.js"; import { PLANS, type Plan } from "../lib/access.js";
import { transcribeAudio, isAcceptedAudioMime } from "../lib/gemini.js";
import type { AuthProfile } from "../middleware/auth.js"; import type { AuthProfile } from "../middleware/auth.js";
type CorrectionError = { type CorrectionError = {
@ -311,20 +312,23 @@ async function runExercicesJob(input: ExercicesJobInput): Promise<void> {
} }
} }
// ── EO — Sprint 4b : transcript-only (audio géré côté frontend) ───────── // ── EO — Sprint 4b.2 : transcript OU audio batch (Gemini) ──────────────
// //
// Décision Sprint 4b : Deepgram en connexion directe navigateur ↔ Deepgram via // Bascule Sprint 4b.2 : abandon de Deepgram live au profit de Gemini batch
// token éphémère (cf. /transcriptions/token). Le backend reçoit uniquement le // côté serveur. Le frontend envoie soit un transcript déjà constitué, soit
// transcript final ; aucun audio n'est stocké côté serveur. // l'audio brut en base64 — auquel cas le backend appelle `transcribeAudio`
// (Gemini) avant de poursuivre le pipeline correction. L'audio n'est PAS
// stocké côté serveur ; le client en garde une copie locale s'il le souhaite.
// //
// Flux POST /corrections/eo : // Flux POST /corrections/eo :
// 1. Vérifier que la production existe, appartient à l'utilisateur. // 1. Vérifier production + ownership.
// 2. Charger la consigne (utile au prompt EO). // 2. Charger la consigne (utile au prompt EO).
// 3. Lancer correction EO + modèle EO en parallèle (mêmes patterns que EE). // 3. Mode A (audioBase64) : valider MIME → transcribeAudio → transcript.
// 4. Persister le rapport (revelation, diagnostic, conseil_nclc, erreurs_codes, // Mode B (transcript direct) : passer.
// contenu = transcript). // 4. Lancer correction EO + modèle EO en parallèle (mêmes patterns que EE).
// 5. Lancer les exercices fire-and-forget. // 5. Persister le rapport (contenu = transcript).
// 6. Incrémenter le quota. // 6. Lancer les exercices fire-and-forget.
// 7. Incrémenter le quota.
// //
// Le risque race-condition décrit dans correctEE s'applique aussi ici : on ne // Le risque race-condition décrit dans correctEE s'applique aussi ici : on ne
// touche PAS aux colonnes *_status dans l'update final. // touche PAS aux colonnes *_status dans l'update final.
@ -333,7 +337,12 @@ export interface CorrectEOInput {
simulationId: string; simulationId: string;
tache: TacheEO; tache: TacheEO;
nclcCible: NclcCible; nclcCible: NclcCible;
transcript: string; /** Transcript texte fourni directement par le client (mode A). */
transcript?: string;
/** Audio brut en base64 (mode B — Gemini transcrit côté serveur). */
audioBase64?: string;
/** MIME du payload audio quand audioBase64 est fourni. */
mimeType?: string;
} }
export async function correctEO( export async function correctEO(
@ -342,7 +351,7 @@ export async function correctEO(
): Promise< ): Promise<
{ data: CorrectionRapport & { simulation_id: string } } | CorrectionError { data: CorrectionRapport & { simulation_id: string } } | CorrectionError
> { > {
const { simulationId, tache, nclcCible, transcript } = input; const { simulationId, tache, nclcCible } = input;
// 1. Vérifier la production + ownership. // 1. Vérifier la production + ownership.
const { data: production, error: fetchError } = await supabase const { data: production, error: fetchError } = await supabase
@ -382,7 +391,46 @@ export async function correctEO(
} }
} }
// 3. Lancer correction EO + modèle EO en parallèle. // 3. Mode batch audio : transcrire d'abord. Mode transcript direct : passer.
let transcript: string;
if (input.audioBase64 && input.mimeType) {
if (!isAcceptedAudioMime(input.mimeType)) {
return {
error: true,
code: "VALIDATION_ERROR",
message:
"mimeType non supporté. Valeurs acceptées : audio/webm, audio/mp4, audio/wav.",
status: 400,
};
}
try {
transcript = await transcribeAudio(input.audioBase64, input.mimeType);
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
console.error("[correctionController.correctEO] transcription failed", {
simulationId,
message,
});
return {
error: true,
code: "INTERNAL_ERROR",
message:
"Impossible de transcrire l'audio. Veuillez réessayer dans quelques instants.",
status: 500,
};
}
} else if (typeof input.transcript === "string") {
transcript = input.transcript;
} else {
return {
error: true,
code: "VALIDATION_ERROR",
message: "Fournir soit `transcript`, soit `audioBase64` + `mimeType`.",
status: 400,
};
}
// 4. Lancer correction EO + modèle EO en parallèle.
const correctionPromise = deepseekCorrectEO( const correctionPromise = deepseekCorrectEO(
transcript, transcript,
tache, tache,

View file

@ -73,7 +73,7 @@ describe("POST /corrections/eo — Sprint 4a", () => {
expect(res.status).toBe(400); expect(res.status).toBe(400);
}); });
it("400 si transcript manquant", async () => { it("400 si ni transcript ni audioBase64 fournis", async () => {
const app = buildApp(); const app = buildApp();
const res = await app.request("/corrections/eo", { const res = await app.request("/corrections/eo", {
method: "POST", method: "POST",
@ -85,6 +85,36 @@ describe("POST /corrections/eo — Sprint 4a", () => {
expect(body.code).toBe("VALIDATION_ERROR"); expect(body.code).toBe("VALIDATION_ERROR");
}); });
it("400 si transcript ET audioBase64 fournis simultanément (XOR)", async () => {
const app = buildApp();
const res = await app.request("/corrections/eo", {
method: "POST",
headers: JSON_HEADERS,
body: JSON.stringify({
simulationId: "s1",
tache: "EO_T1",
transcript: "t",
audioBase64: "AAAA",
mimeType: "audio/webm",
}),
});
expect(res.status).toBe(400);
});
it("400 si audioBase64 sans mimeType", async () => {
const app = buildApp();
const res = await app.request("/corrections/eo", {
method: "POST",
headers: JSON_HEADERS,
body: JSON.stringify({
simulationId: "s1",
tache: "EO_T1",
audioBase64: "AAAA",
}),
});
expect(res.status).toBe(400);
});
it("400 si nclc_cible invalide", async () => { it("400 si nclc_cible invalide", async () => {
const app = buildApp(); const app = buildApp();
const res = await app.request("/corrections/eo", { const res = await app.request("/corrections/eo", {
@ -133,6 +163,34 @@ describe("POST /corrections/eo — Sprint 4a", () => {
); );
}); });
it("200 mode batch audio (transmet audioBase64 + mimeType au controller)", async () => {
correctEOMock.mockResolvedValue({
data: { score: 14, nclc: 9, simulation_id: "s-audio", diagnostic: "d" },
});
const app = buildApp();
const res = await app.request("/corrections/eo", {
method: "POST",
headers: JSON_HEADERS,
body: JSON.stringify({
simulationId: "s-audio",
tache: "EO_T1",
audioBase64: "AAAA",
mimeType: "audio/webm",
}),
});
expect(res.status).toBe(200);
expect(correctEOMock).toHaveBeenCalledWith(
expect.objectContaining({
simulationId: "s-audio",
tache: "EO_T1",
nclcCible: 9,
audioBase64: "AAAA",
mimeType: "audio/webm",
}),
expect.any(Object),
);
});
it("200 avec nclc_cible=10 transmis au controller", async () => { it("200 avec nclc_cible=10 transmis au controller", async () => {
correctEOMock.mockResolvedValue({ correctEOMock.mockResolvedValue({
data: { score: 16, nclc: 10, simulation_id: "s2", diagnostic: "d" }, data: { score: 16, nclc: 10, simulation_id: "s2", diagnostic: "d" },

View file

@ -91,15 +91,19 @@ corrections.post("/ee", authMiddleware, async (c) => {
return c.json(result.data, 200); return c.json(result.data, 200);
}); });
// Sprint 4b — POST /corrections/eo reçoit uniquement le transcript final. // Sprint 4b.2 — POST /corrections/eo accepte SOIT un transcript texte
// La transcription live est gérée navigateur ↔ Deepgram (cf. /transcriptions/token). // SOIT un audio base64 + mimeType (transcrit côté backend via Gemini).
// Aucun audio n'est stocké côté backend. // Aucun audio n'est stocké côté serveur ; le client garde une copie locale.
const MAX_AUDIO_BASE64_LEN = 14 * 1024 * 1024;
corrections.post("/eo", authMiddleware, async (c) => { corrections.post("/eo", authMiddleware, async (c) => {
let body: { let body: {
simulationId?: unknown; simulationId?: unknown;
transcript?: unknown; transcript?: unknown;
tache?: unknown; tache?: unknown;
nclc_cible?: unknown; nclc_cible?: unknown;
audioBase64?: unknown;
mimeType?: unknown;
}; };
try { try {
body = await c.req.json(); body = await c.req.json();
@ -125,17 +129,6 @@ corrections.post("/eo", authMiddleware, async (c) => {
); );
} }
if (!body.transcript || typeof body.transcript !== "string") {
return c.json(
{
error: true,
code: "VALIDATION_ERROR",
message: "transcript est requis.",
},
400,
);
}
if (!body.tache || !VALID_TACHES_EO.includes(body.tache as string)) { if (!body.tache || !VALID_TACHES_EO.includes(body.tache as string)) {
return c.json( return c.json(
{ {
@ -147,6 +140,46 @@ corrections.post("/eo", authMiddleware, async (c) => {
); );
} }
// XOR : transcript OU (audioBase64 + mimeType). Pas les deux, pas aucun.
const hasTranscript =
typeof body.transcript === "string" && body.transcript.length > 0;
const hasAudio =
typeof body.audioBase64 === "string" && body.audioBase64.length > 0;
if (hasTranscript === hasAudio) {
return c.json(
{
error: true,
code: "VALIDATION_ERROR",
message:
"Fournir exactement un des deux : `transcript` (texte) ou `audioBase64` + `mimeType` (audio).",
},
400,
);
}
if (hasAudio) {
if (typeof body.mimeType !== "string" || body.mimeType.length === 0) {
return c.json(
{
error: true,
code: "VALIDATION_ERROR",
message: "`mimeType` est requis quand `audioBase64` est fourni.",
},
400,
);
}
if ((body.audioBase64 as string).length > MAX_AUDIO_BASE64_LEN) {
return c.json(
{
error: true,
code: "VALIDATION_ERROR",
message: "Audio trop volumineux (max ~10 Mo).",
},
413,
);
}
}
// nclc_cible optionnel (défaut 9, valeurs 9 ou 10). // nclc_cible optionnel (défaut 9, valeurs 9 ou 10).
let nclcCible: 9 | 10 = 9; let nclcCible: 9 | 10 = 9;
if (body.nclc_cible !== undefined) { if (body.nclc_cible !== undefined) {
@ -169,13 +202,15 @@ corrections.post("/eo", authMiddleware, async (c) => {
simulationId: body.simulationId, simulationId: body.simulationId,
tache: body.tache as "EO_T1" | "EO_T3", tache: body.tache as "EO_T1" | "EO_T3",
nclcCible, nclcCible,
transcript: body.transcript, transcript: hasTranscript ? (body.transcript as string) : undefined,
audioBase64: hasAudio ? (body.audioBase64 as string) : undefined,
mimeType: hasAudio ? (body.mimeType as string) : undefined,
}, },
profile, profile,
); );
if ("error" in result) { if ("error" in result) {
return c.json(result, result.status as 401 | 404 | 500); return c.json(result, result.status as 400 | 401 | 404 | 500);
} }
return c.json(result.data, 200); return c.json(result.data, 200);