From 0662e766d4ad3fcda9ac9bb55af2c8c82ea49b68 Mon Sep 17 00:00:00 2001 From: Hermann_Kitio Date: Mon, 27 Apr 2026 02:25:58 +0300 Subject: [PATCH] =?UTF-8?q?Sprint=206d=20=E2=80=94=20Migrate=20Gemini=20Li?= =?UTF-8?q?ve=20to=20@google/genai=20SDK?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit feat(geminiLive): rewrite with GoogleGenAI SDK (vertexai: true, apiKey) replaces raw WebSocket to generativelanguage.googleapis.com feat(geminiLive): restore full setup config (systemInstruction, inputAudioTranscription, outputAudioTranscription, VAD) fix(geminiLive): buildSetupFrame → SDK config object (no manual JSON) fix(useT2LiveSession): cancelTokenRef for idempotent startDialogue, closeAllRef for stable unmount cleanup chore: add @google/genai@^1.50.1 dependency test: 11 geminiLive tests rewritten with SDK mock 292/292 backend tests green --- package-lock.json | 415 +++++++++++++++++++++++++- package.json | 1 + src/lib/__tests__/geminiLive.test.ts | 308 ++++++++++---------- src/lib/geminiLive.ts | 421 +++++++++++++++------------ src/routes/t2live.ts | 6 +- test-gemini-live.js | 150 ++++++++++ 6 files changed, 970 insertions(+), 331 deletions(-) create mode 100644 test-gemini-live.js diff --git a/package-lock.json b/package-lock.json index fde260d..3f43397 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,6 +8,7 @@ "name": "expria-backend", "version": "1.0.0", "dependencies": { + "@google/genai": "^1.50.1", "@hono/node-server": "^1.13.7", "@hono/node-ws": "^1.3.0", "@supabase/supabase-js": "^2.49.4", @@ -541,6 +542,29 @@ "node": ">=18" } }, + "node_modules/@google/genai": { + "version": "1.50.1", + "resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.50.1.tgz", + "integrity": "sha512-YbkX7H9+1Pt8wOt7DDREy8XSoiL6fRDzZQRyaVBarFf8MR3zHGqVdvM4cLbDXqPhxqvegZShgfxb8kw9C7YhAQ==", + "license": "Apache-2.0", + "dependencies": { + "google-auth-library": "^10.3.0", + "p-retry": "^4.6.2", + "protobufjs": "^7.5.4", + "ws": "^8.18.0" + }, + "engines": { + "node": ">=20.0.0" + }, + "peerDependencies": { + "@modelcontextprotocol/sdk": "^1.25.2" + }, + "peerDependenciesMeta": { + "@modelcontextprotocol/sdk": { + "optional": true + } + } + }, "node_modules/@hono/node-server": { "version": "1.19.14", "resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.14.tgz", @@ -647,6 +671,70 @@ "node": ">=14" } }, + "node_modules/@protobufjs/aspromise": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz", + "integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/base64": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz", + "integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/codegen": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz", + "integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/eventemitter": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz", + "integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/fetch": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz", + "integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==", + "license": "BSD-3-Clause", + "dependencies": { + "@protobufjs/aspromise": "^1.1.1", + "@protobufjs/inquire": "^1.1.0" + } + }, + "node_modules/@protobufjs/float": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz", + "integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/inquire": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz", + "integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/path": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz", + "integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/pool": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz", + "integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/utf8": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz", + "integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==", + "license": "BSD-3-Clause" + }, "node_modules/@rollup/rollup-android-arm-eabi": { "version": "4.60.1", "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.60.1.tgz", @@ -1117,6 +1205,12 @@ "undici-types": "~6.21.0" } }, + "node_modules/@types/retry": { + "version": "0.12.0", + "resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.0.tgz", + "integrity": "sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA==", + "license": "MIT" + }, "node_modules/@types/ws": { "version": "8.18.1", "resolved": "https://registry.npmjs.org/@types/ws/-/ws-8.18.1.tgz", @@ -1275,6 +1369,15 @@ "url": "https://opencollective.com/vitest" } }, + "node_modules/agent-base": { + "version": "7.1.4", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz", + "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==", + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, "node_modules/ansi-regex": { "version": "6.2.2", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.2.2.tgz", @@ -1333,6 +1436,35 @@ "node": "18 || 20 || >=22" } }, + "node_modules/base64-js": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", + "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/bignumber.js": { + "version": "9.3.1", + "resolved": "https://registry.npmjs.org/bignumber.js/-/bignumber.js-9.3.1.tgz", + "integrity": "sha512-Ko0uX15oIUS7wJ3Rb30Fs6SkVbLmPBAKdlm7q9+ak9bbIeFf0MwuBsQV6z7+X768/cHsfg+WlysDWJcmthjsjQ==", + "license": "MIT", + "engines": { + "node": "*" + } + }, "node_modules/brace-expansion": { "version": "5.0.5", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.5.tgz", @@ -1346,6 +1478,12 @@ "node": "18 || 20 || >=22" } }, + "node_modules/buffer-equal-constant-time": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/buffer-equal-constant-time/-/buffer-equal-constant-time-1.0.1.tgz", + "integrity": "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==", + "license": "BSD-3-Clause" + }, "node_modules/cac": { "version": "6.7.14", "resolved": "https://registry.npmjs.org/cac/-/cac-6.7.14.tgz", @@ -1447,11 +1585,19 @@ "node": ">= 8" } }, + "node_modules/data-uri-to-buffer": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz", + "integrity": "sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==", + "license": "MIT", + "engines": { + "node": ">= 12" + } + }, "node_modules/debug": { "version": "4.4.3", "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", - "dev": true, "license": "MIT", "dependencies": { "ms": "^2.1.3" @@ -1508,6 +1654,15 @@ "dev": true, "license": "MIT" }, + "node_modules/ecdsa-sig-formatter": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz", + "integrity": "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==", + "license": "Apache-2.0", + "dependencies": { + "safe-buffer": "^5.0.1" + } + }, "node_modules/emoji-regex": { "version": "9.2.2", "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz", @@ -1614,6 +1769,12 @@ "node": ">=12.0.0" } }, + "node_modules/extend": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", + "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==", + "license": "MIT" + }, "node_modules/fdir": { "version": "6.5.0", "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", @@ -1632,6 +1793,29 @@ } } }, + "node_modules/fetch-blob": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/fetch-blob/-/fetch-blob-3.2.0.tgz", + "integrity": "sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/jimmywarting" + }, + { + "type": "paypal", + "url": "https://paypal.me/jimmywarting" + } + ], + "license": "MIT", + "dependencies": { + "node-domexception": "^1.0.0", + "web-streams-polyfill": "^3.0.3" + }, + "engines": { + "node": "^12.20 || >= 14.13" + } + }, "node_modules/foreground-child": { "version": "3.3.1", "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.1.tgz", @@ -1649,6 +1833,18 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/formdata-polyfill": { + "version": "4.0.10", + "resolved": "https://registry.npmjs.org/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz", + "integrity": "sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==", + "license": "MIT", + "dependencies": { + "fetch-blob": "^3.1.2" + }, + "engines": { + "node": ">=12.20.0" + } + }, "node_modules/fsevents": { "version": "2.3.3", "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", @@ -1673,6 +1869,34 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/gaxios": { + "version": "7.1.4", + "resolved": "https://registry.npmjs.org/gaxios/-/gaxios-7.1.4.tgz", + "integrity": "sha512-bTIgTsM2bWn3XklZISBTQX7ZSddGW+IO3bMdGaemHZ3tbqExMENHLx6kKZ/KlejgrMtj8q7wBItt51yegqalrA==", + "license": "Apache-2.0", + "dependencies": { + "extend": "^3.0.2", + "https-proxy-agent": "^7.0.1", + "node-fetch": "^3.3.2" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/gcp-metadata": { + "version": "8.1.2", + "resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-8.1.2.tgz", + "integrity": "sha512-zV/5HKTfCeKWnxG0Dmrw51hEWFGfcF2xiXqcA3+J90WDuP0SvoiSO5ORvcBsifmx/FoIjgQN3oNOGaQ5PhLFkg==", + "license": "Apache-2.0", + "dependencies": { + "gaxios": "^7.0.0", + "google-logging-utils": "^1.0.0", + "json-bigint": "^1.0.0" + }, + "engines": { + "node": ">=18" + } + }, "node_modules/get-intrinsic": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", @@ -1778,6 +2002,32 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/google-auth-library": { + "version": "10.6.2", + "resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-10.6.2.tgz", + "integrity": "sha512-e27Z6EThmVNNvtYASwQxose/G57rkRuaRbQyxM2bvYLLX/GqWZ5chWq2EBoUchJbCc57eC9ArzO5wMsEmWftCw==", + "license": "Apache-2.0", + "dependencies": { + "base64-js": "^1.3.0", + "ecdsa-sig-formatter": "^1.0.11", + "gaxios": "^7.1.4", + "gcp-metadata": "8.1.2", + "google-logging-utils": "1.1.3", + "jws": "^4.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/google-logging-utils": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/google-logging-utils/-/google-logging-utils-1.1.3.tgz", + "integrity": "sha512-eAmLkjDjAFCVXg7A1unxHsLf961m6y17QFqXqAXGj/gVkKFrEICfStRfwUlGNfeCEjNRa32JEWOUTlYXPyyKvA==", + "license": "Apache-2.0", + "engines": { + "node": ">=14" + } + }, "node_modules/gopd": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", @@ -1840,6 +2090,19 @@ "dev": true, "license": "MIT" }, + "node_modules/https-proxy-agent": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", + "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "4" + }, + "engines": { + "node": ">= 14" + } + }, "node_modules/iceberg-js": { "version": "0.8.1", "resolved": "https://registry.npmjs.org/iceberg-js/-/iceberg-js-0.8.1.tgz", @@ -1943,6 +2206,42 @@ "dev": true, "license": "MIT" }, + "node_modules/json-bigint": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-bigint/-/json-bigint-1.0.0.tgz", + "integrity": "sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ==", + "license": "MIT", + "dependencies": { + "bignumber.js": "^9.0.0" + } + }, + "node_modules/jwa": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/jwa/-/jwa-2.0.1.tgz", + "integrity": "sha512-hRF04fqJIP8Abbkq5NKGN0Bbr3JxlQ+qhZufXVr0DvujKy93ZCbXZMHDL4EOtodSbCWxOqR8MS1tXA5hwqCXDg==", + "license": "MIT", + "dependencies": { + "buffer-equal-constant-time": "^1.0.1", + "ecdsa-sig-formatter": "1.0.11", + "safe-buffer": "^5.0.1" + } + }, + "node_modules/jws": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/jws/-/jws-4.0.1.tgz", + "integrity": "sha512-EKI/M/yqPncGUUh44xz0PxSidXFr/+r0pA70+gIYhjv+et7yxM+s29Y+VGDkovRofQem0fs7Uvf4+YmAdyRduA==", + "license": "MIT", + "dependencies": { + "jwa": "^2.0.1", + "safe-buffer": "^5.0.1" + } + }, + "node_modules/long": { + "version": "5.3.2", + "resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz", + "integrity": "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==", + "license": "Apache-2.0" + }, "node_modules/loupe": { "version": "3.2.1", "resolved": "https://registry.npmjs.org/loupe/-/loupe-3.2.1.tgz", @@ -2034,7 +2333,6 @@ "version": "2.1.3", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", - "dev": true, "license": "MIT" }, "node_modules/nanoid": { @@ -2056,6 +2354,44 @@ "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" } }, + "node_modules/node-domexception": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz", + "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==", + "deprecated": "Use your platform's native DOMException instead", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/jimmywarting" + }, + { + "type": "github", + "url": "https://paypal.me/jimmywarting" + } + ], + "license": "MIT", + "engines": { + "node": ">=10.5.0" + } + }, + "node_modules/node-fetch": { + "version": "3.3.2", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-3.3.2.tgz", + "integrity": "sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==", + "license": "MIT", + "dependencies": { + "data-uri-to-buffer": "^4.0.0", + "fetch-blob": "^3.1.4", + "formdata-polyfill": "^4.0.10" + }, + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/node-fetch" + } + }, "node_modules/object-inspect": { "version": "1.13.4", "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz", @@ -2068,6 +2404,19 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/p-retry": { + "version": "4.6.2", + "resolved": "https://registry.npmjs.org/p-retry/-/p-retry-4.6.2.tgz", + "integrity": "sha512-312Id396EbJdvRONlngUx0NydfrIQ5lsYu0znKVUzVvArzEIt08V1qhtyESbGVd1FGX7UKtiFp5uwKZdM8wIuQ==", + "license": "MIT", + "dependencies": { + "@types/retry": "0.12.0", + "retry": "^0.13.1" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/package-json-from-dist": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz", @@ -2168,6 +2517,30 @@ "node": "^10 || ^12 || >=14" } }, + "node_modules/protobufjs": { + "version": "7.5.5", + "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.5.tgz", + "integrity": "sha512-3wY1AxV+VBNW8Yypfd1yQY9pXnqTAN+KwQxL8iYm3/BjKYMNg4i0owhEe26PWDOMaIrzeeF98Lqd5NGz4omiIg==", + "hasInstallScript": true, + "license": "BSD-3-Clause", + "dependencies": { + "@protobufjs/aspromise": "^1.1.2", + "@protobufjs/base64": "^1.1.2", + "@protobufjs/codegen": "^2.0.4", + "@protobufjs/eventemitter": "^1.1.0", + "@protobufjs/fetch": "^1.1.0", + "@protobufjs/float": "^1.0.2", + "@protobufjs/inquire": "^1.1.0", + "@protobufjs/path": "^1.1.2", + "@protobufjs/pool": "^1.1.0", + "@protobufjs/utf8": "^1.1.0", + "@types/node": ">=13.7.0", + "long": "^5.0.0" + }, + "engines": { + "node": ">=12.0.0" + } + }, "node_modules/qs": { "version": "6.15.1", "resolved": "https://registry.npmjs.org/qs/-/qs-6.15.1.tgz", @@ -2193,6 +2566,15 @@ "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" } }, + "node_modules/retry": { + "version": "0.13.1", + "resolved": "https://registry.npmjs.org/retry/-/retry-0.13.1.tgz", + "integrity": "sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==", + "license": "MIT", + "engines": { + "node": ">= 4" + } + }, "node_modules/rollup": { "version": "4.60.1", "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.60.1.tgz", @@ -2238,6 +2620,26 @@ "fsevents": "~2.3.2" } }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, "node_modules/semver": { "version": "7.7.4", "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", @@ -2833,6 +3235,15 @@ } } }, + "node_modules/web-streams-polyfill": { + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz", + "integrity": "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==", + "license": "MIT", + "engines": { + "node": ">= 8" + } + }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", diff --git a/package.json b/package.json index 0e64c8b..b2fad31 100644 --- a/package.json +++ b/package.json @@ -11,6 +11,7 @@ "test:coverage": "vitest run --coverage" }, "dependencies": { + "@google/genai": "^1.50.1", "@hono/node-server": "^1.13.7", "@hono/node-ws": "^1.3.0", "@supabase/supabase-js": "^2.49.4", diff --git a/src/lib/__tests__/geminiLive.test.ts b/src/lib/__tests__/geminiLive.test.ts index fc59f68..9fa8ce9 100644 --- a/src/lib/__tests__/geminiLive.test.ts +++ b/src/lib/__tests__/geminiLive.test.ts @@ -1,5 +1,50 @@ import { describe, it, expect, beforeEach, afterEach, vi } from "vitest"; import { EventEmitter } from "node:events"; + +// ─── Mock du SDK @google/genai ─────────────────────────────────────────────── +// +// On capture les callbacks passés à `ai.live.connect` pour pouvoir simuler les +// événements (onopen, onmessage, onerror, onclose) depuis les tests. La +// fabrique `clientFactory` injectée dans openGeminiLiveSession permet de +// remplacer `new GoogleGenAI(...)` par un stub. + +interface CapturedConnect { + model: string; + config: Record; + callbacks: { + onopen?: () => void; + onmessage?: (msg: unknown) => void; + onerror?: (err: unknown) => void; + onclose?: (evt: unknown) => void; + }; + session: { + sendRealtimeInput: ReturnType; + close: ReturnType; + }; +} + +let capturedConnect: CapturedConnect | null = null; + +function makeFakeClient() { + return { + live: { + connect: vi.fn(async (params: CapturedConnect) => { + const session = { + sendRealtimeInput: vi.fn(), + close: vi.fn(), + }; + capturedConnect = { + model: params.model, + config: params.config, + callbacks: params.callbacks, + session, + }; + return session; + }), + }, + }; +} + import { openGeminiLiveSession, buildT2SystemPrompt, @@ -30,6 +75,33 @@ const SUJET_OPTS = { "Vous cherchez un appartement de 2 pièces dans le centre-ville, votre budget est limité et vous souhaitez emménager le mois prochain.", }; +/** Helper : ouvre une session avec un client mocké et retourne la capture. */ +async function openWithMock( + client: FakeWs, + extra: Partial<{ + onSessionEnd: (transcript: string) => void | Promise; + timeoutMs: number; + warningMs: number; + }> = {}, +) { + capturedConnect = null; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + openGeminiLiveSession(client, { + ...SUJET_OPTS, + apiKey: "test-key", + clientFactory: () => makeFakeClient() as any, + ...extra, + }); + // Le `await live.connect()` est dans un `.then()` du code prod ; on laisse + // les microtasks se vider avant de retourner la capture. + await Promise.resolve(); + await Promise.resolve(); + if (!capturedConnect) { + throw new Error("Le mock du SDK n'a pas capturé de connect()"); + } + return capturedConnect; +} + describe("buildT2SystemPrompt", () => { it("substitue role et contexte dans le template", () => { const prompt = buildT2SystemPrompt(SUJET_OPTS); @@ -42,170 +114,120 @@ describe("buildT2SystemPrompt", () => { }); }); -describe("openGeminiLiveSession", () => { - let originalKey: string | undefined; - +describe("openGeminiLiveSession (SDK)", () => { beforeEach(() => { - originalKey = process.env.GEMINI_API_KEY; - process.env.GEMINI_API_KEY = "test-key"; vi.useFakeTimers(); }); afterEach(() => { - if (originalKey === undefined) { - delete process.env.GEMINI_API_KEY; - } else { - process.env.GEMINI_API_KEY = originalKey; - } vi.useRealTimers(); vi.restoreAllMocks(); + capturedConnect = null; }); - it("envoie le setup frame avec prompt dynamique + VAD + transcriptions", () => { + it("appelle live.connect avec le modèle + config Live (audio + system + transcripts + VAD)", async () => { const client = new FakeWs(); - const gemini = new FakeWs(); + const capture = await openWithMock(client); - openGeminiLiveSession(client, { - ...SUJET_OPTS, - geminiFactory: () => gemini, - }); - gemini.emit("open"); - - expect(gemini.sent).toHaveLength(1); - const setup = JSON.parse(gemini.sent[0] as string); - expect(setup.setup.model).toMatch(/gemini/); - expect(setup.setup.systemInstruction.parts[0].text).toContain( + expect(capture.model).toMatch(/gemini/); + const config = capture.config; + expect(config.responseModalities).toContain("AUDIO"); + expect(config.systemInstruction).toContain( "un bailleur qui propose un appartement", ); - expect(setup.setup.generationConfig.responseModalities).toContain("AUDIO"); - expect(setup.setup.inputAudioTranscription).toEqual({}); - expect(setup.setup.outputAudioTranscription).toEqual({}); - expect( - setup.setup.realtimeInputConfig.automaticActivityDetection, - ).toMatchObject({ - disabled: false, - startOfSpeechSensitivity: "START_SENSITIVITY_LOW", - endOfSpeechSensitivity: "END_SENSITIVITY_LOW", - silenceDurationMs: 2000, + expect(config.inputAudioTranscription).toEqual({}); + expect(config.outputAudioTranscription).toEqual({}); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const vad: any = (config.realtimeInputConfig as any) + ?.automaticActivityDetection; + expect(vad?.disabled).toBe(false); + expect(vad?.silenceDurationMs).toBe(2000); + }); + + it("forwarde un chunk audio client {type:'audio'} via session.sendRealtimeInput (PCM 16k base64)", async () => { + const client = new FakeWs(); + const capture = await openWithMock(client); + capture.callbacks.onopen?.(); + + const base64 = "AQIDBA=="; // base64 de [1,2,3,4] + client.emit("message", JSON.stringify({ type: "audio", data: base64 })); + + expect(capture.session.sendRealtimeInput).toHaveBeenCalledTimes(1); + expect(capture.session.sendRealtimeInput).toHaveBeenCalledWith({ + audio: { data: base64, mimeType: "audio/pcm;rate=16000" }, }); }); - it("forwarde un chunk audio client (Buffer) vers Gemini", () => { + it("forwarde un message Gemini (audio inlineData) au client en JSON", async () => { const client = new FakeWs(); - const gemini = new FakeWs(); - openGeminiLiveSession(client, { - ...SUJET_OPTS, - geminiFactory: () => gemini, - }); - gemini.emit("open"); + const capture = await openWithMock(client); + capture.callbacks.onopen?.(); - const audioChunk = Buffer.from([0x01, 0x02, 0x03, 0x04]); - client.emit("message", audioChunk); + const geminiMsg = { + serverContent: { + modelTurn: { + parts: [ + { + inlineData: { data: "EAYE", mimeType: "audio/pcm;rate=24000" }, + }, + ], + }, + }, + }; + capture.callbacks.onmessage?.(geminiMsg); - // [0] = setup, [1] = chunk audio - expect(gemini.sent).toHaveLength(2); - expect(gemini.sent[1]).toBe(audioChunk); - }); - - it("forwarde un chunk audio Gemini (Buffer non-JSON) vers le client sans accumuler de transcript", async () => { - const client = new FakeWs(); - const gemini = new FakeWs(); - const onSessionEnd = vi.fn(); - openGeminiLiveSession(client, { - ...SUJET_OPTS, - geminiFactory: () => gemini, - onSessionEnd, - }); - gemini.emit("open"); - - const examinerAudio = Buffer.from([0x10, 0x20, 0x30]); - gemini.emit("message", examinerAudio); expect(client.sent).toHaveLength(1); - expect(client.sent[0]).toBe(examinerAudio); - - // Fin de session via signal client → transcript vide - client.emit("message", JSON.stringify({ type: "end" })); - await vi.runAllTimersAsync(); - expect(onSessionEnd).toHaveBeenCalledWith(""); + expect(JSON.parse(client.sent[0] as string)).toEqual(geminiMsg); }); - it("accumule inputTranscription et outputTranscription depuis Gemini", async () => { + it("accumule input/outputTranscription et reconstruit le transcript chronologique", async () => { const client = new FakeWs(); - const gemini = new FakeWs(); const onSessionEnd = vi.fn(); - openGeminiLiveSession(client, { - ...SUJET_OPTS, - geminiFactory: () => gemini, - onSessionEnd, - }); - gemini.emit("open"); + const capture = await openWithMock(client, { onSessionEnd }); + capture.callbacks.onopen?.(); - gemini.emit( - "message", - JSON.stringify({ - serverContent: { - inputTranscription: { text: "Bonjour, je voudrais louer." }, - }, - }), - ); - gemini.emit( - "message", - JSON.stringify({ - serverContent: { - outputTranscription: { text: "Bonjour, c’est pour quel quartier ?" }, - }, - }), - ); - gemini.emit( - "message", - JSON.stringify({ - serverContent: { - inputTranscription: { text: "Le centre-ville." }, - }, - }), - ); + capture.callbacks.onmessage?.({ + serverContent: { + inputTranscription: { text: "Bonjour, je voudrais louer." }, + }, + }); + capture.callbacks.onmessage?.({ + serverContent: { + outputTranscription: { text: "Bonjour, c’est pour quel quartier ?" }, + }, + }); + capture.callbacks.onmessage?.({ + serverContent: { inputTranscription: { text: "Le centre-ville." } }, + }); client.emit("message", JSON.stringify({ type: "end" })); await vi.runAllTimersAsync(); expect(onSessionEnd).toHaveBeenCalledTimes(1); - const transcript = onSessionEnd.mock.calls[0][0] as string; - expect(transcript).toBe( + expect(onSessionEnd.mock.calls[0][0]).toBe( "Candidat : Bonjour, je voudrais louer.\nExaminateur : Bonjour, c’est pour quel quartier ?\nCandidat : Le centre-ville.", ); }); - it("ferme Gemini après onSessionEnd, sans fermer le client (réservé à l’appelant)", async () => { + it("ferme la session SDK après onSessionEnd, sans fermer le client", async () => { const client = new FakeWs(); - const gemini = new FakeWs(); const onSessionEnd = vi.fn(); - openGeminiLiveSession(client, { - ...SUJET_OPTS, - geminiFactory: () => gemini, - onSessionEnd, - }); - gemini.emit("open"); + const capture = await openWithMock(client, { onSessionEnd }); + capture.callbacks.onopen?.(); client.emit("message", JSON.stringify({ type: "end" })); await vi.runAllTimersAsync(); - expect(gemini.closed).toBe(true); - expect(gemini.closeCode).toBe(1000); + expect(capture.session.close).toHaveBeenCalledTimes(1); expect(client.closed).toBe(false); }); it("warning à 180 s puis timeout à 210 s déclenche endSession", async () => { const client = new FakeWs(); - const gemini = new FakeWs(); const onSessionEnd = vi.fn(); - openGeminiLiveSession(client, { - ...SUJET_OPTS, - geminiFactory: () => gemini, - onSessionEnd, - }); - gemini.emit("open"); + const capture = await openWithMock(client, { onSessionEnd }); + capture.callbacks.onopen?.(); - // Avancer à 180 s → warning au client await vi.advanceTimersByTimeAsync(180_000); const warningFrame = client.sent.find( (f) => typeof f === "string" && f.includes('"warning"'), @@ -217,22 +239,16 @@ describe("openGeminiLiveSession", () => { }); expect(onSessionEnd).not.toHaveBeenCalled(); - // Avancer à 210 s total → timeout déclenche endSession await vi.advanceTimersByTimeAsync(30_000); expect(onSessionEnd).toHaveBeenCalledTimes(1); - expect(gemini.closed).toBe(true); + expect(capture.session.close).toHaveBeenCalled(); }); - it("signal end client déclenche endSession une seule fois (idempotent)", async () => { + it("signal end client est idempotent (un seul onSessionEnd)", async () => { const client = new FakeWs(); - const gemini = new FakeWs(); const onSessionEnd = vi.fn(); - openGeminiLiveSession(client, { - ...SUJET_OPTS, - geminiFactory: () => gemini, - onSessionEnd, - }); - gemini.emit("open"); + const capture = await openWithMock(client, { onSessionEnd }); + capture.callbacks.onopen?.(); client.emit("message", JSON.stringify({ type: "end" })); client.emit("message", JSON.stringify({ type: "end" })); @@ -241,47 +257,47 @@ describe("openGeminiLiveSession", () => { expect(onSessionEnd).toHaveBeenCalledTimes(1); }); - it("fermeture Gemini avant fin → close client 4006 GEMINI_DISCONNECTED", () => { + it("onclose SDK avant fin → close client 4006 GEMINI_DISCONNECTED", async () => { const client = new FakeWs(); - const gemini = new FakeWs(); - openGeminiLiveSession(client, { - ...SUJET_OPTS, - geminiFactory: () => gemini, - }); - gemini.emit("open"); + const capture = await openWithMock(client); + capture.callbacks.onopen?.(); - gemini.emit("close"); + capture.callbacks.onclose?.({ code: 1000 }); expect(client.closed).toBe(true); expect(client.closeCode).toBe(4006); expect(client.closeReason).toBe("GEMINI_DISCONNECTED"); }); - it("erreur Gemini → close client 4006 GEMINI_DISCONNECTED", () => { + it("onerror SDK → close client 4006", async () => { const client = new FakeWs(); - const gemini = new FakeWs(); - openGeminiLiveSession(client, { - ...SUJET_OPTS, - geminiFactory: () => gemini, - }); - gemini.emit("open"); + const capture = await openWithMock(client); + capture.callbacks.onopen?.(); - gemini.emit("error", new Error("boom")); + capture.callbacks.onerror?.(new Error("boom")); expect(client.closed).toBe(true); expect(client.closeCode).toBe(4006); }); - it("absence de GEMINI_API_KEY → close client 4005 GEMINI_CONFIG sans appel à la factory", () => { + it("absence de GEMINI_API_KEY → close client 4005 GEMINI_CONFIG sans appel à live.connect", () => { + const originalKey = process.env.GEMINI_API_KEY; delete process.env.GEMINI_API_KEY; + capturedConnect = null; const client = new FakeWs(); - const factory = vi.fn(() => new FakeWs()); + const factory = vi.fn(() => makeFakeClient()); - openGeminiLiveSession(client, { ...SUJET_OPTS, geminiFactory: factory }); + openGeminiLiveSession(client, { + ...SUJET_OPTS, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + clientFactory: factory as any, + }); expect(factory).not.toHaveBeenCalled(); expect(client.closed).toBe(true); expect(client.closeCode).toBe(4005); expect(client.closeReason).toBe("GEMINI_CONFIG"); + + if (originalKey !== undefined) process.env.GEMINI_API_KEY = originalKey; }); }); diff --git a/src/lib/geminiLive.ts b/src/lib/geminiLive.ts index 58e622f..d38556e 100644 --- a/src/lib/geminiLive.ts +++ b/src/lib/geminiLive.ts @@ -1,9 +1,38 @@ -import { WebSocket as NodeWebSocket } from "ws"; +/** + * geminiLive.ts — Sprint 6d. + * + * Migration du WebSocket brut (`wss://generativelanguage.googleapis.com/...`) + * vers le SDK officiel `@google/genai` v1.50.x. Motif : Google a migré les + * clés API vers le mode "Vertex AI Express", incompatible avec l'endpoint WS + * historique (réponse 403 systématique). Le SDK gère l'auth automatiquement + * et accepte les clés Express bound à un service account. + * + * Interface publique (consommée par `routes/t2live.ts`) : + * - openGeminiLiveSession(clientWs, opts) : ouvre une session Live et + * proxifie les messages dans les deux sens entre le client (navigateur) + * et Gemini, accumule les transcripts, gère timeouts + close codes. + * - WebSocketLike : interface minimale pour le client WS (Hono adapter). + * - buildT2SystemPrompt({role, contexte}) : prompt dynamique T2 Live. + * - GEMINI_LIVE_MODEL, T2_SESSION_TIMEOUT_MS, T2_SESSION_WARNING_MS. + * + * Cf. docs/IMPLEMENTATION_T2_LIVE.md §3, docs/Prompt_t2live.md §3. + */ -export const GEMINI_LIVE_URL = - "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent"; +import { + GoogleGenAI, + Modality, + StartSensitivity, + EndSensitivity, + type Session, +} from "@google/genai"; -export const GEMINI_LIVE_MODEL = "models/gemini-2.5-flash-native-audio-latest"; +/** + * Modèle Live cible. `gemini-3.1-flash-live-preview` est le choix par défaut + * (Sprint 6d), à valider sur Express Mode via `test-gemini-live.js`. Fallback + * documenté : `gemini-2.0-flash-live-001` (modèle Live garanti sur Express + * d'après la doc Vertex Express). + */ +export const GEMINI_LIVE_MODEL = "gemini-3.1-flash-live-preview"; /** Timeout total session WS T2 Live : 3 min 30 (durée TCF) + marge évaluation. */ export const T2_SESSION_TIMEOUT_MS = 210_000; @@ -36,7 +65,6 @@ Règles à respecter impérativement : /** * Subset minimal d'une WebSocket — compatible avec : * - le wrapper exposé par @hono/node-ws (côté client navigateur) - * - la WebSocket de `ws` (côté Gemini) * - les fakes basés sur EventEmitter dans les tests */ export interface WebSocketLike { @@ -59,34 +87,32 @@ export interface OpenGeminiLiveSessionOptions { timeoutMs?: number; /** Override warning (par défaut T2_SESSION_WARNING_MS). */ warningMs?: number; - /** Injection pour les tests — fabrique de WebSocket vers Gemini. */ - geminiFactory?: (url: string) => WebSocketLike; /** Surcharge la clé API (par défaut : process.env.GEMINI_API_KEY). */ apiKey?: string; + /** + * Injection pour les tests — fabrique de client SDK. Permet de remplacer + * `new GoogleGenAI(...)` par un mock dans les tests sans toucher au code prod. + */ + clientFactory?: (apiKey: string) => GoogleGenAI; } -function buildSetupFrame(systemPrompt: string): string { - return JSON.stringify({ - setup: { - model: GEMINI_LIVE_MODEL, - systemInstruction: { - parts: [{ text: systemPrompt }], - }, - generationConfig: { - responseModalities: ["AUDIO"], - }, - inputAudioTranscription: {}, - outputAudioTranscription: {}, - realtimeInputConfig: { - automaticActivityDetection: { - disabled: false, - startOfSpeechSensitivity: "START_SENSITIVITY_LOW", - endOfSpeechSensitivity: "END_SENSITIVITY_LOW", - silenceDurationMs: 2000, - }, - }, - }, - }); +/** + * Forme minimale d'un message Live retourné par le SDK. On n'exporte pas + * `LiveServerMessage` du SDK pour ne pas coupler les tests à son shape exact. + */ +interface LiveServerMessage { + serverContent?: { + modelTurn?: { + parts?: Array<{ + inlineData?: { data?: string; mimeType?: string }; + }>; + }; + inputTranscription?: { text?: string }; + outputTranscription?: { text?: string }; + interrupted?: boolean; + turnComplete?: boolean; + }; + setupComplete?: unknown; } interface TranscriptEntry { @@ -104,54 +130,6 @@ function reconstructTranscript(entries: TranscriptEntry[]): string { .join("\n"); } -/** - * Tente de parser un message Gemini en JSON pour en extraire les transcripts. - * Retourne null si non-JSON (chunks audio binaires). - */ -function tryParseGeminiMessage(data: unknown): { - inputText?: string; - outputText?: string; -} | null { - let text: string; - if (typeof data === "string") { - text = data; - } else if (data instanceof Buffer) { - // Heuristique : tenter de parser comme JSON UTF-8 ; si ça échoue, c'est binaire. - try { - text = data.toString("utf8"); - if (!text.startsWith("{")) return null; - } catch { - return null; - } - } else if (typeof data === "object" && data !== null && "toString" in data) { - try { - text = (data as { toString: () => string }).toString(); - if (!text.startsWith("{")) return null; - } catch { - return null; - } - } else { - return null; - } - - try { - const parsed = JSON.parse(text) as { - serverContent?: { - inputTranscription?: { text?: string }; - outputTranscription?: { text?: string }; - }; - }; - const sc = parsed.serverContent; - if (!sc) return {}; - return { - inputText: sc.inputTranscription?.text, - outputText: sc.outputTranscription?.text, - }; - } catch { - return null; - } -} - /** * Détecte un signal de fin de session envoyé par le client : `{type:'end'}`. */ @@ -178,19 +156,53 @@ function isEndSignal(data: unknown): boolean { } /** - * Ouvre une session Gemini Live et proxifie les messages + * Parse un message client `{type:'audio', data: base64}` et renvoie le base64 + * si le format est valide, sinon null. + */ +function parseAudioChunk(data: unknown): string | null { + let text: string; + if (typeof data === "string") { + text = data; + } else if (data instanceof Buffer) { + try { + text = data.toString("utf8"); + } catch { + return null; + } + } else { + return null; + } + if (!text.startsWith("{")) return null; + try { + const parsed = JSON.parse(text) as { type?: string; data?: unknown }; + if (parsed.type === "audio" && typeof parsed.data === "string") { + return parsed.data; + } + return null; + } catch { + return null; + } +} + +/** + * Ouvre une session Gemini Live via le SDK et proxifie les messages * dans les deux sens entre le client (navigateur) et Gemini. * - * - À l'open Gemini : envoie le setup frame avec prompt dynamique + VAD - * + inputAudioTranscription + outputAudioTranscription. - * - Forward transparent des frames audio dans les deux directions. - * - Accumule les transcripts (input = candidat, output = examinateur IA). - * - Détecte signal client `{type:'end'}` → déclenche fin de session. - * - Timeout 210 s : warning client à 180 s, fin auto à 210 s. - * - En fin de session : appelle `onSessionEnd(transcript)` puis ferme Gemini. - * Le client WS n'est PAS fermé ici — c'est l'appelant qui décide (envoi du - * rapport puis close 1000). - * - Erreur Gemini → close client 4006 GEMINI_DISCONNECTED. + * - Init : `new GoogleGenAI({ vertexai: true, apiKey })` → mode Vertex Express + * (compatible avec les clés API auto-bound à un service account). + * - Setup config : modèle + responseModalities AUDIO + systemInstruction + * + inputAudioTranscription + outputAudioTranscription + VAD. + * - Forward client → Gemini : parse `{type:'audio', data: base64}` → + * `session.sendRealtimeInput({audio: {data, mimeType: 'audio/pcm;rate=16000'}})`. + * - Forward Gemini → client : `clientWs.send(JSON.stringify(msg))` (le frontend + * parse `serverContent.modelTurn.parts[].inlineData.data`). + * - Accumule input/outputTranscription pour la correction finale. + * - Détecte `{type:'end'}` du client → fin de session. + * - Timer 210 s : warning à 180 s, fin auto à 210 s. + * - En fin : `onSessionEnd(transcript)` puis ferme la session SDK. Le client WS + * n'est PAS fermé ici — c'est l'appelant qui décide (envoi du rapport puis + * close 1000). + * - Erreur SDK / close Gemini → close client 4006 GEMINI_DISCONNECTED. * - GEMINI_API_KEY absente → close client 4005 GEMINI_CONFIG. */ export function openGeminiLiveSession( @@ -211,17 +223,14 @@ export function openGeminiLiveSession( contexte: opts.contexte, }); - const url = `${GEMINI_LIVE_URL}?key=${apiKey}`; - const factory = - opts.geminiFactory ?? - ((u: string) => new NodeWebSocket(u) as unknown as WebSocketLike); - - const geminiWs = factory(url); + const ai = + opts.clientFactory?.(apiKey) ?? new GoogleGenAI({ vertexai: true, apiKey }); const transcriptEntries: TranscriptEntry[] = []; let sessionEnded = false; let warningTimer: ReturnType | null = null; let timeoutTimer: ReturnType | null = null; + let session: Session | null = null; const clearTimers = () => { if (warningTimer !== null) { @@ -238,10 +247,12 @@ export function openGeminiLiveSession( if (sessionEnded) return; sessionEnded = true; clearTimers(); - try { - geminiWs.close(1000); - } catch { - /* ignore */ + if (session) { + try { + session.close(); + } catch { + /* ignore */ + } } if (opts.onSessionEnd) { try { @@ -255,105 +266,153 @@ export function openGeminiLiveSession( } }; - geminiWs.on("open", () => { - console.log("[T2] Gemini WS opened"); - try { - geminiWs.send(buildSetupFrame(systemPrompt)); - console.log("[T2] Setup frame sent"); - - // Démarrer les timers une fois la session Gemini effectivement ouverte. - warningTimer = setTimeout(() => { - if (sessionEnded) return; - try { - clientWs.send( - JSON.stringify({ - type: "warning", - message: "30 secondes restantes", - }), - ); - } catch { - /* ignore */ - } - }, warningMs); - - timeoutTimer = setTimeout(() => { - void endSession(); - }, timeoutMs); - } catch { - try { - clientWs.close(4005, "GEMINI_CONFIG"); - } catch { - /* ignore */ - } + const handleSdkMessage = (msg: LiveServerMessage) => { + // Accumuler transcripts pour la correction finale. + const sc = msg.serverContent; + if (sc?.inputTranscription?.text && sc.inputTranscription.text.length > 0) { + transcriptEntries.push({ + speaker: "candidat", + text: sc.inputTranscription.text, + }); } - }); - - geminiWs.on("message", (data) => { - // Tentative d'extraction des transcripts — si JSON, on accumule ; - // dans tous les cas (JSON ou audio binaire), on forward au client. - const parsed = tryParseGeminiMessage(data); - if (parsed) { - if (parsed.inputText && parsed.inputText.length > 0) { - transcriptEntries.push({ - speaker: "candidat", - text: parsed.inputText, - }); - } - if (parsed.outputText && parsed.outputText.length > 0) { - transcriptEntries.push({ - speaker: "examinateur", - text: parsed.outputText, - }); - } + if ( + sc?.outputTranscription?.text && + sc.outputTranscription.text.length > 0 + ) { + transcriptEntries.push({ + speaker: "examinateur", + text: sc.outputTranscription.text, + }); } + + // Forward verbatim au client. Le frontend parse serverContent.modelTurn. try { - clientWs.send(data); + clientWs.send(JSON.stringify(msg)); } catch { void endSession(); } - }); + }; + // ── Ouverture de la session SDK ────────────────────────────────────── + ai.live + .connect({ + model: GEMINI_LIVE_MODEL, + config: { + responseModalities: [Modality.AUDIO], + systemInstruction: systemPrompt, + inputAudioTranscription: {}, + outputAudioTranscription: {}, + realtimeInputConfig: { + automaticActivityDetection: { + disabled: false, + startOfSpeechSensitivity: StartSensitivity.START_SENSITIVITY_LOW, + endOfSpeechSensitivity: EndSensitivity.END_SENSITIVITY_LOW, + silenceDurationMs: 2000, + }, + }, + }, + callbacks: { + onopen: () => { + console.log("[T2] Session Gemini ouverte (SDK)"); + // Démarrer les timers une fois la session effectivement ouverte. + warningTimer = setTimeout(() => { + if (sessionEnded) return; + try { + clientWs.send( + JSON.stringify({ + type: "warning", + message: "30 secondes restantes", + }), + ); + } catch { + /* ignore */ + } + }, warningMs); + + timeoutTimer = setTimeout(() => { + void endSession(); + }, timeoutMs); + }, + onmessage: (msg: LiveServerMessage) => { + handleSdkMessage(msg); + }, + onerror: (err: unknown) => { + console.log( + "[T2] Erreur SDK :", + err instanceof Error ? err.message : String(err), + ); + if (!sessionEnded) { + clearTimers(); + sessionEnded = true; + try { + clientWs.close(4006, "GEMINI_DISCONNECTED"); + } catch { + /* ignore */ + } + } + }, + onclose: () => { + console.log("[T2] Session Gemini fermée (SDK)"); + if (!sessionEnded) { + clearTimers(); + try { + clientWs.close(4006, "GEMINI_DISCONNECTED"); + } catch { + /* ignore */ + } + } + }, + }, + }) + .then((s: Session) => { + session = s; + }) + .catch((err: unknown) => { + console.log( + "[T2] live.connect a échoué :", + err instanceof Error ? err.message : String(err), + ); + sessionEnded = true; + clearTimers(); + try { + clientWs.close(4006, "GEMINI_DISCONNECTED"); + } catch { + /* ignore */ + } + }); + + // ── Forward client → Gemini ────────────────────────────────────────── clientWs.on("message", (data) => { if (isEndSignal(data)) { void endSession(); return; } - try { - geminiWs.send(data); - } catch { - void endSession(); - } - }); - - geminiWs.on("close", () => { - console.log("[T2] Gemini closed"); - if (!sessionEnded) { - clearTimers(); + const audioBase64 = parseAudioChunk(data); + if (audioBase64 !== null && session !== null && !sessionEnded) { try { - clientWs.close(4006, "GEMINI_DISCONNECTED"); - } catch { - /* ignore */ + session.sendRealtimeInput({ + audio: { + data: audioBase64, + mimeType: "audio/pcm;rate=16000", + }, + }); + } catch (err) { + console.log( + "[T2] sendRealtimeInput a échoué :", + err instanceof Error ? err.message : String(err), + ); + void endSession(); } } + // Tout autre message client est ignoré (ex: ping keep-alive frontend). }); clientWs.on("close", () => { clearTimers(); sessionEnded = true; - try { - geminiWs.close(1000); - } catch { - /* ignore */ - } - }); - - geminiWs.on("error", (err) => { - console.log("[T2] Gemini error:", (err as Error)?.message); - if (!sessionEnded) { - clearTimers(); - sessionEnded = true; + if (session) { try { - clientWs.close(4006, "GEMINI_DISCONNECTED"); + session.close(); } catch { /* ignore */ } @@ -363,10 +422,12 @@ export function openGeminiLiveSession( clientWs.on("error", () => { clearTimers(); sessionEnded = true; - try { - geminiWs.close(1011); - } catch { - /* ignore */ + if (session) { + try { + session.close(); + } catch { + /* ignore */ + } } }); } diff --git a/src/routes/t2live.ts b/src/routes/t2live.ts index 474b7a1..57eafbd 100644 --- a/src/routes/t2live.ts +++ b/src/routes/t2live.ts @@ -230,8 +230,8 @@ export async function runT2LiveCorrection(args: { } export interface CreateT2LiveRoutesOptions { - /** Injection pour les tests : fabrique de WebSocket vers Gemini. */ - geminiFactory?: OpenGeminiLiveSessionOptions["geminiFactory"]; + /** Injection pour les tests : fabrique de client SDK Gemini (Sprint 6d). */ + clientFactory?: OpenGeminiLiveSessionOptions["clientFactory"]; /** Injection pour les tests : override timeout/warning. */ timeoutMs?: number; warningMs?: number; @@ -313,7 +313,7 @@ export default function createT2LiveRoutes( openGeminiLiveSession(adapter, { role: sujetNonNull.role!, contexte: sujetNonNull.contexte!, - geminiFactory: opts.geminiFactory, + clientFactory: opts.clientFactory, timeoutMs: opts.timeoutMs, warningMs: opts.warningMs, onSessionEnd: async (transcript) => { diff --git a/test-gemini-live.js b/test-gemini-live.js new file mode 100644 index 0000000..93771f0 --- /dev/null +++ b/test-gemini-live.js @@ -0,0 +1,150 @@ +// test-gemini-live.js — Sprint 6d : debug du setup frame Gemini Live via SDK. +// +// Usage : +// node --env-file=.env test-gemini-live.js minimal +// node --env-file=.env test-gemini-live.js +system +// node --env-file=.env test-gemini-live.js +transcription +// node --env-file=.env test-gemini-live.js +vad +// +// Chaque mode part du `minimal` qui doit fonctionner avec une clé Express +// Mode et ajoute UN champ. Si le mode reçoit `setupComplete` → le champ est +// accepté. Si l'ouverture échoue → c'est ce champ qui pose problème. +// +// Migration Sprint 6d : passage du WebSocket brut au SDK officiel +// `@google/genai` qui gère l'auth Express Mode automatiquement. + +import { + GoogleGenAI, + Modality, + StartSensitivity, + EndSensitivity, +} from "@google/genai"; + +const MODES = ["minimal", "+system", "+transcription", "+vad"]; +const mode = process.argv[2] ?? "minimal"; +if (!MODES.includes(mode)) { + console.error( + `❌ Mode inconnu : "${mode}". Modes valides : ${MODES.join(", ")}`, + ); + process.exit(1); +} + +const KEY = process.env.GEMINI_API_KEY; +if (!KEY) { + console.error("❌ GEMINI_API_KEY manquante dans l'env"); + process.exit(1); +} + +// Modèle par défaut Sprint 6d. Fallback documenté : `gemini-2.0-flash-live-001`. +const MODEL = "gemini-3.1-flash-live-preview"; + +const SAMPLE_PROMPT = + "Tu joues le rôle d'un bailleur. Tu réponds uniquement en français. " + + "Tu attends que ton interlocuteur s'adresse à toi avant de parler."; + +function buildConfig(mode) { + // Base minimal — équivalent au mode `minimal` qui doit fonctionner. + const config = { + responseModalities: [Modality.AUDIO], + }; + + if (mode === "+system") { + config.systemInstruction = SAMPLE_PROMPT; + } + + if (mode === "+transcription") { + config.inputAudioTranscription = {}; + config.outputAudioTranscription = {}; + } + + if (mode === "+vad") { + config.realtimeInputConfig = { + automaticActivityDetection: { + disabled: false, + startOfSpeechSensitivity: StartSensitivity.START_SENSITIVITY_LOW, + endOfSpeechSensitivity: EndSensitivity.END_SENSITIVITY_LOW, + silenceDurationMs: 2000, + }, + }; + } + + return config; +} + +const ai = new GoogleGenAI({ vertexai: true, apiKey: KEY }); + +console.log(`→ Mode : ${mode}`); +console.log(`→ Modèle : ${MODEL}`); +console.log("→ Connexion à Gemini Live (via SDK)…"); + +let setupCompleteReceived = false; +let resolved = false; + +const config = buildConfig(mode); +console.log("→ Config envoyée :"); +console.log(JSON.stringify(config, null, 2)); + +const timeoutId = setTimeout(() => { + if (!resolved) { + console.log("⏱ Timeout 15 s — pas de setupComplete reçu."); + process.exit(setupCompleteReceived ? 0 : 1); + } +}, 15000); + +try { + const session = await ai.live.connect({ + model: MODEL, + config, + callbacks: { + onopen: () => { + console.log("✅ Connexion ouverte"); + }, + onmessage: (msg) => { + // Compat : selon la version du SDK, setupComplete arrive soit comme + // propriété directe, soit dans serverContent. On loggue tout. + console.log("📨 Message reçu :", JSON.stringify(msg).slice(0, 600)); + if (msg.setupComplete || msg?.serverContent?.setupComplete) { + setupCompleteReceived = true; + resolved = true; + console.log( + `\n🎉 [${mode}] ACCEPTÉ — setupComplete reçu (modèle ${MODEL}).`, + ); + clearTimeout(timeoutId); + try { + session.close(); + } catch { + /* ignore */ + } + process.exit(0); + } + }, + onerror: (err) => { + console.log("❌ Erreur :", err?.message ?? err); + }, + onclose: (evt) => { + console.log( + `🔒 Fermeture${evt?.code ? ` — code ${evt.code}` : ""}${evt?.reason ? ` reason: ${evt.reason}` : ""}`, + ); + if (!setupCompleteReceived) { + console.log(`\n⚠ [${mode}] REJETÉ — fermeture avant setupComplete.`); + console.log( + "→ Le ou les champs ajoutés par ce mode ne sont pas acceptés.", + ); + } + resolved = true; + clearTimeout(timeoutId); + process.exit(setupCompleteReceived ? 0 : 1); + }, + }, + }); + // Conserver la session vivante jusqu'au timeout/setupComplete. + void session; +} catch (err) { + resolved = true; + clearTimeout(timeoutId); + console.log( + "❌ live.connect a échoué :", + err instanceof Error ? err.message : String(err), + ); + process.exit(1); +}