From e4a4faebc75af80b4e111692b455021f80b04d2f Mon Sep 17 00:00:00 2001 From: Mehdi Chaabouni Date: Fri, 14 Feb 2025 16:58:46 -0500 Subject: [PATCH 1/9] feat: add `audioData` option to `KurtMessage` Now it is possible to send inline image data to multi-modal LLMs. --- packages/kurt-cache/src/KurtCache.ts | 2 ++ packages/kurt/src/Kurt.ts | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/packages/kurt-cache/src/KurtCache.ts b/packages/kurt-cache/src/KurtCache.ts index 29ed8bc..0ec3503 100644 --- a/packages/kurt-cache/src/KurtCache.ts +++ b/packages/kurt-cache/src/KurtCache.ts @@ -319,6 +319,8 @@ function hashMessages(digest: Hash, messages: KurtMessage[]): Hash { mayHash(digest, "text", m.text) mayHash(digest, "imageDataMimeType", m.imageData?.mimeType) mayHash(digest, "imageDataBase64Data", m.imageData?.base64Data) + mayHash(digest, "audioDataMimeType", m.audioData?.mimeType) + mayHash(digest, "audioDataBase64Data", m.audioData?.base64Data) if (m.toolCall) { mayHash(digest, "toolName", m.toolCall.name) mayHash(digest, "toolArgs", JSON.stringify(m.toolCall.args)) diff --git a/packages/kurt/src/Kurt.ts b/packages/kurt/src/Kurt.ts index 54570e9..5c35da2 100644 --- a/packages/kurt/src/Kurt.ts +++ b/packages/kurt/src/Kurt.ts @@ -218,6 +218,20 @@ export type KurtMessage = { base64Data: string } + audioData: { + /** + * The IANA standard MIME type of the inline audio data. + * + * Not all MIME types are supported by all LLM providers. + * "audio/mpeg" is the most commonly supported. + * Check your LLM provider's documentation for the right list. + */ + mimeType: string + + /** Base64-encoded audio data, as a string. */ + base64Data: string + } + /** * When present, this is a tool call message, with structured data input * in the `args` object, and structured data output in the `result` object. From 6250511344c9d23a47f630fd19948788056b848c Mon Sep 17 00:00:00 2001 From: Mehdi Chaabouni Date: Sat, 15 Feb 2025 08:18:31 -0500 Subject: [PATCH 2/9] feat: add support for `audioData` messages in `KurtOpenAI` and `KurtVertexAI` The `audioData` is only supported in `KurtVertexAI`. `KurtOpenAI` will throw an error if audioData is included in a `KurtMessage`. --- .../spec/generateNaturalLanguage.spec.ts | 23 +++++++ packages/kurt-open-ai/src/KurtOpenAI.ts | 4 +- .../kurt-vertex-ai/spec/data/HelloWorld.mp3 | Bin 0 -> 13244 bytes .../spec/generateStructuredData.spec.ts | 30 ++++++++- ...ta_transcribes_a_base64-encoded_audio.yaml | 63 ++++++++++++++++++ packages/kurt-vertex-ai/src/KurtVertexAI.ts | 24 +++---- 6 files changed, 129 insertions(+), 15 deletions(-) create mode 100644 packages/kurt-vertex-ai/spec/data/HelloWorld.mp3 create mode 100644 packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateStructuredData_transcribes_a_base64-encoded_audio.yaml diff --git a/packages/kurt-open-ai/spec/generateNaturalLanguage.spec.ts b/packages/kurt-open-ai/spec/generateNaturalLanguage.spec.ts index 3b76366..8a6ab00 100644 --- a/packages/kurt-open-ai/spec/generateNaturalLanguage.spec.ts +++ b/packages/kurt-open-ai/spec/generateNaturalLanguage.spec.ts @@ -68,4 +68,27 @@ describe("KurtOpenAI generateNaturalLanguage", () => { ) expect(result.text).toEqual("Heart eyes") }) + + test("throws an error when a message includes inline audio data", async () => { + await snapshotAndMockWithError( + "gpt-4o-2024-05-13", + (kurt) => + kurt.generateNaturalLanguage({ + prompt: "Transcribe this audio file.", + extraMessages: [ + { + role: "user", + audioData: { + mimeType: "audio/mpeg", + base64Data: "DUMMYDATA", + }, + }, + ], + }), + (errorAny) => { + expect(errorAny).toBeInstanceOf(Error) + expect(errorAny.message).toEqual("Unsupported audio data for OpenAI") + } + ) + }) }) diff --git a/packages/kurt-open-ai/src/KurtOpenAI.ts b/packages/kurt-open-ai/src/KurtOpenAI.ts index ba43d8b..f5e4a0e 100644 --- a/packages/kurt-open-ai/src/KurtOpenAI.ts +++ b/packages/kurt-open-ai/src/KurtOpenAI.ts @@ -247,7 +247,9 @@ function toOpenAIMessages(messages: KurtMessage[]): OpenAIMessage[] { } for (const [messageIndex, message] of messages.entries()) { - const { text, toolCall, imageData } = message + const { text, toolCall, imageData, audioData } = message + if (audioData) throw new Error("Unsupported audio data for OpenAI") // TODO: Use a subclass of KurtError + if (text) { const role = openAIRoleMapping[message.role] diff --git a/packages/kurt-vertex-ai/spec/data/HelloWorld.mp3 b/packages/kurt-vertex-ai/spec/data/HelloWorld.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..c4f149af5cb0e6cb52d0aa38387896f2d5100ea7 GIT binary patch literal 13244 zcmeI&2T&Bvz9{h7T`~(SIY`(gXO|pBvSgPiAjp!FAo&vnT{0^<2NB5`B_kk`kt7mT zkSrO=AR@v(_)gubSM{oHojP^zef8>{-rAj+ou2Ccb@%ji{cAK;#qfaZ#BOA0sCKm_ z2LOnUwV#8ei15`1iTwNIKYnoMb{+rQ;XigY-5nlX?NMBPB>_Mz0U#zLqoSgxzi|T& z=i=iN78Vnel9HEKQc}~>x_j5q(A3n@(%Ra=)%DRMA0L1JprDY5i0J5qgp`!b%$%J3 zl9I~G>gtAumX`PLyZifxhsVY~f1a6HSXkNE*x5Ne{Q2|z9CuyvRUK`w>L@0BeefTN zlK}mXU9p5EohaG=`1@bnUU_=}$WScjJ_zFgisH2D5TO86Rf!PP&jNy;z25a>XJ;pZ z1kH8KbzQX|4H06lJ2?3LMpObEO#kKfYpfp6s>PV-tlZ`9A0hy|A$01oJbSFlivd#< z?;R^MN5HCoBhU2UzFvzs4CInw_t55;lOU}C_6Mn6^qPI1u`W)rpyN`+u)Uhy63q7_ zDyK}SPLxdo&qkGwxy+|m&nrPItwn!Duuk_gooB45xWtD_#di$d=KMF|?uxF1QTFl8 z4`FW?L3C>H^Pk<5#gRXZx0u&93PkqfS#&E5V8muMH~8e2i6R9;VtrE(uxl?lWLSpx z5)>8)a>;`Oi09HOF0Y#ljErVgCd+pt)?yz2OuV@ud!H%^@lIo}Y(-1+(DS{pY81Nf zg|{vPa?Ce|J;jLMEf+I0SiFK@D)TmNg@H^1(0_47o4C|i`_^iFI?stBZ9f~;(^}5V zDr^($WPTy^g{yx!F`i1v`R8X?YUi{&p z#`&LF1b>o6Qy6z0mGkRfG(a9ybSGw{WG2ug+<3%ZY6i_X2@T*3~KV7Al(dNSw;E z_zpM0wjO1;DIYR1CWelN<56p@9m7fxFN#$&2esztpKOq_n`7{)?RN!Ebp3m>tO>Ju z%6OSMZJxqY>zQ%ZndAcYJY(p(v4Op;cUh0vlOOcWZMT{?3zbc{nB5XL0a~(A$RRcSBX1 zo6NQMCS+)g;L7Vl{J6-f0~yoL?T8NxA6TqtaPe5NKP;pZ@;TIaSm;RZN?dTBR5pHD zl`^sKbiy31Jk{1x!l8E)b1{WWNi><6bz zE>+odPvvTu#ptqzEOvBir+0V#APG};xsmynJG}D|znXg7vCw5oG>ICXhIa|M@|wkh z3uPd+3c~bFm;&K0)~@!!nO+vqz9I1D~N{S|S!_q_oTM_=$$;>Is(kp<-L z0-IO!2TZmy`C(s=k6O+?R;iOfHeu8*Ud!dQM6N{d+5>gW!+Bg!H0W~q2(;f=pi zb-q&C%ZPW0L!cqFbw9{2nX$v**!p@1Oo0~f-?y7YZpHCKk`v+_! zIMSDiIoOc5yF%CAYzsR4WpnY(XMFI9%yz&|kR$ForPL<%MhD`4wSApCr=LoqA_Bnr zT7VWX78CXH+DizK#nM{6OTkb!a|oWKWCrNV7H$B+N-f27`Vi+2dkr_g@#ezi^BUFh zVsf#{UL-MpN#Jt@l}@^ozsA5M%fOGcsnU}!>DVEWteifH)b_`8g94uyAV!Vn#$ zXp>OY;$Tz&5f@Y=$1QMV8PAPCd4;6l(WCW72XE7nus?-|gNSfEW(}eX{pEzmAC8rD z&i*{TAjMtOI#4^2BJ&-qBt{q?Ryy@AzxHB>Ur**t0QP&r-{YAOh|Q?8-o3?#t%sMN zGzx9TrDRKBIGt$Fs2AFXfBCMw6!*%j;`KlFZxCT-W-nZ{moPf&;#n;)2{S>}6LT68^nR+o)Z^;Hm_wtc9C!PhT&uV?kH6IGA-*B4wwGEg(mY6w>!x?t?e132_uJO+ntw{<#Ys} z8`FaJtS&#|E_deu%4oHgC`+q9^&EyL0lt z4b*0B&<kl%otcG4UOay=vMDn}i4%DXh)U-cyx?Z6%)6ch@~D*x=%DVw%FyKH-a zKvU_%eRGX6fl^U6k^1~QJslaHlPVN_pBHeOFUQtdi$0r7@1c6k$3mx;FJRz~I;hs@ z8ly9E8BcT4I=4M`pCsmWQq>o1oEeHV9xCKqB@^M)CS`7%&i_Qq{{4bQT72I92{AE= znhCYK{n;Fa_%{ej()CD3Z8r3Zj3TS<9YbHMI=S=Xg(B1N-}kb8o7=~q^=AUTnX3Tc z(}4%dC8$BVj4(^BNeHP&8xNkXCW6>378!Ad1BcslDu5hM)C4m0;_r!*GX(jP&0 zpG=$oW%I+=It2;AsHKxPybY%fz-0kw?-)HZG*7N$79|#!3|45!dJl6GV z#`nzj?>`MXf z{+ZDHxXJ~Pka7mm&?C3mq=)Dd3zJ9+8Stn}f0_`c5zmuJQZi~9;;WaUQQ4xst8Nau z4DX*ZWucYmn>4gn*bH~-&V>N+$YaD#^LA;Zq$XZi&)V5)rdi90prm(l)Oq9W%Za!k zMjXyV@yAN5`qkPN;0VCp;8TQmX47EczN$2wh$dF^2?DTL1-t3YE&Xq#wDHnpN?(l$ zw1M}Ny5lO|?!EHTx0Qp;RGvBz7g1S>l|5DBMwpKrnzIj@>5eUAJ)lOti6C+Vb#yd`%X*@GdE1Ii4rCR z02ru!q*1@woTt-)jEe1$ZxJmkGknhJe#=vyN0Sp7%8W+|)@H93 zc4}osFd19D!QqfyBVH}?^G6)PrBOWS$I`q8BQ_vNH<|}e+Xr=mMkA{L01W`*1hsM- z9=QqkN(tx-2{<&ZzZdeD%f)*f&uOzzyknCr{{0~-aYb!1iO`TeygZF>_zMn4&26lC z?PU$oo+2~WX(IzUq;9#T$CeehmuNb7D!JfylM{3mjh`;ee3a@|89ZgX_^a(`_*|sd z6bx2Zo{QwBvlJU%PI~)|K)GD!7aNmYWdDe1b-J5(IG9HBspk`&pWuWNnd#Bh2r~;2 zba3L~A?}yI7MmusOcSXWCFKFT>g@}oFso25^1XcH-~p}V@0!McYmlVi=)|}YjV#Jz z6dz`z=HbVbNKBF{!QiypTd);p*Zjde)7psybzFUT%Icw%Kw;6V;%e}a+wJ^vmITW* z;$TL)U~4P4V1)F08J|=X@T-D^^Rwj{)`s}ndozS|vCdEjD;?~c$6uEETt-Dv2ZG5K zB+!!|qglC0oUos)5NTY>aptSnJYgU^9=ONx_K#t`o>GPhDEL?P=qqjg<2X6DCuQ{u z^MeZC8Zz&n`)b4dg-I4oeL^SS|9;8(*<7}!qD@cA=ZD*YD zRu3u~XQDVyQb-U-@Z8!woX0xr4%bohm?q&zHn^qYSLtx}d^r)4xa-}+)GC` zh$1tZ(ABRVAGaPFbk}(Nf{~=4+O5vjcId3x9bN`O83#V`1Hhyyl+9XwEs6?_*%fd; zMStp4dszT77lxG8qbnQhB`fE-g_hjOof&nGbDonXCB!)oPI7tGiLHP8gsVDvTHs5N z8sxhFRgkv==o)^`ene|ZK^&3MfMP_y;Q4UN&IAb43JFpBiJ!*MQyL(7kj)reo-$OU zmX^*KH{U9E{G=|tePO#vPAOcDx%%<+EgARmo;ZWCkI!&&*Iqra4zx~J=$3%eU#xt- zMbDRVCR+LFq@N0@SQ8uOZTyA*DK(R!vHjdVju`iw1$>JyXmILk>Yz`i<^Vc))yTl} z`LMe__>5F;DdTB|`Rp-s(>WF7?a;}wL$;;w+eJl8BxKdWFlHT1LGw)g49E0m>keL| z94(A`rupOyfHZ-)-<_iSQFW4xY&u$>*dpU)kcc3;eFM30dO4gF^}O5^osnc+49PV&q9e!H zxfB=(T?}Q_W$eA%;=1-)fJuLy=#oi+y|z2H4f;ODG~*6$3R%jF19yUgKC$7>876(R=k}E;flY(%lTTl8 z8?!geKDTV+*2MJ$dkowBh!O*?I^-ib7X;+?L8ERMLD3D^e4vAVNB?978Ts^T>2+dQ;qE=Oc z4Pj!{uwsh1TQ`5hvUbaR>2U?j-gTBV5<>DFYEj0gGY|j!w6pWHJT})^k!dlqqeiFwKJ=K4ZAb}V0QCg1hLYSr<`k*t zA`iYp({;Y{6#q41@wEe0uu7_i-ra;F6S=7G*Dt|2*gES1z;5M-80TBPmQ ztGU|1TEs-7*z7I&2}>ElHi?x+T3%;9AMH$|4dH=FJzEmoanLE1qr!e8#`y5RA zc8Ygo(UJIJYT@);jA#6V7{#z67}A*tf)~b(?yaY=S?+X15 zGND6U+qcO^X+|uMZ=A4p0=kEShvlStTit17qOEMNQwr==YmHvWG;@bNHOxphmPQK`feT}f^+F|f)cz)mOs7{7< z^XcZ7%i7&$7T#@PDgOP48^MIk6l;NrC9v3H*=7g9P;TnTJ_yYRcE%caN)0j(UR7oy zUSgbK__XW`4E+d6%K`dwIiA>l>+j^=gnr+Hnrzj|IKO=Q2325#R*%m4?U2+Pfww&5}@^=C2bwr?p${5O#|Qcg~$k3`)W?J^9&3l&_)jC zm+zg~)qUiha(x(yL%SdBs9?_epfnUDLA(yxmKcv@ zX*A|B3t!~DdIi^I>FO24i){X#eu8$^7(32iSB};ij3n?OojO zdNa73Vhl4FeG&cAU-mPXyb4`$$&RUSTttv7?tA=pgP=~3ILE!TSH{UO8jz0Ci_h>I zSQ08HZtIZFO1o!T-Kw(;3P#nj!XzO)=0i6q@#CwwUmvliWNG6OtTcUN$;@6jnG44^ z>IN$Mj&==qTaJFhO-TscEcKIKtd0i7*nZORpSGe=WDLR8Gijnwcs zsp#$6+Aml1L-CZu`i+PN?NHAd?xzWkfhzVEP1O)_zv6MrNp$U5pWi$5#qdth0sy}b zCWY|$%-D}+p3^y^tWNn!515Hj9BYSBIjk81b!dQ82yRs~-hI9oH{L&yA0YGU{-bP-M4@4q1 z%ktRK{hTO}_G1uAfHW9}Wy(f>q6)u(DZguK!;Eh$}m26tV% znRfPnQPd?W=?ZXL{WOMSfDU#w5C#!v!~u|L5bK=I$aHt%dKO_prlq`(X_WzIMG)FB zWC8XPT4@)RK1;}z`5dN0Y29f{Tq(GyZtNvIu9!fbc^m%KK=(gfgR2O6 zD1S|+geVw@rarpYa^#1M#NiV3H3|AOF!`Pu8Nzff7M`td5=zQ*)}GzQ z;pR+ihYiDez@POTkINF4-iHrTCKfs|#dKke19}il8c?{+*y}IT zMns%FP9dw@zNy-y3=qIV;aJn;p7Qi<+-V7u6f>e!ncTRip18Y2&1Xcfy-_<_@Al=m zxkX+VehzZvZQ$Z(DGUg1>|z@Pyh_RxMMN|)?5V51u*4TJ0BEkg6=0|cvBnO(Ci7#y zQ^UK$(qdC-JpEkdUWOqV9h2pkx!GBaW?RjYQs(>&Dk-wfyGtjhxIYiA^}V8)_?>PO z5*bc9B??Z%_U-A8zdHCy&k4T5`LlAvJC??RrOtat0k>r^h6D_ZDK0|*6*RTF!uOG? z@GU`*axMsuNFi#u!m7ikk*%NNMizG4aylZ z13YwpHy&z{nXmUjI}TSG|NFuPp;1@ff{Tj@TZQ0xfT{s`wqGyUGptEC*MoQ2hVc$r z=B)t&#o&G{n4qsf`S1DaH5l4+q^-*cODth^A}dOhh6jB+q0TtpiE>o!Df~H=dvV0m zTYvZIZCsI2bd*i@9W!H2r^PV3$Q~Mx7;lwl=e^H99Uq)_iN?+2a1jx*S1~dG8JbCj zvGDTnKpp}{c1<7;4}B&WmuG--t~Cn(@$Y<^Myu`yuh`jqJeGj)DYwX*&A%YjMBaDy zZbABpwjPSrzV5`U>)fdUuL{_%(XxGe5T}!nM+96dk&pn>6dic(87G>1UX3jEc@GM z@*-w|Z9h8uP$;v8faukz$9d>QX!*R$Ts2+n2u*O*NV7weDAGYfekovLh|(NPp?ggE z4xE68M{mEcASNn2cYH%<;2`h|?$d5kR5BGpaF%mVBK)u4ch3@!)JeN23<)c{K6x>d zgcFM76G#SJ6>v76`UdAqd+lunL-{DRs|wvTpRtzO7bEhHOIt)}=Gm{JreEtpz`gF8 zQJO9#LIQLbRksX?o4wjEIW?&>W#WtV7ZN{*0Vb_y4QMyowZ(2c+n9+M! zfW*>1&-!Dvi;}*bH~UqIKT18=y>}GpW}Qasa&-Ii{%WOzLvujSR7+akbkqzks!Cmu zo{3*tn(uM?LtAMk_C3k!psFqQCdX|W&D6Y}zMq7JISvm3lLWocmE`J}M=2?kIRyp%8YHrBfga(@{3D3kT|x;d$g zx9EAi-{$-_nHsZQCWDU=09Ux-*0zHJz-VHb%4_fDReX|C%Iy}#AP4(;O-WtH>wy;k zuaXA)TOj9sd5Glr>W+<)Ylv}_+2X^nIFaBC#k^!r)NOKMrx<9A=q($hfj!GtNENK? z_Gq0>z$bA8)Z_7DhmQyo#c1~-7~?Ko!F2HAW}nF&yRb^MW|gQSHuSB2yuLwuhf?I39{vg3hGc%-R1$z3@bX?b5A4y3n zINfbSofl+r?VSL0JaBh@kkt@-!W{z-vKUbZ#MP!Z=}RG%Zv)4e(37gGefJ=nsoZ0X zMpRkW%KA$k3AyahrX`ErG7kFMBS-k{Re^!0kn_roolUxE*ZP3NuKdbBa~4fH~>)6*7iMc4`hgOlocu6H$X&Q9Ck#7 z^Gcf3*KS9n?!Oe%7FXfMHz=;bBgwO`Rqg5Kyo-m8&OpZvi%v=kx*TAO*XsY+KUnM=7reC)3raB5-Q(}nO&{5|i!MtKL&(_~kTknL^ z`aIv0(;MXEQj=+<<W@_C2KV$j5r^;k*=AV$YvjD=qx?*I@w5oa}+ah+Z7J z{UH`P@GNz0|A?E>nK`VOj5D*)O8j$=Znm6WzL>&Rc3Fx;c}DtQ&p8ukjtAtZZqWra zayDhMtC0@+^{o}mi$sQh3Xl0RKVPk#^4ZF^cD534-n->PBmb=Idl;Z7UBBvVPG^%p zIpxYS5=jUPnj004I-2^@iKj%H$4_G=dUHX1>hNP!x|Josp^2fJ`2H3Q41frK|V{r0seZATfAt%ska&O8Oj2ct=oRO1&suPW8Rf zc>Wl2n5avnjFn-qJWV~x7y82YE%#tY$Gi*^s0Yr zz-?HP<8`)b!7JxA^r!aBo+P=umKnPJ!J!Wns`sF}`!D%cfuepsh16gmD*XBEjzqaq zX>vY0gww|ErfYRzZgq@%>|@(Iesbf8K{lhIh^Mo7YOY_%wI^HFs>iGi;6|oh2>V>- z&RzHK2SUbYFwU4TIypQ^=eOQ!<;hRf6CzYV)loEGQ8W>HMP;dmTdrRGPq=2wfheM5 zo~0N#XqzfC|M|F42${R8qWcftaBjzYUcr~nFP0Hh*WMir*-iis0`&rQRI~M4$1v6> zeajbEVbkqSd}vy35>Yr|lc4MuQ@C==7NyR0_Au;FUa}QX&;t$=<8iUlB-$^$rNmk=k_I{b(ny`|Txa&NMGr29Hw$ESfzA zpeqZM+Qj5xJkZkc`0n@&k>+U0&O59zYMewwhYY;v;4bD!c;6lUM)B7waWgi%E-Ay{ z-4}MYl%FFyg{QB*+W;AtKL`=_6x2{|wRrWunc>ccl#2Y6evXu-&N)kq;VR4YIH4nh*8_P5MoV+DL7`9L zHhuFxD7$M68b|FuZo=amw8{v~6VUwNN6IijUY$@YtCk#*+4LOh!(OXa42UuOX6R5JPI!`7EAFmUvcK29!OK&Is3bR0o^ID%RD?Z zai!40q6k@rAsay4J{svg91}xJr-oLo)pyid2|RW~C2SFo>o| z&sQHAKM<}focCklCb4^ta~O(#ru)mP>vgR6vp@8aC@H>>5Pwr%G`dT^GB>n^Z7^bp zxy=+Q?vF-wh87HVDuKz8VD!3Cf)I~@*-sUk9h0Za}3l6=L8WKT3#{T_Eu;4?Or^RqSTNOJaV=0u*YvI&EB?(=sz^I%&5{)(p3|?eqGY|KS_UkgUR(r zF7smC2J(wuVMHjD{*(Ow7fy+qbYcG~%Rf`MzrBzVO_yJw3}5Wa(ythi*vJ3Oh5xg8 z{!6_nkPZznKn4JCLO>CO0r383E&ohu|E1m}NCyBayn1e3Ef>Wxz`wo!Ob`6`=J|L3 o{)_E { test("says hello (response format 1)", async () => { @@ -108,4 +109,31 @@ describe("KurtVertexAI generateStructuredData", () => { } ) }) + + test("transcribes a base64-encoded audio", async () => { + const result = await snapshotAndMock((kurt) => + kurt.generateStructuredData({ + prompt: "Transcribe this audio file.", + extraMessages: [ + { + role: "user", + audioData: { + mimeType: "audio/mpeg", + base64Data: fs.readFileSync("spec/data/HelloWorld.mp3", { + encoding: "base64", + }), + }, + }, + ], + schema: z + .object({ + transcription: z + .string() + .describe("The transcription of the audio"), + }) + .describe("Result of transcribing an audio file"), + }) + ) + expect(result.data).toEqual({ transcription: "Hello world" }) + }) }) diff --git a/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateStructuredData_transcribes_a_base64-encoded_audio.yaml b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateStructuredData_transcribes_a_base64-encoded_audio.yaml new file mode 100644 index 0000000..fc93dfb --- /dev/null +++ b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateStructuredData_transcribes_a_base64-encoded_audio.yaml @@ -0,0 +1,63 @@ +step1Request: + generationConfig: + maxOutputTokens: 4096 + temperature: 0.5 + topP: 0.95 + contents: + - role: user + parts: + - text: Transcribe this audio file. + - role: user + parts: + - inlineData: + mimeType: audio/mpeg + data: SUQzBAAAAAAAIlRTU0UAAAAOAAADTGF2ZjYxLjEuMTAwAAAAAAAAAAAAAAD/+2TAAAAAAAAAAAAAAAAAAAAAAABJbmZvAAAADwAAADYAADOQAAkNDRISFxcbGyAgJSkpLi4zMzc3PDxBQUVKSk9PU1NYWF1dYWFma2tvb3R0eXl9fYKGhouLkJCUlJmZnp6ip6esrLCwtbW6ur6+w8jIzMzR0dbW2trf3+To6O3t8vL29vv7/wAAAABMYXZjNjEuMy4AAAAAAAAAAAAAAAAkBmAAAAAAAAAzkOFMiDoAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/+2TEAAPLjSTwAYBmAYgkX8AxDwBERAghE396fX+u7nYiIiIIA33dwN3CAAAiFAgh3cOBgb7oiJAgAhfXP66JTiflXNEy3Dv+7vwIAInoLfdw4tz0RCiAAg+m7/RCHIYQsPow+U4gdXLg/4AD/hju8mL0JAaWoxD8MEalriLeGbOOQKPA/jFTv7AiXQIp7IieqS2zMh81BGJEFhypdMVOcpBKlrpR5CyzTdYWcY0yNDXHrEC8GMNaKi8gbkBryohlj7lsELjgARZFIPv2w9Smh+xV6hzm6KMx8I8dTatSEAlZsRspPOIIhyuCjXWUIRD/+2TEDgPMOSj+AYR8AWqhIAAhijhDaq65o1RU3EPUqu6G5ox49pEv3zpbEpIhvEjvqeRKSfJxvi5EiGDGm3NNGDDRdYwilFQqbaCA2sqm5CEaqXNYuxABWHpgdyWLVmqxYXWySM/7nogLv/nQTkrimZqkvTPBHJst1yXIzJGPEkFp9tYQlcHZPoVyI2wM/DJLvFQktMImCobgi4iXn0J2mAGTaKRVotYdLPYNiw9VwvOqKk39tQNeq8ORmJSZkBSQG28iqFkMmPGCxENoYU1M4gZHNSQYmYyZIXv9ypFBzIZFJzP6KBEWkI/pLJzUyJX/+2TEHQGL3Sr+AYRuQWAtoBQwj1hqNEOlo+f+/aILDRdb8y0RNc4BW0pIm5hzMqUYiSiSlJNbZPVSgFvJvK9u1O1ZsZTf3LhraJk9UIsV8zQScaZHJgbrThGFWCkwkYyMFhQjb7uNZBjkfShQHkmeUOZneZNSlO6eUs+9+qv/MufjkzpWHFqU0qlaFaE+fmHa0h7ak6MioVUM51VSZTpzqcQze+wXub09uemkQlYlO9XlMsiOmeLU7+tj0v/zN69UpKTPTc7mrGFN2sCBjC0ISo+0WWa/9w9Igo91W7y7otDx71zxnwdzYif6ta29bTn/+2TELwPL0Qb+AYRqCXilX8AwjMgmZwUQhcngq7VqcORl8qQWLXTySGykZw5rCaP7kqnS/q2U0/Bo9RyLQtW9uqcjTi+A/c6VyORNARTssz65PSXBgwmNzjiwooS4EImZMmswkrbIApTU4a1iw9WgSFmMTR6C4MDBS5fDwuwKhEJqG5m4JoXBbypFRo70Lf6UiwpFBEhzpw5C5lzfahF8E0MhWK+EPgfjJfJ/nXBu2HIqL5yh6Mb1evKB+/f4hv7v47Uhjhej9jUb+BMx3zDu3x9yuceAnEIO8sbEdEWe8IlBwQY6kNBkj0pq98Q1fE//+2TEPgAUVS0CFPeAApakqbc1kALpW94d9018T53m/x/IkkOLi6BdyhjmT9C6kuF33vvKF1bz+FzfrHb2M1rxenU6jQ2HUisARl0MTQ9WajpGkU0LNmPLq4fF1W/Fg5aBXSqSANZwekbikpaG4krAw4JHMiMrr+Gm5N+veUGwokGWnYa8kcvQPJbliTvQXMDgFhORuMUV2a3Vlt+xIKTB037p6e1DbkSzD9b13/p5euiQZxepM2ZiDr9rDOav6yxmd2L923TSyns563rtfWf//////5SyksWLfP/4vdjd+xfVkhIIAAJRSgBKANGMiM3/+2TEBwANjRVfvJSAEbkjLPWUmXpROqToAIFBuqYWUQNpx++1L9sooCA8obZGBIygiUJyCox3NQUlep5IpNIx22oo20raQEAotC0JRtoZAGjSIWpoI3qUM/z+5+pY23WX6zE2yPgugJjorZfJdcufkSUxfubRBSAJBToND0tey9NNK9nbzOQyhXiPTxk7ZqRPFIQhwKhgVoNEpoFQegglDEiebXzO4gRtBkLkCqNvb+e2ZnNYzoT8pPLKIYUEHEgI/VHikHjH9EFM+fx+/Qv//bJmE2gLMKJnrTXOGWysaMXirnIiIAAAAAAaAInjkDn/+2TEBwCNmLNh7j0pibQgqvW2LemOlJw1gCRoFGBQm0tTd3XknTRyoa0oefR5tTS2xIec4pFpK3YURQqGJJmMdpDN1qLkxFfhvvPl50UYwYNuEm/g3JlWMyizg0BpCyNJKhMvsoOf+rxKTkIZujoP96dXyHAAAAACwTuM/GDlEky9Mh1GN+1rA4AaiUBzlJ9NNmVqu3DhUEUw2rtnYPbvHU+QwMolhVWL5httUrS52vXy//+Jxts3afJhHNAwI5aetjHF4+l6raaynN/9v//7olOHVdteqtfUQ6WVCbOsjpoAAAGgKAzCWYdFlKbrYDD/+2TEB4PM1QdSbSRRwcScKQGnmjkGrIGDq7QWR2S/QOZEiKz1/YvGJhNaXq6aWFQqBqQEjaJ8kUmpdf6opf/////l4lSVUUMhcWPGUJDKrsSiEKjLCA+0ZZ//9/+5AgwDsFZq+tgFBbBQph9K+DpeDLgDelTAqy6pOTLgmAGEpcWOlQw3co6tAp2xzdDIKesWNb01qFAXB5K1rHU+U0HEaPqFZy2ynZaan///l766FJlvJm4K5xVj+RlWGSM2vUedrKoWBta2ln3vX/7N//7u3QARi0W6iF1f/LQjU+oA4ABgmoeWAaQeZ1MIdJnwJqj/+2TECQKNYNtGzSVvybKa6BXMsJs86CBYwRTxLCV/C8KqUPPBPGq2W+V1CTLAifIhPj6jWaz87bNCWLt//r1eIWASUSB1oFSZAKgyHzG0okfEwJjB1EMPxtbf/+md0ecepdZW2e9QxFrRg9Xi/RACwLMMStDmGWkw4gRJ1GGgv43wCoyukq3XiY5ZUAuPDC1prTmGaL03WlWXKntKZb7fXJaPoVvTMzM1sdNFs/Dcnj6PryERC+eHZ0sfYOQ5QB5NR1N1X7M7+/TfpViF+lecdbm/0n/ImQDFmeUAAHTABAGgBrEwas4cN+ShxdOttrL/+2TECoMMPKtFLemBQaebJ8GzMiEKGEJm2luy1UjgH5dvDO572Vp3Yux1YZG65chrdtR6fnoK0mZmc2rT60wIcJJsHyUwMkOjiF1cX4vE5IWFhUP+QHqHoPG/YCh46QGNHrsOxqjTBM29yoxsOYC7w6LhQnLoEAHNjIDDSz7E9AFqbPLlIukKiRECnFEiiSnRpz68CbN17TP3llcFJwTx8Xh6YpVZawsq6I5xYJR+UGjEIl59+ds9tq2PII1r2Jjjmb/j9lLAO5caLDUAAAAAOELAYBgBJbVVMDWLISmpuUXKoS+RKRcsWZhQhK4N6iP/+2TEEgIL/NNJzeUjAXAYqLGjLfHpJ7m+Ff3dpwvfk8zYuZGUkpT//hqXf4haAHkYokarwnL3Nl0RSVSSbl///6x7CyR9FBcw6x84yxaRpJIKpBgEE6j2VgeWCd4NNhEZSVMFRqhgUJwyxeVtZk0Yvn4ammBBF7dIS+QfU+2z+y0ANIf0Ie2566iHNkkEhMTm+eWZWbr1LDZzk4j7tj/+0459Gf//cEDs5LtGNgAAACN6iRspQCDBnBSAIHVEFCQhtx5a0wYCWasiWNo9UdsLFY84DUHYVQZLAL7UTdJDc27FPDjSGb4fK9VLsccfle//+2TEIYAP5Q9b7LDY6YYWLD2HoTj+7kzNbQ3v78V0gKKUJAJC1K8r723BzuJFCYJCJhRHWuT2SU6iEMEZpkvf/+QeUZqh4wkJEFpTkGKILgcAABABpSTUJzAWVrpQI7aKiGhItOm5oUzAMIiNwDy6MqmpU4okbj1xPCdJJDCDHAQRAksisC5oux4hGlVfr///MMLOcro83vMjgaiIJ2B4OChSzmIBA8NgJwRLdFD1YGAwrQAAYAAANAayO+hwoJBXqAYXpAYjSWHjpCdaO49w9N1LHQ+8Hzam+seSkeRF1JIHUyKFqpYpzNf//xMmVUn/+2TEHoJL1Q1VTL0NAWaV6m2Xjammv6dJacFBagXDDgbCpdL339rNN8NDyvcf/Xe988z7McVYAgJGQt2HJhZdM8zikrjbBkKqTPgfGjuHyM5YsJdtc4UCFEl+cXtM9gKQpznZ0OWGWVwxYIGR8vL/+HlKH0kcObcLchER8SJEP/1UhF+DJQ7volWByUqT40lVAABgAAAwCwKLkY7OSJoR9FApgOixbM2Uh4CSRAbK2W24Al8kwt7x3hrm9TUTjrSFlOXyNyukpZ+msALLbT+iqh2QXZYJgRkWgWFfbYEhOL45dJVQAPirCiTWXfmJYgn/+2TEL4IL4LNTTIk4gXWhKqmVijlDQEwDgJwsBk480RTkLwmQ8JM+h1WoI/l1TrnTUjrSfO8juPxTtE5BmFYBgfiw0J9LUfSOO217qbTf3so+27WZW/t1SxB5LgrgWHSD1L76nR7WWjqxp/xSmtoq/BMqAAG0CQB4CwSjk2ywZFWQuE4MMo5WQN4KpAaOBophWoUnYZr8Gf9uSvF5DABYkgWHtW2VBa1MJMTX9S01T3PEJXFW5geGCgnC4aHHNX/3CrNVUXM1Ks/VDyPIFSgCEIWAFgKAUBKnByYlKkgIRpjhn17Cw1eRm2aojFkpE/X/+2TEPoILxOlXTLENAXaaKimnoaAzXNd1nFHeL/fvKsNQ4nqdHCWnsqlM6mv575+GpVkUg+XYcM3qbFLDsXD4EAqUUL9f+0XtFmEiZp19DBnJzyoAANRYDB9hRuaIEmCA7itp92DPwUWdowFaLgOptGCstrQ2rN4mLeFuDmkZTPSeiguSkI5pytQlcrMz59vP/631fqN1BpV9a/DYLCirAMLMiiLzFVgPCYaYtVAZYJsnxz1kVgCESoKCRfYFlhjEqHo28ZwZgaqUy7FFlpcZjt67O/R7s4W/3+u3O0GFOxyweNk9OW7SxI5S0c1//zv/+2TETgJMDLNNLT0rQXceKXWliuGe07sqFjh+LUNhCVOh6HcFz5WZGLa/+26OCf/6HQI9KX3K1QAAABeTEyMAwChFDQLeohBPdAUZEm2U0pZToiKM3dmeqw6/DNTR8h/h4+ulpRoKDbCBEBxMSSxamVGYZ5d/52ZyCyO6GU5Yog4YADgGNDbzN/mctRx2/3mFCioifHEKXB1ASAg3cUjjPhMEt5xqACQs0csW0cwMtfBxNARMHJfu1RbsrfWTMLfz59PqDSgYNYYKPxsw3/f+zJKIiqpYYJiSYaYDoFpUFg6AZGqjUi39y5qzmO+86iX/+2TEXAIL+PVN7SSvSXIeqHG3nWiHFHKqVQAC4ERn4jvjfQUQUZ0Q4YMGj2AKCBzYjYfpxWX1Ix2L3gbwqxb+01QqoJuLLdDGS5V7ev7///5mjbLrFVSquzQCYnAijaP/EV//8G6rbujj+/3dTLHHlTQ2aJJb/YAAJWrCUALAfskuBeuLDyeIGgAwA2CDSs2sgTRMB/OAUP2RYcLBqU69R2O1UXRVFNJmr6QdqqZ87X19zx//+xjUdZzd8EIRAOlSw/XP1//o3BtZ1rrr/+Y0TizEU712CJF6AAAAzQKgAQCgxgyIG1QMYLWJogCmHQf/+2TEawALpPU/DaFvyYeep7W2LSEULkxAAxolt1XK/8VWVKnhjMWzgKNmVHQpoyYEKKcrNNUlxI2tIUn3NqW7RAboLglEhKYPjj0v+hCa7qrFF/+jHHOzXRSqLfgDAAg6UoDBEGDBaADOyY1DZBAmZVimCBJr5sTDaC8RfCGleuAzZmdoPXuZGXrKtCQStaju+5ppFa/1tLqLnj//7HM0XeBYeoIwemesJh0WHfFjIbEoVWIxuaUAauAgYSgQabxIbVAoYhngYlguYdmYYViqCgsETwfYligqSne2NZjQmXw5dsIlFBxk1EocYQDfZ9//+2TEeQBMBQM5rglRiWGU5mndoDCxBY3fDd0lSF8229GnlAOilFEXP/b1KttYFL/5ebzg0mK7LN1oQWsgYdkaam+2aPDGYNFiY4KnAFIMcQEegwpb9CQxtCUOgUcUwvvtfgWuIeCuZhKBQ5VITk+Q7iyWt4lldZI7mfFMnyXLOyKJEjfdfQPcEjgxvfif3bJ8siHTtjPcPTzTXTksvTXktjDkDAxMDCAgQMA5t4uwO6SJDjhoxRSZKGcUUvs3L/256kqxzPMDEDCXHUGDKgjoOCCTfqcndSmZ3pspL34yGLwlc/hd9P5Bpznd8y/8pJD/+2TEigILuNUozuBnCWyVpNndjHGPXoxAhKUQMGkIAwSEJjAoBQMUoIY1hw8gAwHYwxA7BJBdASjkFxsNLfL/vdvUtjgZiSd1ji5LkGDEDycd9bDrRmjO072p77p7H/j4Qyb3/pgVhZMLB07y3hkMjGMzGiPv+f6737+/Gj7n19c9Pj4QDAkhzxP1zURQNUQ8ynPzMgfMjrcxkujiAgMDhswSDS2oNEK8VMVaFePBRt6qZZQmwmznHolqRLNDsOw+GpY0wjglahytfmGYmKUUWjcVrIb3y7VI+WbdzojwrLnTlIWbsVdhYbX2rZebFjX/+2TEmwILyM8gDuBnia4hZSXtGHG1ilty87M5Fel+6df+su65HSjrLjcq8IYbgQccD+Z8kacQjaY6uWYsgyYTh8YDFMciGbFuEUgNcChEHAgoCf9Thdk60yhnrWuZXbdM+8Bz8iiEYyIAC4NzmNMKbsdXb9+/9O7fY2yzQfJh3JukJSUu01RONVz4OTby0O+v07cwyLyH3cUlyRzYVTUAACB8BAAAkZ4wsY/ByajG8YHMuYog8VQAbgachKIgaLgGAoLoyNMmk1KjbtXooxDU3cjUAwtckmeJEkMWG2CZKhdFAzZDRMUct2V/J2//l7v/+2TEo4NQWQ0mLvGBmcqgZgXdGPq5y9x9vV4lvmZ/7ZUNQCH490ocLIu7rTkohRgZ6Xn/mrAcANFu8mYdPuy44GDg6ZD0ZioOGXkqYWeJvYxGGgkX1CJQYSIRq4TOIAhI5Eg20SreKenoc3VdtYAHEYK1F+4ZD0SjjSTB9TmEzWO0N+Z//+d80sIw4LB4FAeAKh+mMASAMHYjgOFhOLDfr/8dFEKJyt//71lkMGnpuVDrE3H5yC+wCgGFG8JA0xG7hAjOTyBELxAgAMDCkB/JmVUGALrQ/Y4phBkkhirLXjhbFw9CQ3pKx7qW5wy/aan/+2TElgJPdQM5LrB1SdkiJ8nMIWBUZlr+vmkiRnQ8aKB0GQmDw17tQ4D5xgwUMSnv/qm5YaSvMsta/45toMdR2lEsACwEwDq6jHCTOGCAeB0ph/veNfCsxEAc7wG1g8lBf9MOafKHoZgZsMDQyOLcVt7ZzWYQ2Yik6VlaY0//+rRZIDtAjBwCYP2LnXk61iU8Qxamp+1YdY6GfWv/44/rtSxMfTQjV5afVZMQFAFMQZvWIBuJChJoJmGCwaxkf0rDRNwYPlSwjS4KgidayxsRBY+tJq7zHpSaSwQG5LfbHZmc3/XdhQVUwwJB4MBVRFH/+2TEioMM2M1GbeUFiZ6hKQ2soLFhgcaImCAuPM3QwszDXdDP/9U1Ky+p2FRMYJAqOgB4BqhnpQJJGHCkLdnJoWqT8JAAWHgVqHSqiCV6wbqpvUKrodrz1eyCQ1kOVEUEJBIXZL2//uRRgwRChMhUCMpkUIoYW5Ys0QGr7+BYRSGO+I//4pIWv5rWvmbGjCjTFQAAKkyAlAf0+VV2FN4kaYaps0J7xAIaGTSJZIBD8rHLAcBIAUMGPbAZT4rvjtLI06J6OZ9bz3/pt//9ZuX9DA4ABbOwabkuwEGQixgYkOhOsGEKZGW//ZyC1RTflsH/+2TEkIIMEQtMbSSwyYch6amhImDIJFABS3gBUCsLgRCJGpxgzxKFG4ClidxQNLgiMBD46jSjQTDoByg4FLqUwilST2vDGu3w+A+zkUFNKyZf9lUggADAIkUBgCGOVCMADUgJTPfaOcPC/8TgEH1YqCh4FUIAaUABYFOC3xCdDKxkiJfcByWZihBMEWHBYwmsLOBGVXIu0kCQE5k/INhSTXupMuHrcYl91YxLmUfv/+Y3klQuKCOJAigKSRLSQaPJcY7nIO+ZlLr/15zi7gPK3uzMHiCScAGbYAJgQ4cMEGFAIObhpbMXiTqJKWjTGYj/+2TEnIIMFQtNTJhRiXQYabWjChimWPzIrBccxUhJpTAFGmCW/s21azEOgyqxmTUyAgCLE/NXuvN2MIf//5BRSQbGSICicUiYLhZqDnR0rAlUOWZccbuvkKeqnuZ/P///3VZjzFPnPf//t1LYxt0AABnKAEgF3HHSjpouOPAgyhGRpGgajMKAVXrEIRpIDCBi0a7X2VQIJMRoguUmdZVL0BgDAFw+9ElWV0LZP//3pxo3HCGoQg5VxLQJw6dFdM5Ov7ZLi04//tJaXKHCBZ4wzrh84NY3gAEgVWpjfQCfkpkyIYa3GO9GRJtdBTFSABT/+2TEqwIMMQlI7SCxmc2iaLW8pKimWDgEUaBCxRjLOUxW4p2gnJoZWeq5NjdaKhhDlDq57uH19//8bGFRcogaKmg/BwhT1GiRLNkQ8GUW9NhmdjjdLJ//eR4mIMDh0YE3+8XQAG3iAoAKAgDRQOIDAENgE0Y+486theosAUKgAQQIPojiq1/AdLYZYRBDAkkks5JeZEFBCjqdvGIlIn/zyPQWMYowVQYJSBHaDVIEYMjDBFDgHCnFXr///6iP+v1qIUizqrr/joyE5QMEcAGwADwf+JsimGEKJOeB7h/MBPId3mEABkCByIkCBsajQqn/+2TErgIMxPNLTRkQkaceaA2lljggfdv1E5j77YhnRMSk9tWg/fMnxbRPfj//pVSLiGKcR1lVJGjghBaHxBRwjHNDf/vFf8/08ffCMo7e//+GhjqaAAAAMIOZEoBccnBwMPIAVGS3AXBwURoQJXF6QqBhUAV4+qIUtl/DtXKN3KIEHXqF1W2HL7n4AxMIc0fvPQPGCOpsM7KuclqRkDsICun/6OU9izJf892TpwRTmZzTA4MDLAaAgLAV3mXREV0MFAYiDTRdE0YMoAJmAIMY4GjKwWBEQ/OrHq5NbfE7PHQVFBdzZ1B8lp8zyjxMTP//+2TEs4IM6Q1G7eUBiZEhqOmWIalFL8nmCOOfZEuJiSAow+A/UaVN//9f+MVpav5j95iTJxCTPGg75gAAAAM4MGEh0AYVnoDAIq0AaBICTDjkdRwEvByy7Cl01TymHKcKIsTyaZ9Rp8FiFRB1OywjCnZdyL/aoYbcAOt2FmWDDBxIKzI/////cE6GSnS8meVY4MCDkGdIlmRgCkAWA4L4PoCMCJZmWAzvGqJmJABwcMXIdSyAiIWg0uhMyfF81//P7pKIkxIhLNwk7zWFd/pP+ZHzRgvCD1MHrMLPchwKhQcxogY2ev////nela//dRX/+2TEuwIMKRFLraRtSX8eqGWmIaH0qLoxFN4iAAAC2LQBAFfDsdAUwkEHBmDCEsM5ASYisHaTlZLBlNPXYdcKdmbJGl/ucVDihAC4oHLnMxgNCyE57v/+TVo5JBrQPHl3xFqtCwOhAJRMwWNQ4f+y8OtlA4dcVMpw6TB2IwGHgGLEg5oHGFaAUVGTQiELaElZMPHhYHyW2drXXThhij8M/WCjTUTomsGo+UYaagq62/C/vzb6KgiHDh8TUVBSGnW+ND4MHg+QeHxEff/71yuVqO9xFCshYkHr94mMVQAACtSEBYGLUHTODQQtgyYyIgr/+2TEyAIL1P1J7RhuyXSd6HmkoXHIohBcEXXbg51AYQoQPRipGapRv+vByd9wEnk73ezEgp3Er/ycZm9JWDWi4MBCKT13b5n9B1IgYv3G/////y85kzDHm+qz2932eVYjX8TdviUgGAwcFE5o+yTQCMIEz6THCAzABlByBkjxls0GwsUKHyN/jqqnXxIwwmZ5YZkjaq9cgL1uwyf/g3BRA2CwCCkqMgDCwoJU6LLEmNwwusFQ2SFJicLlUc/////OtnWOn3zqNySu1JPZc65Bk2OVAAABECAFjSfEiHFCZADQIAApmtLFQQXAImoEoAT/+2TE14KL0LFBjOEFQYafqDGmFXiq0+9P3T+dthMjJm+G2W922I4HTC05wnkzPRzcKotHtysZO2M0jmn1XAOgtwNgnhNy4Gcfj1C2xtsrTLd0o5Y23EIcJwEwxNuWyJN6jgbicYiEKWiy/cVkUfpvoP0MrmRdB4CICZhgAGDAQwNgHgDAnOelxbEBM5bW3HmJS6WS2/u5dq00b5Xx7lfzh21hlhVq8z/W8OWsZma5esTVupah2Yja/4itRiwXGio4OCl4l2xjOBoi7zawfa3qIrdn6xRJlaHExvYHpJ5mXnySLDQtOJ47w5Ptz0+iaur/+2TE5IIMjRM/jSTLQa+eZtG8pChTHTJiw02S317cFGkurbv+A4lKZWRIyAAIFAQlKoa8bitNJ4anZxweR6Xcn7sqtY1atcfjsyrrbCOjZspqVNq2UXjpweY4Rzg2VFE6cWTky6ST+iZIo1Gyjdm9zEyK2csTTNbK6na7KSdFjWZmROeJSwwroEiHWXkyiQAcA0y7HqTomiaPxMvMbRcBdIcxNashPsgtUK1zDQ5yqpG0O8ZUsdrPPuXPEAASBp08pnNVTv59bmx8b5kerudaCzlzMvuPdune6w1FHGSZGHRUACA+G2297VAAAmAJOaz/+2TE6YAP5RE3bT0PyiMi57Gns8Cuybp52DZS8OoBafA8AzbS5e1iQWuCVYhZ4GyEjjGBmEChkyRQPw4uaIwMjDI9YjBSZR3YA60QqT/Qs0x6zzQhDHB44MB0MRoP0MPyAoBuOKsa8ZsvxlZHZISsYEmtMkCJg/1chq2e627Ti/HLgnHC7lFcFekEq5wWxQSwouZCish4V2WIANNlzeypliE0BTnYE11lkDqqIasTgzJ12sAYuCIdY4DXCq3skB+9V6FKBkZMVfw41Mv5Ym8TWeMkdjzBj4isCNTMQwz3nVjSTVpRMBELLNnDYhOhfVz/+2TE0wBMcP9H7A2xCXEgZ3zxmhlQ8QmwWC8XqAOsuHzRGmDUROXGVwPplSbRGVQSmmFCUeHAkKCi00lwi00GECsmbHVrzKh2pWjAAAAYrWnewwaWAY802CdiERl9+pGoBkIcUBXe4RZ6VmZIivlroO1Y+FvnRUga62Q4ggtFXNpagVeRjdTEvCGXMbSZXVOSty5hg0581SWZbqsbc5OIu218LrUtOzCJrQAIESIZf+TQwsKnow6tAFJFZfD0/vc0EjmMacQok1+YEI1znxtemLvpCAQ8lOEgIOYmgcimDln2iVgtMjB28PQVRrUqoNv/+2TE4IBRMRM3jA3xCjCiaH2XpjHxyPEsGHoVeFf7SuSD4VVrRTb5KjghQlABIAAAERWyxl5ufBiMZzoxqPBSIYf98sCIhSg2VrSy64tQW50zSzRDJgVSprEECqFlskTEwyRPBImLmGCOzDLUNixq8Ymm5rT8ZX/f3u1ZqZA/RAVlFO9nmT7cWBZZBQwFocCjPGEX4lxzEvN85kEYjOhMRiUKgJ8ZWpxR6kXnwSAVdIrvO07KJCVFObYVJU1CplgLGkJYlITgJG3SWDSFVVR8ft/JU1F08IKKodf13y/wbqKChVypsWFLAgXI0WXvXr3/+2TEwwAL5O1R7AUQCXwfKXmDDfmCT0hS0crhdOnKsEGGUAdCxCUvPYHmunaDkBIRjIyJMS560K11dQrFq/M09acooRJivXHkyRpEFI5VkiVzsyaRJUbjy3l8OSkiDyoCaCwNHjobEWIlFY31hFlYA8xkhXZfNWa9igqkMbOqbHwv6c7qUJKXyFdNChJDPdR1csyvsSRrnTCut3LmJ0Ik+kzU312L25Sa5ZSECaIw6JdJlZEfZYdTaTjOrpNUKx3xyiRHU+kglF0YJVQT/n00rCiiVYkSSOzudtBWs1TzjyLJUxJLy8oog2olP6u3ueX/+2TE0QCL4Ms/x40wyX+V5vDzJhlpHISmDSSZpF801sRNnDtOojM+6pyplGaqmJd3iiSR2vTJOxI9Fnq2JLeZIkULynbG5qPeMYeFz4WM197esEuU1l9jstysBN5zuse2Ktw6vhAAQDi05WlaFmKi1NVrHc6HBxB93dAuiPPV18EEQRWhKhRfMi/fNNXyzYiXX2MqYIQpSE9z7JYVMyttsCMcifb/lh0hxCgLPPP5E577GspoZ+DlgiOFyEVbveYg64HKtbcYwwlb6hACAAEGS6jt6peXoou6T51EtyoOXS4OvQEMGY1KDE3U2JS0eH3/+2TE3wOMBL0cJ7DRQVEj4QTzDADVRqWXO0S+VNKi0TDMCMIxqR4YyqqWRpJ1DJt1TwakUZma/z46yrFybmRA8vyyVSMyacOFFYjErQtB5wxYuSOvuZShEglw2upAaSAJAjz3szohFQFkrBya1AmXaLnSUJuz09qnTe1R5LwkJJcQpoi2IbqREaueB13/WwDBgDGhlcqDaqhCOs4PNw9ddqa58hMMiFS4hjJtJr16pe7jkXUyxCoTA0JtSmYj7rT3HfGJYObMUNr3zUmHlh3aCBAAAACFywdCMzwbDmYrZDYwWKUsrVtG18kMeDExzDf/+2TE8gGPxTz+B4zCCZStX+BQjABBIGnRV1I8rC7WSphCTgJASaTPbBvnGmy9C6lTcrS4UiZTp55y+eVcKDtyy/yy+WUKk8OyHAzrx2bGAYNOXrDDdzy7K6XIKT6VgQCIhYrc7TWqQaiToiIDJGJkL2uwfKCwjG6NeGNHdjvSIcofVMuGftwERWvXDkvUuuaw0WFSIFRYwiFloBzB7m7j8S1V+RAkjIQWOwQ2abhzRaqTe0aQhkMBsIgU2FAUhk6lqZVT6mtyKnsl3KoAiAj0J+GMIAHrEpmiitJUgg1uREBu7CiFJmdXcoH+aZviIRL/+2TE7YAN7Xr/AwxyAcCwH8hhjBm0ktuiK+QfTcORbFiqVoak02KQQnaMpOQ9Y1BGJflshPIr7QX6uJTuTGQYs8teef04ea1qeopQvGMdqyk6wDJP8mp2s+b1e9oD9ZgBgAkeaoVdMOkEsOaLSwFeI1ZV0jOokhXqZcZ6MhGx3RKpa8imoCEYwZGpNmHXzFUnkJmwR8m0ZiEkzrKXY/CLaCVbv6dFaRSbHcr0lHpcuXDJDOTfEUsJDoQnDKiFj8OPmDG5izbBRx6MRSQICPIYKGCBwhyHIMZHUbY0rkOO2mLuapTMAu6bZGMR1obBLtn/+2TE6wANJXkBIYR+AbWqX+AxDwBT7eg3QzzVUZ43XUz7Hbo29IvCq7SxSBmSC8yUFXkYT9bI6HWjS2oxkWRrmW6cVZEUiwY2JC5rhUPiF/8t81KuyaFSVOHdOn2De47pdaHHQm5IylWI7ni3BCnKX5h8oStJx3YLGMkOsJCyOkWThpm325KCbBFCdhSiLRYzz7cgqApaEEAFavSoWmpaHeGVTpFLvKuO4lQ2lVjJG9YFMq1R4al8dr9+51uytu8+7P5OK+7/ufa7MirYUuqk36zgYC1jBmaqGPmSKudn84WjVTOUclrkaSrZyZZE7fn/+2TE7QBN1XL+oQRgCawt3+AxDZhTFEcNgkFbRaVdjUZ/NR7mwuYNy5o7Zk4lqPNRgpl2bDGrWNSal8qrGDKUON6w81VeLrOk3nYdJgpkiv9my4t/Tfhfwq6Nc3/8F4dQNymCgnlYKGBgwjysoIO6H8qGyxy9WDA0eWBQwYOjysFBAg6WKGCggTpysDRD//VZUP2ChQTp9DBQYJ5YoYGDR5WChgYNH+WO5f/s1j/qwao/xQQKpZWCggQORYWEeK+sWbi7MVFsWF2YrUxBTUUzLjEwMFVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVX/+2TE7YEOUc7+oYxtyaKln4AxD0FVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVMlbCiUxBTUUzLjEwMFVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVX/+2TE7QPOSWr4AZh1iZ2o2IAxjXhVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVUxBTUUzLjEwMFVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVX/+2TElAPASAIADQAAIAcAQAGAAARVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVUxBTUUzLjEwMFVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVX/+2TEkgPAAAGkAAAAIAAANIAAAARVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVUxBTUUzLjEwMFVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVX/+2TEkgPAAAGkAAAAIAAANIAAAARVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVUxBTUUzLjEwMFVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVX/+2TEkgPAAAGkAAAAIAAANIAAAARVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVUxBTUUzLjEwMFVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVX/+2TEkgPAAAGkAAAAIAAANIAAAARVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVUxBTUUzLjEwMFVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVX/+2TEkgPAAAGkAAAAIAAANIAAAARVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVUxBTUUzLjEwMFVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVX/+2TEkgPAAAGkAAAAIAAANIAAAARVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVUxBTUUzLjEwMFVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVX/+2TEkgPAAAGkAAAAIAAANIAAAARVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVX/+2TEkgPAAAGkAAAAIAAANIAAAARVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVU= + tools: + - functionDeclarations: + - name: structured_data + description: Result of transcribing an audio file + parameters: + type: object + properties: + transcription: + type: string + description: The transcription of the audio + required: + - transcription + tool_config: + function_calling_config: + mode: ANY + allowed_function_names: + - structured_data +step2RawChunks: + - candidates: + - content: + role: model + parts: + - functionCall: + name: structured_data + args: + transcription: Hello world + finishReason: STOP + index: 0 + usageMetadata: + promptTokenCount: 48 + candidatesTokenCount: 6 + totalTokenCount: 54 + promptTokensDetails: + - modality: AUDIO + tokenCount: 25 + - modality: TEXT + tokenCount: 23 + candidatesTokensDetails: + - modality: TEXT + tokenCount: 6 +step3KurtEvents: + - chunk: '{"transcription":"Hello world"}' + - finished: true + text: '{"transcription":"Hello world"}' + data: + transcription: Hello world + metadata: + totalInputTokens: 48 + totalOutputTokens: 6 diff --git a/packages/kurt-vertex-ai/src/KurtVertexAI.ts b/packages/kurt-vertex-ai/src/KurtVertexAI.ts index 0d5b2bc..b6b127e 100644 --- a/packages/kurt-vertex-ai/src/KurtVertexAI.ts +++ b/packages/kurt-vertex-ai/src/KurtVertexAI.ts @@ -1,9 +1,14 @@ import "./VertexAI.patch.generateContentStream" // monkey-patches VertexAI GenerativeModel.prototype.generateContentStream - import zodToJsonSchema from "zod-to-json-schema" import { type KurtAdapterV1, - type KurtStreamEvent, + KurtCapabilityError, + type KurtMessage, + type KurtResult, + KurtResultBlockedError, + KurtResultLimitError, + KurtResultValidateError, + type KurtSamplingOptions, type KurtSchema, type KurtSchemaInner, type KurtSchemaInnerMap, @@ -13,13 +18,7 @@ import { type KurtSchemaMaybe, type KurtSchemaResult, type KurtSchemaResultMaybe, - type KurtMessage, - type KurtSamplingOptions, - type KurtResult, - KurtResultValidateError, - KurtResultLimitError, - KurtResultBlockedError, - KurtCapabilityError, + type KurtStreamEvent, } from "@formula-monks/kurt" import type { VertexAI, @@ -27,7 +26,6 @@ import type { VertexAIMessage, VertexAIRequest, VertexAIResponseChunk, - VertexAIResponseChunkCandidate, VertexAIResponseFunctionCall, VertexAISchema, VertexAITool, @@ -171,7 +169,7 @@ function toVertexAIMessages(messages: KurtMessage[]): VertexAIMessage[] { const vertexAIMessages: VertexAIMessage[] = [] for (const message of messages) { - const { role, text, toolCall, imageData } = message + const { role, text, toolCall, imageData, audioData } = message if (text) { vertexAIMessages.push({ role, parts: [{ text }] }) } else if (toolCall) { @@ -180,8 +178,8 @@ function toVertexAIMessages(messages: KurtMessage[]): VertexAIMessage[] { const functionResponse = { name, response: result } vertexAIMessages.push({ role, parts: [{ functionCall }] }) vertexAIMessages.push({ role, parts: [{ functionResponse }] }) - } else if (imageData) { - const { mimeType, base64Data } = imageData + } else if (imageData || audioData) { + const { mimeType, base64Data } = imageData ?? audioData const inlineData = { mimeType, data: base64Data } vertexAIMessages.push({ role, parts: [{ inlineData }] }) } else { From 36dacc0935c1d7a412882cac91c4414a3a735ba2 Mon Sep 17 00:00:00 2001 From: Mehdi Chaabouni Date: Sat, 15 Feb 2025 08:19:13 -0500 Subject: [PATCH 3/9] chore: added idea files to .gitignore --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index bdec31e..ac7d3d9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ node_modules dist .env -.turbo \ No newline at end of file +.turbo +.idea +*.iml From 54fcb2f991eaf378022619ab8cbae1ee060998e0 Mon Sep 17 00:00:00 2001 From: Mehdi Chaabouni Date: Sat, 15 Feb 2025 15:16:04 -0500 Subject: [PATCH 4/9] chore: refactoring renamed `audioData` to `inlineData` --- packages/kurt-cache/src/KurtCache.ts | 4 +- .../spec/generateNaturalLanguage.spec.ts | 27 ++++++- ...s_a_base64-encoded_image_(imageData).yaml} | 28 ++++--- ...s_a_base64-encoded_image_(inlineData).yaml | 73 +++++++++++++++++++ packages/kurt-open-ai/src/KurtOpenAI.ts | 8 +- .../spec/generateNaturalLanguage.spec.ts | 23 +++++- .../spec/generateStructuredData.spec.ts | 11 +-- ...s_a_base64-encoded_image_(imageData).yaml} | 0 ...s_a_base64-encoded_image_(inlineData).yaml | 63 ++++++++++++++++ ...ta_transcribes_a_base64-encoded_audio.yaml | 12 +-- packages/kurt-vertex-ai/src/KurtVertexAI.ts | 10 +-- packages/kurt/src/Kurt.ts | 57 +++++++-------- 12 files changed, 249 insertions(+), 67 deletions(-) rename packages/kurt-open-ai/spec/snapshots/{KurtOpenAI_generateNaturalLanguage_describes_a_base64-encoded_image.yaml => KurtOpenAI_generateNaturalLanguage_describes_a_base64-encoded_image_(imageData).yaml} (81%) create mode 100644 packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateNaturalLanguage_describes_a_base64-encoded_image_(inlineData).yaml rename packages/kurt-vertex-ai/spec/snapshots/{KurtVertexAI_generateNaturalLanguage_describes_a_base64-encoded_image.yaml => KurtVertexAI_generateNaturalLanguage_describes_a_base64-encoded_image_(imageData).yaml} (100%) create mode 100644 packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateNaturalLanguage_describes_a_base64-encoded_image_(inlineData).yaml diff --git a/packages/kurt-cache/src/KurtCache.ts b/packages/kurt-cache/src/KurtCache.ts index 0ec3503..92d7d79 100644 --- a/packages/kurt-cache/src/KurtCache.ts +++ b/packages/kurt-cache/src/KurtCache.ts @@ -319,8 +319,8 @@ function hashMessages(digest: Hash, messages: KurtMessage[]): Hash { mayHash(digest, "text", m.text) mayHash(digest, "imageDataMimeType", m.imageData?.mimeType) mayHash(digest, "imageDataBase64Data", m.imageData?.base64Data) - mayHash(digest, "audioDataMimeType", m.audioData?.mimeType) - mayHash(digest, "audioDataBase64Data", m.audioData?.base64Data) + mayHash(digest, "inlineDataMimeType", m.inlineData?.mimeType) + mayHash(digest, "inlineDataBase64Data", m.inlineData?.base64Data) if (m.toolCall) { mayHash(digest, "toolName", m.toolCall.name) mayHash(digest, "toolArgs", JSON.stringify(m.toolCall.args)) diff --git a/packages/kurt-open-ai/spec/generateNaturalLanguage.spec.ts b/packages/kurt-open-ai/spec/generateNaturalLanguage.spec.ts index 8a6ab00..9c27ece 100644 --- a/packages/kurt-open-ai/spec/generateNaturalLanguage.spec.ts +++ b/packages/kurt-open-ai/spec/generateNaturalLanguage.spec.ts @@ -50,7 +50,7 @@ describe("KurtOpenAI generateNaturalLanguage", () => { ) }) - test("describes a base64-encoded image", async () => { + test("describes a base64-encoded image (imageData)", async () => { const result = await snapshotAndMock("gpt-4o-2024-05-13", (kurt) => kurt.generateNaturalLanguage({ prompt: "Describe this emoji, in two words.", @@ -69,6 +69,25 @@ describe("KurtOpenAI generateNaturalLanguage", () => { expect(result.text).toEqual("Heart eyes") }) + test("describes a base64-encoded image (inlineData)", async () => { + const result = await snapshotAndMock("gpt-4o-2024-05-13", (kurt) => + kurt.generateNaturalLanguage({ + prompt: "Describe this emoji, in two words.", + extraMessages: [ + { + role: "user", + inlineData: { + mimeType: "image/png", + base64Data: + "iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAApgAAAKYB3X3/OAAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAANCSURBVEiJtZZPbBtFFMZ/M7ubXdtdb1xSFyeilBapySVU8h8OoFaooFSqiihIVIpQBKci6KEg9Q6H9kovIHoCIVQJJCKE1ENFjnAgcaSGC6rEnxBwA04Tx43t2FnvDAfjkNibxgHxnWb2e/u992bee7tCa00YFsffekFY+nUzFtjW0LrvjRXrCDIAaPLlW0nHL0SsZtVoaF98mLrx3pdhOqLtYPHChahZcYYO7KvPFxvRl5XPp1sN3adWiD1ZAqD6XYK1b/dvE5IWryTt2udLFedwc1+9kLp+vbbpoDh+6TklxBeAi9TL0taeWpdmZzQDry0AcO+jQ12RyohqqoYoo8RDwJrU+qXkjWtfi8Xxt58BdQuwQs9qC/afLwCw8tnQbqYAPsgxE1S6F3EAIXux2oQFKm0ihMsOF71dHYx+f3NND68ghCu1YIoePPQN1pGRABkJ6Bus96CutRZMydTl+TvuiRW1m3n0eDl0vRPcEysqdXn+jsQPsrHMquGeXEaY4Yk4wxWcY5V/9scqOMOVUFthatyTy8QyqwZ+kDURKoMWxNKr2EeqVKcTNOajqKoBgOE28U4tdQl5p5bwCw7BWquaZSzAPlwjlithJtp3pTImSqQRrb2Z8PHGigD4RZuNX6JYj6wj7O4TFLbCO/Mn/m8R+h6rYSUb3ekokRY6f/YukArN979jcW+V/S8g0eT/N3VN3kTqWbQ428m9/8k0P/1aIhF36PccEl6EhOcAUCrXKZXXWS3XKd2vc/TRBG9O5ELC17MmWubD2nKhUKZa26Ba2+D3P+4/MNCFwg59oWVeYhkzgN/JDR8deKBoD7Y+ljEjGZ0sosXVTvbc6RHirr2reNy1OXd6pJsQ+gqjk8VWFYmHrwBzW/n+uMPFiRwHB2I7ih8ciHFxIkd/3Omk5tCDV1t+2nNu5sxxpDFNx+huNhVT3/zMDz8usXC3ddaHBj1GHj/As08fwTS7Kt1HBTmyN29vdwAw+/wbwLVOJ3uAD1wi/dUH7Qei66PfyuRj4Ik9is+hglfbkbfR3cnZm7chlUWLdwmprtCohX4HUtlOcQjLYCu+fzGJH2QRKvP3UNz8bWk1qMxjGTOMThZ3kvgLI5AzFfo379UAAAAASUVORK5CYII=", + }, + }, + ], + }) + ) + expect(result.text).toEqual("Heart eyes") + }) + test("throws an error when a message includes inline audio data", async () => { await snapshotAndMockWithError( "gpt-4o-2024-05-13", @@ -78,7 +97,7 @@ describe("KurtOpenAI generateNaturalLanguage", () => { extraMessages: [ { role: "user", - audioData: { + inlineData: { mimeType: "audio/mpeg", base64Data: "DUMMYDATA", }, @@ -87,7 +106,9 @@ describe("KurtOpenAI generateNaturalLanguage", () => { }), (errorAny) => { expect(errorAny).toBeInstanceOf(Error) - expect(errorAny.message).toEqual("Unsupported audio data for OpenAI") + expect(errorAny.message).toEqual( + "Unsupported image MIME type: audio/mpeg" + ) } ) }) diff --git a/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateNaturalLanguage_describes_a_base64-encoded_image.yaml b/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateNaturalLanguage_describes_a_base64-encoded_image_(imageData).yaml similarity index 81% rename from packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateNaturalLanguage_describes_a_base64-encoded_image.yaml rename to packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateNaturalLanguage_describes_a_base64-encoded_image_(imageData).yaml index 93c043a..69c97ef 100644 --- a/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateNaturalLanguage_describes_a_base64-encoded_image.yaml +++ b/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateNaturalLanguage_describes_a_base64-encoded_image_(imageData).yaml @@ -23,7 +23,7 @@ step2RawChunks: refusal: null logprobs: null finish_reason: null - system_fingerprint: fp_5796ac6771 + system_fingerprint: fp_279b0a9ade usage: null - choices: - index: 0 @@ -31,7 +31,7 @@ step2RawChunks: content: Heart logprobs: null finish_reason: null - system_fingerprint: fp_5796ac6771 + system_fingerprint: fp_279b0a9ade usage: null - choices: - index: 0 @@ -39,29 +39,35 @@ step2RawChunks: content: " eyes" logprobs: null finish_reason: null - system_fingerprint: fp_5796ac6771 + system_fingerprint: fp_279b0a9ade usage: null - choices: - index: 0 delta: {} logprobs: null finish_reason: stop - system_fingerprint: fp_5796ac6771 + system_fingerprint: fp_279b0a9ade usage: null - choices: [] - system_fingerprint: fp_5796ac6771 + system_fingerprint: fp_279b0a9ade usage: - prompt_tokens: 270 - completion_tokens: 2 - total_tokens: 272 + prompt_tokens: 455 + completion_tokens: 3 + total_tokens: 458 + prompt_tokens_details: + cached_tokens: 0 + audio_tokens: 0 completion_tokens_details: reasoning_tokens: 0 + audio_tokens: 0 + accepted_prediction_tokens: 0 + rejected_prediction_tokens: 0 step3KurtEvents: - chunk: Heart - chunk: " eyes" - finished: true text: Heart eyes metadata: - totalInputTokens: 270 - totalOutputTokens: 2 - systemFingerprint: fp_5796ac6771 + totalInputTokens: 455 + totalOutputTokens: 3 + systemFingerprint: fp_279b0a9ade diff --git a/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateNaturalLanguage_describes_a_base64-encoded_image_(inlineData).yaml b/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateNaturalLanguage_describes_a_base64-encoded_image_(inlineData).yaml new file mode 100644 index 0000000..69c97ef --- /dev/null +++ b/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateNaturalLanguage_describes_a_base64-encoded_image_(inlineData).yaml @@ -0,0 +1,73 @@ +step1Request: + stream: true + stream_options: + include_usage: true + model: gpt-4o-2024-05-13 + max_tokens: 4096 + temperature: 0.5 + top_p: 0.95 + messages: + - role: user + content: + - type: text + text: Describe this emoji, in two words. + - type: image_url + image_url: + url: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAApgAAAKYB3X3/OAAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAANCSURBVEiJtZZPbBtFFMZ/M7ubXdtdb1xSFyeilBapySVU8h8OoFaooFSqiihIVIpQBKci6KEg9Q6H9kovIHoCIVQJJCKE1ENFjnAgcaSGC6rEnxBwA04Tx43t2FnvDAfjkNibxgHxnWb2e/u992bee7tCa00YFsffekFY+nUzFtjW0LrvjRXrCDIAaPLlW0nHL0SsZtVoaF98mLrx3pdhOqLtYPHChahZcYYO7KvPFxvRl5XPp1sN3adWiD1ZAqD6XYK1b/dvE5IWryTt2udLFedwc1+9kLp+vbbpoDh+6TklxBeAi9TL0taeWpdmZzQDry0AcO+jQ12RyohqqoYoo8RDwJrU+qXkjWtfi8Xxt58BdQuwQs9qC/afLwCw8tnQbqYAPsgxE1S6F3EAIXux2oQFKm0ihMsOF71dHYx+f3NND68ghCu1YIoePPQN1pGRABkJ6Bus96CutRZMydTl+TvuiRW1m3n0eDl0vRPcEysqdXn+jsQPsrHMquGeXEaY4Yk4wxWcY5V/9scqOMOVUFthatyTy8QyqwZ+kDURKoMWxNKr2EeqVKcTNOajqKoBgOE28U4tdQl5p5bwCw7BWquaZSzAPlwjlithJtp3pTImSqQRrb2Z8PHGigD4RZuNX6JYj6wj7O4TFLbCO/Mn/m8R+h6rYSUb3ekokRY6f/YukArN979jcW+V/S8g0eT/N3VN3kTqWbQ428m9/8k0P/1aIhF36PccEl6EhOcAUCrXKZXXWS3XKd2vc/TRBG9O5ELC17MmWubD2nKhUKZa26Ba2+D3P+4/MNCFwg59oWVeYhkzgN/JDR8deKBoD7Y+ljEjGZ0sosXVTvbc6RHirr2reNy1OXd6pJsQ+gqjk8VWFYmHrwBzW/n+uMPFiRwHB2I7ih8ciHFxIkd/3Omk5tCDV1t+2nNu5sxxpDFNx+huNhVT3/zMDz8usXC3ddaHBj1GHj/As08fwTS7Kt1HBTmyN29vdwAw+/wbwLVOJ3uAD1wi/dUH7Qei66PfyuRj4Ik9is+hglfbkbfR3cnZm7chlUWLdwmprtCohX4HUtlOcQjLYCu+fzGJH2QRKvP3UNz8bWk1qMxjGTOMThZ3kvgLI5AzFfo379UAAAAASUVORK5CYII= +step2RawChunks: + - choices: + - index: 0 + delta: + role: assistant + content: "" + refusal: null + logprobs: null + finish_reason: null + system_fingerprint: fp_279b0a9ade + usage: null + - choices: + - index: 0 + delta: + content: Heart + logprobs: null + finish_reason: null + system_fingerprint: fp_279b0a9ade + usage: null + - choices: + - index: 0 + delta: + content: " eyes" + logprobs: null + finish_reason: null + system_fingerprint: fp_279b0a9ade + usage: null + - choices: + - index: 0 + delta: {} + logprobs: null + finish_reason: stop + system_fingerprint: fp_279b0a9ade + usage: null + - choices: [] + system_fingerprint: fp_279b0a9ade + usage: + prompt_tokens: 455 + completion_tokens: 3 + total_tokens: 458 + prompt_tokens_details: + cached_tokens: 0 + audio_tokens: 0 + completion_tokens_details: + reasoning_tokens: 0 + audio_tokens: 0 + accepted_prediction_tokens: 0 + rejected_prediction_tokens: 0 +step3KurtEvents: + - chunk: Heart + - chunk: " eyes" + - finished: true + text: Heart eyes + metadata: + totalInputTokens: 455 + totalOutputTokens: 3 + systemFingerprint: fp_279b0a9ade diff --git a/packages/kurt-open-ai/src/KurtOpenAI.ts b/packages/kurt-open-ai/src/KurtOpenAI.ts index f5e4a0e..ad256e3 100644 --- a/packages/kurt-open-ai/src/KurtOpenAI.ts +++ b/packages/kurt-open-ai/src/KurtOpenAI.ts @@ -247,9 +247,7 @@ function toOpenAIMessages(messages: KurtMessage[]): OpenAIMessage[] { } for (const [messageIndex, message] of messages.entries()) { - const { text, toolCall, imageData, audioData } = message - if (audioData) throw new Error("Unsupported audio data for OpenAI") // TODO: Use a subclass of KurtError - + const { text, toolCall, imageData, inlineData } = message if (text) { const role = openAIRoleMapping[message.role] @@ -286,8 +284,8 @@ function toOpenAIMessages(messages: KurtMessage[]): OpenAIMessage[] { tool_call_id: id, content: JSON.stringify(result), }) - } else if (imageData && message.role === "user") { - const { mimeType, base64Data } = imageData + } else if ((imageData || inlineData) && message.role === "user") { + const { mimeType, base64Data } = inlineData ?? imageData // OpenAI only supports the following MIME types, according to these docs: // https://platform.openai.com/docs/guides/vision diff --git a/packages/kurt-vertex-ai/spec/generateNaturalLanguage.spec.ts b/packages/kurt-vertex-ai/spec/generateNaturalLanguage.spec.ts index 1864734..9702cc2 100644 --- a/packages/kurt-vertex-ai/spec/generateNaturalLanguage.spec.ts +++ b/packages/kurt-vertex-ai/spec/generateNaturalLanguage.spec.ts @@ -60,7 +60,7 @@ describe("KurtVertexAI generateNaturalLanguage", () => { ) }) - test("describes a base64-encoded image", async () => { + test("describes a base64-encoded image (imageData)", async () => { const result = await snapshotAndMock((kurt) => kurt.generateNaturalLanguage({ prompt: "Describe this emoji, in two words.", @@ -78,4 +78,25 @@ describe("KurtVertexAI generateNaturalLanguage", () => { ) expect(result.text).toEqual("Lovestruck smile \n") }) + + test("describes a base64-encoded image (inlineData)", async () => { + process.env.VERTEX_AI_LOCATION = "us-central1" + process.env.VERTEX_AI_PROJECT = "monksflow-insights-engine" + const result = await snapshotAndMock((kurt) => + kurt.generateNaturalLanguage({ + prompt: "Describe this emoji, in two words.", + extraMessages: [ + { + role: "user", + inlineData: { + mimeType: "image/png", + base64Data: + "iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAApgAAAKYB3X3/OAAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAANCSURBVEiJtZZPbBtFFMZ/M7ubXdtdb1xSFyeilBapySVU8h8OoFaooFSqiihIVIpQBKci6KEg9Q6H9kovIHoCIVQJJCKE1ENFjnAgcaSGC6rEnxBwA04Tx43t2FnvDAfjkNibxgHxnWb2e/u992bee7tCa00YFsffekFY+nUzFtjW0LrvjRXrCDIAaPLlW0nHL0SsZtVoaF98mLrx3pdhOqLtYPHChahZcYYO7KvPFxvRl5XPp1sN3adWiD1ZAqD6XYK1b/dvE5IWryTt2udLFedwc1+9kLp+vbbpoDh+6TklxBeAi9TL0taeWpdmZzQDry0AcO+jQ12RyohqqoYoo8RDwJrU+qXkjWtfi8Xxt58BdQuwQs9qC/afLwCw8tnQbqYAPsgxE1S6F3EAIXux2oQFKm0ihMsOF71dHYx+f3NND68ghCu1YIoePPQN1pGRABkJ6Bus96CutRZMydTl+TvuiRW1m3n0eDl0vRPcEysqdXn+jsQPsrHMquGeXEaY4Yk4wxWcY5V/9scqOMOVUFthatyTy8QyqwZ+kDURKoMWxNKr2EeqVKcTNOajqKoBgOE28U4tdQl5p5bwCw7BWquaZSzAPlwjlithJtp3pTImSqQRrb2Z8PHGigD4RZuNX6JYj6wj7O4TFLbCO/Mn/m8R+h6rYSUb3ekokRY6f/YukArN979jcW+V/S8g0eT/N3VN3kTqWbQ428m9/8k0P/1aIhF36PccEl6EhOcAUCrXKZXXWS3XKd2vc/TRBG9O5ELC17MmWubD2nKhUKZa26Ba2+D3P+4/MNCFwg59oWVeYhkzgN/JDR8deKBoD7Y+ljEjGZ0sosXVTvbc6RHirr2reNy1OXd6pJsQ+gqjk8VWFYmHrwBzW/n+uMPFiRwHB2I7ih8ciHFxIkd/3Omk5tCDV1t+2nNu5sxxpDFNx+huNhVT3/zMDz8usXC3ddaHBj1GHj/As08fwTS7Kt1HBTmyN29vdwAw+/wbwLVOJ3uAD1wi/dUH7Qei66PfyuRj4Ik9is+hglfbkbfR3cnZm7chlUWLdwmprtCohX4HUtlOcQjLYCu+fzGJH2QRKvP3UNz8bWk1qMxjGTOMThZ3kvgLI5AzFfo379UAAAAASUVORK5CYII=", + }, + }, + ], + }) + ) + expect(result.text).toEqual("Lovestruck smile \n") + }) }) diff --git a/packages/kurt-vertex-ai/spec/generateStructuredData.spec.ts b/packages/kurt-vertex-ai/spec/generateStructuredData.spec.ts index 3b31b13..b24a7a0 100644 --- a/packages/kurt-vertex-ai/spec/generateStructuredData.spec.ts +++ b/packages/kurt-vertex-ai/spec/generateStructuredData.spec.ts @@ -5,7 +5,7 @@ import { KurtCapabilityError, KurtResultValidateError, } from "@formula-monks/kurt" -import * as fs from "node:fs" +import { promises as fs } from "node:fs" describe("KurtVertexAI generateStructuredData", () => { test("says hello (response format 1)", async () => { @@ -111,17 +111,18 @@ describe("KurtVertexAI generateStructuredData", () => { }) test("transcribes a base64-encoded audio", async () => { + const base64Data = await fs.readFile("spec/data/HelloWorld.mp3", { + encoding: "base64", + }) const result = await snapshotAndMock((kurt) => kurt.generateStructuredData({ prompt: "Transcribe this audio file.", extraMessages: [ { role: "user", - audioData: { + inlineData: { mimeType: "audio/mpeg", - base64Data: fs.readFileSync("spec/data/HelloWorld.mp3", { - encoding: "base64", - }), + base64Data, }, }, ], diff --git a/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateNaturalLanguage_describes_a_base64-encoded_image.yaml b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateNaturalLanguage_describes_a_base64-encoded_image_(imageData).yaml similarity index 100% rename from packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateNaturalLanguage_describes_a_base64-encoded_image.yaml rename to packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateNaturalLanguage_describes_a_base64-encoded_image_(imageData).yaml diff --git a/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateNaturalLanguage_describes_a_base64-encoded_image_(inlineData).yaml b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateNaturalLanguage_describes_a_base64-encoded_image_(inlineData).yaml new file mode 100644 index 0000000..6f31543 --- /dev/null +++ b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateNaturalLanguage_describes_a_base64-encoded_image_(inlineData).yaml @@ -0,0 +1,63 @@ +step1Request: + generationConfig: + maxOutputTokens: 4096 + temperature: 0.5 + topP: 0.95 + contents: + - role: user + parts: + - text: Describe this emoji, in two words. + - role: user + parts: + - inlineData: + mimeType: image/png + data: iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAApgAAAKYB3X3/OAAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAANCSURBVEiJtZZPbBtFFMZ/M7ubXdtdb1xSFyeilBapySVU8h8OoFaooFSqiihIVIpQBKci6KEg9Q6H9kovIHoCIVQJJCKE1ENFjnAgcaSGC6rEnxBwA04Tx43t2FnvDAfjkNibxgHxnWb2e/u992bee7tCa00YFsffekFY+nUzFtjW0LrvjRXrCDIAaPLlW0nHL0SsZtVoaF98mLrx3pdhOqLtYPHChahZcYYO7KvPFxvRl5XPp1sN3adWiD1ZAqD6XYK1b/dvE5IWryTt2udLFedwc1+9kLp+vbbpoDh+6TklxBeAi9TL0taeWpdmZzQDry0AcO+jQ12RyohqqoYoo8RDwJrU+qXkjWtfi8Xxt58BdQuwQs9qC/afLwCw8tnQbqYAPsgxE1S6F3EAIXux2oQFKm0ihMsOF71dHYx+f3NND68ghCu1YIoePPQN1pGRABkJ6Bus96CutRZMydTl+TvuiRW1m3n0eDl0vRPcEysqdXn+jsQPsrHMquGeXEaY4Yk4wxWcY5V/9scqOMOVUFthatyTy8QyqwZ+kDURKoMWxNKr2EeqVKcTNOajqKoBgOE28U4tdQl5p5bwCw7BWquaZSzAPlwjlithJtp3pTImSqQRrb2Z8PHGigD4RZuNX6JYj6wj7O4TFLbCO/Mn/m8R+h6rYSUb3ekokRY6f/YukArN979jcW+V/S8g0eT/N3VN3kTqWbQ428m9/8k0P/1aIhF36PccEl6EhOcAUCrXKZXXWS3XKd2vc/TRBG9O5ELC17MmWubD2nKhUKZa26Ba2+D3P+4/MNCFwg59oWVeYhkzgN/JDR8deKBoD7Y+ljEjGZ0sosXVTvbc6RHirr2reNy1OXd6pJsQ+gqjk8VWFYmHrwBzW/n+uMPFiRwHB2I7ih8ciHFxIkd/3Omk5tCDV1t+2nNu5sxxpDFNx+huNhVT3/zMDz8usXC3ddaHBj1GHj/As08fwTS7Kt1HBTmyN29vdwAw+/wbwLVOJ3uAD1wi/dUH7Qei66PfyuRj4Ik9is+hglfbkbfR3cnZm7chlUWLdwmprtCohX4HUtlOcQjLYCu+fzGJH2QRKvP3UNz8bWk1qMxjGTOMThZ3kvgLI5AzFfo379UAAAAASUVORK5CYII= +step2RawChunks: + - candidates: + - content: + role: model + parts: + - text: Loves + index: 0 + usageMetadata: + promptTokenCount: 266 + candidatesTokenCount: 1 + totalTokenCount: 267 + promptTokensDetails: + - modality: TEXT + tokenCount: 8 + - modality: IMAGE + tokenCount: 258 + candidatesTokensDetails: + - modality: TEXT + tokenCount: 1 + - candidates: + - content: + role: model + parts: + - text: | + truck smile + finishReason: STOP + index: 0 + usageMetadata: + promptTokenCount: 266 + candidatesTokenCount: 5 + totalTokenCount: 271 + promptTokensDetails: + - modality: TEXT + tokenCount: 8 + - modality: IMAGE + tokenCount: 258 + candidatesTokensDetails: + - modality: TEXT + tokenCount: 5 +step3KurtEvents: + - chunk: Loves + - chunk: | + truck smile + - finished: true + text: | + Lovestruck smile + metadata: + totalInputTokens: 266 + totalOutputTokens: 5 diff --git a/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateStructuredData_transcribes_a_base64-encoded_audio.yaml b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateStructuredData_transcribes_a_base64-encoded_audio.yaml index fc93dfb..a3e478e 100644 --- a/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateStructuredData_transcribes_a_base64-encoded_audio.yaml +++ b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateStructuredData_transcribes_a_base64-encoded_audio.yaml @@ -42,16 +42,16 @@ step2RawChunks: index: 0 usageMetadata: promptTokenCount: 48 - candidatesTokenCount: 6 - totalTokenCount: 54 + candidatesTokenCount: 7 + totalTokenCount: 55 promptTokensDetails: - - modality: AUDIO - tokenCount: 25 - modality: TEXT tokenCount: 23 + - modality: AUDIO + tokenCount: 25 candidatesTokensDetails: - modality: TEXT - tokenCount: 6 + tokenCount: 7 step3KurtEvents: - chunk: '{"transcription":"Hello world"}' - finished: true @@ -60,4 +60,4 @@ step3KurtEvents: transcription: Hello world metadata: totalInputTokens: 48 - totalOutputTokens: 6 + totalOutputTokens: 7 diff --git a/packages/kurt-vertex-ai/src/KurtVertexAI.ts b/packages/kurt-vertex-ai/src/KurtVertexAI.ts index b6b127e..b905f2b 100644 --- a/packages/kurt-vertex-ai/src/KurtVertexAI.ts +++ b/packages/kurt-vertex-ai/src/KurtVertexAI.ts @@ -169,7 +169,7 @@ function toVertexAIMessages(messages: KurtMessage[]): VertexAIMessage[] { const vertexAIMessages: VertexAIMessage[] = [] for (const message of messages) { - const { role, text, toolCall, imageData, audioData } = message + const { role, text, toolCall, imageData, inlineData } = message if (text) { vertexAIMessages.push({ role, parts: [{ text }] }) } else if (toolCall) { @@ -178,10 +178,10 @@ function toVertexAIMessages(messages: KurtMessage[]): VertexAIMessage[] { const functionResponse = { name, response: result } vertexAIMessages.push({ role, parts: [{ functionCall }] }) vertexAIMessages.push({ role, parts: [{ functionResponse }] }) - } else if (imageData || audioData) { - const { mimeType, base64Data } = imageData ?? audioData - const inlineData = { mimeType, data: base64Data } - vertexAIMessages.push({ role, parts: [{ inlineData }] }) + } else if (imageData || inlineData) { + const { mimeType, base64Data } = inlineData ?? imageData + const dataPart = { mimeType, data: base64Data } + vertexAIMessages.push({ role, parts: [{ inlineData: dataPart }] }) } else { throw new Error(`Invalid KurtMessage: ${JSON.stringify(message)}`) } diff --git a/packages/kurt/src/Kurt.ts b/packages/kurt/src/Kurt.ts index 5c35da2..f92445b 100644 --- a/packages/kurt/src/Kurt.ts +++ b/packages/kurt/src/Kurt.ts @@ -98,7 +98,7 @@ export class Kurt { * - autonomous, open-ended decision-making or action-taking * * The `data` field of the result will be `undefined` if Kurt decides to - * generate natural language. Otherwise it will contain a tool call. + * generate natural language. Otherwise, it will contain a tool call. * * Your application can decide if and how it should fulfill the tool call. * @@ -198,39 +198,23 @@ export type KurtMessage = { text: string /** + * @deprecated Use `inlineData` instead. * When present, this is an image data message, with a base64-encoded image. * This is often used with "multi-modal" LLMs that support image mode input. * * Not all LLM providers or underlying models support this kind of message. - * Check your LLM provider's documentation for confirmaton. + * Check your LLM provider's documentation for confirmation. */ - imageData: { - /** - * The IANA standard MIME type of the inline image data. - * - * Not all MIME types are supported by all LLM providers. - * "image/png" and "image/jpeg" are the most commonly supported. - * Check your LLM provider's documentation for the right list. - */ - mimeType: string - - /** Base64-encoded image data, as a string. */ - base64Data: string - } + imageData: KurtInlineData - audioData: { - /** - * The IANA standard MIME type of the inline audio data. - * - * Not all MIME types are supported by all LLM providers. - * "audio/mpeg" is the most commonly supported. - * Check your LLM provider's documentation for the right list. - */ - mimeType: string - - /** Base64-encoded audio data, as a string. */ - base64Data: string - } + /** + * When present, this is a base64-encoded data message (i.e. image, audio). + * This is often used with "multi-modal" LLMs that support image/audio mode input. + * + * Not all LLM providers or underlying models support this kind of message. + * Check your LLM provider's documentation for confirmation. + */ + inlineData: KurtInlineData /** * When present, this is a tool call message, with structured data input @@ -265,6 +249,21 @@ export type KurtMessage = { } }> +export interface KurtInlineData { + /** + * The IANA standard MIME type of the inline data. + * + * Not all MIME types are supported by all LLM providers. + * OpenAI's GPT, for example, supports only images. + * Gemini supports both images and audio. + * Check your LLM provider's documentation for the right list. + */ + mimeType: string + + /** Base64-encoded data, as a string. */ + base64Data: string +} + export interface KurtCreateOptions { /** * The default system prompt to use, for any generation method call which @@ -308,7 +307,7 @@ export const KurtSamplingOptionsDefault = { /** * Maximum number of output tokens to sample from the model. * - * This is mean to be a cost control measure, to protect against scenarios + * This is meant to be a cost control measure, to protect against scenarios * where the model might get "stuck" and generate excessive output. * * When the model hits the output limit, whatever it has generated will From ec290f1624b46d71454f62e1717918d5aea48ca2 Mon Sep 17 00:00:00 2001 From: Mehdi Chaabouni Date: Sun, 16 Feb 2025 16:54:24 -0500 Subject: [PATCH 5/9] chore: clean-up --- packages/kurt-vertex-ai/spec/generateNaturalLanguage.spec.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/kurt-vertex-ai/spec/generateNaturalLanguage.spec.ts b/packages/kurt-vertex-ai/spec/generateNaturalLanguage.spec.ts index 9702cc2..a9cdb81 100644 --- a/packages/kurt-vertex-ai/spec/generateNaturalLanguage.spec.ts +++ b/packages/kurt-vertex-ai/spec/generateNaturalLanguage.spec.ts @@ -80,8 +80,6 @@ describe("KurtVertexAI generateNaturalLanguage", () => { }) test("describes a base64-encoded image (inlineData)", async () => { - process.env.VERTEX_AI_LOCATION = "us-central1" - process.env.VERTEX_AI_PROJECT = "monksflow-insights-engine" const result = await snapshotAndMock((kurt) => kurt.generateNaturalLanguage({ prompt: "Describe this emoji, in two words.", From 5f8aedea6a166f5837fe2761539947dd839b801e Mon Sep 17 00:00:00 2001 From: Mehdi Chaabouni Date: Sun, 16 Feb 2025 17:10:26 -0500 Subject: [PATCH 6/9] chore: upgrade openai library to 4.85.1 --- packages/kurt-open-ai/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/kurt-open-ai/package.json b/packages/kurt-open-ai/package.json index 77066f7..510f8d5 100644 --- a/packages/kurt-open-ai/package.json +++ b/packages/kurt-open-ai/package.json @@ -28,7 +28,7 @@ }, "dependencies": { "@formula-monks/kurt": "^1.4.0", - "openai": "4.76.0", + "openai": "4.85.1", "zod": "^3.23.8", "zod-to-json-schema": "^3.23.3" }, From deb6076bef4f699ce8a2c9fb87cea11b449f0745 Mon Sep 17 00:00:00 2001 From: Mehdi Chaabouni Date: Sun, 16 Feb 2025 17:11:03 -0500 Subject: [PATCH 7/9] chore: upgrade @google-cloud/vertexai library to 4.85.1 --- packages/kurt-vertex-ai/package.json | 2 +- .../src/VertexAI.patch.generateContentStream.ts | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/packages/kurt-vertex-ai/package.json b/packages/kurt-vertex-ai/package.json index 26d9b76..904f7a3 100644 --- a/packages/kurt-vertex-ai/package.json +++ b/packages/kurt-vertex-ai/package.json @@ -28,7 +28,7 @@ }, "dependencies": { "@formula-monks/kurt": "^1.4.0", - "@google-cloud/vertexai": "1.1.0", + "@google-cloud/vertexai": "1.9.0", "zod": "^3.23.8", "zod-to-json-schema": "^3.23.3" }, diff --git a/packages/kurt-vertex-ai/src/VertexAI.patch.generateContentStream.ts b/packages/kurt-vertex-ai/src/VertexAI.patch.generateContentStream.ts index 1edb637..d2d115b 100644 --- a/packages/kurt-vertex-ai/src/VertexAI.patch.generateContentStream.ts +++ b/packages/kurt-vertex-ai/src/VertexAI.patch.generateContentStream.ts @@ -66,7 +66,6 @@ import { export async function generateContent( location: string, - project: string, publisherModelEndpoint: string, token: Promise, request: GenerateContentRequest | string, @@ -96,7 +95,6 @@ export async function generateContent( } const response: Response | undefined = await postRequest({ region: location, - project: project, resourcePath: publisherModelEndpoint, resourceMethod: constants.GENERATE_CONTENT_METHOD, token: await token, @@ -122,7 +120,6 @@ export async function generateContent( */ export async function generateContentStream( location: string, - project: string, publisherModelEndpoint: string, token: Promise, request: GenerateContentRequest | string, @@ -151,7 +148,6 @@ export async function generateContentStream( } const response = await postRequest({ region: location, - project: project, resourcePath: publisherModelEndpoint, resourceMethod: constants.STREAMING_GENERATE_CONTENT_METHOD, token: await token, @@ -183,7 +179,6 @@ async function generateContentStreamPATCHED( const _this = this as any return generateContentStream( _this.location, - _this.project, _this.publisherModelEndpoint, _this.fetchToken(), request, From 694ed8247762a02e9e83724e9365b91e995e9976 Mon Sep 17 00:00:00 2001 From: Mehdi Chaabouni Date: Sun, 16 Feb 2025 17:11:24 -0500 Subject: [PATCH 8/9] chore: update pnpm-lock.yaml --- pnpm-lock.yaml | 45 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 02bc197..9af42a3 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -156,8 +156,8 @@ importers: specifier: workspace:^ version: link:../kurt openai: - specifier: 4.76.0 - version: 4.76.0(zod@3.23.8) + specifier: 4.85.1 + version: 4.85.1(zod@3.23.8) zod: specifier: ^3.23.8 version: 3.23.8 @@ -199,8 +199,8 @@ importers: specifier: workspace:^ version: link:../kurt '@google-cloud/vertexai': - specifier: 1.1.0 - version: 1.1.0 + specifier: 1.9.0 + version: 1.9.0 zod: specifier: ^3.23.8 version: 3.23.8 @@ -677,6 +677,10 @@ packages: resolution: {integrity: sha512-hfwfdlVpJ+kM6o2b5UFfPnweBcz8tgHAFRswnqUKYqLJsvKU0DDD0Z2/YKoHyAUoPJAv20qg6KlC3msNeUKUiw==} engines: {node: '>=18.0.0'} + '@google-cloud/vertexai@1.9.0': + resolution: {integrity: sha512-8brlcJwFXI4fPuBtsDNQqCdWZmz8gV9jeEKOU0vc5H2SjehCQpXK/NwuSEr916zbhlBHtg/sU37qQQdgvh5BRA==} + engines: {node: '>=18.0.0'} + '@istanbuljs/load-nyc-config@1.1.0': resolution: {integrity: sha512-VjeHSlIzpv/NyD3N0YuHfXOPDIixcA1q2ZV98wsMqcYlPmv2n3Yb2lYP9XMElnaFVXg5A7YLTeLu6V84uQDjmQ==} engines: {node: '>=8'} @@ -2528,6 +2532,18 @@ packages: zod: optional: true + openai@4.85.1: + resolution: {integrity: sha512-jkX2fntHljUvSH3MkWh4jShl10oNkb+SsCj4auKlbu2oF4KWAnmHLNR5EpnUHK1ZNW05Rp0fjbJzYwQzMsH8ZA==} + hasBin: true + peerDependencies: + ws: ^8.18.0 + zod: ^3.23.8 + peerDependenciesMeta: + ws: + optional: true + zod: + optional: true + p-each-series@2.2.0: resolution: {integrity: sha512-ycIL2+1V32th+8scbpTvyHNaHe02z0sjgh91XXjAk+ZeXoPN4Z46DVUnzdso0aX4KckKw0FNNFHdjZ2UsZvxiA==} engines: {node: '>=8'} @@ -3855,6 +3871,13 @@ snapshots: - encoding - supports-color + '@google-cloud/vertexai@1.9.0': + dependencies: + google-auth-library: 9.11.0 + transitivePeerDependencies: + - encoding + - supports-color + '@istanbuljs/load-nyc-config@1.1.0': dependencies: camelcase: 5.3.1 @@ -6077,6 +6100,20 @@ snapshots: transitivePeerDependencies: - encoding + openai@4.85.1(zod@3.23.8): + dependencies: + '@types/node': 18.19.32 + '@types/node-fetch': 2.6.11 + abort-controller: 3.0.0 + agentkeepalive: 4.5.0 + form-data-encoder: 1.7.2 + formdata-node: 4.4.1 + node-fetch: 2.7.0 + optionalDependencies: + zod: 3.23.8 + transitivePeerDependencies: + - encoding + p-each-series@2.2.0: {} p-each-series@3.0.0: {} From c2aadd95f01cc71822a0c3b1334b5deac1f3308b Mon Sep 17 00:00:00 2001 From: Mehdi Chaabouni Date: Sun, 16 Feb 2025 18:09:33 -0500 Subject: [PATCH 9/9] chore: clean-up --- examples/basic/package.json | 4 +-- packages/kurt-vertex-ai/package.json | 2 +- pnpm-lock.yaml | 52 +++++----------------------- 3 files changed, 12 insertions(+), 46 deletions(-) diff --git a/examples/basic/package.json b/examples/basic/package.json index 65a48ea..05cab3c 100644 --- a/examples/basic/package.json +++ b/examples/basic/package.json @@ -27,8 +27,8 @@ "@formula-monks/kurt-cache": "workspace:*", "@formula-monks/kurt-open-ai": "workspace:*", "@formula-monks/kurt-vertex-ai": "workspace:*", - "@google-cloud/vertexai": "1.1.0", - "openai": "^4.76.0", + "@google-cloud/vertexai": "1.9.3", + "openai": "4.85.1", "zod": "^3.23.8" } } diff --git a/packages/kurt-vertex-ai/package.json b/packages/kurt-vertex-ai/package.json index 904f7a3..3fb3e10 100644 --- a/packages/kurt-vertex-ai/package.json +++ b/packages/kurt-vertex-ai/package.json @@ -28,7 +28,7 @@ }, "dependencies": { "@formula-monks/kurt": "^1.4.0", - "@google-cloud/vertexai": "1.9.0", + "@google-cloud/vertexai": "1.9.3", "zod": "^3.23.8", "zod-to-json-schema": "^3.23.3" }, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 9af42a3..aa95136 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -48,11 +48,11 @@ importers: specifier: workspace:* version: link:../../packages/kurt-vertex-ai '@google-cloud/vertexai': - specifier: 1.1.0 - version: 1.1.0 + specifier: 1.9.3 + version: 1.9.3 openai: - specifier: ^4.76.0 - version: 4.76.0(zod@3.23.8) + specifier: 4.85.1 + version: 4.85.1(zod@3.23.8) zod: specifier: ^3.23.8 version: 3.23.8 @@ -199,8 +199,8 @@ importers: specifier: workspace:^ version: link:../kurt '@google-cloud/vertexai': - specifier: 1.9.0 - version: 1.9.0 + specifier: 1.9.3 + version: 1.9.3 zod: specifier: ^3.23.8 version: 3.23.8 @@ -673,12 +673,8 @@ packages: cpu: [x64] os: [win32] - '@google-cloud/vertexai@1.1.0': - resolution: {integrity: sha512-hfwfdlVpJ+kM6o2b5UFfPnweBcz8tgHAFRswnqUKYqLJsvKU0DDD0Z2/YKoHyAUoPJAv20qg6KlC3msNeUKUiw==} - engines: {node: '>=18.0.0'} - - '@google-cloud/vertexai@1.9.0': - resolution: {integrity: sha512-8brlcJwFXI4fPuBtsDNQqCdWZmz8gV9jeEKOU0vc5H2SjehCQpXK/NwuSEr916zbhlBHtg/sU37qQQdgvh5BRA==} + '@google-cloud/vertexai@1.9.3': + resolution: {integrity: sha512-35o5tIEMLW3JeFJOaaMNR2e5sq+6rpnhrF97PuAxeOm0GlqVTESKhkGj7a5B5mmJSSSU3hUfIhcQCRRsw4Ipzg==} engines: {node: '>=18.0.0'} '@istanbuljs/load-nyc-config@1.1.0': @@ -2523,15 +2519,6 @@ packages: resolution: {integrity: sha512-1FlR+gjXK7X+AsAHso35MnyN5KqGwJRi/31ft6x0M194ht7S+rWAvd7PHss9xSKMzE0asv1pyIHaJYq+BbacAQ==} engines: {node: '>=12'} - openai@4.76.0: - resolution: {integrity: sha512-QBGIetjX1C9xDp5XGa/3mPnfKI9BgAe2xHQX6PmO98wuW9qQaurBaumcYptQWc9LHZZq7cH/Y1Rjnsr6uUDdVw==} - hasBin: true - peerDependencies: - zod: ^3.23.8 - peerDependenciesMeta: - zod: - optional: true - openai@4.85.1: resolution: {integrity: sha512-jkX2fntHljUvSH3MkWh4jShl10oNkb+SsCj4auKlbu2oF4KWAnmHLNR5EpnUHK1ZNW05Rp0fjbJzYwQzMsH8ZA==} hasBin: true @@ -3864,14 +3851,7 @@ snapshots: '@esbuild/win32-x64@0.21.5': optional: true - '@google-cloud/vertexai@1.1.0': - dependencies: - google-auth-library: 9.11.0 - transitivePeerDependencies: - - encoding - - supports-color - - '@google-cloud/vertexai@1.9.0': + '@google-cloud/vertexai@1.9.3': dependencies: google-auth-library: 9.11.0 transitivePeerDependencies: @@ -6086,20 +6066,6 @@ snapshots: dependencies: mimic-fn: 4.0.0 - openai@4.76.0(zod@3.23.8): - dependencies: - '@types/node': 18.19.32 - '@types/node-fetch': 2.6.11 - abort-controller: 3.0.0 - agentkeepalive: 4.5.0 - form-data-encoder: 1.7.2 - formdata-node: 4.4.1 - node-fetch: 2.7.0 - optionalDependencies: - zod: 3.23.8 - transitivePeerDependencies: - - encoding - openai@4.85.1(zod@3.23.8): dependencies: '@types/node': 18.19.32