From 60905f49b0cd3fd27ed70b50e0e090b190667855 Mon Sep 17 00:00:00 2001 From: Matt Granmoe Date: Wed, 11 Sep 2024 22:03:09 -0500 Subject: [PATCH 1/6] feat: Add afterCompletion callback to runTools --- src/lib/AbstractChatCompletionRunner.ts | 21 +- src/resources/beta/chat/completions.ts | 7 +- tests/lib/ChatCompletionRunFunctions.test.ts | 326 +++++++++++++++++++ 3 files changed, 349 insertions(+), 5 deletions(-) diff --git a/src/lib/AbstractChatCompletionRunner.ts b/src/lib/AbstractChatCompletionRunner.ts index 39ee4e993..137972c25 100644 --- a/src/lib/AbstractChatCompletionRunner.ts +++ b/src/lib/AbstractChatCompletionRunner.ts @@ -29,6 +29,12 @@ const DEFAULT_MAX_CHAT_COMPLETIONS = 10; export interface RunnerOptions extends Core.RequestOptions { /** How many requests to make before canceling. Default 10. */ maxChatCompletions?: number; + /** A callback to be run after each chat completion (and after any tools have been run for the completion). + * Can be used, for example, to make an LLM call to analyze the conversation thus far and provide guidance + * or supplemental information by injecting a message via runner._addMessage(). + * Receives the chat completion that it was run after as an argument. + */ + afterCompletion?: (completion: ChatCompletion) => Promise; } export class AbstractChatCompletionRunner< @@ -274,7 +280,7 @@ export class AbstractChatCompletionRunner< const role = 'function' as const; const { function_call = 'auto', stream, ...restParams } = params; const singleFunctionToCall = typeof function_call !== 'string' && function_call?.name; - const { maxChatCompletions = DEFAULT_MAX_CHAT_COMPLETIONS } = options || {}; + const { maxChatCompletions = DEFAULT_MAX_CHAT_COMPLETIONS, afterCompletion } = options || {}; const functionsByName: Record> = {}; for (const f of params.functions) { @@ -345,6 +351,10 @@ export class AbstractChatCompletionRunner< this._addMessage({ role, name, content }); + if (afterCompletion) { + await afterCompletion(chatCompletion); + } + if (singleFunctionToCall) return; } } @@ -359,7 +369,7 @@ export class AbstractChatCompletionRunner< const role = 'tool' as const; const { tool_choice = 'auto', stream, ...restParams } = params; const singleFunctionToCall = typeof tool_choice !== 'string' && tool_choice?.function?.name; - const { maxChatCompletions = DEFAULT_MAX_CHAT_COMPLETIONS } = options || {}; + const { maxChatCompletions = DEFAULT_MAX_CHAT_COMPLETIONS, afterCompletion } = options || {}; // TODO(someday): clean this logic up const inputTools = params.tools.map((tool): RunnableToolFunction => { @@ -470,9 +480,16 @@ export class AbstractChatCompletionRunner< this._addMessage({ role, tool_call_id, content }); if (singleFunctionToCall) { + if (afterCompletion) { + await afterCompletion(chatCompletion); + } return; } } + + if (afterCompletion) { + await afterCompletion(chatCompletion); + } } return; diff --git a/src/resources/beta/chat/completions.ts b/src/resources/beta/chat/completions.ts index 113de4026..03ea0aab5 100644 --- a/src/resources/beta/chat/completions.ts +++ b/src/resources/beta/chat/completions.ts @@ -21,6 +21,7 @@ export { ParsingFunction, ParsingToolFunction, } from '../../../lib/RunnableFunction'; +import { RunnerOptions } from '../../../lib/AbstractChatCompletionRunner'; import { ChatCompletionToolRunnerParams } from '../../../lib/ChatCompletionRunner'; export { ChatCompletionToolRunnerParams } from '../../../lib/ChatCompletionRunner'; import { ChatCompletionStreamingToolRunnerParams } from '../../../lib/ChatCompletionStreamingRunner'; @@ -119,19 +120,19 @@ export class Completions extends APIResource { runTools< Params extends ChatCompletionToolRunnerParams, ParsedT = ExtractParsedContentFromParams, - >(body: Params, options?: Core.RequestOptions): ChatCompletionRunner; + >(body: Params, options?: RunnerOptions): ChatCompletionRunner; runTools< Params extends ChatCompletionStreamingToolRunnerParams, ParsedT = ExtractParsedContentFromParams, - >(body: Params, options?: Core.RequestOptions): ChatCompletionStreamingRunner; + >(body: Params, options?: RunnerOptions): ChatCompletionStreamingRunner; runTools< Params extends ChatCompletionToolRunnerParams | ChatCompletionStreamingToolRunnerParams, ParsedT = ExtractParsedContentFromParams, >( body: Params, - options?: Core.RequestOptions, + options?: RunnerOptions, ): ChatCompletionRunner | ChatCompletionStreamingRunner { if (body.stream) { return ChatCompletionStreamingRunner.runTools( diff --git a/tests/lib/ChatCompletionRunFunctions.test.ts b/tests/lib/ChatCompletionRunFunctions.test.ts index b684f204d..c984c8ca5 100644 --- a/tests/lib/ChatCompletionRunFunctions.test.ts +++ b/tests/lib/ChatCompletionRunFunctions.test.ts @@ -1452,6 +1452,169 @@ describe('resource completions', () => { ]); await listener.sanityCheck(); }); + test('afterCompletion', async () => { + const { fetch, handleRequest } = mockChatCompletionFetch(); + + const openai = new OpenAI({ apiKey: 'something1234', baseURL: 'http://127.0.0.1:4010', fetch }); + + let hasInjectedMessage = false; + const runner = openai.beta.chat.completions.runTools( + { + messages: [{ role: 'user', content: 'tell me what the weather is like' }], + model: 'gpt-3.5-turbo', + tools: [ + { + type: 'function', + function: { + function: function getWeather() { + return `it's raining`; + }, + parameters: {}, + description: 'gets the weather', + }, + }, + ], + }, + { + afterCompletion: async () => { + // A simple example of conditionally injecting a message into the conversation during a runTools call + if (!hasInjectedMessage) { + runner._addMessage({ + role: 'system', + content: `Here's some up-to-date information I've found from the web that can help you with your next response: 42.`, + }); + + hasInjectedMessage = true; + } + }, + }, + ); + + const listener = new RunnerListener(runner); + + await handleRequest(async (request) => { + expect(request.messages).toEqual([{ role: 'user', content: 'tell me what the weather is like' }]); + return { + id: '1', + choices: [ + { + index: 0, + finish_reason: 'function_call', + logprobs: null, + message: { + role: 'assistant', + content: null, + refusal: null, + parsed: null, + tool_calls: [ + { + type: 'function', + id: '123', + function: { + arguments: '', + name: 'getWeather', + }, + }, + ], + }, + }, + ], + created: Math.floor(Date.now() / 1000), + model: 'gpt-3.5-turbo', + object: 'chat.completion', + }; + }); + + await handleRequest(async (request) => { + expect(request.messages).toEqual([ + { + role: 'user', + content: 'tell me what the weather is like', + }, + { + role: 'assistant', + content: null, + refusal: null, + parsed: null, + tool_calls: [ + { + type: 'function', + id: '123', + function: { + arguments: '', + name: 'getWeather', + parsed_arguments: null, + }, + }, + ], + }, + { + role: 'tool', + content: `it's raining`, + tool_call_id: '123', + }, + { + content: `Here's some up-to-date information I've found from the web that can help you with your next response: 42.`, + role: 'system', + }, + ]); + + return { + id: '2', + choices: [ + { + index: 0, + finish_reason: 'stop', + logprobs: null, + message: { + role: 'assistant', + content: `it's raining`, + refusal: null, + }, + }, + ], + created: Math.floor(Date.now() / 1000), + model: 'gpt-3.5-turbo', + object: 'chat.completion', + }; + }); + + await runner.done(); + + expect(listener.messages).toEqual([ + { + role: 'assistant', + content: null, + parsed: null, + refusal: null, + tool_calls: [ + { + type: 'function', + id: '123', + function: { + arguments: '', + name: 'getWeather', + parsed_arguments: null, + }, + }, + ], + }, + { role: 'tool', content: `it's raining`, tool_call_id: '123' }, + { + content: `Here's some up-to-date information I've found from the web that can help you with your next response: 42.`, + role: 'system', + }, + { + role: 'assistant', + content: "it's raining", + parsed: null, + refusal: null, + tool_calls: [], + }, + ]); + expect(listener.functionCallResults).toEqual([`it's raining`]); + await listener.sanityCheck(); + }); }); describe('runTools with stream: true', () => { @@ -2310,6 +2473,169 @@ describe('resource completions', () => { ]); await listener.sanityCheck(); }); + test('afterCompletion', async () => { + const { fetch, handleRequest } = mockChatCompletionFetch(); + + const openai = new OpenAI({ apiKey: 'something1234', baseURL: 'http://127.0.0.1:4010', fetch }); + + let hasInjectedMessage = false; + const runner = openai.beta.chat.completions.runTools( + { + messages: [{ role: 'user', content: 'tell me what the weather is like' }], + model: 'gpt-3.5-turbo', + tools: [ + { + type: 'function', + function: { + function: function getWeather() { + return `it's raining`; + }, + parameters: {}, + description: 'gets the weather', + }, + }, + ], + }, + { + afterCompletion: async () => { + // A simple example of conditionally injecting a message into the conversation during a runTools call + if (!hasInjectedMessage) { + runner._addMessage({ + role: 'system', + content: `Here's some up-to-date information I've found from the web that can help you with your next response: 42.`, + }); + + hasInjectedMessage = true; + } + }, + }, + ); + + const listener = new RunnerListener(runner); + + await handleRequest(async (request) => { + expect(request.messages).toEqual([{ role: 'user', content: 'tell me what the weather is like' }]); + return { + id: '1', + choices: [ + { + index: 0, + finish_reason: 'function_call', + logprobs: null, + message: { + role: 'assistant', + content: null, + refusal: null, + parsed: null, + tool_calls: [ + { + type: 'function', + id: '123', + function: { + arguments: '', + name: 'getWeather', + }, + }, + ], + }, + }, + ], + created: Math.floor(Date.now() / 1000), + model: 'gpt-3.5-turbo', + object: 'chat.completion', + }; + }); + + await handleRequest(async (request) => { + expect(request.messages).toEqual([ + { + role: 'user', + content: 'tell me what the weather is like', + }, + { + role: 'assistant', + content: null, + refusal: null, + parsed: null, + tool_calls: [ + { + type: 'function', + id: '123', + function: { + arguments: '', + name: 'getWeather', + parsed_arguments: null, + }, + }, + ], + }, + { + role: 'tool', + content: `it's raining`, + tool_call_id: '123', + }, + { + content: `Here's some up-to-date information I've found from the web that can help you with your next response: 42.`, + role: 'system', + }, + ]); + + return { + id: '2', + choices: [ + { + index: 0, + finish_reason: 'stop', + logprobs: null, + message: { + role: 'assistant', + content: `it's raining`, + refusal: null, + }, + }, + ], + created: Math.floor(Date.now() / 1000), + model: 'gpt-3.5-turbo', + object: 'chat.completion', + }; + }); + + await runner.done(); + + expect(listener.messages).toEqual([ + { + role: 'assistant', + content: null, + parsed: null, + refusal: null, + tool_calls: [ + { + type: 'function', + id: '123', + function: { + arguments: '', + name: 'getWeather', + parsed_arguments: null, + }, + }, + ], + }, + { role: 'tool', content: `it's raining`, tool_call_id: '123' }, + { + content: `Here's some up-to-date information I've found from the web that can help you with your next response: 42.`, + role: 'system', + }, + { + role: 'assistant', + content: "it's raining", + parsed: null, + refusal: null, + tool_calls: [], + }, + ]); + expect(listener.functionCallResults).toEqual([`it's raining`]); + await listener.sanityCheck(); + }); }); describe('stream', () => { From 2d320dbac5cb16b5ed0a36f8896e6a43ef3db0c9 Mon Sep 17 00:00:00 2001 From: Matt Granmoe Date: Wed, 11 Sep 2024 22:30:06 -0500 Subject: [PATCH 2/6] Add example of afterCompletion to helpers.md --- helpers.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/helpers.md b/helpers.md index abf980c82..662e0e3cd 100644 --- a/helpers.md +++ b/helpers.md @@ -585,6 +585,42 @@ async function main() { main(); ``` +#### Use afterComplete for multi-agent patterns + +The `afterComplete` callback allows for some powerful multi-agent patterns. By passing runner.messages to another LLM chat within afterComplete, you can easily have another model analyze the conversation and do things like conditionally inject web research or other targeting guidance to help the first model overcome problems. + +```ts +import OpenAI from 'openai'; + +const client = new OpenAI(); + +async function main() { + let shouldInjectMessage = false // You can do any kind of conditional logic you want + const runner = client.chat.completions + .runTools({ + model: 'gpt-3.5-turbo', + messages: [{ role: 'user', content: "How's the weather this week in Los Angeles?" }], + tools: [ + // Whole bunch of tools...perhaps so many that we need to offload some cognitive overhead to another chat via afterCompletion... + ], + }, + { + afterCompletion: async () => { + if (!shouldInjectMessage) { + runner._addMessage({ + role: 'system', + content: `Here's some up-to-date information I've found from the web that can help you with your next response: 42.`, + }); + + shouldInjectMessage = true; + } + }, + }) +} + +main(); +``` + #### Integrate with `zod` [`zod`](https://www.npmjs.com/package/zod) is a schema validation library which can help with validating the From 048a4d4408bbcdce495b9650f320a52901fd3e21 Mon Sep 17 00:00:00 2001 From: Matt Granmoe Date: Fri, 13 Sep 2024 13:19:45 -0500 Subject: [PATCH 3/6] Revert RunnerOptions type fix --- src/resources/beta/chat/completions.ts | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/resources/beta/chat/completions.ts b/src/resources/beta/chat/completions.ts index 03ea0aab5..113de4026 100644 --- a/src/resources/beta/chat/completions.ts +++ b/src/resources/beta/chat/completions.ts @@ -21,7 +21,6 @@ export { ParsingFunction, ParsingToolFunction, } from '../../../lib/RunnableFunction'; -import { RunnerOptions } from '../../../lib/AbstractChatCompletionRunner'; import { ChatCompletionToolRunnerParams } from '../../../lib/ChatCompletionRunner'; export { ChatCompletionToolRunnerParams } from '../../../lib/ChatCompletionRunner'; import { ChatCompletionStreamingToolRunnerParams } from '../../../lib/ChatCompletionStreamingRunner'; @@ -120,19 +119,19 @@ export class Completions extends APIResource { runTools< Params extends ChatCompletionToolRunnerParams, ParsedT = ExtractParsedContentFromParams, - >(body: Params, options?: RunnerOptions): ChatCompletionRunner; + >(body: Params, options?: Core.RequestOptions): ChatCompletionRunner; runTools< Params extends ChatCompletionStreamingToolRunnerParams, ParsedT = ExtractParsedContentFromParams, - >(body: Params, options?: RunnerOptions): ChatCompletionStreamingRunner; + >(body: Params, options?: Core.RequestOptions): ChatCompletionStreamingRunner; runTools< Params extends ChatCompletionToolRunnerParams | ChatCompletionStreamingToolRunnerParams, ParsedT = ExtractParsedContentFromParams, >( body: Params, - options?: RunnerOptions, + options?: Core.RequestOptions, ): ChatCompletionRunner | ChatCompletionStreamingRunner { if (body.stream) { return ChatCompletionStreamingRunner.runTools( From 8fceb31305ab420e997164ac8138a211601f5c19 Mon Sep 17 00:00:00 2001 From: Matt Granmoe Date: Fri, 13 Sep 2024 13:25:23 -0500 Subject: [PATCH 4/6] Update comment in afterCompletion example --- helpers.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/helpers.md b/helpers.md index 662e0e3cd..8514a8423 100644 --- a/helpers.md +++ b/helpers.md @@ -585,9 +585,9 @@ async function main() { main(); ``` -#### Use afterComplete for multi-agent patterns +#### Use `afterCompletion` for multi-agent patterns -The `afterComplete` callback allows for some powerful multi-agent patterns. By passing runner.messages to another LLM chat within afterComplete, you can easily have another model analyze the conversation and do things like conditionally inject web research or other targeting guidance to help the first model overcome problems. +The `afterCompletion` callback allows for some powerful multi-agent patterns. By passing runner.messages to another LLM chat within `afterCompletion`, you can easily have another model analyze the conversation and do things like conditionally inject web research or other relevant data or guidance to help the first model overcome problems. ```ts import OpenAI from 'openai'; From e59654dd43cdbc0d11945731421ef204cc081377 Mon Sep 17 00:00:00 2001 From: Matt Granmoe Date: Fri, 13 Sep 2024 13:28:53 -0500 Subject: [PATCH 5/6] Update JSDoc for afterCompletion --- src/lib/AbstractChatCompletionRunner.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/AbstractChatCompletionRunner.ts b/src/lib/AbstractChatCompletionRunner.ts index 137972c25..bc423f4aa 100644 --- a/src/lib/AbstractChatCompletionRunner.ts +++ b/src/lib/AbstractChatCompletionRunner.ts @@ -32,7 +32,7 @@ export interface RunnerOptions extends Core.RequestOptions { /** A callback to be run after each chat completion (and after any tools have been run for the completion). * Can be used, for example, to make an LLM call to analyze the conversation thus far and provide guidance * or supplemental information by injecting a message via runner._addMessage(). - * Receives the chat completion that it was run after as an argument. + * Receives the chat completion that was just processed as an argument and runs after all tool calls have been handled. */ afterCompletion?: (completion: ChatCompletion) => Promise; } From 15852aa2946728b8c2d15b1cd96ac8ee8020b6af Mon Sep 17 00:00:00 2001 From: Matt Granmoe Date: Thu, 19 Sep 2024 22:27:25 -0500 Subject: [PATCH 6/6] Update afterCompletion example --- helpers.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/helpers.md b/helpers.md index 8514a8423..72f3907b3 100644 --- a/helpers.md +++ b/helpers.md @@ -595,24 +595,25 @@ import OpenAI from 'openai'; const client = new OpenAI(); async function main() { - let shouldInjectMessage = false // You can do any kind of conditional logic you want const runner = client.chat.completions .runTools({ - model: 'gpt-3.5-turbo', - messages: [{ role: 'user', content: "How's the weather this week in Los Angeles?" }], + model: 'gpt-4o', + // Let's say we have a code agent that can autonomously carry out code changes + messages: [systemMessage, { role: 'user', content: "Please setup [some awesome library with a complex setup] in my codebase." }], tools: [ - // Whole bunch of tools...perhaps so many that we need to offload some cognitive overhead to another chat via afterCompletion... + // Whole bunch of tools...so many that we need to offload some cognitive overhead via afterCompletion ], }, { afterCompletion: async () => { - if (!shouldInjectMessage) { + // Pass the last ten messages to a separate LLM flow and check if we should inject any web research to help the agent overcome any problems or gaps in knowledge + const webResearch = await optionallyPerformWebResearch(runner.messages.slice(-10)) + + if (webResearch) { runner._addMessage({ role: 'system', - content: `Here's some up-to-date information I've found from the web that can help you with your next response: 42.`, + content: `You've been provided the following up-to-date web research and should use it to guide your next steps:\n\n${webResearch}.`, }); - - shouldInjectMessage = true; } }, })