From eb8cc487d23bae94e85c0915b8e2409305748efa Mon Sep 17 00:00:00 2001 From: Robert Craigie Date: Fri, 9 Aug 2024 16:35:44 +0100 Subject: [PATCH] fix(helpers/zod): nested union schema extraction --- .../zod-to-json-schema/zodToJsonSchema.ts | 36 +- tests/lib/__snapshots__/parser.test.ts.snap | 52 ++- tests/lib/parser.test.ts | 346 ++++++++++++++++-- 3 files changed, 378 insertions(+), 56 deletions(-) diff --git a/src/_vendor/zod-to-json-schema/zodToJsonSchema.ts b/src/_vendor/zod-to-json-schema/zodToJsonSchema.ts index 2078b503f..1c3290008 100644 --- a/src/_vendor/zod-to-json-schema/zodToJsonSchema.ts +++ b/src/_vendor/zod-to-json-schema/zodToJsonSchema.ts @@ -43,24 +43,24 @@ const zodToJsonSchema = ( main.title = title; } - const definitions = - !isEmptyObj(refs.definitions) ? - Object.entries(refs.definitions).reduce( - (acc, [name, schema]) => ({ - ...acc, - [name]: - parseDef( - zodDef(schema), - { - ...refs, - currentPath: [...refs.basePath, refs.definitionPath, name], - }, - true, - ) ?? {}, - }), - {}, - ) - : undefined; + const definitions = (() => { + if (isEmptyObj(refs.definitions)) { + return undefined; + } + + const definitions: Record = {}; + + for (const [name, zodSchema] of Object.entries(refs.definitions)) { + definitions[name] = + parseDef( + zodDef(zodSchema), + { ...refs, currentPath: [...refs.basePath, refs.definitionPath, name] }, + true, + ) ?? {}; + } + + return definitions; + })(); const combined: ReturnType> = name === undefined ? diff --git a/tests/lib/__snapshots__/parser.test.ts.snap b/tests/lib/__snapshots__/parser.test.ts.snap index 715c268ff..d98db2345 100644 --- a/tests/lib/__snapshots__/parser.test.ts.snap +++ b/tests/lib/__snapshots__/parser.test.ts.snap @@ -2,16 +2,16 @@ exports[`.parse() zod deserialises response_format 1`] = ` "{ - "id": "chatcmpl-9tZXFjiGKgtrHZeIxvkklWe51DYZp", + "id": "chatcmpl-9uLhvwLPvKOZoJ7hwaa666fYuxYif", "object": "chat.completion", - "created": 1723031665, + "created": 1723216839, "model": "gpt-4o-2024-08-06", "choices": [ { "index": 0, "message": { "role": "assistant", - "content": "{\\"city\\":\\"San Francisco\\",\\"units\\":\\"f\\"}", + "content": "{\\"city\\":\\"San Francisco\\",\\"units\\":\\"c\\"}", "refusal": null }, "logprobs": null, @@ -30,16 +30,16 @@ exports[`.parse() zod deserialises response_format 1`] = ` exports[`.parse() zod merged schemas 2`] = ` "{ - "id": "chatcmpl-9tyPgktyF5JgREIZd0XZI4XgrBAD2", + "id": "chatcmpl-9uLi0HJ6HYH0FM1VI1N6XCREiGvX1", "object": "chat.completion", - "created": 1723127296, + "created": 1723216844, "model": "gpt-4o-2024-08-06", "choices": [ { "index": 0, "message": { "role": "assistant", - "content": "{\\"person1\\":{\\"name\\":\\"Jane Doe\\",\\"phone_number\\":\\"+1234567890\\",\\"roles\\":[\\"other\\"],\\"description\\":\\"Engineer at OpenAI. Email: jane@openai.com\\"},\\"person2\\":{\\"name\\":\\"John Smith\\",\\"phone_number\\":\\"+0987654321\\",\\"differentField\\":\\"Engineer at OpenAI. Email: john@openai.com\\"}}", + "content": "{\\"person1\\":{\\"name\\":\\"Jane Doe\\",\\"phone_number\\":\\".\\",\\"roles\\":[\\"other\\"],\\"description\\":\\"Engineer at OpenAI, born Nov 16, contact email: jane@openai.com\\"},\\"person2\\":{\\"name\\":\\"John Smith\\",\\"phone_number\\":\\"john@openai.com\\",\\"differentField\\":\\"Engineer at OpenAI, born March 1.\\"}}", "refusal": null }, "logprobs": null, @@ -51,23 +51,51 @@ exports[`.parse() zod merged schemas 2`] = ` "completion_tokens": 72, "total_tokens": 133 }, - "system_fingerprint": "fp_845eaabc1f" + "system_fingerprint": "fp_2a322c9ffc" +} +" +`; + +exports[`.parse() zod nested schema extraction 2`] = ` +"{ + "id": "chatcmpl-9uLi6hkH6VcoaYiNEzy3h56QRAyns", + "object": "chat.completion", + "created": 1723216850, + "model": "gpt-4o-2024-08-06", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "{\\"name\\":\\"TodoApp\\",\\"fields\\":[{\\"type\\":\\"string\\",\\"name\\":\\"taskId\\",\\"metadata\\":{\\"foo\\":\\"unique identifier for each task\\"}},{\\"type\\":\\"string\\",\\"name\\":\\"title\\",\\"metadata\\":{\\"foo\\":\\"title of the task\\"}},{\\"type\\":\\"string\\",\\"name\\":\\"description\\",\\"metadata\\":{\\"foo\\":\\"detailed description of the task. This is optional.\\"}},{\\"type\\":\\"string\\",\\"name\\":\\"status\\",\\"metadata\\":{\\"foo\\":\\"status of the task, e.g., pending, completed, etc.\\"}},{\\"type\\":\\"string\\",\\"name\\":\\"dueDate\\",\\"metadata\\":null},{\\"type\\":\\"string\\",\\"name\\":\\"priority\\",\\"metadata\\":{\\"foo\\":\\"priority level of the task, e.g., low, medium, high\\"}},{\\"type\\":\\"string\\",\\"name\\":\\"creationDate\\",\\"metadata\\":{\\"foo\\":\\"date when the task was created\\"}},{\\"type\\":\\"string\\",\\"name\\":\\"lastModifiedDate\\",\\"metadata\\":{\\"foo\\":\\"date when the task was last modified\\"}},{\\"type\\":\\"string\\",\\"name\\":\\"tags\\",\\"metadata\\":{\\"foo\\":\\"tags associated with the task, for categorization\\"}}]}", + "refusal": null + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 36, + "completion_tokens": 208, + "total_tokens": 244 + }, + "system_fingerprint": "fp_2a322c9ffc" } " `; exports[`.parse() zod top-level recursive schemas 1`] = ` "{ - "id": "chatcmpl-9taiMDrRVRIkk1Xg1yE82UjnYuZjt", + "id": "chatcmpl-9uLhw79ArBF4KsQQOlsoE68m6vh6v", "object": "chat.completion", - "created": 1723036198, + "created": 1723216840, "model": "gpt-4o-2024-08-06", "choices": [ { "index": 0, "message": { "role": "assistant", - "content": "{\\"type\\":\\"form\\",\\"label\\":\\"User Profile Form\\",\\"children\\":[{\\"type\\":\\"field\\",\\"label\\":\\"Full Name\\",\\"children\\":[],\\"attributes\\":[{\\"name\\":\\"type\\",\\"value\\":\\"text\\"},{\\"name\\":\\"placeholder\\",\\"value\\":\\"Enter your full name\\"}]},{\\"type\\":\\"field\\",\\"label\\":\\"Email Address\\",\\"children\\":[],\\"attributes\\":[{\\"name\\":\\"type\\",\\"value\\":\\"email\\"},{\\"name\\":\\"placeholder\\",\\"value\\":\\"Enter your email address\\"}]},{\\"type\\":\\"field\\",\\"label\\":\\"Phone Number\\",\\"children\\":[],\\"attributes\\":[{\\"name\\":\\"type\\",\\"value\\":\\"tel\\"},{\\"name\\":\\"placeholder\\",\\"value\\":\\"Enter your phone number\\"}]},{\\"type\\":\\"button\\",\\"label\\":\\"Submit\\",\\"children\\":[],\\"attributes\\":[{\\"name\\":\\"type\\",\\"value\\":\\"submit\\"}]}],\\"attributes\\":[{\\"name\\":\\"method\\",\\"value\\":\\"post\\"},{\\"name\\":\\"action\\",\\"value\\":\\"/submit-profile\\"}]}", + "content": "{\\"type\\":\\"form\\",\\"label\\":\\"User Profile Form\\",\\"children\\":[{\\"type\\":\\"field\\",\\"label\\":\\"First Name\\",\\"children\\":[],\\"attributes\\":[{\\"name\\":\\"type\\",\\"value\\":\\"text\\"},{\\"name\\":\\"name\\",\\"value\\":\\"firstName\\"},{\\"name\\":\\"placeholder\\",\\"value\\":\\"Enter your first name\\"}]},{\\"type\\":\\"field\\",\\"label\\":\\"Last Name\\",\\"children\\":[],\\"attributes\\":[{\\"name\\":\\"type\\",\\"value\\":\\"text\\"},{\\"name\\":\\"name\\",\\"value\\":\\"lastName\\"},{\\"name\\":\\"placeholder\\",\\"value\\":\\"Enter your last name\\"}]},{\\"type\\":\\"field\\",\\"label\\":\\"Email Address\\",\\"children\\":[],\\"attributes\\":[{\\"name\\":\\"type\\",\\"value\\":\\"email\\"},{\\"name\\":\\"name\\",\\"value\\":\\"email\\"},{\\"name\\":\\"placeholder\\",\\"value\\":\\"Enter your email address\\"}]},{\\"type\\":\\"button\\",\\"label\\":\\"Submit\\",\\"children\\":[],\\"attributes\\":[{\\"name\\":\\"type\\",\\"value\\":\\"submit\\"}]}],\\"attributes\\":[]}", "refusal": null }, "logprobs": null, @@ -76,8 +104,8 @@ exports[`.parse() zod top-level recursive schemas 1`] = ` ], "usage": { "prompt_tokens": 38, - "completion_tokens": 168, - "total_tokens": 206 + "completion_tokens": 175, + "total_tokens": 213 }, "system_fingerprint": "fp_845eaabc1f" } diff --git a/tests/lib/parser.test.ts b/tests/lib/parser.test.ts index 3fb3c948a..331b16895 100644 --- a/tests/lib/parser.test.ts +++ b/tests/lib/parser.test.ts @@ -32,10 +32,10 @@ describe('.parse()', () => { "index": 0, "logprobs": null, "message": { - "content": "{"city":"San Francisco","units":"f"}", + "content": "{"city":"San Francisco","units":"c"}", "parsed": { "city": "San Francisco", - "units": "f", + "units": "c", }, "refusal": null, "role": "assistant", @@ -76,18 +76,9 @@ describe('.parse()', () => { expect(completion.choices[0]?.message).toMatchInlineSnapshot(` { - "content": "{"type":"form","label":"User Profile Form","children":[{"type":"field","label":"Full Name","children":[],"attributes":[{"name":"type","value":"text"},{"name":"placeholder","value":"Enter your full name"}]},{"type":"field","label":"Email Address","children":[],"attributes":[{"name":"type","value":"email"},{"name":"placeholder","value":"Enter your email address"}]},{"type":"field","label":"Phone Number","children":[],"attributes":[{"name":"type","value":"tel"},{"name":"placeholder","value":"Enter your phone number"}]},{"type":"button","label":"Submit","children":[],"attributes":[{"name":"type","value":"submit"}]}],"attributes":[{"name":"method","value":"post"},{"name":"action","value":"/submit-profile"}]}", + "content": "{"type":"form","label":"User Profile Form","children":[{"type":"field","label":"First Name","children":[],"attributes":[{"name":"type","value":"text"},{"name":"name","value":"firstName"},{"name":"placeholder","value":"Enter your first name"}]},{"type":"field","label":"Last Name","children":[],"attributes":[{"name":"type","value":"text"},{"name":"name","value":"lastName"},{"name":"placeholder","value":"Enter your last name"}]},{"type":"field","label":"Email Address","children":[],"attributes":[{"name":"type","value":"email"},{"name":"name","value":"email"},{"name":"placeholder","value":"Enter your email address"}]},{"type":"button","label":"Submit","children":[],"attributes":[{"name":"type","value":"submit"}]}],"attributes":[]}", "parsed": { - "attributes": [ - { - "name": "method", - "value": "post", - }, - { - "name": "action", - "value": "/submit-profile", - }, - ], + "attributes": [], "children": [ { "attributes": [ @@ -95,43 +86,55 @@ describe('.parse()', () => { "name": "type", "value": "text", }, + { + "name": "name", + "value": "firstName", + }, { "name": "placeholder", - "value": "Enter your full name", + "value": "Enter your first name", }, ], "children": [], - "label": "Full Name", + "label": "First Name", "type": "field", }, { "attributes": [ { "name": "type", - "value": "email", + "value": "text", + }, + { + "name": "name", + "value": "lastName", }, { "name": "placeholder", - "value": "Enter your email address", + "value": "Enter your last name", }, ], "children": [], - "label": "Email Address", + "label": "Last Name", "type": "field", }, { "attributes": [ { "name": "type", - "value": "tel", + "value": "email", + }, + { + "name": "name", + "value": "email", }, { "name": "placeholder", - "value": "Enter your phone number", + "value": "Enter your email address", }, ], "children": [], - "label": "Phone Number", + "label": "Email Address", "type": "field", }, { @@ -467,22 +470,313 @@ describe('.parse()', () => { expect(completion.choices[0]?.message).toMatchInlineSnapshot(` { - "content": "{"person1":{"name":"Jane Doe","phone_number":"+1234567890","roles":["other"],"description":"Engineer at OpenAI. Email: jane@openai.com"},"person2":{"name":"John Smith","phone_number":"+0987654321","differentField":"Engineer at OpenAI. Email: john@openai.com"}}", + "content": "{"person1":{"name":"Jane Doe","phone_number":".","roles":["other"],"description":"Engineer at OpenAI, born Nov 16, contact email: jane@openai.com"},"person2":{"name":"John Smith","phone_number":"john@openai.com","differentField":"Engineer at OpenAI, born March 1."}}", "parsed": { "person1": { - "description": "Engineer at OpenAI. Email: jane@openai.com", + "description": "Engineer at OpenAI, born Nov 16, contact email: jane@openai.com", "name": "Jane Doe", - "phone_number": "+1234567890", + "phone_number": ".", "roles": [ "other", ], }, "person2": { - "differentField": "Engineer at OpenAI. Email: john@openai.com", + "differentField": "Engineer at OpenAI, born March 1.", "name": "John Smith", - "phone_number": "+0987654321", + "phone_number": "john@openai.com", + }, + }, + "refusal": null, + "role": "assistant", + "tool_calls": [], + } + `); + }); + + test('nested schema extraction', async () => { + // optional object that can be on each field, mark it as nullable to comply with structured output restrictions + const metadata = z.nullable( + z.object({ + foo: z.string(), + }), + ); + + // union element a + const fieldA = z.object({ + type: z.literal('string'), + name: z.string(), + metadata, + }); + + // union element b, both referring to above nullable object + const fieldB = z.object({ + type: z.literal('number'), + metadata, + }); + + // top level input object with array of union element + const model = z.object({ + name: z.string(), + fields: z.array(z.union([fieldA, fieldB])), + }); + + expect(zodResponseFormat(model, 'query').json_schema.schema).toMatchInlineSnapshot(` + { + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": false, + "definitions": { + "contactPerson_properties_person1_properties_name": { + "type": "string", + }, + "contactPerson_properties_person1_properties_phone_number": { + "nullable": true, + "type": "string", + }, + "query": { + "additionalProperties": false, + "properties": { + "fields": { + "items": { + "anyOf": [ + { + "additionalProperties": false, + "properties": { + "metadata": { + "anyOf": [ + { + "additionalProperties": false, + "properties": { + "foo": { + "type": "string", + }, + }, + "required": [ + "foo", + ], + "type": "object", + }, + { + "type": "null", + }, + ], + }, + "name": { + "type": "string", + }, + "type": { + "const": "string", + "type": "string", + }, + }, + "required": [ + "type", + "name", + "metadata", + ], + "type": "object", + }, + { + "additionalProperties": false, + "properties": { + "metadata": { + "$ref": "#/definitions/query_properties_fields_items_anyOf_0_properties_metadata", + }, + "type": { + "const": "number", + "type": "string", + }, + }, + "required": [ + "type", + "metadata", + ], + "type": "object", + }, + ], + }, + "type": "array", + }, + "name": { + "type": "string", + }, + }, + "required": [ + "name", + "fields", + ], + "type": "object", + }, + "query_properties_fields_items_anyOf_0_properties_metadata": { + "anyOf": [ + { + "$ref": "#/definitions/query_properties_fields_items_anyOf_0_properties_metadata_anyOf_0", + }, + { + "type": "null", + }, + ], + }, + }, + "properties": { + "fields": { + "items": { + "anyOf": [ + { + "additionalProperties": false, + "properties": { + "metadata": { + "anyOf": [ + { + "additionalProperties": false, + "properties": { + "foo": { + "type": "string", + }, + }, + "required": [ + "foo", + ], + "type": "object", + }, + { + "type": "null", + }, + ], + }, + "name": { + "type": "string", + }, + "type": { + "const": "string", + "type": "string", + }, + }, + "required": [ + "type", + "name", + "metadata", + ], + "type": "object", + }, + { + "additionalProperties": false, + "properties": { + "metadata": { + "$ref": "#/definitions/query_properties_fields_items_anyOf_0_properties_metadata", + }, + "type": { + "const": "number", + "type": "string", + }, + }, + "required": [ + "type", + "metadata", + ], + "type": "object", + }, + ], + }, + "type": "array", + }, + "name": { + "type": "string", }, }, + "required": [ + "name", + "fields", + ], + "type": "object", + } + `); + + const completion = await makeSnapshotRequest( + (openai) => + openai.beta.chat.completions.parse({ + model: 'gpt-4o-2024-08-06', + messages: [ + { + role: 'system', + content: + "You are a helpful assistant. Generate a data model according to the user's instructions.", + }, + { role: 'user', content: 'create a todo app data model' }, + ], + response_format: zodResponseFormat(model, 'query'), + }), + 2, + ); + + expect(completion.choices[0]?.message).toMatchInlineSnapshot(` + { + "content": "{"name":"TodoApp","fields":[{"type":"string","name":"taskId","metadata":{"foo":"unique identifier for each task"}},{"type":"string","name":"title","metadata":{"foo":"title of the task"}},{"type":"string","name":"description","metadata":{"foo":"detailed description of the task. This is optional."}},{"type":"string","name":"status","metadata":{"foo":"status of the task, e.g., pending, completed, etc."}},{"type":"string","name":"dueDate","metadata":null},{"type":"string","name":"priority","metadata":{"foo":"priority level of the task, e.g., low, medium, high"}},{"type":"string","name":"creationDate","metadata":{"foo":"date when the task was created"}},{"type":"string","name":"lastModifiedDate","metadata":{"foo":"date when the task was last modified"}},{"type":"string","name":"tags","metadata":{"foo":"tags associated with the task, for categorization"}}]}", + "parsed": { + "fields": [ + { + "metadata": { + "foo": "unique identifier for each task", + }, + "name": "taskId", + "type": "string", + }, + { + "metadata": { + "foo": "title of the task", + }, + "name": "title", + "type": "string", + }, + { + "metadata": { + "foo": "detailed description of the task. This is optional.", + }, + "name": "description", + "type": "string", + }, + { + "metadata": { + "foo": "status of the task, e.g., pending, completed, etc.", + }, + "name": "status", + "type": "string", + }, + { + "metadata": null, + "name": "dueDate", + "type": "string", + }, + { + "metadata": { + "foo": "priority level of the task, e.g., low, medium, high", + }, + "name": "priority", + "type": "string", + }, + { + "metadata": { + "foo": "date when the task was created", + }, + "name": "creationDate", + "type": "string", + }, + { + "metadata": { + "foo": "date when the task was last modified", + }, + "name": "lastModifiedDate", + "type": "string", + }, + { + "metadata": { + "foo": "tags associated with the task, for categorization", + }, + "name": "tags", + "type": "string", + }, + ], + "name": "TodoApp", + }, "refusal": null, "role": "assistant", "tool_calls": [],