feat: Add multi-language system prompts(#576)

--------- Co-authored-by: Charles Marion <chmrion@amazon.com>
aws-samples · Oct 31, 2024 · 6a18d87 · 6a18d87
1 parent 6c64064
commit 6a18d87
Show file tree

Hide file tree

Showing 9 changed files with 265 additions and 95 deletions.
diff --git a/.gitignore b/.gitignore
@@ -458,3 +458,6 @@ lib/user-interface/react-app/src/graphql/subscriptions.ts
 # js function
 !lib/authentication/lambda/updateUserPoolClient/index.js
 !lib/authentication/lambda/updateOidcSecret/index.js
+/.project
+/.pydevproject
+/outputs.json
diff --git a/lib/model-interfaces/langchain/functions/request-handler/adapters/base/base.py b/lib/model-interfaces/langchain/functions/request-handler/adapters/base/base.py
@@ -60,12 +60,12 @@ def on_llm_end(
                     "total_tokens": 0,
                 }
             self.usage = {
-                "input_tokens": self.usage.get("input_tokens")
-                + generation.message.usage_metadata.get("input_tokens"),
-                "output_tokens": self.usage.get("output_tokens")
-                + generation.message.usage_metadata.get("output_tokens"),
-                "total_tokens": self.usage.get("total_tokens")
-                + generation.message.usage_metadata.get("total_tokens"),
+                "input_tokens": self.usage.get("input_tokens", 0)
+                + generation.message.usage_metadata.get("input_tokens", 0),
+                "output_tokens": self.usage.get("output_tokens", 0)
+                + generation.message.usage_metadata.get("output_tokens", 0),
+                "total_tokens": self.usage.get("total_tokens", 0)
+                + generation.message.usage_metadata.get("total_tokens", 0),
             }
 
 
@@ -199,7 +199,7 @@ def run_with_chain_v2(self, user_prompt, workspace_id=None):
                     input={"input": user_prompt}, config=config
                 )
                 if "answer" in response:
-                    answer = response.get("answer")  # Rag flow
+                    answer = response.get("answer")  # RAG flow
                 else:
                     answer = response.content
         except Exception as e:

diff --git a/lib/model-interfaces/langchain/functions/request-handler/adapters/bedrock/__init__.py b/lib/model-interfaces/langchain/functions/request-handler/adapters/bedrock/__init__.py
@@ -1,2 +1,2 @@
 # flake8: noqa
-from .base import *
+from adapters.bedrock.base import *
diff --git a/lib/model-interfaces/langchain/functions/request-handler/adapters/bedrock/base.py b/lib/model-interfaces/langchain/functions/request-handler/adapters/bedrock/base.py
@@ -1,97 +1,128 @@
 import os
-from typing import Any, List
-
-from ..base import ModelAdapter
-from genai_core.registry import registry
 import genai_core.clients
-
 from aws_lambda_powertools import Logger
-
+from typing import Any, List
+from adapters.base import ModelAdapter
+from genai_core.registry import registry
 from langchain_core.messages import BaseMessage
 from langchain_core.messages.ai import AIMessage
 from langchain_core.messages.human import HumanMessage
 from langchain_aws import ChatBedrockConverse
 from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
 from langchain.prompts.prompt import PromptTemplate
+from adapters.shared.prompts.system_prompts import (
+    prompts,
+    locale,
+)  # Import prompts and language
 
 logger = Logger()
 
+# Setting programmatic log level
+# logger.setLevel("DEBUG")
+
 
 def get_guardrails() -> dict:
     if "BEDROCK_GUARDRAILS_ID" in os.environ:
+        logger.debug("Guardrails ID found in environment variables.")
         return {
             "guardrailIdentifier": os.environ["BEDROCK_GUARDRAILS_ID"],
             "guardrailVersion": os.environ.get("BEDROCK_GUARDRAILS_VERSION", "DRAFT"),
         }
+    logger.debug("No guardrails ID found.")
     return {}
 
 
 class BedrockChatAdapter(ModelAdapter):
     def __init__(self, model_id, *args, **kwargs):
         self.model_id = model_id
-
+        logger.info(f"Initializing BedrockChatAdapter with model_id: {model_id}")
         super().__init__(*args, **kwargs)
 
     def get_qa_prompt(self):
-        system_prompt = (
-            "Use the following pieces of context to answer the question at the end."
-            " If you don't know the answer, just say that you don't know, "
-            "don't try to make up an answer. \n\n{context}"
+        # Fetch the QA prompt based on the current language
+        qa_system_prompt = prompts[locale]["qa_prompt"]
+        # Append the context placeholder if needed
+        qa_system_prompt_with_context = qa_system_prompt + "\n\n{context}"
+        logger.info(
+            f"Generating QA prompt template with: {qa_system_prompt_with_context}"
         )
-        return ChatPromptTemplate.from_messages(
+
+        # Create the ChatPromptTemplate
+        chat_prompt_template = ChatPromptTemplate.from_messages(
             [
-                ("system", system_prompt),
+                ("system", qa_system_prompt_with_context),
                 MessagesPlaceholder("chat_history"),
                 ("human", "{input}"),
             ]
         )
 
+        # Trace the ChatPromptTemplate by logging its content
+        logger.debug(f"ChatPromptTemplate messages: {chat_prompt_template.messages}")
+
+        return chat_prompt_template
+
     def get_prompt(self):
-        prompt_template = ChatPromptTemplate(
+        # Fetch the conversation prompt based on the current language
+        conversation_prompt = prompts[locale]["conversation_prompt"]
+        logger.info("Generating general conversation prompt template.")
+        chat_prompt_template = ChatPromptTemplate.from_messages(
             [
-                (
-                    "system",
-                    (
-                        "The following is a friendly conversation between "
-                        "a human and an AI."
-                        "If the AI does not know the answer to a question, it "
-                        "truthfully says it does not know."
-                    ),
-                ),
+                ("system", conversation_prompt),
                 MessagesPlaceholder(variable_name="chat_history"),
                 ("human", "{input}"),
             ]
         )
-
-        return prompt_template
+        # Trace the ChatPromptTemplate by logging its content
+        logger.debug(f"ChatPromptTemplate messages: {chat_prompt_template.messages}")
+        return chat_prompt_template
 
     def get_condense_question_prompt(self):
-        contextualize_q_system_prompt = (
-            "Given the following conversation and a follow up"
-            " question, rephrase the follow up question to be a standalone question."
-        )
-        return ChatPromptTemplate.from_messages(
+        # Fetch the prompt based on the current language
+        condense_question_prompt = prompts[locale]["condense_question_prompt"]
+        logger.info("Generating condense question prompt template.")
+        chat_prompt_template = ChatPromptTemplate.from_messages(
             [
-                ("system", contextualize_q_system_prompt),
+                ("system", condense_question_prompt),
                 MessagesPlaceholder("chat_history"),
                 ("human", "{input}"),
             ]
         )
+        # Trace the ChatPromptTemplate by logging its content
+        logger.debug(f"ChatPromptTemplate messages: {chat_prompt_template.messages}")
+        return chat_prompt_template
 
     def get_llm(self, model_kwargs={}, extra={}):
         bedrock = genai_core.clients.get_bedrock_client()
         params = {}
-        if "temperature" in model_kwargs:
-            params["temperature"] = model_kwargs["temperature"]
-        if "topP" in model_kwargs:
-            params["top_p"] = model_kwargs["topP"]
-        if "maxTokens" in model_kwargs:
-            params["max_tokens"] = model_kwargs["maxTokens"]
 
+        # Collect temperature, topP, and maxTokens if available
+        temperature = model_kwargs.get("temperature")
+        top_p = model_kwargs.get("topP")
+        max_tokens = model_kwargs.get("maxTokens")
+
+        if temperature is not None:
+            params["temperature"] = temperature
+        if top_p:
+            params["top_p"] = top_p
+        if max_tokens:
+            params["max_tokens"] = max_tokens
+
+        # Fetch guardrails if any
         guardrails = get_guardrails()
         if len(guardrails.keys()) > 0:
             params["guardrails"] = guardrails
 
+        # Log all parameters in a single log entry, including full guardrails
+        logger.info(
+            f"Creating LLM chain for model {self.model_id}",
+            model_kwargs=model_kwargs,
+            temperature=temperature,
+            top_p=top_p,
+            max_tokens=max_tokens,
+            guardrails=guardrails,
+        )
+
+        # Return ChatBedrockConverse instance with the collected params
         return ChatBedrockConverse(
             client=bedrock,
             model=self.model_id,
@@ -107,47 +138,102 @@ class BedrockChatNoStreamingAdapter(BedrockChatAdapter):
     """Some models do not support system streaming using the converse API"""
 
     def __init__(self, *args, **kwargs):
+        logger.info(
+            "Initializing BedrockChatNoStreamingAdapter with disabled streaming."
+        )
         super().__init__(disable_streaming=True, *args, **kwargs)
 
 
 class BedrockChatNoSystemPromptAdapter(BedrockChatAdapter):
-    """Some models do not support system and message history in the conversion API"""
+    """Some models do not support system and message history in the conversation API"""
 
     def get_prompt(self):
-        template = """The following is a friendly conversation between a human and an AI. If the AI does not know the answer to a question, it truthfully says it does not know.
+        # Fetch the conversation prompt and translated
+        # words based on the current language
+        conversation_prompt = prompts[locale]["conversation_prompt"]
+        question_word = prompts[locale]["question_word"]
+        assistant_word = prompts[locale]["assistant_word"]
+        logger.info("Generating no-system-prompt template for conversation.")
+
+        # Combine conversation prompt, chat history, and input into the template
+        template = f"""{conversation_prompt}
+
+{{chat_history}}
 
-Current conversation:
-{chat_history}
+{question_word}: {{input}}
 
-Question: {input}
+{assistant_word}:"""
 
-Assistant:"""  # noqa: E501
-        return PromptTemplateWithHistory(
-            template=template, input_variables=["input", "chat_history"]
+        # Create the PromptTemplateWithHistory instance
+        prompt_template = PromptTemplateWithHistory(
+            input_variables=["input", "chat_history"], template=template
         )
 
+        # Log the content of PromptTemplateWithHistory before returning
+        logger.debug(f"PromptTemplateWithHistory template: {prompt_template.template}")
+
+        return prompt_template
+
     def get_condense_question_prompt(self):
-        template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
-
-Chat History:
-{chat_history}
-Follow Up Input: {input}
-Standalone question:"""  # noqa: E501
-        return PromptTemplateWithHistory(
-            template=template, input_variables=["input", "chat_history"]
+        # Fetch the prompt and translated words based on the current language
+        condense_question_prompt = prompts[locale]["condense_question_prompt"]
+        logger.debug(f"condense_question_prompt: {condense_question_prompt}")
+
+        follow_up_input_word = prompts[locale]["follow_up_input_word"]
+        logger.debug(f"follow_up_input_word: {follow_up_input_word}")
+
+        standalone_question_word = prompts[locale]["standalone_question_word"]
+        logger.debug(f"standalone_question_word: {standalone_question_word}")
+
+        chat_history_word = prompts[locale]["chat_history_word"]
+        logger.debug(f"chat_history_word: {chat_history_word}")
+
+        logger.debug("Generating no-system-prompt template for condensing question.")
+
+        # Combine the prompt with placeholders
+        template = f"""{condense_question_prompt}
+{chat_history_word}: {{chat_history}}
+{follow_up_input_word}: {{input}}
+{standalone_question_word}:"""
+        # Log the content of template
+        logger.debug(f"get_condense_question_prompt: Template content: {template}")
+        # Create the PromptTemplateWithHistory instance
+        prompt_template = PromptTemplateWithHistory(
+            input_variables=["input", "chat_history"], template=template
         )
 
+        # Log the content of PromptTemplateWithHistory before returning
+        logger.debug(f"PromptTemplateWithHistory template: {prompt_template.template}")
+
+        return prompt_template
+
     def get_qa_prompt(self):
-        template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
+        # Fetch the QA prompt and translated words based on the current language
+        qa_system_prompt = prompts[locale]["qa_prompt"]
+        question_word = prompts[locale]["question_word"]
+        helpful_answer_word = prompts[locale]["helpful_answer_word"]
+        logger.info("Generating no-system-prompt QA template.")
 
-{context}
+        # Append the context placeholder if needed
 
-Question: {input}
-Helpful Answer:"""  # noqa: E501
-        return PromptTemplateWithHistory(
-            template=template, input_variables=["input", "content"]
+        # Combine the prompt with placeholders
+        template = f"""{qa_system_prompt}
+
+{{context}}
+
+{question_word}: {{input}}
+{helpful_answer_word}:"""
+
+        # Create the PromptTemplateWithHistory instance
+        prompt_template = PromptTemplateWithHistory(
+            input_variables=["input", "context"], template=template
         )
 
+        # Log the content of PromptTemplateWithHistory before returning
+        logger.debug(f"PromptTemplateWithHistory template: {prompt_template.template}")
+
+        return prompt_template
+
 
 class BedrockChatNoStreamingNoSystemPromptAdapter(BedrockChatNoSystemPromptAdapter):
     """Some models do not support system streaming using the converse API"""
@@ -164,26 +250,11 @@ def __init__(self, *args, **kwargs):
 )
 registry.register(r"^bedrock\.cohere\.command-r.*", BedrockChatAdapter)
 registry.register(r"^bedrock.anthropic.claude*", BedrockChatAdapter)
-registry.register(
-    r"^bedrock.meta.llama*",
-    BedrockChatAdapter,
-)
-registry.register(
-    r"^bedrock.mistral.mistral-large*",
-    BedrockChatAdapter,
-)
-registry.register(
-    r"^bedrock.mistral.mistral-small*",
-    BedrockChatAdapter,
-)
-registry.register(
-    r"^bedrock.mistral.mistral-7b-*",
-    BedrockChatNoSystemPromptAdapter,
-)
-registry.register(
-    r"^bedrock.mistral.mixtral-*",
-    BedrockChatNoSystemPromptAdapter,
-)
+registry.register(r"^bedrock.meta.llama*", BedrockChatAdapter)
+registry.register(r"^bedrock.mistral.mistral-large*", BedrockChatAdapter)
+registry.register(r"^bedrock.mistral.mistral-small*", BedrockChatAdapter)
+registry.register(r"^bedrock.mistral.mistral-7b-*", BedrockChatNoSystemPromptAdapter)
+registry.register(r"^bedrock.mistral.mixtral-*", BedrockChatNoSystemPromptAdapter)
 registry.register(r"^bedrock.amazon.titan-t*", BedrockChatNoSystemPromptAdapter)
 
 

diff --git a/lib/model-interfaces/langchain/functions/request-handler/adapters/openai/gpt.py b/lib/model-interfaces/langchain/functions/request-handler/adapters/openai/gpt.py
@@ -1,6 +1,6 @@
 import os
 from langchain_openai import ChatOpenAI
-from ..base import ModelAdapter
+from adapters.base import ModelAdapter
 from genai_core.registry import registry
 
 

diff --git a/lib/model-interfaces/langchain/functions/request-handler/adapters/shared/__init__.py b/lib/model-interfaces/langchain/functions/request-handler/adapters/shared/__init__.py
@@ -1,2 +1,3 @@
 # flake8: noqa
 from .meta.llama2_chat import *
+from .prompts.system_prompts import *
diff --git a/lib/model-interfaces/langchain/functions/request-handler/adapters/shared/prompts/__init__.py b/lib/model-interfaces/langchain/functions/request-handler/adapters/shared/prompts/__init__.py
@@ -0,0 +1,2 @@
+# flake8: noqa
+from .system_prompts import *