mlc-ai · LeshengJin · Aug 24, 2023 · Feb 5, 2024 · Mar 1, 2024
diff --git a/python/mlc_chat/__init__.py b/python/mlc_chat/__init__.py
@@ -2,6 +2,7 @@
 
 MLC Chat is the app runtime of MLC LLM.
 """
-from . import protocol, serve
+
+# from . import protocol, serve
 from .chat_module import ChatConfig, ChatModule, ConvConfig, GenerationConfig
 from .libinfo import __version__
diff --git a/python/mlc_chat/base.py b/python/mlc_chat/base.py
@@ -1,4 +1,5 @@
 """Load MLC LLM library and _ffi_api functions."""
+
 import ctypes
 import os
 import sys
@@ -24,5 +25,5 @@ def _load_mlc_llm_lib():
 
 
 # only load once here
-if SKIP_LOADING_MLCLLM_SO == "0":
-    _LIB, _LIB_PATH = _load_mlc_llm_lib()
+# if SKIP_LOADING_MLCLLM_SO == "0":
+#     _LIB, _LIB_PATH = _load_mlc_llm_lib()
diff --git a/python/mlc_chat/compiler_pass/pipeline.py b/python/mlc_chat/compiler_pass/pipeline.py
@@ -1,4 +1,5 @@
 """The compilation pipeline for LLM applications."""
+
 from pathlib import Path
 from typing import Any, Dict, List, Optional
 

diff --git a/python/mlc_chat/interface/compile.py b/python/mlc_chat/interface/compile.py
@@ -1,4 +1,5 @@
 """Python entrypoint of compilation."""
+
 import dataclasses
 import math
 from io import StringIO

diff --git a/python/mlc_chat/model/model.py b/python/mlc_chat/model/model.py
@@ -17,6 +17,7 @@
 from .phi import phi_loader, phi_model, phi_quantization
 from .qwen import qwen_loader, qwen_model, qwen_quantization
 from .stable_lm import stablelm_loader, stablelm_model, stablelm_quantization
+from .whisper import whisper_loader, whisper_model, whisper_quantization
 
 ModelConfig = Any
 """A ModelConfig is an object that represents a model architecture. It is required to have
@@ -195,4 +196,17 @@ class Model:
             "group-quant": stablelm_quantization.group_quant,
         },
     ),
+    "whisper": Model(
+        name="whisper",
+        model=whisper_model.WhisperForConditionalGeneration,
+        config=whisper_model.WhisperConfig,
+        source={
+            "huggingface-torch": whisper_loader.huggingface,
+            "huggingface-safetensor": whisper_loader.huggingface,
+        },
+        quantize={
+            "no-quant": whisper_quantization.no_quant,
+            "group-quant": whisper_quantization.group_quant,
+        },
+    ),
 }
diff --git a/python/mlc_chat/model/whisper/__init__.py b/python/mlc_chat/model/whisper/__init__.py
diff --git a/python/mlc_chat/model/whisper/whisper_loader.py b/python/mlc_chat/model/whisper/whisper_loader.py
@@ -0,0 +1,51 @@
+"""
+This file specifies how MLC's Whisper parameter maps from other formats, for example HuggingFace
+PyTorch, HuggingFace safetensors.
+"""
+
+import functools
+
+from mlc_chat.loader import ExternMapping
+from mlc_chat.quantization import Quantization
+
+from .whisper_model import WhisperConfig, WhisperForConditionalGeneration
+
+
+def huggingface(model_config: WhisperConfig, quantization: Quantization) -> ExternMapping:
+    """Returns a parameter mapping that maps from the names of MLC LLM parameters to
+    the names of HuggingFace PyTorch parameters.
+
+    Parameters
+    ----------
+    model_config : WhisperConfig
+        The configuration of the GPTNeoX model.
+
+    quantization : Quantization
+        The quantization configuration.
+
+    Returns
+    -------
+    param_map : ExternMapping
+        The parameter mapping from MLC to HuggingFace PyTorch.
+    """
+    model = WhisperForConditionalGeneration(model_config)
+    if quantization is not None:
+        model.to(quantization.model_dtype)
+    _, _named_params, _ = model.export_tvm(  # type: ignore[misc]
+        spec=model.get_default_spec(),
+        allow_extern=True,
+    )
+    named_parameters = dict(_named_params)
+
+    mapping = ExternMapping()
+
+    for mlc_name, mlc_param in named_parameters.items():
+        mapping.add_mapping(
+            mlc_name,
+            [mlc_name],
+            functools.partial(
+                lambda x, dtype: x.astype(dtype),
+                dtype=mlc_param.dtype,
+            ),
+        )
+    return mapping