Skip to content

Commit

Permalink
Update VILA and UI (#97)
Browse files Browse the repository at this point in the history
  • Loading branch information
RaymondWang0 authored Mar 3, 2024
1 parent d0fed69 commit 6fb8866
Show file tree
Hide file tree
Showing 22 changed files with 1,096 additions and 71 deletions.
12 changes: 8 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,15 +151,15 @@ TinyChatEngine offers versatile capabilities suitable for various applications.

- Start the speech-to-speech chat locally.
```bash
./chat -v # chat.exe -v on Windows
./voicechat # chat.exe -v on Windows
```

- If you encounter any issues or errors during setup, please explore [here](llm/application/README.md) to follow the step-by-step guide to debug.


## Deploy vision language model (VLM) chatbot with TinyChatEngine

TinyChatEngine supports not only LLM but also VLM. We introduce a sophisticated text/voice chatbot for VLM. Here, we provide very easy-to-follow instructions to deploy vision language model chatbot (VILA-7B) with TinyChatEngine.
TinyChatEngine supports not only LLM but also VLM. We introduce a sophisticated text/voice chatbot for VLM. Here, we provide easy-to-follow instructions to deploy vision language model chatbot (VILA-7B) with TinyChatEngine. We recommend using M1/M2 MacBooks for this VLM feature.

- Follow the instructions above to setup the basic environment, i.e., [Prerequisites](#prerequisites) and [Step-by-step to Deploy LLaMA2-7B-chat with TinyChatEngine](#step-by-step-to-deploy-llama2-7b-chat-with-tinychatengine).

Expand All @@ -169,6 +169,10 @@ TinyChatEngine supports not only LLM but also VLM. We introduce a sophisticated
- (For other OS) Please refer to [here](https://github.com/AnonymouX47/termvisage?tab=readme-ov-file#requirements) to get the appropriate terminal ready.

- (Optional) To enable the speech-to-speech chatbot for VLM, please follow the [instruction above](#deploy-speech-to-speech-chatbot-with-tinychatengine-demo) to run the shell script to set up the environment.
```bash
cd llm
./voicechat_setup.sh
```

- Download the quantized VILA-7B model from our model zoo.

Expand All @@ -184,12 +188,12 @@ TinyChatEngine supports not only LLM but also VLM. We introduce a sophisticated
- (For MacOS) Start the chatbot locally. Please use an appropriate terminal (e.g., iTerm2).
- Image/Text to text
```bash
./scripts/vila.sh ../assets/figures/vlm_demo/pedestrian.png
./vila ../assets/figures/vlm_demo/pedestrian.png
```

- Image/Speech to speech
```bash
./scripts/voice_vila.sh ../assets/figures/vlm_demo/pedestrian.png
./voice_vila ../assets/figures/vlm_demo/pedestrian.png
```

- There are several images under the path `../assets/figures/vlm_demo`. Feel free to try different images with VILA on your device!
Expand Down
5 changes: 4 additions & 1 deletion kernels/matmul.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,15 +99,16 @@ struct thread_args {
int start_i, end_i, blk_size;
};


#define MAX(A, B) ((A) > (B) ? (A) : (B))
#define MIN(A, B) ((A) < (B) ? (A) : (B))

namespace matmul {
class MatmulOperator {
public:
void mat_mul_transposed(const struct matmul_params *params);
void mat_mul_accelerator_transposed_fastover_column(const struct matmul_params *params);
void mat_mul_accelerator_transposed_fastover_column_bias(const struct matmul_params *params);
void mat_mul_accelerator_untransposed_fastover_column(const struct matmul_params *params);
// int8
void naive_mat_mul_int8(const struct matmul_params *params);
void mat_mul_accelerator_int8_fast_32unroll_over_column(const struct matmul_params *params);
Expand All @@ -125,6 +126,8 @@ class MatmulOperator {
void mat_mul_accelerator_int8_int4_fast_no_offset(struct matmul_params *params);
void gemv_accelerator_int8_int4_fast_no_offset(struct matmul_params *params);
void gemm_accelerator_int8_int4_fast_no_offset(struct matmul_params *params);
void gemm_accelerator_int8_int4_fast_no_offset_v2(struct matmul_params *params);
void cblas_gemm_accelerator_no_offset(struct matmul_params *params);
void naive_mat_mul_int4(const struct matmul_params *params);
void naive_mat_mul_int4_with_offset(const struct matmul_params *params);
// cuda
Expand Down
39 changes: 30 additions & 9 deletions kernels/neon/matmul_neon_fp32.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,25 +38,46 @@ void fp32_ref_matmul(const struct matmul_params *params) {
}
}

void fp32_matmul_cblas_gemm(const struct matmul_params *params) {
inline void fp32_matmul_transposed_cblas_gemm(const struct matmul_params *params) {
const struct matrix *A = &params->A, *B = &params->B, *C = &params->C;
float *data_A = A->data_ptr, *data_B = B->data_ptr, *data_C = C->data_ptr;
float alpha = params->alpha;

assert(A->column == B->column);
assert(C->row == A->row);
assert(C->column == B->row);
int m = C->row, n = C->column, k = A->column;

cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
m, n, k,
alpha, data_A, k,
data_B, k,
0.0f, data_C, n);
}

void MatmulOperator::mat_mul_accelerator_transposed_fastover_column(const struct matmul_params *params) {
// fp32_ref_matmul(params);
fp32_matmul_transposed_cblas_gemm(params);
}

inline void fp32_matmul_untransposed_cblas_gemm(const struct matmul_params *params) {
const struct matrix *A = &params->A, *B = &params->B, *C = &params->C;
float *data_A = A->data_ptr, *data_B = B->data_ptr, *data_C = C->data_ptr;

assert(A->column == B->row);
assert(C->row == A->row);
assert(C->column == B->column);
int m = A->row, n = B->column, k = A->column;
int m = C->row, n = C->column, k = A->column;

cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
m, n, k,
1.0f, data_A, m,
data_B, k,
0.0f, data_C, m);
m, n, k,
1.0f, data_A, k,
data_B, n,
0.0f, data_C, n);
}

void MatmulOperator::mat_mul_accelerator_transposed_fastover_column(const struct matmul_params *params) {
fp32_ref_matmul(params);
// fp32_matmul_cblas_gemm(params);
void MatmulOperator::mat_mul_accelerator_untransposed_fastover_column(const struct matmul_params *params) {
fp32_matmul_untransposed_cblas_gemm(params);
}

void fp32_ref_matmul_bias(const struct matmul_params *params) {
Expand Down
Loading

0 comments on commit 6fb8866

Please sign in to comment.