Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add rate limiter #19

Merged
merged 2 commits into from
Sep 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion bot_base/core/telegram_bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,20 @@ async def run(self) -> None:
# message = await app.get_messages(chat_id, message_ids=message_id)
# return await message.download(in_memory=True)

def _check_pyrogram_tokens(self):
if not (
self.config.api_id.get_secret_value()
and self.config.api_hash.get_secret_value()
):
raise ValueError(
"Telegram api_id and api_hash must be provided for Pyrogram "
"to download large files"
)

async def download_large_file(self, chat_id, message_id, target_path=None):
# todo: troubleshoot chat_id. Only username works for now.
self._check_pyrogram_tokens()

script_path = tools_dir / "download_file_with_pyrogram.py"

# Construct command to run the download script
Expand Down Expand Up @@ -165,7 +177,6 @@ async def download_large_file(self, chat_id, message_id, target_path=None):
self.logger.debug(f"{result.stdout=}\n\n{result.stderr=}")
if target_path is None:
file_data = BytesIO(open(file_path, "rb").read())
# remove file
os.unlink(file_path)
return file_data
return file_path
Expand Down
13 changes: 11 additions & 2 deletions bot_base/utils/audio_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@
import tqdm
from pydub import AudioSegment

from bot_base.utils.gpt_utils import Audio, atranscribe_audio, transcribe_audio
from bot_base.utils.gpt_utils import (
Audio,
atranscribe_audio,
transcribe_audio,
WHISPER_RATE_LIMIT,
)

DEFAULT_PERIOD = 120 * 1000
DEFAULT_BUFFER = 5 * 1000
Expand All @@ -23,6 +28,10 @@ def split_audio(
logger = loguru.logger
chunks = []
s = 0

if len(audio) / period > WHISPER_RATE_LIMIT - 5:
period = len(audio) // (WHISPER_RATE_LIMIT - 5)

logger.debug(f"Splitting audio into chunks")
while s + period < len(audio):
chunks.append(audio[s : s + period])
Expand Down Expand Up @@ -70,5 +79,5 @@ async def split_and_transcribe_audio(
for chunk in tqdm.std.tqdm(audio_chunks):
text_chunks.append(transcribe_audio(chunk))

logger.info(f"Parsed audio", data=pprint.pformat(text_chunks))
logger.debug(f"Parsed audio", data=pprint.pformat(text_chunks))
return text_chunks
16 changes: 13 additions & 3 deletions bot_base/utils/gpt_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,15 @@
import openai
import pydub
import tiktoken
from pydub import AudioSegment
from aiolimiter import AsyncLimiter

WHISPER_RATE_LIMIT = 50 # 50 requests per minute
whisper_limiter = AsyncLimiter(WHISPER_RATE_LIMIT, 60) # 50 requests per minute
GPT_RATE_LIMIT = 200 # 200 requests per minute
gpt_limiter = AsyncLimiter(GPT_RATE_LIMIT, 60) # 200 requests per minute


# Then use atranscribe_audio_limited instead of atranscribe_audio

token_limit_by_model = {
"gpt-3.5-turbo": 4096,
Expand Down Expand Up @@ -43,7 +51,8 @@ async def arun_command_with_gpt(command: str, data: str, model="gpt-3.5-turbo"):
{"role": "system", "content": command},
{"role": "user", "content": data},
]
response = await openai.ChatCompletion.acreate(messages=messages, model=model)
async with gpt_limiter:
response = await openai.ChatCompletion.acreate(messages=messages, model=model)
return response.choices[0].message.content


Expand All @@ -59,7 +68,8 @@ def transcribe_audio(audio: Audio, model="whisper-1"):
async def atranscribe_audio(audio: Audio, model="whisper-1"):
if isinstance(audio, str):
audio = open(audio)
result = await openai.Audio.atranscribe(model, audio)
async with whisper_limiter:
result = await openai.Audio.atranscribe(model, audio)
return result.text


Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "bot-base"
version = "0.2.10"
version = "0.2.12"
description = ""
authors = ["Petr Lavrov <calmmage@gmail.com>"]
readme = "README.md"
Expand All @@ -17,6 +17,7 @@ aiogram = "*"
tiktoken = "*"
pyrogram = "^2.0.106"
tgcrypto = "^1.2.5"
aiolimiter = "^1.1.0"


[tool.poetry.group.dev.dependencies]
Expand Down