calmmage · calmmage · Sep 24, 2023 · Sep 23, 2023 · Sep 24, 2023
diff --git a/bot_base/core/telegram_bot.py b/bot_base/core/telegram_bot.py
@@ -130,8 +130,20 @@ async def run(self) -> None:
     #         message = await app.get_messages(chat_id, message_ids=message_id)
     #         return await message.download(in_memory=True)
 
+    def _check_pyrogram_tokens(self):
+        if not (
+            self.config.api_id.get_secret_value()
+            and self.config.api_hash.get_secret_value()
+        ):
+            raise ValueError(
+                "Telegram api_id and api_hash must be provided for Pyrogram "
+                "to download large files"
+            )
+
     async def download_large_file(self, chat_id, message_id, target_path=None):
         # todo: troubleshoot chat_id. Only username works for now.
+        self._check_pyrogram_tokens()
+
         script_path = tools_dir / "download_file_with_pyrogram.py"
 
         # Construct command to run the download script
@@ -165,7 +177,6 @@ async def download_large_file(self, chat_id, message_id, target_path=None):
         self.logger.debug(f"{result.stdout=}\n\n{result.stderr=}")
         if target_path is None:
             file_data = BytesIO(open(file_path, "rb").read())
-            # remove file
             os.unlink(file_path)
             return file_data
         return file_path

diff --git a/bot_base/utils/audio_utils.py b/bot_base/utils/audio_utils.py
@@ -7,7 +7,12 @@
 import tqdm
 from pydub import AudioSegment
 
-from bot_base.utils.gpt_utils import Audio, atranscribe_audio, transcribe_audio
+from bot_base.utils.gpt_utils import (
+    Audio,
+    atranscribe_audio,
+    transcribe_audio,
+    WHISPER_RATE_LIMIT,
+)
 
 DEFAULT_PERIOD = 120 * 1000
 DEFAULT_BUFFER = 5 * 1000
@@ -23,6 +28,10 @@ def split_audio(
         logger = loguru.logger
     chunks = []
     s = 0
+
+    if len(audio) / period > WHISPER_RATE_LIMIT - 5:
+        period = len(audio) // (WHISPER_RATE_LIMIT - 5)
+
     logger.debug(f"Splitting audio into chunks")
     while s + period < len(audio):
         chunks.append(audio[s : s + period])
@@ -70,5 +79,5 @@ async def split_and_transcribe_audio(
         for chunk in tqdm.std.tqdm(audio_chunks):
             text_chunks.append(transcribe_audio(chunk))
 
-    logger.info(f"Parsed audio", data=pprint.pformat(text_chunks))
+    logger.debug(f"Parsed audio", data=pprint.pformat(text_chunks))
     return text_chunks
diff --git a/bot_base/utils/gpt_utils.py b/bot_base/utils/gpt_utils.py
@@ -8,7 +8,15 @@
 import openai
 import pydub
 import tiktoken
-from pydub import AudioSegment
+from aiolimiter import AsyncLimiter
+
+WHISPER_RATE_LIMIT = 50  # 50 requests per minute
+whisper_limiter = AsyncLimiter(WHISPER_RATE_LIMIT, 60)  # 50 requests per minute
+GPT_RATE_LIMIT = 200  # 200 requests per minute
+gpt_limiter = AsyncLimiter(GPT_RATE_LIMIT, 60)  # 200 requests per minute
+
+
+# Then use atranscribe_audio_limited instead of atranscribe_audio
 
 token_limit_by_model = {
     "gpt-3.5-turbo": 4096,
@@ -43,7 +51,8 @@ async def arun_command_with_gpt(command: str, data: str, model="gpt-3.5-turbo"):
         {"role": "system", "content": command},
         {"role": "user", "content": data},
     ]
-    response = await openai.ChatCompletion.acreate(messages=messages, model=model)
+    async with gpt_limiter:
+        response = await openai.ChatCompletion.acreate(messages=messages, model=model)
     return response.choices[0].message.content
 
 
@@ -59,7 +68,8 @@ def transcribe_audio(audio: Audio, model="whisper-1"):
 async def atranscribe_audio(audio: Audio, model="whisper-1"):
     if isinstance(audio, str):
         audio = open(audio)
-    result = await openai.Audio.atranscribe(model, audio)
+    async with whisper_limiter:
+        result = await openai.Audio.atranscribe(model, audio)
     return result.text
 
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "bot-base"
-version = "0.2.10"
+version = "0.2.12"
 description = ""
 authors = ["Petr Lavrov <calmmage@gmail.com>"]
 readme = "README.md"
@@ -17,6 +17,7 @@ aiogram = "*"
 tiktoken = "*"
 pyrogram = "^2.0.106"
 tgcrypto = "^1.2.5"
+aiolimiter = "^1.1.0"
 
 
 [tool.poetry.group.dev.dependencies]