From a4b784f7a4fa6f494309f74bf2054e0199afaa0a Mon Sep 17 00:00:00 2001 From: Carlo Alberto Barbano Date: Fri, 21 Feb 2025 10:58:54 +0100 Subject: [PATCH 01/16] add subscription config --- config/subscription.json | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 config/subscription.json diff --git a/config/subscription.json b/config/subscription.json new file mode 100644 index 0000000..9c16de7 --- /dev/null +++ b/config/subscription.json @@ -0,0 +1,3 @@ +{ + "channel_id": "xxx" +} \ No newline at end of file From 5211cf97591282849771376501d5c3ebf482dfe9 Mon Sep 17 00:00:00 2001 From: Carlo Alberto Barbano Date: Fri, 21 Feb 2025 14:53:18 +0100 Subject: [PATCH 02/16] [wip] add channel join --- src/config/__init__.py | 4 + src/transcriberbot/blueprints/__init__.py | 2 +- src/transcriberbot/blueprints/payments.py | 28 ++++++ src/transcriberbot/bot.py | 20 ++-- values/strings.xml | 114 ++++++++++++---------- 5 files changed, 106 insertions(+), 62 deletions(-) create mode 100644 src/transcriberbot/blueprints/payments.py diff --git a/src/config/__init__.py b/src/config/__init__.py index 9986c2a..6d4084e 100644 --- a/src/config/__init__.py +++ b/src/config/__init__.py @@ -68,3 +68,7 @@ def get_document_extensions(): def get_bot_admins(): return [int(id) for id in get_config_prop("telegram")["admins"]] + + +def get_premium_join_link(): + return get_config_prop("subscription")["premium_join_link"] diff --git a/src/transcriberbot/blueprints/__init__.py b/src/transcriberbot/blueprints/__init__.py index 7fc3b7e..dc52736 100644 --- a/src/transcriberbot/blueprints/__init__.py +++ b/src/transcriberbot/blueprints/__init__.py @@ -2,4 +2,4 @@ Author: Carlo Alberto Barbano Date: 15/02/25 """ -from . import commands, messages, voice, photos, chat_handlers +from . import commands, messages, voice, photos, chat_handlers, payments diff --git a/src/transcriberbot/blueprints/payments.py b/src/transcriberbot/blueprints/payments.py new file mode 100644 index 0000000..129d30c --- /dev/null +++ b/src/transcriberbot/blueprints/payments.py @@ -0,0 +1,28 @@ +""" +Author: Carlo Alberto Barbano +Date: 20/02/25 +""" +import config +import resources as R + +from telegram import Update, InlineKeyboardMarkup, InlineKeyboardButton +from telegram.ext import ContextTypes +from database import TBDB + + +async def premium(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: + premium_join_link = config.get_premium_join_link() + + keyboard = InlineKeyboardMarkup( + [[InlineKeyboardButton("Join", url=premium_join_link)]] + ) + + await update.effective_message.reply_text( + R.get_string_resource( + "premium_join_message", + TBDB.get_chat_lang(update.effective_chat.id) + ).replace("{invite_url}", premium_join_link), + reply_markup=keyboard + ) + + diff --git a/src/transcriberbot/bot.py b/src/transcriberbot/bot.py index 06c170c..ceea319 100644 --- a/src/transcriberbot/bot.py +++ b/src/transcriberbot/bot.py @@ -2,23 +2,24 @@ Author: Carlo Alberto Barbano Date: 15/02/25 """ -from telegram import Update - -import config import logging - -from telegram.ext import MessageHandler, ApplicationBuilder, CommandHandler, ContextTypes, CallbackQueryHandler, \ - ChatMemberHandler from functools import partial -from transcriberbot.blueprints import commands, messages, voice, photos, chat_handlers -from transcriberbot.blueprints.commands import set_language +from telegram import Update +from telegram.ext import MessageHandler, ApplicationBuilder, CommandHandler, CallbackQueryHandler, \ + ChatMemberHandler from telegram.ext.filters import VOICE, VIDEO_NOTE, AUDIO, PHOTO + +import config +from transcriberbot.blueprints import commands, messages, voice, photos, chat_handlers, payments +from transcriberbot.blueprints.commands import set_language from transcriberbot.filters import chat_admin, FromPrivate, AllowedDocument, BotAdmin def run(bot_token: str): application = (ApplicationBuilder() + # .base_url("https://api.telegram.org/bot{token}/test") + # .base_file_url("https://api.telegram.org/file/bot{token}/test") .token(bot_token) .concurrent_updates(True) .build()) @@ -44,7 +45,8 @@ def run(bot_token: str): 'enable_qr': commands.enable_qr, 'translate': commands.translate, 'donate': commands.donate, - 'privacy': commands.privacy + 'privacy': commands.privacy, + 'premium': payments.premium } for command, callback in chat_admin_handlers.items(): diff --git a/values/strings.xml b/values/strings.xml index cba0064..a565199 100644 --- a/values/strings.xml +++ b/values/strings.xml @@ -1,77 +1,87 @@ -Language set to {lang} -Current language: {lang} + Language set to {lang} + Current language: {lang} - -This bot transcribes audio and pictures into text. Add it to a group or forward audio messages and pictures to it. -{b}Note{/b}: Within groups, the bot will respond to commands by {b}non-anonymous admins only{/b} + + This bot transcribes audio and pictures into text. Add it to a group or forward audio messages and pictures to + it. + {b}Note{/b}: Within groups, the bot will respond to commands by {b}non-anonymous admins only{/b} -Choose a language (for voice messages): -{languages} + Choose a language (for voice messages): + {languages} -/rate this bot or leave your feedback on {a href="https://telegram.me/storebot?start=transcriber_bot"}Store Bot{/a} + /rate this bot or leave your feedback on {a href="https://telegram.me/storebot?start=transcriber_bot"}Store + Bot{/a} -If you have any trouble, or want to get in touch with the developers, contact @transcribersupport_bot + If you have any trouble, or want to get in touch with the developers, contact @transcribersupport_bot -If you like this bot, you can /donate. This will help us maintain the service and keep on improving it. Thank you! - + If you like this bot, you can /donate. This will help us maintain the service and keep on improving it. Thank + you! + -https://telegram.me/storebot?start=transcriber_bot - -You can make a donation for TranscriberBot on PayPal. This will really help us, thanks for your support! -Money from donations will be used for paying the server and backend costs and let the devs make new features! + https://telegram.me/storebot?start=transcriber_bot + + You can make a donation for TranscriberBot on PayPal. This will really help us, thanks for your support! + Money from donations will be used for paying the server and backend costs and let the devs make new features! - + -You can also donate with Bitcoin, Ethereum or Litecoin + You can also donate with Bitcoin, Ethereum or Litecoin -BTC: {code}1DTrCfoNb9RLJnR5dfJvo9zwRjBZj8j1PY{/code} + BTC: {code}1DTrCfoNb9RLJnR5dfJvo9zwRjBZj8j1PY{/code} -ETH: {code}0x0b784efc808527c75a8ef12a80622c41c28d45bd{/code} + ETH: {code}0x0b784efc808527c75a8ef12a80622c41c28d45bd{/code} -LTC: {code}LdsVPxqHR6PuKeMNvGYmMEkBRQ7M3AP3uY{/code} - + LTC: {code}LdsVPxqHR6PuKeMNvGYmMEkBRQ7M3AP3uY{/code} + -Send a voice message or a picture with some text + Send a voice message or a picture with some text -Voice enabled -Voice disabled -Photos enabled -Photos disabled -QR enabled -QR disabled + Voice enabled + Voice disabled + Photos enabled + Photos disabled + QR enabled + QR disabled -Sorry, file is too big! (limit: {0}MB) + Sorry, file is too big! (limit: {0}MB) -Transcribing... -Audio is very long ({0}s), this will take some time -{b}Text:{/b} -{b}[continues]:{/b} -Could not transcribe audio -{b}[Stopped]{/b} + Transcribing... + Audio is very long ({0}s), this will take some time + {b}Text:{/b} + {b}[continues]:{/b} + Could not transcribe audio + {b}[Stopped]{/b} -Recognizing... -{b}Recognized Text:{/b} -No text recognized -{b}QR Code found:{/b} -No QR code found + Recognizing... + {b}Recognized Text:{/b} + No text recognized + {b}QR Code found:{/b} + No QR code found -{b}WARNING:{/b} Flood detected. Stop spamming please + {b}WARNING:{/b} Flood detected. Stop spamming please -Unknown language: {language} -Please specify a language to translate to, e.g. "/translate english" -You must reply to a message in order to translate it + Unknown language: {language} + Please specify a language to translate to, e.g. "/translate english" + + You must reply to a message in order to translate it - -We do not store any personal data or media content (such as audio or pictures). -The only user data we store is the telegram chat id along with the chosen settings. -Voice/audio messages are sent to {a href="https://wit.ai/terms"}wit.ai{/a} for transcription, and are immediately deleted after the transcription is completed. -Pictures are immediately deleted from our server after OCR or QR recognition is perfomed (offline). -We do NOT store ANY message content (text, multimedia or anything else). - + + We do not store any personal data or media content (such as audio or pictures). + The only user data we store is the telegram chat id along with the chosen settings. + Voice/audio messages are sent to {a href="https://wit.ai/terms"}wit.ai{/a} for transcription, and are + immediately deleted after the transcription is completed. + Pictures are immediately deleted from our server after OCR or QR recognition is perfomed (offline). + We do NOT store ANY message content (text, multimedia or anything else). + -{b}ERROR:{/b} Oops, no wit.ai API key could be found for the language {language}. :( + {b}ERROR:{/b} Oops, no wit.ai API key could be found for the language {language}. + :( + + + To enable Transcriber Bot Premium, please join this channel: {invite_url} + From 61aa3b80bf4da92058b714f678de7f16d9f04fb5 Mon Sep 17 00:00:00 2001 From: Carlo Alberto Barbano Date: Sat, 22 Feb 2025 00:21:25 +0100 Subject: [PATCH 03/16] [wip] add channel subscription --- src/config/__init__.py | 3 + src/database/db.py | 2 +- src/transcriberbot/blueprints/payments.py | 17 ++- src/transcriberbot/blueprints/voice.py | 13 ++- src/transcriberbot/filters/filters.py | 23 +++- values/strings.xml | 124 +++++++++++----------- 6 files changed, 112 insertions(+), 70 deletions(-) diff --git a/src/config/__init__.py b/src/config/__init__.py index 6d4084e..86dc11e 100644 --- a/src/config/__init__.py +++ b/src/config/__init__.py @@ -72,3 +72,6 @@ def get_bot_admins(): def get_premium_join_link(): return get_config_prop("subscription")["premium_join_link"] + +def get_premium_chat_id(): + return get_config_prop("subscription")["channel_id"] diff --git a/src/database/db.py b/src/database/db.py index a928ab0..4e585dd 100644 --- a/src/database/db.py +++ b/src/database/db.py @@ -95,7 +95,7 @@ def get_chat_voice_enabled(chat_id): with TBDB._get_db() as db: c = db.execute("SELECT voice_enabled FROM chats WHERE chat_id='{0}'".format(chat_id)) return c.fetchone()[0] - except TypeError as e: + except Exception as e: logger.error("Error getting voice_enabled for chat %d: %s", chat_id, e) raise e diff --git a/src/transcriberbot/blueprints/payments.py b/src/transcriberbot/blueprints/payments.py index 129d30c..04d2bd3 100644 --- a/src/transcriberbot/blueprints/payments.py +++ b/src/transcriberbot/blueprints/payments.py @@ -8,6 +8,7 @@ from telegram import Update, InlineKeyboardMarkup, InlineKeyboardButton from telegram.ext import ContextTypes from database import TBDB +from transcriberbot.filters import is_premium_user async def premium(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: @@ -17,12 +18,18 @@ async def premium(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: [[InlineKeyboardButton("Join", url=premium_join_link)]] ) + premium_join_message = R.get_string_resource( + 'premium_join_message', + TBDB.get_chat_lang(update.effective_chat.id) + ).replace('{invite_url}', premium_join_link) + + current_plan = R.get_string_resource("current_plan_free") + if await is_premium_user(update, context): + current_plan = R.get_string_resource("current_plan_premium") + await update.effective_message.reply_text( - R.get_string_resource( - "premium_join_message", - TBDB.get_chat_lang(update.effective_chat.id) - ).replace("{invite_url}", premium_join_link), - reply_markup=keyboard + f"{premium_join_message}\n\n{current_plan}", + reply_markup=keyboard, parse_mode="html" ) diff --git a/src/transcriberbot/blueprints/voice.py b/src/transcriberbot/blueprints/voice.py index 7c5bf90..239fefe 100644 --- a/src/transcriberbot/blueprints/voice.py +++ b/src/transcriberbot/blueprints/voice.py @@ -18,6 +18,7 @@ import config import resources as R from database import TBDB +from transcriberbot.filters import is_premium_user logger = logging.getLogger(__name__) @@ -94,6 +95,9 @@ async def run_voice_task(update: Update, context: ContextTypes.DEFAULT_TYPE, med async def process_media_voice(update: Update, context: ContextTypes.DEFAULT_TYPE, media: [Voice | VideoNote | Document], name: str) -> None: + print("Update:", update) + print("Effective user:", update.effective_user) + chat_id = update.effective_chat.id file_size = media.file_size max_size = config.get_config_prop("app").get("max_media_voice_file_size", 20 * 1024 * 1024) @@ -119,6 +123,12 @@ async def process_media_voice(update: Update, context: ContextTypes.DEFAULT_TYPE os.remove(file_path) +async def get_backend(update: Update, context: ContextTypes.DEFAULT_TYPE): + backend = "wit" + if await is_premium_user(update, context): + backend = "whisper" + return backend + async def transcribe_audio_file(update: Update, context: ContextTypes.DEFAULT_TYPE, path: str): chat_id = update.effective_chat.id task_id = update.effective_message.message_id @@ -136,8 +146,9 @@ async def transcribe_audio_file(update: Update, context: ContextTypes.DEFAULT_TY logger.debug("Using key %s for lang %s", api_key, lang) + backend = await get_backend(update, context) message = await context.bot.send_message( - chat_id, R.get_string_resource("transcribing", lang), parse_mode="html", + chat_id, f"{R.get_string_resource('transcribing', lang)} ({backend})", parse_mode="html", reply_to_message_id=update.effective_message.message_id ) diff --git a/src/transcriberbot/filters/filters.py b/src/transcriberbot/filters/filters.py index bc89d30..2a279ac 100644 --- a/src/transcriberbot/filters/filters.py +++ b/src/transcriberbot/filters/filters.py @@ -5,7 +5,7 @@ import logging import asyncio -from telegram.constants import ChatType +from telegram.constants import ChatType, ChatMemberStatus from telegram.ext import ContextTypes from telegram.ext.filters import UpdateFilter from telegram import Update, ChatMember @@ -86,6 +86,27 @@ async def chat_admin(update: Update, context: ContextTypes.DEFAULT_TYPE, callbac return await callback(update, context) +async def is_premium_user(update: Update, context: ContextTypes.DEFAULT_TYPE): + print("is_premium_user: update.effective_user", update.effective_user) + user = (update.effective_user or update.channel_post.from_user) + + # does not support anonymous channels + if user is None: + return False + + user_id = user.id + + premium_channel = await context.bot.get_chat(config.get_premium_chat_id()) + + try: + member = await premium_channel.get_member(user_id) + print("Member:", member) + return member.status in (ChatMemberStatus.MEMBER, ChatMemberStatus.ADMINISTRATOR, ChatMemberStatus.OWNER) + except Exception as e: + logging.error("User %d is not a premium user", user_id, exc_info=True) + return False + + class BotAdmin(UpdateFilter): """ Checks if the message was sent by the bot admin. diff --git a/values/strings.xml b/values/strings.xml index a565199..31f4d3b 100644 --- a/values/strings.xml +++ b/values/strings.xml @@ -1,87 +1,87 @@ - Language set to {lang} - Current language: {lang} +Language set to {lang} +Current language: {lang} - - This bot transcribes audio and pictures into text. Add it to a group or forward audio messages and pictures to - it. - {b}Note{/b}: Within groups, the bot will respond to commands by {b}non-anonymous admins only{/b} + +This bot transcribes audio and pictures into text. Add it to a group or forward audio messages and pictures to it. - Choose a language (for voice messages): - {languages} +{b}Note{/b}: Within groups, the bot will respond to commands by {b}non-anonymous admins only{/b} - /rate this bot or leave your feedback on {a href="https://telegram.me/storebot?start=transcriber_bot"}Store - Bot{/a} +Choose a language (for voice messages): +{languages} - If you have any trouble, or want to get in touch with the developers, contact @transcribersupport_bot +/rate this bot or leave your feedback on {a href="https://telegram.me/storebot?start=transcriber_bot"}Store Bot{/a} - If you like this bot, you can /donate. This will help us maintain the service and keep on improving it. Thank - you! - +If you have any trouble, or want to get in touch with the developers, contact @transcribersupport_bot - https://telegram.me/storebot?start=transcriber_bot - - You can make a donation for TranscriberBot on PayPal. This will really help us, thanks for your support! - Money from donations will be used for paying the server and backend costs and let the devs make new features! +If you like this bot, you can /donate. This will help us maintain the service and keep on improving it. Thank you! + - +https://telegram.me/storebot?start=transcriber_bot + +You can make a donation for TranscriberBot on PayPal. This will really help us, thanks for your support! +Money from donations will be used for paying the server and backend costs and let the devs make new features! - You can also donate with Bitcoin, Ethereum or Litecoin + - BTC: {code}1DTrCfoNb9RLJnR5dfJvo9zwRjBZj8j1PY{/code} +You can also donate with Bitcoin, Ethereum or Litecoin - ETH: {code}0x0b784efc808527c75a8ef12a80622c41c28d45bd{/code} +BTC: {code}1DTrCfoNb9RLJnR5dfJvo9zwRjBZj8j1PY{/code} - LTC: {code}LdsVPxqHR6PuKeMNvGYmMEkBRQ7M3AP3uY{/code} - +ETH: {code}0x0b784efc808527c75a8ef12a80622c41c28d45bd{/code} - Send a voice message or a picture with some text +LTC: {code}LdsVPxqHR6PuKeMNvGYmMEkBRQ7M3AP3uY{/code} + - Voice enabled - Voice disabled - Photos enabled - Photos disabled - QR enabled - QR disabled +Send a voice message or a picture with some text - Sorry, file is too big! (limit: {0}MB) +Voice enabled +Voice disabled +Photos enabled +Photos disabled +QR enabled +QR disabled - Transcribing... - Audio is very long ({0}s), this will take some time - {b}Text:{/b} - {b}[continues]:{/b} - Could not transcribe audio - {b}[Stopped]{/b} +Sorry, file is too big! (limit: {0}MB) - Recognizing... - {b}Recognized Text:{/b} - No text recognized - {b}QR Code found:{/b} - No QR code found +Transcribing... +Audio is very long ({0}s), this will take some time +{b}Text:{/b} +{b}[continues]:{/b} +Could not transcribe audio +{b}[Stopped]{/b} - {b}WARNING:{/b} Flood detected. Stop spamming please +Recognizing... +{b}Recognized Text:{/b} +No text recognized +{b}QR Code found:{/b} +No QR code found - Unknown language: {language} - Please specify a language to translate to, e.g. "/translate english" - - You must reply to a message in order to translate it +{b}WARNING:{/b} Flood detected. Stop spamming please - - We do not store any personal data or media content (such as audio or pictures). - The only user data we store is the telegram chat id along with the chosen settings. - Voice/audio messages are sent to {a href="https://wit.ai/terms"}wit.ai{/a} for transcription, and are - immediately deleted after the transcription is completed. - Pictures are immediately deleted from our server after OCR or QR recognition is perfomed (offline). - We do NOT store ANY message content (text, multimedia or anything else). - +Unknown language: {language} +Please specify a language to translate to, e.g. "/translate english" + +You must reply to a message in order to translate it - {b}ERROR:{/b} Oops, no wit.ai API key could be found for the language {language}. - :( - + +We do not store any personal data or media content (such as audio or pictures). +The only user data we store is the telegram chat id along with the chosen settings. +Voice/audio messages are sent to {a href="https://wit.ai/terms"}wit.ai{/a} for transcription, and are +immediately deleted after the transcription is completed. +Pictures are immediately deleted from our server after OCR or QR recognition is perfomed (offline). +We do NOT store ANY message content (text, multimedia or anything else). + - - To enable Transcriber Bot Premium, please join this channel: {invite_url} - +{b}ERROR:{/b} Oops, no wit.ai API key could be found for the language {language}. +:( + + + +To enable Transcriber Bot Premium, please join this channel: {invite_url} + +{b}Current plan:{/b} Free +{b}Current plan:{/b} Premium From 806e797537edcec4099cf3e6977fd0dbc31b6fa5 Mon Sep 17 00:00:00 2001 From: Carlo Alberto Barbano Date: Sat, 22 Feb 2025 00:22:01 +0100 Subject: [PATCH 04/16] update .gitignore --- .gitignore | 1 + .python-version | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 .python-version diff --git a/.gitignore b/.gitignore index 39ace71..f756563 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # TranscriberBot-specific ignores media/ +.python-version # Generic data-related ignores *.csv diff --git a/.python-version b/.python-version deleted file mode 100644 index 275cfdd..0000000 --- a/.python-version +++ /dev/null @@ -1 +0,0 @@ -transcriber-bot-wonda From 9f56ff2cd6ee145a893ed6102bd06feb85d96dec Mon Sep 17 00:00:00 2001 From: Carlo Alberto Barbano Date: Sun, 23 Feb 2025 18:22:01 +0100 Subject: [PATCH 05/16] check response status code --- src/audiotools/speech.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/audiotools/speech.py b/src/audiotools/speech.py index 188bf8b..28acf2b 100644 --- a/src/audiotools/speech.py +++ b/src/audiotools/speech.py @@ -99,6 +99,9 @@ async def transcribe_wit(path, api_key): async def transcribe_whisper(path): resp = requests.get(f"{config.get_config_prop('app')['whisper']['api_endpoint']}/transcribe?file_id={path}") + if resp.status_code != 200: + raise ValueError(f"Error transcribing audio: {resp.text}") + # split the response into chunks of 4000 characters chunks = textwrap.wrap(resp.text, 4000) for idx, chunk in enumerate(chunks): From 086643b755b0ddb4dff8b97616a27135d6586b0c Mon Sep 17 00:00:00 2001 From: Carlo Alberto Barbano Date: Sun, 23 Feb 2025 18:22:18 +0100 Subject: [PATCH 06/16] remove prints --- src/transcriberbot/filters/filters.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/transcriberbot/filters/filters.py b/src/transcriberbot/filters/filters.py index 2a279ac..fb3d7a0 100644 --- a/src/transcriberbot/filters/filters.py +++ b/src/transcriberbot/filters/filters.py @@ -87,7 +87,6 @@ async def chat_admin(update: Update, context: ContextTypes.DEFAULT_TYPE, callbac async def is_premium_user(update: Update, context: ContextTypes.DEFAULT_TYPE): - print("is_premium_user: update.effective_user", update.effective_user) user = (update.effective_user or update.channel_post.from_user) # does not support anonymous channels @@ -100,10 +99,8 @@ async def is_premium_user(update: Update, context: ContextTypes.DEFAULT_TYPE): try: member = await premium_channel.get_member(user_id) - print("Member:", member) return member.status in (ChatMemberStatus.MEMBER, ChatMemberStatus.ADMINISTRATOR, ChatMemberStatus.OWNER) except Exception as e: - logging.error("User %d is not a premium user", user_id, exc_info=True) return False From 03a36347fb8a2b271ae99aac54a173a6118549c5 Mon Sep 17 00:00:00 2001 From: Carlo Alberto Barbano Date: Sun, 23 Feb 2025 18:22:41 +0100 Subject: [PATCH 07/16] test backend --- src/transcriberbot/blueprints/voice.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/transcriberbot/blueprints/voice.py b/src/transcriberbot/blueprints/voice.py index 239fefe..574bb08 100644 --- a/src/transcriberbot/blueprints/voice.py +++ b/src/transcriberbot/blueprints/voice.py @@ -7,6 +7,7 @@ import os import traceback import datetime +import audioread from asyncio import CancelledError import telegram @@ -123,10 +124,23 @@ async def process_media_voice(update: Update, context: ContextTypes.DEFAULT_TYPE os.remove(file_path) -async def get_backend(update: Update, context: ContextTypes.DEFAULT_TYPE): +def get_duration(update: Update, path: str): + media = (update.effective_message.voice or update.effective_message.audio or + update.effective_message.video_note or update.effective_message.video) + if media is not None: + return media.duration + + with audioread.audio_open(path) as f: + return f.duration + + +async def get_backend(update: Update, context: ContextTypes.DEFAULT_TYPE, path): backend = "wit" if await is_premium_user(update, context): - backend = "whisper" + logging.info("User is premium") + duration = get_duration(update, path) + if duration <= config.get_config_prop("app")["whisper"]["max_duration"]: + backend = "whisper" return backend async def transcribe_audio_file(update: Update, context: ContextTypes.DEFAULT_TYPE, path: str): @@ -146,9 +160,9 @@ async def transcribe_audio_file(update: Update, context: ContextTypes.DEFAULT_TY logger.debug("Using key %s for lang %s", api_key, lang) - backend = await get_backend(update, context) + backend = await get_backend(update, context, path) message = await context.bot.send_message( - chat_id, f"{R.get_string_resource('transcribing', lang)} ({backend})", parse_mode="html", + chat_id, f"{R.get_string_resource('transcribing', lang)} ({backend}, lang: {lang})", parse_mode="html", reply_to_message_id=update.effective_message.message_id ) @@ -162,7 +176,7 @@ async def transcribe_audio_file(update: Update, context: ContextTypes.DEFAULT_TY text = R.get_string_resource("transcription_text", lang) + "\n" try: - async for idx, speech, n_chunks in audiotools.transcribe(path, api_key): + async for idx, speech, n_chunks in audiotools.transcribe(path, api_key, backend=backend): logging.debug(f"Transcription idx={idx} n_chunks={n_chunks}, text={speech}") suffix = f" [{idx + 1}/{n_chunks}]" if idx < n_chunks - 1 else "" reply_markup = keyboard if idx < n_chunks - 1 else None From bebf7d62a578870c481835cdce36e80018dfc56f Mon Sep 17 00:00:00 2001 From: Carlo Alberto Barbano Date: Sun, 23 Feb 2025 18:22:45 +0100 Subject: [PATCH 08/16] update requirements --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 8b8bc35..c6483f4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,5 @@ tesserocr pydub zbarlight requests -sentry-sdk \ No newline at end of file +sentry-sdk +audioread \ No newline at end of file From 25681fa7c48c8a7341d3435fd5a5f8e391706486 Mon Sep 17 00:00:00 2001 From: Carlo Alberto Barbano Date: Sun, 23 Feb 2025 18:28:51 +0100 Subject: [PATCH 09/16] update json template --- config/subscription.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/config/subscription.json b/config/subscription.json index 9c16de7..72bbe27 100644 --- a/config/subscription.json +++ b/config/subscription.json @@ -1,3 +1,4 @@ { - "channel_id": "xxx" + "channel_id": "", + "premium_join_link": "xxx" } \ No newline at end of file From e241cc1e7a9315ed17c61b9f3bfe0eb188e46e1a Mon Sep 17 00:00:00 2001 From: Carlo Alberto Barbano Date: Sun, 23 Feb 2025 18:31:11 +0100 Subject: [PATCH 10/16] update event branches --- .github/workflows/build_image.yml | 3 +- .github/workflows/docker_build_push.yml | 40 ------------------------- 2 files changed, 1 insertion(+), 42 deletions(-) delete mode 100644 .github/workflows/docker_build_push.yml diff --git a/.github/workflows/build_image.yml b/.github/workflows/build_image.yml index 475763b..c0c8a9b 100644 --- a/.github/workflows/build_image.yml +++ b/.github/workflows/build_image.yml @@ -4,8 +4,7 @@ on: push: branches: - master - - developement - - ptb-async + - development env: IMAGE_NAME: transcriberbot diff --git a/.github/workflows/docker_build_push.yml b/.github/workflows/docker_build_push.yml deleted file mode 100644 index 230d48f..0000000 --- a/.github/workflows/docker_build_push.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: DockerBuildAndPush - -on: - push: - branches: - - master - - developement - -env: - IMAGE_NAME: transcriberbot - -jobs: - push: - runs-on: ubuntu-latest - if: github.event_name == 'push' - - steps: - - uses: actions/checkout@v2 - - - name: Login to ghcr registry - run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u $ --password-stdin - - - name: Build image - run: docker build . --file Dockerfile --tag $IMAGE_NAME - - - name: Push image - run: | - IMAGE_ID=ghcr.io/${{ github.repository_owner }}/$IMAGE_NAME - # Change all uppercase to lowercase - IMAGE_ID=$(echo $IMAGE_ID | tr '[A-Z]' '[a-z]') - # Strip git ref prefix from version - VERSION=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,') - # Strip "v" prefix from tag name - [[ "${{ github.ref }}" == "refs/tags/"* ]] && VERSION=$(echo $VERSION | sed -e 's/^v//') - # Use Docker `latest` tag convention - [ "$VERSION" == "master" ] && VERSION=latest - echo IMAGE_ID=$IMAGE_ID - echo VERSION=$VERSION - docker tag $IMAGE_NAME $IMAGE_ID:$VERSION - docker push $IMAGE_ID:$VERSION From 004140122ebe31fac7590e55f46ad9ca33ed15f8 Mon Sep 17 00:00:00 2001 From: Carlo Alberto Barbano Date: Sun, 23 Feb 2025 18:31:22 +0100 Subject: [PATCH 11/16] update tag name --- run.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/run.sh b/run.sh index 0fe85f7..5372365 100755 --- a/run.sh +++ b/run.sh @@ -1,6 +1,6 @@ #!/bin/sh -docker pull ghcr.io/charslab/transcriberbot:ptb-async +docker pull ghcr.io/charslab/transcriberbot:development docker run \ -e LC_ALL=C \ -d --restart unless-stopped \ @@ -12,4 +12,4 @@ docker run \ --cpus=4.0 \ --memory=3000m \ -u "$(id -u):1337" \ - ghcr.io/charslab/transcriberbot:ptb-async \ No newline at end of file + ghcr.io/charslab/transcriberbot:development \ No newline at end of file From aed79acef43abb56b65b9fa29b7210a5210efbb7 Mon Sep 17 00:00:00 2001 From: Carlo Alberto Barbano Date: Sun, 23 Feb 2025 18:32:21 +0100 Subject: [PATCH 12/16] remove action --- .github/workflows/docker_build_push.yml | 40 ------------------------- 1 file changed, 40 deletions(-) delete mode 100644 .github/workflows/docker_build_push.yml diff --git a/.github/workflows/docker_build_push.yml b/.github/workflows/docker_build_push.yml deleted file mode 100644 index 230d48f..0000000 --- a/.github/workflows/docker_build_push.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: DockerBuildAndPush - -on: - push: - branches: - - master - - developement - -env: - IMAGE_NAME: transcriberbot - -jobs: - push: - runs-on: ubuntu-latest - if: github.event_name == 'push' - - steps: - - uses: actions/checkout@v2 - - - name: Login to ghcr registry - run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u $ --password-stdin - - - name: Build image - run: docker build . --file Dockerfile --tag $IMAGE_NAME - - - name: Push image - run: | - IMAGE_ID=ghcr.io/${{ github.repository_owner }}/$IMAGE_NAME - # Change all uppercase to lowercase - IMAGE_ID=$(echo $IMAGE_ID | tr '[A-Z]' '[a-z]') - # Strip git ref prefix from version - VERSION=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,') - # Strip "v" prefix from tag name - [[ "${{ github.ref }}" == "refs/tags/"* ]] && VERSION=$(echo $VERSION | sed -e 's/^v//') - # Use Docker `latest` tag convention - [ "$VERSION" == "master" ] && VERSION=latest - echo IMAGE_ID=$IMAGE_ID - echo VERSION=$VERSION - docker tag $IMAGE_NAME $IMAGE_ID:$VERSION - docker push $IMAGE_ID:$VERSION From fb45951f0468ec3f2f465ec8c7e1506ad3fe68ed Mon Sep 17 00:00:00 2001 From: Carlo Alberto Barbano Date: Sun, 23 Feb 2025 18:42:20 +0100 Subject: [PATCH 13/16] remove logging from db.__exit__ --- src/database/db.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/database/db.py b/src/database/db.py index a928ab0..33437e2 100644 --- a/src/database/db.py +++ b/src/database/db.py @@ -33,13 +33,6 @@ def assoc(self): def __exit__(self, exc_type, exc_value, exc_traceback): logger.debug("__exit__") self.__close() - - if exc_type: - logger.error("exc_type: {}".format(exc_type)) - logger.error("exc_value: {}".format(exc_value)) - logger.error("exc_traceback: {}".format(exc_traceback)) - logger.error("Caught exception", exc_info=True) - return True def execute(self, query, *args): From 84e1248337b67e603876059f709fc48a8bafdefe Mon Sep 17 00:00:00 2001 From: Carlo Alberto Barbano Date: Sun, 23 Feb 2025 18:44:25 +0100 Subject: [PATCH 14/16] update action name --- .github/workflows/build_image.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_image.yml b/.github/workflows/build_image.yml index c0c8a9b..7abb768 100644 --- a/.github/workflows/build_image.yml +++ b/.github/workflows/build_image.yml @@ -1,4 +1,4 @@ -name: DockerBuildAndPush +name: Build Image on: push: From 00dbfef6fa4f4083eaa3b660f5536c5576a71e53 Mon Sep 17 00:00:00 2001 From: Carlo Alberto Barbano Date: Tue, 25 Feb 2025 16:15:33 +0100 Subject: [PATCH 15/16] fix sync request --- src/audiotools/speech.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/audiotools/speech.py b/src/audiotools/speech.py index 28acf2b..84010ec 100644 --- a/src/audiotools/speech.py +++ b/src/audiotools/speech.py @@ -97,7 +97,12 @@ async def transcribe_wit(path, api_key): async def transcribe_whisper(path): - resp = requests.get(f"{config.get_config_prop('app')['whisper']['api_endpoint']}/transcribe?file_id={path}") + loop = asyncio.get_event_loop() + resp = await loop.run_in_executor( + None, + partial(requests.get, + url=f"{config.get_config_prop('app')['whisper']['api_endpoint']}/transcribe?file_id={path}") + ) if resp.status_code != 200: raise ValueError(f"Error transcribing audio: {resp.text}") From 45810a14818e128b0fa066f05cbd616b572c2a48 Mon Sep 17 00:00:00 2001 From: Carlo Alberto Barbano Date: Thu, 16 Oct 2025 23:51:30 +0200 Subject: [PATCH 16/16] add wit fallback --- src/transcriberbot/blueprints/voice.py | 58 +++++++++----------------- 1 file changed, 19 insertions(+), 39 deletions(-) diff --git a/src/transcriberbot/blueprints/voice.py b/src/transcriberbot/blueprints/voice.py index 574bb08..cb38bbf 100644 --- a/src/transcriberbot/blueprints/voice.py +++ b/src/transcriberbot/blueprints/voice.py @@ -143,7 +143,7 @@ async def get_backend(update: Update, context: ContextTypes.DEFAULT_TYPE, path): backend = "whisper" return backend -async def transcribe_audio_file(update: Update, context: ContextTypes.DEFAULT_TYPE, path: str): +async def run_transcription(update: Update, context: ContextTypes.DEFAULT_TYPE, path: str, backend: str): chat_id = update.effective_chat.id task_id = update.effective_message.message_id lang = TBDB.get_chat_lang(chat_id) @@ -160,7 +160,6 @@ async def transcribe_audio_file(update: Update, context: ContextTypes.DEFAULT_TY logger.debug("Using key %s for lang %s", api_key, lang) - backend = await get_backend(update, context, path) message = await context.bot.send_message( chat_id, f"{R.get_string_resource('transcribing', lang)} ({backend}, lang: {lang})", parse_mode="html", reply_to_message_id=update.effective_message.message_id @@ -197,43 +196,6 @@ async def transcribe_audio_file(update: Update, context: ContextTypes.DEFAULT_TY text = f"{text} {speech}" - # retry_num = 0 - # retry = True - # while retry: # Retry loop - # try: - # if len(text + " " + speech) >= 4000: - # text = R.get_string_resource("transcription_continues", lang) + "\n" - # message = await context.bot.send_message( - # chat_id, f"{text} {speech} {suffix}", - # reply_to_message_id=message.message_id, parse_mode="html", - # reply_markup=keyboard - # ) - # else: - # message = await context.bot.edit_message_text( - # f"{text} {speech} {suffix}", chat_id=chat_id, - # message_id=message.message_id, parse_mode="html", - # reply_markup=keyboard - # ) - # - # text += " " + speech - # retry = False - # - # except telegram.error.TimedOut as e: - # print(e) - # logger.error("Timeout error %s", traceback.format_exc()) - # retry_num += 1 - # if retry_num >= 3: - # retry = False - # - # except telegram.error.RetryAfter as r: - # logger.warning("Retrying after %d", r.retry_after) - # await asyncio.sleep(r.retry_after) - # - # except telegram.error.TelegramError: - # logger.error("Telegram error %s", traceback.format_exc()) - # retry = False - - except CancelledError: logging.debug("Task cancelled") await context.bot.edit_message_text( @@ -251,3 +213,21 @@ async def transcribe_audio_file(update: Update, context: ContextTypes.DEFAULT_TY ) raise e + +async def transcribe_audio_file(update: Update, context: ContextTypes.DEFAULT_TYPE, path: str): + backend = await get_backend(update, context, path) + + # try running transcription, if it fails with whisper, try wit + try: + await run_transcription(update, context, path, backend) + except Exception as e: + if backend == "whisper": + logger.error("Whisper transcription failed, falling back to wit", exc_info=True) + try: + await run_transcription(update, context, path, "wit") + except Exception as e2: + logger.error("Wit transcription also failed", exc_info=True) + raise e2 + else: + raise e +