diff --git a/.github/workflows/build_image.yml b/.github/workflows/build_image.yml index 475763b..7abb768 100644 --- a/.github/workflows/build_image.yml +++ b/.github/workflows/build_image.yml @@ -1,11 +1,10 @@ -name: DockerBuildAndPush +name: Build Image on: push: branches: - master - - developement - - ptb-async + - development env: IMAGE_NAME: transcriberbot diff --git a/.github/workflows/docker_build_push.yml b/.github/workflows/docker_build_push.yml deleted file mode 100644 index 230d48f..0000000 --- a/.github/workflows/docker_build_push.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: DockerBuildAndPush - -on: - push: - branches: - - master - - developement - -env: - IMAGE_NAME: transcriberbot - -jobs: - push: - runs-on: ubuntu-latest - if: github.event_name == 'push' - - steps: - - uses: actions/checkout@v2 - - - name: Login to ghcr registry - run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u $ --password-stdin - - - name: Build image - run: docker build . --file Dockerfile --tag $IMAGE_NAME - - - name: Push image - run: | - IMAGE_ID=ghcr.io/${{ github.repository_owner }}/$IMAGE_NAME - # Change all uppercase to lowercase - IMAGE_ID=$(echo $IMAGE_ID | tr '[A-Z]' '[a-z]') - # Strip git ref prefix from version - VERSION=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,') - # Strip "v" prefix from tag name - [[ "${{ github.ref }}" == "refs/tags/"* ]] && VERSION=$(echo $VERSION | sed -e 's/^v//') - # Use Docker `latest` tag convention - [ "$VERSION" == "master" ] && VERSION=latest - echo IMAGE_ID=$IMAGE_ID - echo VERSION=$VERSION - docker tag $IMAGE_NAME $IMAGE_ID:$VERSION - docker push $IMAGE_ID:$VERSION diff --git a/.gitignore b/.gitignore index 39ace71..f756563 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # TranscriberBot-specific ignores media/ +.python-version # Generic data-related ignores *.csv diff --git a/.python-version b/.python-version deleted file mode 100644 index 275cfdd..0000000 --- a/.python-version +++ /dev/null @@ -1 +0,0 @@ -transcriber-bot-wonda diff --git a/config/subscription.json b/config/subscription.json new file mode 100644 index 0000000..72bbe27 --- /dev/null +++ b/config/subscription.json @@ -0,0 +1,4 @@ +{ + "channel_id": "", + "premium_join_link": "xxx" +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 8b8bc35..c6483f4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,5 @@ tesserocr pydub zbarlight requests -sentry-sdk \ No newline at end of file +sentry-sdk +audioread \ No newline at end of file diff --git a/run.sh b/run.sh index 0fe85f7..5372365 100755 --- a/run.sh +++ b/run.sh @@ -1,6 +1,6 @@ #!/bin/sh -docker pull ghcr.io/charslab/transcriberbot:ptb-async +docker pull ghcr.io/charslab/transcriberbot:development docker run \ -e LC_ALL=C \ -d --restart unless-stopped \ @@ -12,4 +12,4 @@ docker run \ --cpus=4.0 \ --memory=3000m \ -u "$(id -u):1337" \ - ghcr.io/charslab/transcriberbot:ptb-async \ No newline at end of file + ghcr.io/charslab/transcriberbot:development \ No newline at end of file diff --git a/src/audiotools/speech.py b/src/audiotools/speech.py index 188bf8b..84010ec 100644 --- a/src/audiotools/speech.py +++ b/src/audiotools/speech.py @@ -97,7 +97,15 @@ async def transcribe_wit(path, api_key): async def transcribe_whisper(path): - resp = requests.get(f"{config.get_config_prop('app')['whisper']['api_endpoint']}/transcribe?file_id={path}") + loop = asyncio.get_event_loop() + resp = await loop.run_in_executor( + None, + partial(requests.get, + url=f"{config.get_config_prop('app')['whisper']['api_endpoint']}/transcribe?file_id={path}") + ) + + if resp.status_code != 200: + raise ValueError(f"Error transcribing audio: {resp.text}") # split the response into chunks of 4000 characters chunks = textwrap.wrap(resp.text, 4000) diff --git a/src/config/__init__.py b/src/config/__init__.py index 9986c2a..86dc11e 100644 --- a/src/config/__init__.py +++ b/src/config/__init__.py @@ -68,3 +68,10 @@ def get_document_extensions(): def get_bot_admins(): return [int(id) for id in get_config_prop("telegram")["admins"]] + + +def get_premium_join_link(): + return get_config_prop("subscription")["premium_join_link"] + +def get_premium_chat_id(): + return get_config_prop("subscription")["channel_id"] diff --git a/src/database/db.py b/src/database/db.py index a928ab0..06f6094 100644 --- a/src/database/db.py +++ b/src/database/db.py @@ -33,13 +33,6 @@ def assoc(self): def __exit__(self, exc_type, exc_value, exc_traceback): logger.debug("__exit__") self.__close() - - if exc_type: - logger.error("exc_type: {}".format(exc_type)) - logger.error("exc_value: {}".format(exc_value)) - logger.error("exc_traceback: {}".format(exc_traceback)) - logger.error("Caught exception", exc_info=True) - return True def execute(self, query, *args): @@ -95,7 +88,7 @@ def get_chat_voice_enabled(chat_id): with TBDB._get_db() as db: c = db.execute("SELECT voice_enabled FROM chats WHERE chat_id='{0}'".format(chat_id)) return c.fetchone()[0] - except TypeError as e: + except Exception as e: logger.error("Error getting voice_enabled for chat %d: %s", chat_id, e) raise e diff --git a/src/transcriberbot/blueprints/__init__.py b/src/transcriberbot/blueprints/__init__.py index 7fc3b7e..dc52736 100644 --- a/src/transcriberbot/blueprints/__init__.py +++ b/src/transcriberbot/blueprints/__init__.py @@ -2,4 +2,4 @@ Author: Carlo Alberto Barbano Date: 15/02/25 """ -from . import commands, messages, voice, photos, chat_handlers +from . import commands, messages, voice, photos, chat_handlers, payments diff --git a/src/transcriberbot/blueprints/payments.py b/src/transcriberbot/blueprints/payments.py new file mode 100644 index 0000000..04d2bd3 --- /dev/null +++ b/src/transcriberbot/blueprints/payments.py @@ -0,0 +1,35 @@ +""" +Author: Carlo Alberto Barbano +Date: 20/02/25 +""" +import config +import resources as R + +from telegram import Update, InlineKeyboardMarkup, InlineKeyboardButton +from telegram.ext import ContextTypes +from database import TBDB +from transcriberbot.filters import is_premium_user + + +async def premium(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: + premium_join_link = config.get_premium_join_link() + + keyboard = InlineKeyboardMarkup( + [[InlineKeyboardButton("Join", url=premium_join_link)]] + ) + + premium_join_message = R.get_string_resource( + 'premium_join_message', + TBDB.get_chat_lang(update.effective_chat.id) + ).replace('{invite_url}', premium_join_link) + + current_plan = R.get_string_resource("current_plan_free") + if await is_premium_user(update, context): + current_plan = R.get_string_resource("current_plan_premium") + + await update.effective_message.reply_text( + f"{premium_join_message}\n\n{current_plan}", + reply_markup=keyboard, parse_mode="html" + ) + + diff --git a/src/transcriberbot/blueprints/voice.py b/src/transcriberbot/blueprints/voice.py index 7c5bf90..cb38bbf 100644 --- a/src/transcriberbot/blueprints/voice.py +++ b/src/transcriberbot/blueprints/voice.py @@ -7,6 +7,7 @@ import os import traceback import datetime +import audioread from asyncio import CancelledError import telegram @@ -18,6 +19,7 @@ import config import resources as R from database import TBDB +from transcriberbot.filters import is_premium_user logger = logging.getLogger(__name__) @@ -94,6 +96,9 @@ async def run_voice_task(update: Update, context: ContextTypes.DEFAULT_TYPE, med async def process_media_voice(update: Update, context: ContextTypes.DEFAULT_TYPE, media: [Voice | VideoNote | Document], name: str) -> None: + print("Update:", update) + print("Effective user:", update.effective_user) + chat_id = update.effective_chat.id file_size = media.file_size max_size = config.get_config_prop("app").get("max_media_voice_file_size", 20 * 1024 * 1024) @@ -119,7 +124,26 @@ async def process_media_voice(update: Update, context: ContextTypes.DEFAULT_TYPE os.remove(file_path) -async def transcribe_audio_file(update: Update, context: ContextTypes.DEFAULT_TYPE, path: str): +def get_duration(update: Update, path: str): + media = (update.effective_message.voice or update.effective_message.audio or + update.effective_message.video_note or update.effective_message.video) + if media is not None: + return media.duration + + with audioread.audio_open(path) as f: + return f.duration + + +async def get_backend(update: Update, context: ContextTypes.DEFAULT_TYPE, path): + backend = "wit" + if await is_premium_user(update, context): + logging.info("User is premium") + duration = get_duration(update, path) + if duration <= config.get_config_prop("app")["whisper"]["max_duration"]: + backend = "whisper" + return backend + +async def run_transcription(update: Update, context: ContextTypes.DEFAULT_TYPE, path: str, backend: str): chat_id = update.effective_chat.id task_id = update.effective_message.message_id lang = TBDB.get_chat_lang(chat_id) @@ -137,7 +161,7 @@ async def transcribe_audio_file(update: Update, context: ContextTypes.DEFAULT_TY logger.debug("Using key %s for lang %s", api_key, lang) message = await context.bot.send_message( - chat_id, R.get_string_resource("transcribing", lang), parse_mode="html", + chat_id, f"{R.get_string_resource('transcribing', lang)} ({backend}, lang: {lang})", parse_mode="html", reply_to_message_id=update.effective_message.message_id ) @@ -151,7 +175,7 @@ async def transcribe_audio_file(update: Update, context: ContextTypes.DEFAULT_TY text = R.get_string_resource("transcription_text", lang) + "\n" try: - async for idx, speech, n_chunks in audiotools.transcribe(path, api_key): + async for idx, speech, n_chunks in audiotools.transcribe(path, api_key, backend=backend): logging.debug(f"Transcription idx={idx} n_chunks={n_chunks}, text={speech}") suffix = f" [{idx + 1}/{n_chunks}]" if idx < n_chunks - 1 else "" reply_markup = keyboard if idx < n_chunks - 1 else None @@ -172,43 +196,6 @@ async def transcribe_audio_file(update: Update, context: ContextTypes.DEFAULT_TY text = f"{text} {speech}" - # retry_num = 0 - # retry = True - # while retry: # Retry loop - # try: - # if len(text + " " + speech) >= 4000: - # text = R.get_string_resource("transcription_continues", lang) + "\n" - # message = await context.bot.send_message( - # chat_id, f"{text} {speech} {suffix}", - # reply_to_message_id=message.message_id, parse_mode="html", - # reply_markup=keyboard - # ) - # else: - # message = await context.bot.edit_message_text( - # f"{text} {speech} {suffix}", chat_id=chat_id, - # message_id=message.message_id, parse_mode="html", - # reply_markup=keyboard - # ) - # - # text += " " + speech - # retry = False - # - # except telegram.error.TimedOut as e: - # print(e) - # logger.error("Timeout error %s", traceback.format_exc()) - # retry_num += 1 - # if retry_num >= 3: - # retry = False - # - # except telegram.error.RetryAfter as r: - # logger.warning("Retrying after %d", r.retry_after) - # await asyncio.sleep(r.retry_after) - # - # except telegram.error.TelegramError: - # logger.error("Telegram error %s", traceback.format_exc()) - # retry = False - - except CancelledError: logging.debug("Task cancelled") await context.bot.edit_message_text( @@ -226,3 +213,21 @@ async def transcribe_audio_file(update: Update, context: ContextTypes.DEFAULT_TY ) raise e + +async def transcribe_audio_file(update: Update, context: ContextTypes.DEFAULT_TYPE, path: str): + backend = await get_backend(update, context, path) + + # try running transcription, if it fails with whisper, try wit + try: + await run_transcription(update, context, path, backend) + except Exception as e: + if backend == "whisper": + logger.error("Whisper transcription failed, falling back to wit", exc_info=True) + try: + await run_transcription(update, context, path, "wit") + except Exception as e2: + logger.error("Wit transcription also failed", exc_info=True) + raise e2 + else: + raise e + diff --git a/src/transcriberbot/bot.py b/src/transcriberbot/bot.py index 06c170c..ceea319 100644 --- a/src/transcriberbot/bot.py +++ b/src/transcriberbot/bot.py @@ -2,23 +2,24 @@ Author: Carlo Alberto Barbano Date: 15/02/25 """ -from telegram import Update - -import config import logging - -from telegram.ext import MessageHandler, ApplicationBuilder, CommandHandler, ContextTypes, CallbackQueryHandler, \ - ChatMemberHandler from functools import partial -from transcriberbot.blueprints import commands, messages, voice, photos, chat_handlers -from transcriberbot.blueprints.commands import set_language +from telegram import Update +from telegram.ext import MessageHandler, ApplicationBuilder, CommandHandler, CallbackQueryHandler, \ + ChatMemberHandler from telegram.ext.filters import VOICE, VIDEO_NOTE, AUDIO, PHOTO + +import config +from transcriberbot.blueprints import commands, messages, voice, photos, chat_handlers, payments +from transcriberbot.blueprints.commands import set_language from transcriberbot.filters import chat_admin, FromPrivate, AllowedDocument, BotAdmin def run(bot_token: str): application = (ApplicationBuilder() + # .base_url("https://api.telegram.org/bot{token}/test") + # .base_file_url("https://api.telegram.org/file/bot{token}/test") .token(bot_token) .concurrent_updates(True) .build()) @@ -44,7 +45,8 @@ def run(bot_token: str): 'enable_qr': commands.enable_qr, 'translate': commands.translate, 'donate': commands.donate, - 'privacy': commands.privacy + 'privacy': commands.privacy, + 'premium': payments.premium } for command, callback in chat_admin_handlers.items(): diff --git a/src/transcriberbot/filters/filters.py b/src/transcriberbot/filters/filters.py index bc89d30..fb3d7a0 100644 --- a/src/transcriberbot/filters/filters.py +++ b/src/transcriberbot/filters/filters.py @@ -5,7 +5,7 @@ import logging import asyncio -from telegram.constants import ChatType +from telegram.constants import ChatType, ChatMemberStatus from telegram.ext import ContextTypes from telegram.ext.filters import UpdateFilter from telegram import Update, ChatMember @@ -86,6 +86,24 @@ async def chat_admin(update: Update, context: ContextTypes.DEFAULT_TYPE, callbac return await callback(update, context) +async def is_premium_user(update: Update, context: ContextTypes.DEFAULT_TYPE): + user = (update.effective_user or update.channel_post.from_user) + + # does not support anonymous channels + if user is None: + return False + + user_id = user.id + + premium_channel = await context.bot.get_chat(config.get_premium_chat_id()) + + try: + member = await premium_channel.get_member(user_id) + return member.status in (ChatMemberStatus.MEMBER, ChatMemberStatus.ADMINISTRATOR, ChatMemberStatus.OWNER) + except Exception as e: + return False + + class BotAdmin(UpdateFilter): """ Checks if the message was sent by the bot admin. diff --git a/values/strings.xml b/values/strings.xml index cba0064..31f4d3b 100644 --- a/values/strings.xml +++ b/values/strings.xml @@ -6,6 +6,7 @@ This bot transcribes audio and pictures into text. Add it to a group or forward audio messages and pictures to it. + {b}Note{/b}: Within groups, the bot will respond to commands by {b}non-anonymous admins only{/b} Choose a language (for voice messages): @@ -61,17 +62,26 @@ LTC: {code}LdsVPxqHR6PuKeMNvGYmMEkBRQ7M3AP3uY{/code} {b}WARNING:{/b} Flood detected. Stop spamming please Unknown language: {language} -Please specify a language to translate to, e.g. "/translate english" +Please specify a language to translate to, e.g. "/translate english" + You must reply to a message in order to translate it We do not store any personal data or media content (such as audio or pictures). The only user data we store is the telegram chat id along with the chosen settings. -Voice/audio messages are sent to {a href="https://wit.ai/terms"}wit.ai{/a} for transcription, and are immediately deleted after the transcription is completed. +Voice/audio messages are sent to {a href="https://wit.ai/terms"}wit.ai{/a} for transcription, and are +immediately deleted after the transcription is completed. Pictures are immediately deleted from our server after OCR or QR recognition is perfomed (offline). We do NOT store ANY message content (text, multimedia or anything else). -{b}ERROR:{/b} Oops, no wit.ai API key could be found for the language {language}. :( +{b}ERROR:{/b} Oops, no wit.ai API key could be found for the language {language}. +:( + + +To enable Transcriber Bot Premium, please join this channel: {invite_url} + +{b}Current plan:{/b} Free +{b}Current plan:{/b} Premium