Python script for downloading Telegram videos

Recently I needed a way to download all the videos from a telegram channel that I came across, and there was no way I wanted to open the web version and click on the download button for all the hundreds of videos one by one.

So I wrote a script to do it for me.

There are some pre-requisites for the script to work, you need to get the Telegram API keys. This is how to get them :

Getting your API Keys: (API ID / hash pair):

  • Visit https://my.telegram.org/apps and log in with your Telegram Account.
  • Fill out the form to register a new Telegram application. Done! The API key consists of two parts: api_id and api_hash.
import os
import sys
import asyncio
import time
from telethon import TelegramClient, errors
from telethon.tl.types import InputMessagesFilterVideo
import logging

api_id = 'xxx'
api_hash = 'xxxx'
channel_username = '-100xxxx'
output_folder = 'videos_output'
downloaded_videos_file = 'downloaded_videos.txt'
min_video_size = 100 * 1024 * 1024  # 100MB in bytes
max_concurrent_downloads = 15
max_retries = 5
initial_retry_delay = 5  # in seconds
max_retry_delay = 40  # in seconds

client = TelegramClient('anon', api_id, api_hash)

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def load_downloaded_videos():
    if os.path.exists(downloaded_videos_file):
        with open(downloaded_videos_file, 'r') as f:
            return set(line.strip() for line in f.readlines())
    else:
        return set()

def save_downloaded_video(video_id):
    with open(downloaded_videos_file, 'a') as f:
        f.write(f"{video_id}\n")

# Dictionary to track progress
download_progress = {}
semaphore = asyncio.Semaphore(max_concurrent_downloads)

async def refetch_message(message_id, channel_entity):
    try:
        return await client.get_messages(channel_entity, ids=message_id)
    except Exception as e:
        logger.error(f"Error refetching message {message_id}: {str(e)}")
        return None

async def download_video(message):
    async with semaphore:
        original_video_name = f"{message.date.strftime('%Y-%m-%d_%H-%M-%S')}_{message.file.name}"
        temp_video_name = original_video_name + ".temp"
        temp_video_path = os.path.join(output_folder, temp_video_name)
        final_video_path = os.path.join(output_folder, original_video_name)

        logger.info(f"Downloading {message.id}: {original_video_name}...")

        retry_count = 0
        wait_time = initial_retry_delay

        while retry_count < max_retries:
            try:
                with open(temp_video_path, 'ab') as file:
                    offset = file.tell()
                    downloaded_bytes = offset
                    async for chunk in client.iter_download(message.media.document, offset=offset):
                        file.write(chunk)
                        downloaded_bytes += len(chunk)
                        progress_handler(downloaded_bytes, message.media.document.size, original_video_name)

                if os.path.getsize(temp_video_path) >= message.media.document.size:
                    os.rename(temp_video_path, final_video_path)
                    save_downloaded_video(message.id)
                    logger.info(f"Downloaded {message.id}: {original_video_name}")
                    break
                else:
                    raise errors.TimedOutError()

            except errors.FileReferenceExpiredError:
                logger.info(f"File reference expired for {message.id}: {original_video_name}. Refetching message...")
                message = await refetch_message(message.id, message.chat_id)
                if message:
                    continue
                else:
                    logger.error(f"Failed to refetch message for {message.id}: {original_video_name}. Skipping...")
                    break

            except errors.TimedOutError:
                retry_count += 1
                logger.error(f"Timeout on {message.id}: {original_video_name}. Retrying in {wait_time} seconds...")
                time.sleep(wait_time)
                wait_time = min(wait_time * 2, max_retry_delay) if wait_time < max_retry_delay else 10

            if retry_count == max_retries:
                logger.error(f"Failed to download {message.id}: {original_video_name} after {max_retries} retries.")

        del download_progress[original_video_name]
        update_progress_display()

async def download_videos():
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    downloaded_videos = load_downloaded_videos()

    async with client:
        target_channel = await client.get_entity(int(channel_username))

        logger.info(f"Downloading videos larger than 100MB from @{channel_username}...")

        messages = [
            message async for message in client.iter_messages(
                target_channel,
                filter=InputMessagesFilterVideo()
            ) if str(message.id) not in downloaded_videos and message.media.document.size >= min_video_size
        ]

        tasks = [download_video(message) for message in messages]
        await asyncio.gather(*tasks)

        logger.info(f"Finished downloading videos from @{channel_username}")

def progress_handler(downloaded_bytes, total_bytes, file_name):
    progress = (downloaded_bytes / total_bytes) * 100
    download_progress[file_name] = f"Downloading {file_name}: {progress:.2f}%"
    update_progress_display()

def update_progress_display():
    os.system('cls' if os.name == 'nt' else 'clear')
    for file_name, progress in download_progress.items():
        sys.stdout.write(progress + '\n')
    sys.stdout.flush()

if __name__ == "__main__":
    loop = asyncio.get_event_loop()

    try:
        loop.run_until_complete(download_videos())
    except KeyboardInterrupt:
        logger.warning("Interrupted by user. Exiting...")
        loop.stop()
        client.disconnect()

There are many things that the script does for me, some of them are :

  1. Asynchronous Downloads: The script uses Python's asyncio library to perform asynchronous downloads. This means it can handle multiple download tasks simultaneously without blocking the main execution thread, leading to more efficient use of resources and faster overall download times.

  2. Semaphore for Concurrent Download Limit: A semaphore is utilized to limit the number of concurrent downloads. This prevents the script from overloading the network or system resources by restricting the number of files being downloaded at the same time.

  3. Telethon for Telegram API Interaction: The script leverages the Telethon library, a powerful Python toolkit for interacting with Telegram's API. It allows for seamless access and control over Telegram channels, messages, and media files.

  4. Resumable Downloads: One of the notable features of the script is its ability to resume incomplete downloads. If a download is interrupted due to a network issue or a program restart, the script can resume from where it left off, avoiding the need to re-download the entire file.

  5. Exponential Backoff on Retries: The script implements an exponential backoff strategy for retries. If a download fails due to a timeout, it waits for a certain period before retrying, and this wait time increases exponentially with each failed attempt. This strategy helps in efficiently managing network issues.

  6. Error Handling: The script includes robust error handling mechanisms, particularly for network-related issues like timeouts. It logs errors and handles them gracefully, ensuring the script's stability even in less-than-ideal network conditions.

  7. Progress Tracking and Display: Progress of each download is tracked and displayed in the console. This feature provides real-time feedback on the status of each download, including which files are currently being downloaded and their progress percentage.

  8. Filtering and Downloading Large Files: The script is specifically designed to filter and download large video files from Telegram channels. It allows setting a minimum file size threshold to target larger media files.

  9. Organized File Management: Downloaded files are systematically renamed and organized. The script initially saves files with a .temp extension during the download process and renames them to their original names once the download is complete.

  10. Customizable Parameters: Key parameters such as the number of concurrent downloads, minimum file size for downloads, and retry behavior are easily customizable, making the script adaptable to different user requirements and network conditions.