Spaces:
Running
Running
| import yt_dlp | |
| import os | |
| import uuid | |
| import json | |
| from pathlib import Path | |
| from typing import Dict, Any | |
| from datetime import datetime | |
| def download_youtube_video(url: str, | |
| base_dir: str = None, | |
| video_quality: int = 720, | |
| youtube_cookies: str = "./cookies.txt") -> Dict[str, str]: | |
| """ | |
| Downloads video and audio from YouTube, saving them to a unique GUID folder. | |
| Metadata is saved in JSON format including download datetime and timezone. | |
| Args: | |
| url (str): YouTube video URL | |
| base_dir (str): Base download directory (default './downloads') | |
| video_quality (int): preferred quality of the downloaded video, acceptable values 144, 240, 360, 480, 720, 1080, 1440, 2160. Default value: 720. | |
| Returns: | |
| dict: Dictionary with file paths and information: | |
| { | |
| 'data_path': str, # Path to download directory | |
| 'video_path': str, # Full path to video.mp4 | |
| 'audio_path': str, # Full path to audio.mp3 | |
| 'metadata_path': str # Full path to metadata.json | |
| } | |
| Raises: | |
| RuntimeError: If download fails | |
| """ | |
| youtube_quality = [144, 240, 360, 480, 720, 1080, 1440, 2160] | |
| if video_quality not in youtube_quality: | |
| raise ValueError( | |
| f"Invalid video quality: '{video_quality}'. " | |
| f"Allowed qualities are: {', '.join(map(str, youtube_quality))}" | |
| ) | |
| try: | |
| # Determine the base directory | |
| if base_dir is None: | |
| # Get the directory of the current script file | |
| script_dir = Path(__file__).parent | |
| base_dir = script_dir / "downloads" # Создаем папку 'downloads' рядом со скриптом | |
| else: | |
| base_dir = Path(base_dir) | |
| # Generate GUID and create folder | |
| guid = str(uuid.uuid4()) | |
| download_dir = Path(base_dir) / guid | |
| os.makedirs(download_dir, exist_ok=True) | |
| # File paths | |
| video_path = download_dir / "video.mp4" | |
| audio_path = download_dir / "audio.mp3" | |
| metadata_path = download_dir / "metadata.json" | |
| # Record exact download start time | |
| download_datetime = datetime.now() | |
| current_timezone = download_datetime.astimezone().tzinfo | |
| # 1. Download video (MP4) | |
| video_opts = { | |
| 'format': ( | |
| f"bestvideo[height={video_quality}][ext=mp4]" | |
| f"/worstvideo[height>{video_quality}][ext=mp4]" | |
| f"/bestvideo[height<={video_quality}][ext=mp4]" | |
| ), | |
| 'outtmpl': str(video_path), | |
| 'quiet': True, | |
| 'no_warnings': True, | |
| 'restrict_filenames': True, | |
| 'cookiefile': youtube_cookies, | |
| } | |
| with yt_dlp.YoutubeDL(video_opts) as ydl: | |
| video_info = ydl.extract_info(url, download=True) | |
| # 2. Download audio (MP3) | |
| audio_opts = { | |
| 'format': 'bestaudio/best', | |
| 'outtmpl': str(audio_path), | |
| 'quiet': True, | |
| 'postprocessors': [{ | |
| 'key': 'FFmpegExtractAudio', | |
| 'preferredcodec': 'mp3', | |
| 'preferredquality': '128', | |
| }], | |
| 'cookiefile': youtube_cookies, | |
| } | |
| with yt_dlp.YoutubeDL(audio_opts) as ydl: | |
| audio_info = ydl.extract_info(url, download=True) | |
| # Format date and time for storage | |
| formatted_date = download_datetime.strftime('%Y-%m-%d') | |
| formatted_time = download_datetime.strftime('%H:%M:%S') | |
| # 3. Save metadata to JSON | |
| metadata = { | |
| 'original_url': url, | |
| 'guid': guid, | |
| 'download_info': { | |
| 'date': formatted_date, | |
| 'time': formatted_time, | |
| 'timezone': str(current_timezone), | |
| 'datetime_iso': download_datetime.isoformat(), | |
| }, | |
| 'video': { | |
| 'path': str(video_path), | |
| 'title': video_info.get('title'), | |
| 'duration': video_info.get('duration'), | |
| 'resolution': video_info.get('resolution'), | |
| 'upload_date': video_info.get('upload_date'), | |
| }, | |
| 'audio': { | |
| 'path': str(audio_path), | |
| 'bitrate': audio_info.get('abr'), | |
| 'codec': 'mp3', | |
| }, | |
| } | |
| with open(metadata_path, 'w', encoding='utf-8') as f: | |
| json.dump(metadata, f, indent=2, ensure_ascii=False) | |
| return { | |
| 'data_path': str(download_dir.absolute()), | |
| 'video_path': str(video_path.absolute()), | |
| 'audio_path': str(audio_path.absolute()) + ".mp3", | |
| 'metadata_path': str(metadata_path), | |
| } | |
| except Exception as e: | |
| raise RuntimeError(f"Media download error: {str(e)}") | |
| if __name__ == "__main__": | |
| video_url = "https://www.youtube.com/watch?v=FK3dav4bA4s" | |
| downloaded_video = download_youtube_video(video_url, "./temp") | |
| print(downloaded_video) |