import gradio as gr import cv2 import torch import os import datetime import subprocess from transformers import AutoProcessor, AutoModelForCausalLM from PIL import Image from huggingface_hub import HfApi # Muat model GIT # Model akan diunduh ke direktori yang dapat ditulis berkat Dockerfile print("Memuat model GIT...") processor = AutoProcessor.from_pretrained("microsoft/git-base-coco") model = AutoModelForCausalLM.from_pretrained("microsoft/git-base-coco") device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) print("Model GIT berhasil dimuat!") # Konfigurasi Hugging Face Hub DATASET_REPO_ID = "monikahung/videos_throw_garbage" HF_TOKEN = os.getenv("HF_TOKEN_VIDEOS") api = HfApi(token=HF_TOKEN) def upload_video_to_dataset(video_path, folder_name): """ Mengunggah file video ke Hugging Face Dataset di folder yang ditentukan. """ if not HF_TOKEN: print("Peringatan: Token Hugging Face tidak ditemukan. Tidak dapat mengunggah ke dataset.") return try: # Gunakan nama file yang unik berdasarkan timestamp timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") file_name = f"{timestamp}_{os.path.basename(video_path)}" path_in_repo = f"{folder_name}/{file_name}" # Unggah file ke folder yang benar di dataset api.upload_file( path_or_fileobj=video_path, path_in_repo=path_in_repo, repo_id=DATASET_REPO_ID, repo_type="dataset", ) print(f"File {file_name} berhasil diunggah ke folder '{folder_name}'.") except Exception as e: print(f"Gagal mengunggah file ke dataset: {e}") def process_and_slow_video(video_path, slow_factor=2): """ Memperlambat video menggunakan FFmpeg. """ # Buat path untuk video yang diperlambat slowed_video_path = f"{os.path.splitext(video_path)[0]}_slowed.mp4" # Perintah FFmpeg untuk memperlambat video dan mempertahankan audio # `-filter:v` mengubah kecepatan video, `-filter:a` mengubah kecepatan audio # `-c:a aac` mengkodekan ulang audio ke format AAC command = [ 'ffmpeg', '-i', video_path, '-filter:v', f'setpts={slow_factor}*PTS', '-c:a', 'aac', '-y', slowed_video_path ] try: subprocess.run(command, check=True, capture_output=True, text=True) print(f"Video berhasil diperlambat. Disimpan di: {slowed_video_path}") return slowed_video_path except subprocess.CalledProcessError as e: print(f"FFmpeg gagal. Error: {e.stderr}") return None except FileNotFoundError: print("FFmpeg tidak ditemukan. Pastikan sudah terinstal dan ada di PATH.") return None def validate_video_with_git(video_path, mission_type): """ Validasi video menggunakan model GIT dan mengunggahnya ke dataset. """ # Memperlambat video terlebih dahulu slowed_video_path = process_and_slow_video(video_path) if not slowed_video_path: upload_video_to_dataset(video_path, "neither") return "Gagal. Gagal memproses video." # Tentukan kata kunci berdasarkan tipe misi if mission_type == 'paper': required_keywords = ['person', 'paper', 'yellow trash can'] elif mission_type == 'leaf': required_keywords = ['person', 'leaves', 'green trash can'] else: # Jika tipe misi tidak valid, simpan ke folder "neither" upload_video_to_dataset(video_path, "neither") return "Gagal. Tipe misi tidak valid." cap = cv2.VideoCapture(slowed_video_path) if not cap.isOpened(): upload_video_to_dataset(video_path, "neither") return "Gagal. Gagal membuka file video yang diperlambat." valid_frames_count = 0 frame_interval = 1 frame_count = 0 validation_status = "gagal" while cap.isOpened(): ret, frame_bgr = cap.read() if not ret: break if frame_count % frame_interval == 0: frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB) pil_image = Image.fromarray(frame_rgb) pixel_values = processor(images=pil_image, return_tensors="pt").pixel_values.to(device) generated_ids = model.generate(pixel_values=pixel_values, max_length=50) caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip().lower() print(f"Processing frame {frame_count}: '{caption}'") found_keywords = [keyword for keyword in required_keywords if keyword in caption] is_valid_frame = len(found_keywords) >= 2 if is_valid_frame: valid_frames_count += 1 else: valid_frames_count = 0 if valid_frames_count >= 3: validation_status = "valid" break frame_count += 1 cap.release() # Hapus file video yang diperlambat setelah selesai os.remove(slowed_video_path) # Tentukan folder berdasarkan hasil validasi dan tipe misi if validation_status == "valid": upload_video_to_dataset(video_path, mission_type) return "Video dianggap valid. Misi berhasil!" else: upload_video_to_dataset(video_path, "neither") return "Video tidak memenuhi kriteria. Misi gagal." # Buat antarmuka Gradio interface = gr.Interface( fn=validate_video_with_git, inputs=[ gr.Video(label="Video Sampah"), gr.Radio(choices=["paper", "leaf"], label="Jenis Misi") ], outputs=gr.Textbox(label="Hasil Validasi"), title="Validasi Misi Sampah dengan AI", description="Unggah video dan pilih tipe misi untuk memvalidasi aksi membuang sampah." ) if __name__ == "__main__": interface.launch()