Spaces:

artyomboyko
/

Aura_AI_Scan

Running

Artyom Boyko

Fix docker problem.

076e4c4 7 months ago

3.95 kB

	from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, logging
	from qwen_vl_utils import process_vision_info
	import torch
	from PIL import Image
	import gc
	import warnings


	### Utility functions ###
	def clear_gpu_cache():
	"""Clears GPU cache and performs garbage collection"""
	torch.cuda.empty_cache()
	gc.collect()


	def get_device_and_dtype():
	if torch.cuda.is_available():
	device = 'cuda'
	torch_dtype = torch.float16
	else:
	device = 'cpu'
	torch_dtype = torch.float32
	return device, torch_dtype


	### Model-related functions ###
	def initialize_model(device: str, torch_dtype: torch.dtype):
	"""Initialize and return model with GPU optimization"""
	with warnings.catch_warnings():
	warnings.simplefilter("ignore")
	return Qwen2_5_VLForConditionalGeneration.from_pretrained(
	"Qwen/Qwen2.5-VL-7B-Instruct-AWQ",
	device_map=device,
	torch_dtype=torch_dtype,
	)


	def initialize_processor():
	"""Initialize and return processor"""
	return AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct-AWQ")


	def prepare_model_inputs(processor, messages, device: str):
	"""Prepare all inputs for model inference"""
	text = processor.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True
	)
	image_inputs, video_inputs = process_vision_info(messages)

	return processor(
	text=[text],
	images=image_inputs,
	videos=video_inputs,
	padding=True,
	return_tensors="pt"
	).to(device)


	def generate_description(model, inputs):
	"""Generate description using model"""
	return model.generate(
	**inputs,
	max_new_tokens=512,
	do_sample=True,
	temperature=0.7,
	top_p=0.9
	)


	def process_model_output(processor, inputs, generated_ids):
	"""Process model output to get clean text"""
	generated_ids_trimmed = [
	out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
	]
	return processor.batch_decode(
	generated_ids_trimmed,
	skip_special_tokens=True,
	clean_up_tokenization_spaces=False
	)[0]


	def create_prompt_message(image_path, timestamp, custom_prompt=None):
	"""
	Create standardized message prompt for model with optional custom prompt

	Args:
	image_path (str): Path to the image file
	timestamp (str): Video timestamp in HH:MM:SS.SSS format
	custom_prompt (str, optional): Custom prompt text. If None, uses default prompt.

	Returns:
	list: Formatted message for model input
	"""
	default_prompt = f"Video timestamp: {timestamp}. Describe in detail what is happening in this frame."
	prompt_text = custom_prompt.format(timestamp=timestamp) if custom_prompt else default_prompt

	return [{
	"role": "user",
	"content": [
	{"type": "image", "image": f"file://{image_path}"},
	{"type": "text", "text": prompt_text},
	],
	}]


	if __name__ == "__main__":

	selected_device, selected_dtype = get_device_and_dtype()

	image_url = "/workspaces/Video_Analyser/app_srv/temp/a28af289-377d-468d-b0eb-ed0f7dcd2ab3/frames/frame_00_01_50_110.jpg"
	timestamp = "00:01:16.076"
	custom_prompt = "Timestamp {timestamp}. Analyze this frame focusing on objects in the foreground."

	model = initialize_model(selected_device, selected_dtype)
	processor = initialize_processor()
	messages = create_prompt_message(image_url, timestamp, custom_prompt)
	inputs = prepare_model_inputs(processor, messages, selected_device)

	generated_ids = generate_description(model, inputs)
	image_description = process_model_output(processor, inputs, generated_ids)

	print(image_description)