Spaces:

MuhammedEmirEren
/

GLOWii

Sleeping

App Files Files Community

GLOWii / process_image.py

MuhammedEmirEren

Reverting all the finegrain things

d295d37 verified 4 months ago

raw

history blame contribute delete

13 kB

	from transformers import OwlViTProcessor, OwlViTForObjectDetection
	from PIL import Image
	import torch
	from rembg import remove
	import os
	import cv2
	import numpy as np
	from PIL import Image, ImageEnhance, ImageFilter
	from gradio_client import Client, handle_file
	import json
	import google.generativeai as genai
	import base64
	import image_enhancement_option3_helper
	from dotenv import load_dotenv

	load_dotenv()

	class process_image:
	def __init__(self):
	self.image_path = None
	self.raw_image = None
	self.detected_objects = []
	self.cropped_image = None
	self.no_background_image = None
	self.enhanced_image_1 = None
	self.enhanced_image_2 = None
	self.enhanced_image_3 = None
	self.chosen_image = None
	self.description = ""

	def detect_object(self):
	processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
	model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32")
	texts = [[
	# Giyim
	"clothing",
	"topwear",
	"bottomwear",
	"outerwear",
	"apparel",
	"sportswear",
	"uniform",
	"underwear",
	"dress",
	"outfit",

	# Ayakkabı
	"footwear",
	"shoes",
	"boots",
	"sneakers",

	# Aksesuarlar
	"accessory",
	"bag",
	"backpack",
	"handbag",
	"wallet",
	"belt",
	"hat",
	"cap",
	"scarf",
	"glasses",
	"watch",
	"jewelry",

	# Elektronik
	"electronics",
	"device",
	"gadget",
	"smartphone",
	"laptop",
	"tablet",
	"headphones",
	"smartwatch",

	# Kozmetik / Kişisel Bakım
	"cosmetics",
	"beauty product",
	"skincare",
	"makeup",
	"perfume",
	"hair product",

	# Bebek ve çocuk
	"baby product",
	"baby clothes",
	"toy",
	"stroller",
	"pacifier",

	# Ev ve yaşam
	"home item",
	"furniture",
	"appliance",
	"decor",
	"kitchenware",
	"bedding",
	"cleaning tool",

	# Spor ve outdoor
	"sports gear",
	"fitness equipment",
	"gym accessory",
	"camping gear",
	"bicycle equipment"
	]
	]

	inputs = processor(text=texts, images=self.raw_image, return_tensors="pt")

	with torch.no_grad():
	outputs = model(**inputs)

	target_sizes = torch.tensor([self.raw_image.size[::-1]])
	results = processor.post_process_grounded_object_detection(
	outputs=outputs,
	target_sizes=target_sizes,
	threshold=0.2
	)[0]
	self.detected_objects = results["labels"].tolist()

	# Collect all valid bounding boxes
	valid_boxes = []
	detected_labels = []
	for score, label_id, box in zip(results["scores"], results["labels"], results["boxes"]):
	if score < 0.05:
	continue
	valid_boxes.append(box.tolist())
	detected_labels.append(texts[0][label_id])

	if len(valid_boxes) == 0:
	self.cropped_image = self.raw_image
	elif len(valid_boxes) == 1:
	# Single object detected
	xmin, ymin, xmax, ymax = map(int, valid_boxes[0])
	self.cropped_image = self.raw_image.crop((xmin, ymin, xmax, ymax))
	print(f"Single object detected: {detected_labels[0]}")
	else:
	# Multiple objects detected and they are pairs
	similar_items = ['shoes', 'boots', 'sneakers', 'footwear', 'glasses', 'earrings',
	'gloves', 'socks', 'jewelry', 'watch', 'bracelet']
	clothing_items = ['clothing', 'topwear', 'bottomwear', 'dress', 'outfit', 'apparel']

	has_similar_items = any(any(item in label.lower() for item in similar_items)
	for label in detected_labels)
	has_clothing_items = any(any(item in label.lower() for item in clothing_items)
	for label in detected_labels)

	if has_similar_items or has_clothing_items or len(valid_boxes) <= 3:
	# Combining them
	all_xmin = min(box[0] for box in valid_boxes)
	all_ymin = min(box[1] for box in valid_boxes)
	all_xmax = max(box[2] for box in valid_boxes)
	all_ymax = max(box[3] for box in valid_boxes)

	self.cropped_image = self.raw_image.crop((all_xmin, all_ymin, all_xmax, all_ymax))
	else: # If there are too many different objects
	self.cropped_image = self.raw_image

	def remove_background(self):
	if self.cropped_image is None:
	print("No cropped image available. Using entire image.")
	self.cropped_image = self.raw_image

	self.no_background_image = remove(self.cropped_image)

	def enhance_image_option1(self):
	sharpened = self.no_background_image.filter(ImageFilter.UnsharpMask(
	radius=1,
	percent=120,
	threshold=1
	))

	enhancer = ImageEnhance.Contrast(sharpened)
	contrast_enhanced = enhancer.enhance(1.1) # 10% more contrast

	enhancer = ImageEnhance.Brightness(contrast_enhanced)
	brightness_enhanced = enhancer.enhance(1.02) # 2% brighter

	enhancer = ImageEnhance.Color(brightness_enhanced)
	color_enhanced = enhancer.enhance(1.05) # 5% more vibrant

	img_array = np.array(color_enhanced)

	img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
	denoised = cv2.bilateralFilter(img_bgr, 3, 10, 10)
	img_rgb = cv2.cvtColor(denoised, cv2.COLOR_BGR2RGB)

	self.enhanced_image_1 = Image.fromarray(img_rgb)
	scale = 1.5
	original_size = self.enhanced_image_1.size
	new_size = (int(original_size[0] * scale), int(original_size[1] * scale))

	self.enhanced_image_1 = self.enhanced_image_1.resize(new_size, Image.Resampling.LANCZOS)
	return self.enhanced_image_1

	def enhance_image_option2(self):

	client = Client("finegrain/finegrain-image-enhancer")

	script_dir = os.path.dirname(os.path.abspath(__file__))
	output_path = os.path.join(script_dir, "temp_image.png")

	self.no_background_image.save(output_path)

	script_dir = os.path.dirname(os.path.abspath(__file__))
	temp_image_path = os.path.join(script_dir, "temp_image.png")
	result = client.predict(
	input_image=handle_file(temp_image_path),
	prompt="",
	negative_prompt="",
	seed=0,
	upscale_factor=2.6,
	controlnet_scale=0.5,
	controlnet_decay=0.6,
	condition_scale=5,
	tile_width=200,
	tile_height=200,
	denoise_strength=0,
	num_inference_steps=23,
	solver="DPMSolver",
	api_name="/process"
	)
	# Get the image from result[1] - local file path, not a URL
	image_path = result[1]

	self.enhanced_image_2 = Image.open(image_path)
	return self.enhanced_image_2

	def enhance_image_option3(self):
	enhancer = image_enhancement_option3_helper.image_enhancement_option3_helper(model=None)
	self.enhanced_image_3 = enhancer.ai_enhanced_image_processing(self.no_background_image)

	def generate_description_from_image(self, image_b64: str,
	tone: str = "professional",
	lang: str = "en") -> str:

	API_KEY = os.getenv("SECRET_API_KEY")

	genai.configure(api_key=API_KEY) # ← ONLY this line

	model = genai.GenerativeModel("gemini-2.0-flash-exp") # Updated model name

	prompt = (
	f"Analyze this product image and generate an SEO-optimized e-commerce product listing in {lang}. "
	f"Tone: {tone}. Respond ONLY with valid JSON (no markdown formatting) containing these exact keys: "
	f"'title', 'description', 'features', 'tags'. "
	f"The 'features' and 'tags' must be arrays of strings. "
	f"Do not include any other text or formatting."
	)

	try:
	response = model.generate_content(
	[
	{"inline_data": {"mime_type": "image/jpeg", "data": image_b64}},
	prompt
	]
	)
	text = response.text.strip()

	# Remove markdown code blocks
	if text.startswith("```json"):
	text = text[7:] # Remove ```json
	if text.startswith("```"):
	text = text[3:] # Remove ```
	if text.endswith("```"):
	text = text[:-3] # Remove trailing ```

	text = text.strip()

	# Parsing the JSON response
	try:
	parsed_json = json.loads(text)
	print("Successfully parsed JSON response")
	return text
	except json.JSONDecodeError:
	return "Invalid JSON response: " + text
	except Exception as err:
	return "Error generating description: " + str(err)


	def choose_image(self, number: int):
	if number == 1:
	self.chosen_image = self.enhanced_image_1
	elif number == 2:
	self.chosen_image = self.enhanced_image_2
	elif number == 3:
	self.chosen_image = self.enhanced_image_3
	else:
	raise ValueError("Invalid image number. Choose 1, 2, or 3.")


	def generate_description(self):
	print("Starting description generation...")

	if self.chosen_image is None:
	print("Error: No image chosen for description generation")
	self.description = "Error: No image selected for description generation"
	return self.description

	try:
	print("Converting image to base64...")
	from io import BytesIO
	buffer = BytesIO()

	# It handles RGBA images by converting to RGB
	image_to_save = self.chosen_image
	if image_to_save.mode == 'RGBA':
	background = Image.new('RGB', image_to_save.size, (255, 255, 255))
	background.paste(image_to_save, mask=image_to_save.split()[-1]) # Use alpha channel as mask
	image_to_save = background
	elif image_to_save.mode != 'RGB':
	image_to_save = image_to_save.convert('RGB')

	image_to_save.save(buffer, format='JPEG', quality=95)
	img_b64 = base64.b64encode(buffer.getvalue()).decode()
	print(f"Image converted to base64, size: {len(img_b64)} characters")

	tone = "professional"
	lang = "en"
	self.description = self.generate_description_from_image(img_b64, tone, lang)


	if len(self.description) > 15000:
	self.description = self.description[:15000] + "..."

	return self.description
	except Exception as e:
	print(f"Error in generate_description: {str(e)}")
	import traceback
	traceback.print_exc()
	self.description = f"Error generating description: {str(e)}"
	return self.description

	def process(self, image_path):
	if os.path.isabs(image_path):
	# If absolute path, use it directly
	self.image_path = image_path
	else:
	# If relative path, join with script directory
	script_dir = os.path.dirname(os.path.abspath(__file__))
	self.image_path = os.path.join(script_dir, image_path)

	self.raw_image = Image.open(self.image_path).convert("RGB")

	def get_enhanced_images(self):
	return self.enhanced_image_1, self.enhanced_image_2, self.enhanced_image_3

	def get_description(self):
	return self.description