from transformers import OwlViTProcessor, OwlViTForObjectDetection
from PIL import Image
import torch
from rembg import remove
import os
import cv2
import numpy as np
from PIL import Image, ImageEnhance, ImageFilter
from gradio_client import Client, handle_file
import json
import google.generativeai as genai
import base64
import image_enhancement_option3_helper
from dotenv import load_dotenv

load_dotenv()

class process_image:
    def __init__(self):
        self.image_path = None
        self.raw_image = None
        self.detected_objects = []
        self.cropped_image = None
        self.no_background_image = None
        self.enhanced_image_1 = None
        self.enhanced_image_2 = None
        self.enhanced_image_3 = None
        self.chosen_image = None
        self.description = ""

    def detect_object(self):
        processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
        model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32")
        texts = [[
            # Giyim
            "clothing",
            "topwear",
            "bottomwear",
            "outerwear",
            "apparel",
            "sportswear",
            "uniform",
            "underwear",
            "dress",
            "outfit",

            # Ayakkabı
            "footwear",
            "shoes",
            "boots",
            "sneakers",

            # Aksesuarlar
            "accessory",
            "bag",
            "backpack",
            "handbag",
            "wallet",
            "belt",
            "hat",
            "cap",
            "scarf",
            "glasses",
            "watch",
            "jewelry",

            # Elektronik
            "electronics",
            "device",
            "gadget",
            "smartphone",
            "laptop",
            "tablet",
            "headphones",
            "smartwatch",

            # Kozmetik / Kişisel Bakım
            "cosmetics",
            "beauty product",
            "skincare",
            "makeup",
            "perfume",
            "hair product",

            # Bebek ve çocuk
            "baby product",
            "baby clothes",
            "toy",
            "stroller",
            "pacifier",

            # Ev ve yaşam
            "home item",
            "furniture",
            "appliance",
            "decor",
            "kitchenware",
            "bedding",
            "cleaning tool",

            # Spor ve outdoor
            "sports gear",
            "fitness equipment",
            "gym accessory",
            "camping gear",
            "bicycle equipment"
            ]
        ]

        inputs = processor(text=texts, images=self.raw_image, return_tensors="pt")

        with torch.no_grad():
            outputs = model(**inputs)

        target_sizes = torch.tensor([self.raw_image.size[::-1]])
        results = processor.post_process_grounded_object_detection(
            outputs=outputs,
            target_sizes=target_sizes,
            threshold=0.2
        )[0]
        self.detected_objects = results["labels"].tolist()
        
        # Collect all valid bounding boxes
        valid_boxes = []
        detected_labels = []
        for score, label_id, box in zip(results["scores"], results["labels"], results["boxes"]):
            if score < 0.05:
                continue 
            valid_boxes.append(box.tolist())
            detected_labels.append(texts[0][label_id])
        
        if len(valid_boxes) == 0:
            self.cropped_image = self.raw_image
        elif len(valid_boxes) == 1:
            # Single object detected
            xmin, ymin, xmax, ymax = map(int, valid_boxes[0])
            self.cropped_image = self.raw_image.crop((xmin, ymin, xmax, ymax))
            print(f"Single object detected: {detected_labels[0]}")
        else:
            # Multiple objects detected and they are pairs      
            similar_items = ['shoes', 'boots', 'sneakers', 'footwear', 'glasses', 'earrings', 
                           'gloves', 'socks', 'jewelry', 'watch', 'bracelet']
            clothing_items = ['clothing', 'topwear', 'bottomwear', 'dress', 'outfit', 'apparel']
            
            has_similar_items = any(any(item in label.lower() for item in similar_items) 
                                  for label in detected_labels)
            has_clothing_items = any(any(item in label.lower() for item in clothing_items) 
                                   for label in detected_labels)
            
            if has_similar_items or has_clothing_items or len(valid_boxes) <= 3:
                # Combining them
                all_xmin = min(box[0] for box in valid_boxes)
                all_ymin = min(box[1] for box in valid_boxes)
                all_xmax = max(box[2] for box in valid_boxes)
                all_ymax = max(box[3] for box in valid_boxes)
            
                self.cropped_image = self.raw_image.crop((all_xmin, all_ymin, all_xmax, all_ymax))
            else: # If there are too many different objects
                self.cropped_image = self.raw_image
        
    def remove_background(self):
        if self.cropped_image is None:
            print("No cropped image available. Using entire image.")
            self.cropped_image = self.raw_image

        self.no_background_image = remove(self.cropped_image)

    def enhance_image_option1(self):
        sharpened = self.no_background_image.filter(ImageFilter.UnsharpMask(
            radius=1,
            percent=120,
            threshold=1
        ))

        enhancer = ImageEnhance.Contrast(sharpened)
        contrast_enhanced = enhancer.enhance(1.1)  # 10% more contrast
        
        enhancer = ImageEnhance.Brightness(contrast_enhanced)
        brightness_enhanced = enhancer.enhance(1.02)  # 2% brighter
        
        enhancer = ImageEnhance.Color(brightness_enhanced)
        color_enhanced = enhancer.enhance(1.05)  # 5% more vibrant
 
        img_array = np.array(color_enhanced)
        
        img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
        denoised = cv2.bilateralFilter(img_bgr, 3, 10, 10)
        img_rgb = cv2.cvtColor(denoised, cv2.COLOR_BGR2RGB)
        
        self.enhanced_image_1 = Image.fromarray(img_rgb)
        scale = 1.5
        original_size = self.enhanced_image_1.size
        new_size = (int(original_size[0] * scale), int(original_size[1] * scale))

        self.enhanced_image_1 = self.enhanced_image_1.resize(new_size, Image.Resampling.LANCZOS)
        return self.enhanced_image_1

    def enhance_image_option2(self):

        client = Client("finegrain/finegrain-image-enhancer")

        script_dir = os.path.dirname(os.path.abspath(__file__))
        output_path = os.path.join(script_dir, "temp_image.png")

        self.no_background_image.save(output_path)

        script_dir = os.path.dirname(os.path.abspath(__file__))
        temp_image_path = os.path.join(script_dir, "temp_image.png")
        result = client.predict(
                input_image=handle_file(temp_image_path),
                prompt="",
                negative_prompt="",
                seed=0,
                upscale_factor=2.6,
                controlnet_scale=0.5,
                controlnet_decay=0.6,
                condition_scale=5,
                tile_width=200,
                tile_height=200,
                denoise_strength=0,
                num_inference_steps=23,
                solver="DPMSolver",
                api_name="/process"
        )
        # Get the image from result[1] - local file path, not a URL
        image_path = result[1]

        self.enhanced_image_2 = Image.open(image_path)
        return self.enhanced_image_2
    
    def enhance_image_option3(self):
        enhancer = image_enhancement_option3_helper.image_enhancement_option3_helper(model=None)
        self.enhanced_image_3 = enhancer.ai_enhanced_image_processing(self.no_background_image)

    def generate_description_from_image(self, image_b64: str,
                                        tone: str = "professional",
                                        lang: str = "en") -> str:
        
        API_KEY = os.getenv("SECRET_API_KEY")

        genai.configure(api_key=API_KEY) # ← ONLY this line

        model = genai.GenerativeModel("gemini-2.0-flash-exp")  # Updated model name

        prompt = (
            f"Analyze this product image and generate an SEO-optimized e-commerce product listing in {lang}. "
            f"Tone: {tone}. Respond ONLY with valid JSON (no markdown formatting) containing these exact keys: "
            f"'title', 'description', 'features', 'tags'. "
            f"The 'features' and 'tags' must be arrays of strings. "
            f"Do not include any other text or formatting."
        )

        try:
            response = model.generate_content(
                [
                    {"inline_data": {"mime_type": "image/jpeg", "data": image_b64}},
                    prompt
                ]
            )
            text = response.text.strip()
            
            # Remove markdown code blocks
            if text.startswith("```json"):
                text = text[7:]  # Remove ```json
            if text.startswith("```"):
                text = text[3:]   # Remove ```
            if text.endswith("```"):
                text = text[:-3]  # Remove trailing ```
            
            text = text.strip()
            
            # Parsing the JSON response
            try:
                parsed_json = json.loads(text)
                print("Successfully parsed JSON response")
                return text
            except json.JSONDecodeError:
                return "Invalid JSON response: " + text
        except Exception as err:
            return "Error generating description: " + str(err)
    

    def choose_image(self, number: int):
        if number == 1:
            self.chosen_image = self.enhanced_image_1
        elif number == 2:
            self.chosen_image = self.enhanced_image_2
        elif number == 3:
            self.chosen_image = self.enhanced_image_3
        else:
            raise ValueError("Invalid image number. Choose 1, 2, or 3.")
        

    def generate_description(self):
        print("Starting description generation...")
        
        if self.chosen_image is None:
            print("Error: No image chosen for description generation")
            self.description = "Error: No image selected for description generation"
            return self.description
        
        try:
            print("Converting image to base64...")
            from io import BytesIO
            buffer = BytesIO()  
            
            # It handles RGBA images by converting to RGB
            image_to_save = self.chosen_image
            if image_to_save.mode == 'RGBA':
                background = Image.new('RGB', image_to_save.size, (255, 255, 255))
                background.paste(image_to_save, mask=image_to_save.split()[-1])  # Use alpha channel as mask
                image_to_save = background
            elif image_to_save.mode != 'RGB':
                image_to_save = image_to_save.convert('RGB')
            
            image_to_save.save(buffer, format='JPEG', quality=95)
            img_b64 = base64.b64encode(buffer.getvalue()).decode()
            print(f"Image converted to base64, size: {len(img_b64)} characters")
            
            tone = "professional"
            lang = "en"
            self.description = self.generate_description_from_image(img_b64, tone, lang)


            if len(self.description) > 15000:
                self.description = self.description[:15000] + "..."

            return self.description
        except Exception as e:
            print(f"Error in generate_description: {str(e)}")
            import traceback
            traceback.print_exc()
            self.description = f"Error generating description: {str(e)}"
            return self.description

    def process(self, image_path):
        if os.path.isabs(image_path):
            # If absolute path, use it directly
            self.image_path = image_path
        else:
            # If relative path, join with script directory
            script_dir = os.path.dirname(os.path.abspath(__file__))
            self.image_path = os.path.join(script_dir, image_path)
        
        self.raw_image = Image.open(self.image_path).convert("RGB")

    def get_enhanced_images(self):
        return self.enhanced_image_1, self.enhanced_image_2, self.enhanced_image_3
    
    def get_description(self):
        return self.description