Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| from diffusers import DiffusionPipeline, QwenImageEditPipeline | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
| from qwen_vl_utils import QwenVLTokenizer | |
| from PIL import Image | |
| # Global variables for each model pipeline | |
| gen_pipe = None | |
| edit_pipe = None | |
| # --- Model Loading Functions --- | |
| def setup_generation_model(): | |
| """ | |
| Loads and quantizes the Qwen/Qwen-Image model for text-to-image generation using bitsandbytes. | |
| """ | |
| global gen_pipe | |
| if gen_pipe is not None: | |
| return "Generation Model already loaded. β¨" | |
| model_id = "Qwen/Qwen-Image" | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"Loading Qwen-Image Generation Model on {device} with bitsandbytes quantization...") | |
| try: | |
| # Define BitsAndBytesConfig for 4-bit quantization | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_compute_dtype=torch.bfloat16, | |
| bnb_4bit_use_double_quant=True, | |
| ) | |
| gen_model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| quantization_config=bnb_config if device == "cuda" else None, | |
| torch_dtype=torch.bfloat16, | |
| trust_remote_code=True | |
| ) | |
| gen_tokenizer = QwenVLTokenizer.from_pretrained(model_id, trust_remote_code=True) | |
| gen_pipe = DiffusionPipeline.from_pretrained( | |
| model_id, | |
| model=gen_model, | |
| tokenizer=gen_tokenizer, | |
| torch_dtype=torch.bfloat16, | |
| use_safetensors=True, | |
| trust_remote_code=True | |
| ) | |
| gen_pipe.to(device) | |
| print("Qwen-Image Generation Model loaded and quantized successfully.") | |
| return "Generation Model loaded! π" | |
| except Exception as e: | |
| gen_pipe = None | |
| return f"Generation Model setup failed. Error: {e}" | |
| def setup_editing_model(): | |
| """ | |
| Loads the Qwen/Qwen-Image-Edit pipeline for image-to-image editing using bitsandbytes. | |
| """ | |
| global edit_pipe | |
| if edit_pipe is not None: | |
| return "Editing Model already loaded. β¨" | |
| model_id = "Qwen/Qwen-Image-Edit" | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"Loading Qwen-Image-Edit Model on {device} with bitsandbytes quantization...") | |
| try: | |
| # Note: QwenImageEditPipeline is a custom pipeline, so direct bnb quantization might not work | |
| # as seamlessly as with the CausalLM model. We'll rely on it internally. | |
| edit_pipe = QwenImageEditPipeline.from_pretrained( | |
| model_id, | |
| torch_dtype=torch.bfloat16, | |
| use_safetensors=True, | |
| trust_remote_code=True | |
| ) | |
| edit_pipe.to(device) | |
| print("Qwen-Image-Edit model loaded successfully.") | |
| return "Editing Model loaded! βοΈ" | |
| except Exception as e: | |
| edit_pipe = None | |
| return f"Editing Model setup failed. Error: {e}" | |
| # --- Generation and Editing Functions (remain the same as before) --- | |
| def generate_image(prompt, negative_prompt, num_inference_steps, guidance_scale, seed): | |
| global gen_pipe | |
| if gen_pipe is None: return None, "Model not loaded.", "" | |
| generator = torch.Generator(device=gen_pipe.device).manual_seed(seed) if seed != -1 else None | |
| try: | |
| image = gen_pipe(prompt=prompt, negative_prompt=negative_prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, generator=generator).images[0] | |
| return image, "Image generated successfully!", "" | |
| except Exception as e: | |
| return None, "An error occurred during image generation.", f"Error: {e}" | |
| def edit_image(input_image_pil, prompt, negative_prompt, num_inference_steps, guidance_scale, true_cfg_scale, denoising_strength, seed): | |
| global edit_pipe | |
| if edit_pipe is None: return None, "Model not loaded.", "" | |
| if input_image_pil is None: return None, "Please upload an image.", "" | |
| generator = torch.Generator(device=edit_pipe.device).manual_seed(seed) if seed != -1 else None | |
| try: | |
| edited_image = edit_pipe(image=input_image_pil.convert("RGB"), prompt=prompt, negative_prompt=negative_prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, true_cfg_scale=true_cfg_scale, denoising_strength=denoising_strength, generator=generator).images[0] | |
| return edited_image, "Image edited successfully!", "" | |
| except Exception as e: | |
| return None, "An error occurred during image editing.", f"Error: {e}" | |
| # --- Gradio UI --- | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# π¨ Qwen Image Studio: Generation & Editing") | |
| gr.Markdown("Explore the power of Qwen models for advanced image generation and detailed editing.") | |
| with gr.Tab("Image Generation (Qwen/Qwen-Image)"): | |
| gr.Markdown("### Text-to-Image Generation") | |
| gr.Markdown("Create new images from text prompts. ") | |
| with gr.Row(): | |
| gen_model_status = gr.Textbox(value="Generation Model not loaded. Click 'Load' to begin.", interactive=False, label="Model Status") | |
| load_gen_button = gr.Button("Load Generation Model", variant="primary") | |
| load_gen_button.click(fn=setup_generation_model, outputs=gen_model_status) | |
| with gr.Column(): | |
| gen_prompt = gr.Textbox(label="Prompt", placeholder="A majestic dragon flying over a futuristic city at sunset, highly detailed, photorealistic", lines=2) | |
| gen_negative_prompt = gr.Textbox(label="Negative Prompt (Optional)", placeholder="blurry, low quality, distorted, bad anatomy", lines=1) | |
| with gr.Accordion("Advanced Settings", open=False): | |
| with gr.Row(): | |
| gen_num_steps = gr.Slider(minimum=10, maximum=150, step=1, value=50, label="Inference Steps") | |
| gen_guidance_scale = gr.Slider(minimum=1.0, maximum=20.0, step=0.5, value=7.5, label="Guidance Scale") | |
| with gr.Row(): | |
| gen_seed = gr.Number(label="Seed (-1 for random)", value=-1, precision=0) | |
| generate_button = gr.Button("Generate Image", variant="secondary") | |
| gen_output_image = gr.Image(label="Generated Image") | |
| gen_status_text = gr.Textbox(label="Status", interactive=False) | |
| gen_error_text = gr.Textbox(label="Error Details", interactive=False, visible=False) | |
| generate_button.click( | |
| fn=generate_image, | |
| inputs=[gen_prompt, gen_negative_prompt, gen_num_steps, gen_guidance_scale, gen_seed], | |
| outputs=[gen_output_image, gen_status_text, gen_error_text] | |
| ) | |
| with gr.Tab("Image Editing (Qwen/Qwen-Image-Edit)"): | |
| gr.Markdown("### Image-to-Image Editing") | |
| gr.Markdown("Upload an image and provide a text prompt to transform it. This model excels at semantic and appearance editing.") | |
| with gr.Row(): | |
| edit_model_status = gr.Textbox(value="Editing Model not loaded. Click 'Load' to begin.", interactive=False, label="Model Status") | |
| load_edit_button = gr.Button("Load Editing Model", variant="primary") | |
| load_edit_button.click(fn=setup_editing_model, outputs=edit_model_status) | |
| with gr.Column(): | |
| edit_input_image = gr.Image(label="Upload Image to Edit", type="pil") | |
| edit_prompt = gr.Textbox(label="Edit Prompt", placeholder="Change the dog's fur to a vibrant blue and add a red collar", lines=2) | |
| edit_negative_prompt = gr.Textbox(label="Negative Prompt (Optional)", placeholder="blurry, low quality, distorted, messy", lines=1) | |
| with gr.Accordion("Advanced Settings", open=False): | |
| with gr.Row(): | |
| edit_num_steps = gr.Slider(minimum=10, maximum=150, step=1, value=50, label="Inference Steps") | |
| edit_guidance_scale = gr.Slider(minimum=1.0, maximum=20.0, step=0.5, value=7.5, label="Guidance Scale") | |
| with gr.Row(): | |
| edit_true_cfg_scale = gr.Slider(minimum=1.0, maximum=10.0, step=0.1, value=4.0, label="True CFG Scale (for more precise control)") | |
| edit_denoising_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.8, label="Denoising Strength (how much to change original)") | |
| with gr.Row(): | |
| edit_seed = gr.Number(label="Seed (-1 for random)", value=-1, precision=0) | |
| edit_button = gr.Button("Edit Image", variant="secondary") | |
| edit_output_image = gr.Image(label="Edited Image") | |
| edit_status_text = gr.Textbox(label="Status", interactive=False) | |
| edit_error_text = gr.Textbox(label="Error Details", interactive=False, visible=False) | |
| edit_button.click( | |
| fn=edit_image, | |
| inputs=[edit_input_image, edit_prompt, edit_negative_prompt, edit_num_steps, edit_guidance_scale, edit_true_cfg_scale, edit_denoising_strength, edit_seed], | |
| outputs=[edit_output_image, edit_status_text, edit_error_text] | |
| ) | |
| # Launch the app | |
| demo.launch(inbrowser=True, share=False) | |