Spaces:
Sleeping
Sleeping
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import gradio as gr | |
| import concurrent.futures | |
| device = torch.device("cpu") | |
| def load_model(name): | |
| tokenizer = AutoTokenizer.from_pretrained(name) | |
| model = AutoModelForCausalLM.from_pretrained(name) | |
| tokenizer.pad_token = tokenizer.eos_token | |
| model.config.pad_token_id = tokenizer.pad_token_id | |
| return tokenizer, model.to(device) | |
| tokenizer1, model1 = load_model("Gensyn/Qwen2.5-0.5B-Instruct") | |
| tokenizer2, model2 = load_model("tiiuae/falcon-rw-1b") | |
| tokenizer3, model3 = load_model("microsoft/phi-1_5") | |
| def generate_response(model, tokenizer, prompt): | |
| inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(device) | |
| outputs = model.generate( | |
| inputs["input_ids"], | |
| attention_mask=inputs["attention_mask"], | |
| max_length=100, | |
| pad_token_id=tokenizer.pad_token_id, | |
| do_sample=True, | |
| temperature=0.7, | |
| top_p=0.9 | |
| ) | |
| return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| def multi_agent_chat(user_input): | |
| with concurrent.futures.ThreadPoolExecutor() as executor: | |
| futures = [ | |
| executor.submit(generate_response, model1, tokenizer1, user_input), | |
| executor.submit(generate_response, model2, tokenizer2, user_input), | |
| executor.submit(generate_response, model3, tokenizer3, user_input) | |
| ] | |
| results = [f.result() for f in futures] | |
| return results | |
| interface = gr.Interface( | |
| fn=multi_agent_chat, | |
| inputs=gr.Textbox(lines=2, placeholder="Ask something..."), | |
| outputs=[ | |
| gr.Textbox(label="Agent 1 (Gensyn/Qwen2.5-0.5B-Instruct)"), | |
| gr.Textbox(label="Agent 2 (tiiuae/falcon-rw-1b)"), | |
| gr.Textbox(label="Agent 3 (microsoft/phi-1_5)") | |
| ], | |
| title="3-Agent AI Chatbot", | |
| description="Three GPT-style agents respond to your input in parallel!" | |
| ) | |
| interface.launch() | |