GhostScientist commited on
Commit
9dd73f1
·
verified ·
1 Parent(s): 5a4b365

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +8 -4
  2. requirements.txt +1 -0
app.py CHANGED
@@ -2,17 +2,21 @@ import gradio as gr
2
  import spaces
3
  import torch
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
5
 
6
  MODEL_ID = "GhostScientist/qwen25-coder-1.5b-codealpaca-sft"
 
7
 
8
- # Load tokenizer at startup
9
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
10
 
11
- # Load model at startup (will be moved to GPU when @spaces.GPU is called)
12
- model = AutoModelForCausalLM.from_pretrained(
13
- MODEL_ID,
14
  torch_dtype=torch.float16,
15
  )
 
 
16
 
17
  @spaces.GPU
18
  def generate_response(message, history, system_message, max_tokens, temperature, top_p):
 
2
  import spaces
3
  import torch
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ from peft import PeftModel
6
 
7
  MODEL_ID = "GhostScientist/qwen25-coder-1.5b-codealpaca-sft"
8
+ BASE_MODEL_ID = "Qwen/Qwen2.5-Coder-1.5B-Instruct"
9
 
10
+ # Load tokenizer
11
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
12
 
13
+ # Load base model and apply adapter
14
+ base_model = AutoModelForCausalLM.from_pretrained(
15
+ BASE_MODEL_ID,
16
  torch_dtype=torch.float16,
17
  )
18
+ model = PeftModel.from_pretrained(base_model, MODEL_ID)
19
+ model = model.merge_and_unload() # Merge adapter for faster inference
20
 
21
  @spaces.GPU
22
  def generate_response(message, history, system_message, max_tokens, temperature, top_p):
requirements.txt CHANGED
@@ -2,3 +2,4 @@ gradio>=5.0.0
2
  torch
3
  transformers
4
  accelerate
 
 
2
  torch
3
  transformers
4
  accelerate
5
+ peft