Exploring the Effectiveness of Instruction Tuning in Biomedical Language Processing
Paper
•
2401.00579
•
Published
•
2
This research purpose model is a fine-tuned version of Qwen/Qwen3-8B for biomedical NLP tasks, trained using instruction tuning with QLoRA. It has been optimized for Named Entity Recognition (NER), Relation Extraction (RE), Natural Language Inference (NLI), and Document Classification in the medical domain.
📚 GitHub Repository: BioMedical-LLM
This work reproduces and extends "Exploring the Effectiveness of Instruction Tuning in Biomedical Language Processing" by adapting the approach to Qwen3-8B (replacing the original Llama2), leveraging Qwen3's reasoning capabilities.
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
base_model = AutoModelForCausalLM.from_pretrained(
"Qwen/Qwen3-8B",
device_map="auto",
trust_remote_code=True
)
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
"Qwen/Qwen3-8B",
trust_remote_code=True
)
# Load LoRA adapters
model = PeftModel.from_pretrained(
base_model,
"pkhare/qwen3-8b-biomedical"
)
# Example: Named Entity Recognition
prompt = "Extract disease entities from: Patient diagnosed with diabetes and hypertension."
messages = [
{"role": "system", "content": "You are a helpful medical AI assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
inputs = tokenizer(text, return_tensors="pt").to("cuda")
outputs = model.generate(
**inputs,
max_new_tokens=128,
do_sample=True,
temperature=0.7,
top_p=0.8,
top_k=20
)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
from transformers import pipeline
# Initialize pipeline
generator = pipeline(
"text-generation",
model="pkhare/qwen3-8b-biomedical",
device="cuda"
)
# Named Entity Recognition
ner_prompt = "Extract disease entities from: Patient diagnosed with diabetes mellitus and hypertension."
output = generator(
[{"role": "user", "content": ner_prompt}],
max_new_tokens=128,
do_sample=True,
temperature=0.7,
top_p=0.8,
top_k=20,
return_full_text=False
)[0]
print(output["generated_text"])
prompt = """Extract disease entities from the following text:
Patient diagnosed with diabetes mellitus and hypertension.
Format: List each disease on a new line."""
messages = [
{"role": "system", "content": "You are a helpful medical AI assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(text, return_tensors="pt").to("cuda")
outputs = model.generate(
**inputs,
max_new_tokens=128,
do_sample=True,
temperature=0.7,
top_p=0.8,
top_k=20
)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
prompt = """Given the following sentence, identify the relation between the treatment and problem:
"The patient was treated with metformin for type 2 diabetes."
Possible relations: TrIP (Treatment improves Problem), TeRP (Test reveals Problem),
PIP (Problem indicates Problem), TrAP (Treatment causes Problem),
TrCP (Treatment conducted for Problem), TrWP (Treatment worsens Problem),
TrNAP (Treatment not administered because of Problem),
TeCP (Test conducted to investigate Problem), No Relations
Answer:"""
messages = [
{"role": "system", "content": "You are a helpful medical AI assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(text, return_tensors="pt").to("cuda")
outputs = model.generate(
**inputs,
max_new_tokens=64,
do_sample=True,
temperature=0.7,
top_p=0.8,
top_k=20
)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
The code for this work is available at BioMedical-LLM
This research work is based on original work done by
@article{rohanian2024exploring,
title = {Exploring the Effectiveness of Instruction Tuning in Biomedical Language Processing},
author = {Rohanian, Omid and Nouriborji, Mohammadmahdi and Kouchaki, Samaneh and Nooralahzadeh, Farhad and Clifton, Lei and Clifton, David A},
journal = {Artificial Intelligence in Medicine},
volume = {158},
pages = {103007},
year = {2024},
publisher = {Elsevier},
doi = {10.1016/j.artmed.2024.103007},
url = {https://www.sciencedirect.com/science/article/pii/S0933365724002495},
issn = {0933-3657}
}