ggcristian's picture
Remove shuttle views, instead: show STX EQV and Cell Coverage. Make NST the efault and add context length of each model
9d21cc4
from typing import Optional
import os
import sys
import csv
import json
from config.model_metadata import MODELS
def normalize_model_name(csv_name: str) -> str:
csv_name = csv_name.strip()
name_mapping = {
"Kimi-K2-Instruct-0905": "Kimi K2 Instruct 0905",
"DeepSeek-R1-0528": "DeepSeek R1-0528",
"Qwen3-Coder-480B-A35B-Instruct": "Qwen3 Coder 480B A35B",
"gpt-oss-120b": "gpt-oss-120b",
"Qwen2.5-72B-Instruct": "Qwen2.5 72B",
"Qwen2.5-Coder-32B-Instruct": "QwenCoder 2.5 32B",
"Qwen2.5-14B-Instruct-1M": "Qwen2.5 14B 1M",
"Qwen2.5-14B-Instruct": "Qwen2.5 14B",
"Qwen2.5-7B-Instruct": "Qwen2.5 7B",
"HaVen-CodeQwen": "HaVen-CodeQwen",
"OriGen": "OriGen",
}
return name_mapping.get(csv_name, csv_name)
def get_hf_model_context_length(model_hf_url: str) -> int:
from transformers import AutoConfig
model_id = model_hf_url.split("https://huggingface.co/")[-1]
try:
config = AutoConfig.from_pretrained(model_id, trust_remote_code=True)
return config.max_position_embeddings
except:
# list of context lengths that are not parseable via config.json
d = { "henryen/OriGen": 4096 }
# fallback!
return d[model_id]
def get_model_metadata(model_key: str) -> tuple:
"""Get model metadata from MODELS dictionary."""
try:
model_metadata = MODELS[model_key]
except KeyError:
print(f"Warning: Unknown model '{model_key}' - using default values")
return (
f"https://huggingface.co/{model_key}", # placeholder URL
None, # unknown params
None,
"General", # default type
"V4", # current release
"Dense", # default architecture
)
try:
model_context_length = get_hf_model_context_length(model_metadata.url)
except:
print(f"Warning: Context length not available for model '{model_key}'")
return (
model_metadata.url,
model_metadata.params,
model_context_length,
model_metadata.model_type,
model_metadata.release,
model_metadata.model_arch,
)
def parse_nst_results(csv_path: str) -> list[dict]:
dataset = []
shuttle = ""
with open(csv_path, newline="", encoding="utf-8-sig") as csvfile:
reader = csv.reader(csvfile, delimiter=",")
header = next(reader)
tt_columns = header[1:] # headers pop
metrics = next(reader)
metrics = metrics[1:] # headers pop
print(tt_columns)
print(metrics)
for row in reader:
if not row or all(not cell.strip() for cell in row):
continue
model_csv = row[0].strip()
if not model_csv:
continue
model = normalize_model_name(model_csv)
url, params, context, type, release, reasoning = get_model_metadata(model)
scores = row[1:]
for tt_column, metric, score_str in zip(tt_columns, metrics, scores):
shuttle = tt_column if tt_column != "" else shuttle
if not score_str.strip():
continue
try:
score = float(score_str.replace(",", "."))
except ValueError:
print(f"Warning: Could not parse score '{score_str}' for {model} - {tt_column}")
continue
record = {
"Model": model,
"Model Type": type,
"Benchmark": "NotSoTiny-25-12",
"Shuttle": shuttle,
"Metric": metric,
"Result": score,
"Model URL": url,
"Params": params,
"Context Length": context,
"Release": release,
"Thinking": reasoning,
}
dataset.append(record)
print(f"Parsed {len(dataset)} records from {csv_path}")
return dataset
def write_json(data: list, path: str):
"""Write data to JSON file."""
with open(path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=4, ensure_ascii=False)
print(f"Successfully wrote {len(data)} records to {path}")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python -m results.parse_nst <path_to_NST.csv>")
print("Example: python -m results.parse_nst results/results_NST-YY-MM.csv")
sys.exit(1)
csv_path = sys.argv[1]
if not os.path.exists(csv_path):
print(f"Error: File not found at {csv_path}")
sys.exit(1)
json_path = os.path.splitext(csv_path)[0] + ".json"
print(f"Parsing {csv_path}...")
parsed_data = parse_nst_results(csv_path)
write_json(parsed_data, json_path)
print("Done!")