TuRTLe-Leaderboard

Running

App Files Files Community

TuRTLe-Leaderboard / results /parse_nst.py

ggcristian

Remove shuttle views, instead: show STX EQV and Cell Coverage. Make NST the efault and add context length of each model

9d21cc4 7 days ago

raw

history blame contribute delete

4.89 kB

	from typing import Optional
	import os
	import sys
	import csv
	import json
	from config.model_metadata import MODELS


	def normalize_model_name(csv_name: str) -> str:
	csv_name = csv_name.strip()
	name_mapping = {
	"Kimi-K2-Instruct-0905": "Kimi K2 Instruct 0905",
	"DeepSeek-R1-0528": "DeepSeek R1-0528",
	"Qwen3-Coder-480B-A35B-Instruct": "Qwen3 Coder 480B A35B",
	"gpt-oss-120b": "gpt-oss-120b",
	"Qwen2.5-72B-Instruct": "Qwen2.5 72B",
	"Qwen2.5-Coder-32B-Instruct": "QwenCoder 2.5 32B",
	"Qwen2.5-14B-Instruct-1M": "Qwen2.5 14B 1M",
	"Qwen2.5-14B-Instruct": "Qwen2.5 14B",
	"Qwen2.5-7B-Instruct": "Qwen2.5 7B",
	"HaVen-CodeQwen": "HaVen-CodeQwen",
	"OriGen": "OriGen",
	}

	return name_mapping.get(csv_name, csv_name)


	def get_hf_model_context_length(model_hf_url: str) -> int:
	from transformers import AutoConfig
	model_id = model_hf_url.split("https://huggingface.co/")[-1]
	try:
	config = AutoConfig.from_pretrained(model_id, trust_remote_code=True)
	return config.max_position_embeddings
	except:
	# list of context lengths that are not parseable via config.json
	d = { "henryen/OriGen": 4096 }
	# fallback!
	return d[model_id]

	def get_model_metadata(model_key: str) -> tuple:
	"""Get model metadata from MODELS dictionary."""
	try:
	model_metadata = MODELS[model_key]
	except KeyError:
	print(f"Warning: Unknown model '{model_key}' - using default values")
	return (
	f"https://huggingface.co/{model_key}", # placeholder URL
	None, # unknown params
	None,
	"General", # default type
	"V4", # current release
	"Dense", # default architecture
	)

	try:
	model_context_length = get_hf_model_context_length(model_metadata.url)
	except:
	print(f"Warning: Context length not available for model '{model_key}'")

	return (
	model_metadata.url,
	model_metadata.params,
	model_context_length,
	model_metadata.model_type,
	model_metadata.release,
	model_metadata.model_arch,
	)


	def parse_nst_results(csv_path: str) -> list[dict]:
	dataset = []
	shuttle = ""

	with open(csv_path, newline="", encoding="utf-8-sig") as csvfile:
	reader = csv.reader(csvfile, delimiter=",")

	header = next(reader)
	tt_columns = header[1:] # headers pop
	metrics = next(reader)
	metrics = metrics[1:] # headers pop
	print(tt_columns)
	print(metrics)

	for row in reader:
	if not row or all(not cell.strip() for cell in row):
	continue

	model_csv = row[0].strip()
	if not model_csv:
	continue

	model = normalize_model_name(model_csv)
	url, params, context, type, release, reasoning = get_model_metadata(model)

	scores = row[1:]
	for tt_column, metric, score_str in zip(tt_columns, metrics, scores):
	shuttle = tt_column if tt_column != "" else shuttle
	if not score_str.strip():
	continue

	try:
	score = float(score_str.replace(",", "."))
	except ValueError:
	print(f"Warning: Could not parse score '{score_str}' for {model} - {tt_column}")
	continue

	record = {
	"Model": model,
	"Model Type": type,
	"Benchmark": "NotSoTiny-25-12",
	"Shuttle": shuttle,
	"Metric": metric,
	"Result": score,
	"Model URL": url,
	"Params": params,
	"Context Length": context,
	"Release": release,
	"Thinking": reasoning,
	}
	dataset.append(record)

	print(f"Parsed {len(dataset)} records from {csv_path}")
	return dataset


	def write_json(data: list, path: str):
	"""Write data to JSON file."""
	with open(path, "w", encoding="utf-8") as f:
	json.dump(data, f, indent=4, ensure_ascii=False)
	print(f"Successfully wrote {len(data)} records to {path}")


	if __name__ == "__main__":
	if len(sys.argv) < 2:
	print("Usage: python -m results.parse_nst <path_to_NST.csv>")
	print("Example: python -m results.parse_nst results/results_NST-YY-MM.csv")
	sys.exit(1)

	csv_path = sys.argv[1]

	if not os.path.exists(csv_path):
	print(f"Error: File not found at {csv_path}")
	sys.exit(1)

	json_path = os.path.splitext(csv_path)[0] + ".json"

	print(f"Parsing {csv_path}...")
	parsed_data = parse_nst_results(csv_path)
	write_json(parsed_data, json_path)
	print("Done!")