euijinrnd
/

RoboticsDiffusionTransformer

Model card Files Files and versions

RoboticsDiffusionTransformer / encode_lang.py

euijinrnd's picture

Add files using upload-large-folder tool

9de9fbf verified 7 months ago

history blame contribute delete

1.81 kB

	import os

	import torch
	import yaml

	from models.multimodal_encoder.t5_encoder import T5Embedder


	GPU = 0
	MODEL_PATH = "google/t5-v1_1-xxl"
	CONFIG_PATH = "configs/base.yaml"
	SAVE_DIR = "lang_embed/"

	# Modify this to your task name and instruction
	TASK_NAME = "anubis_carrot_to_bag"
	# INSTRUCTION = "take the towel off the kirby doll"
	# INSTRUCTION = "insert the brush to the dustpan"
	INSTRUCTION = "pick up the carrot and put into the bag"

	# Note: if your GPU VRAM is less than 24GB,
	# it is recommended to enable offloading by specifying an offload directory.
	# OFFLOAD_DIR = '/home/jellyho/OFFLOAD' # Specify your offload directory here, ensuring the directory exists.

	def main():
	with open(CONFIG_PATH, "r") as fp:
	config = yaml.safe_load(fp)

	device = torch.device(f"cuda:{GPU}")
	text_embedder = T5Embedder(
	from_pretrained=MODEL_PATH,
	model_max_length=config["dataset"]["tokenizer_max_length"],
	device=device,
	# use_offload_folder=OFFLOAD_DIR
	)
	tokenizer, text_encoder = text_embedder.tokenizer, text_embedder.model

	tokens = tokenizer(
	INSTRUCTION, return_tensors="pt",
	padding="longest",
	truncation=True
	)["input_ids"].to(device)

	tokens = tokens.view(1, -1)
	with torch.no_grad():
	pred = text_encoder(tokens).last_hidden_state.detach().cpu()

	save_path = os.path.join(SAVE_DIR, f"{TASK_NAME}.pt")
	# We save the embeddings in a dictionary format
	torch.save({
	"name": TASK_NAME,
	"instruction": INSTRUCTION,
	"embeddings": pred
	}, save_path
	)

	print(f'\"{INSTRUCTION}\" from \"{TASK_NAME}\" is encoded by \"{MODEL_PATH}\" into shape {pred.shape} and saved to \"{save_path}\"')


	if __name__ == "__main__":
	main()