| | import os |
| |
|
| | import torch |
| | import yaml |
| |
|
| | from models.multimodal_encoder.t5_encoder import T5Embedder |
| |
|
| |
|
| | GPU = 0 |
| | MODEL_PATH = "google/t5-v1_1-xxl" |
| | CONFIG_PATH = "configs/base.yaml" |
| | SAVE_DIR = "lang_embed/" |
| |
|
| | |
| | TASK_NAME = "anubis_carrot_to_bag" |
| | |
| | |
| | INSTRUCTION = "pick up the carrot and put into the bag" |
| |
|
| | |
| | |
| | |
| |
|
| | def main(): |
| | with open(CONFIG_PATH, "r") as fp: |
| | config = yaml.safe_load(fp) |
| | |
| | device = torch.device(f"cuda:{GPU}") |
| | text_embedder = T5Embedder( |
| | from_pretrained=MODEL_PATH, |
| | model_max_length=config["dataset"]["tokenizer_max_length"], |
| | device=device, |
| | |
| | ) |
| | tokenizer, text_encoder = text_embedder.tokenizer, text_embedder.model |
| |
|
| | tokens = tokenizer( |
| | INSTRUCTION, return_tensors="pt", |
| | padding="longest", |
| | truncation=True |
| | )["input_ids"].to(device) |
| |
|
| | tokens = tokens.view(1, -1) |
| | with torch.no_grad(): |
| | pred = text_encoder(tokens).last_hidden_state.detach().cpu() |
| | |
| | save_path = os.path.join(SAVE_DIR, f"{TASK_NAME}.pt") |
| | |
| | torch.save({ |
| | "name": TASK_NAME, |
| | "instruction": INSTRUCTION, |
| | "embeddings": pred |
| | }, save_path |
| | ) |
| | |
| | print(f'\"{INSTRUCTION}\" from \"{TASK_NAME}\" is encoded by \"{MODEL_PATH}\" into shape {pred.shape} and saved to \"{save_path}\"') |
| |
|
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|