{ "dataset_name": "custom", "dataset_config": null, "dataset_split": "train", "text_column": "text", "output_dir": "./output", "max_length": 512, "batch_size": 4, "learning_rate": 0.0002, "num_epochs": 4, "from_scratch": true, "max_samples": null, "n_embd": 768, "n_layer": 12, "n_head": 12, "dropout": 0.1, "weight_decay": 0.01, "max_grad_norm": 1.0, "warmup_ratio": 0.1, "fp16": false, "log_steps": 100, "save_steps": 10000, "save_epochs": 1, "use_wandb": true, "wandb_project": "ngram-gpt2-exp3", "run_name": "ngram-gpt2-exp4-extended-tokens", "num_workers": 8, "text_files_dir": "./data" }