| { | |
| "dataset_name": "custom", | |
| "dataset_config": null, | |
| "dataset_split": "train", | |
| "text_column": "text", | |
| "output_dir": "./output", | |
| "max_length": 512, | |
| "batch_size": 4, | |
| "learning_rate": 0.0002, | |
| "num_epochs": 4, | |
| "from_scratch": true, | |
| "max_samples": null, | |
| "n_embd": 768, | |
| "n_layer": 12, | |
| "n_head": 12, | |
| "dropout": 0.1, | |
| "weight_decay": 0.01, | |
| "max_grad_norm": 1.0, | |
| "warmup_ratio": 0.1, | |
| "fp16": false, | |
| "log_steps": 100, | |
| "save_steps": 10000, | |
| "save_epochs": 1, | |
| "use_wandb": true, | |
| "wandb_project": "ngram-gpt2-exp3", | |
| "run_name": "ngram-gpt2-exp4-extended-tokens", | |
| "num_workers": 8, | |
| "text_files_dir": "./data" | |
| } |