{
  "dataset_name": "custom",
  "dataset_config": null,
  "dataset_split": "train",
  "text_column": "text",
  "output_dir": "./output",
  "max_length": 512,
  "batch_size": 4,
  "learning_rate": 0.0002,
  "num_epochs": 4,
  "from_scratch": true,
  "max_samples": null,
  "n_embd": 768,
  "n_layer": 12,
  "n_head": 12,
  "dropout": 0.1,
  "weight_decay": 0.01,
  "max_grad_norm": 1.0,
  "warmup_ratio": 0.1,
  "fp16": false,
  "log_steps": 100,
  "save_steps": 10000,
  "save_epochs": 1,
  "use_wandb": true,
  "wandb_project": "ngram-gpt2-exp3",
  "run_name": "ngram-gpt2-exp4-extended-tokens",
  "num_workers": 8,
  "text_files_dir": "./data"
}