Shamima
/

ngram-gpt2-exp4-intermediate

Model card Files Files and versions

ngram-gpt2-exp4-intermediate / config.json

Shamima's picture

upload trained models

7e5e9e3 verified 6 months ago

history blame contribute delete

647 Bytes

	{
	"dataset_name": "custom",
	"dataset_config": null,
	"dataset_split": "train",
	"text_column": "text",
	"output_dir": "./output",
	"max_length": 512,
	"batch_size": 4,
	"learning_rate": 0.0002,
	"num_epochs": 4,
	"from_scratch": true,
	"max_samples": null,
	"n_embd": 768,
	"n_layer": 12,
	"n_head": 12,
	"dropout": 0.1,
	"weight_decay": 0.01,
	"max_grad_norm": 1.0,
	"warmup_ratio": 0.1,
	"fp16": false,
	"log_steps": 100,
	"save_steps": 10000,
	"save_epochs": 1,
	"use_wandb": true,
	"wandb_project": "ngram-gpt2-exp3",
	"run_name": "ngram-gpt2-exp4-extended-tokens",
	"num_workers": 8,
	"text_files_dir": "./data"
	}