Commit 94ad5ad7 authored by novelailab's avatar novelailab

update hypetrain with API changes

parent 5a4b10c7
......@@ -12,7 +12,7 @@ import wandb
import numpy as np
from torch.utils.checkpoint import checkpoint as ck
from math import log2, ceil
from basedformer import optimizer, lm_utils
from basedformer import optimizer, lm_utils, dataset
from basedformer.utils import *
import glob
from transformers import AutoTokenizer
......@@ -191,10 +191,8 @@ def sample(prompt, n_tokens, bsz, hypernetwork=None):
# we need 250 batch size to train the small GPT.
train_config = {
"data_path": "/home/xuser/diffusionstorage/datasets/enwik9-gpt2-2049.map",
#"data_path": "/home/xuser/diffusionstorage/datasets/sigurd/map/sigurd_v7_infilling.map",
##"data_path": "/home/xuser/diffusionstorage/datasets/OWT2-gpt2-full.map",
#"data_path": "/home/xuser/diffusionstorage/datasets/sigurd/map/sigurd_v5_fs_2049.map",
"save_path": "/home/xuser/diffusionstorage/workspace/kuru/basedformer/models/hypernetwork-fairseq-6b-2048-enwik9-again",
"lm_path": "/home/xuser/nvme1/pretrained/sigurdv4",
"do_save": True,
"run_name": "fairseq-6b-enwik9-6b-postln-bf16-2e-4-4bsz-every5layer",
"lr": 2e-4,
......@@ -215,7 +213,7 @@ gas = train_config["gas"]
Path(train_config["save_path"]).mkdir(parents=True, exist_ok=True)
#model = GPTModel.gpt2_init(model_config).cuda().float()
model = lm_utils.load_from_path("pretrained/fairseq_6_7b").cuda().bfloat16()
model = lm_utils.load_from_path("/home/xuser/nvme1/pretrained/sigurdv4").cuda().bfloat16()
for param in model.parameters():
param.requires_grad = False
......@@ -243,7 +241,7 @@ else:
# TODO: Add load, add evals, add FP16 AMP, and Data Parallel, outputting hidden states from the get_logits function.
print(opt.curr_step)
train_dataset = FbDataset(2049, train_config["data_path"])
train_dataset = dataset.ShardedDataset(2049, train_config["data_path"])
if last_cp:
train_dataset.skip = opt.curr_step * bs * gas
......@@ -309,8 +307,7 @@ for input_ids, labels in t:
},
step=curr_step)
if train_config["do_save"]:
if curr_step % train_config["save_every"] == 0 and curr_step != 0:
if train_config["do_save"] and curr_step % train_config["save_every"] == 0 and curr_step != 0:
save_folder = Path(train_config["save_path"]) / f"step_{curr_step}"
save_folder.mkdir(parents=True, exist_ok=True)
torch.save(hypernetwork.state_dict(), save_folder / "hyper.pt")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment