Commit 94ad5ad7 authored by novelailab's avatar novelailab

update hypetrain with API changes

parent 5a4b10c7
...@@ -12,7 +12,7 @@ import wandb ...@@ -12,7 +12,7 @@ import wandb
import numpy as np import numpy as np
from torch.utils.checkpoint import checkpoint as ck from torch.utils.checkpoint import checkpoint as ck
from math import log2, ceil from math import log2, ceil
from basedformer import optimizer, lm_utils from basedformer import optimizer, lm_utils, dataset
from basedformer.utils import * from basedformer.utils import *
import glob import glob
from transformers import AutoTokenizer from transformers import AutoTokenizer
...@@ -191,10 +191,8 @@ def sample(prompt, n_tokens, bsz, hypernetwork=None): ...@@ -191,10 +191,8 @@ def sample(prompt, n_tokens, bsz, hypernetwork=None):
# we need 250 batch size to train the small GPT. # we need 250 batch size to train the small GPT.
train_config = { train_config = {
"data_path": "/home/xuser/diffusionstorage/datasets/enwik9-gpt2-2049.map", "data_path": "/home/xuser/diffusionstorage/datasets/enwik9-gpt2-2049.map",
#"data_path": "/home/xuser/diffusionstorage/datasets/sigurd/map/sigurd_v7_infilling.map",
##"data_path": "/home/xuser/diffusionstorage/datasets/OWT2-gpt2-full.map",
#"data_path": "/home/xuser/diffusionstorage/datasets/sigurd/map/sigurd_v5_fs_2049.map",
"save_path": "/home/xuser/diffusionstorage/workspace/kuru/basedformer/models/hypernetwork-fairseq-6b-2048-enwik9-again", "save_path": "/home/xuser/diffusionstorage/workspace/kuru/basedformer/models/hypernetwork-fairseq-6b-2048-enwik9-again",
"lm_path": "/home/xuser/nvme1/pretrained/sigurdv4",
"do_save": True, "do_save": True,
"run_name": "fairseq-6b-enwik9-6b-postln-bf16-2e-4-4bsz-every5layer", "run_name": "fairseq-6b-enwik9-6b-postln-bf16-2e-4-4bsz-every5layer",
"lr": 2e-4, "lr": 2e-4,
...@@ -215,7 +213,7 @@ gas = train_config["gas"] ...@@ -215,7 +213,7 @@ gas = train_config["gas"]
Path(train_config["save_path"]).mkdir(parents=True, exist_ok=True) Path(train_config["save_path"]).mkdir(parents=True, exist_ok=True)
#model = GPTModel.gpt2_init(model_config).cuda().float() #model = GPTModel.gpt2_init(model_config).cuda().float()
model = lm_utils.load_from_path("pretrained/fairseq_6_7b").cuda().bfloat16() model = lm_utils.load_from_path("/home/xuser/nvme1/pretrained/sigurdv4").cuda().bfloat16()
for param in model.parameters(): for param in model.parameters():
param.requires_grad = False param.requires_grad = False
...@@ -243,7 +241,7 @@ else: ...@@ -243,7 +241,7 @@ else:
# TODO: Add load, add evals, add FP16 AMP, and Data Parallel, outputting hidden states from the get_logits function. # TODO: Add load, add evals, add FP16 AMP, and Data Parallel, outputting hidden states from the get_logits function.
print(opt.curr_step) print(opt.curr_step)
train_dataset = FbDataset(2049, train_config["data_path"]) train_dataset = dataset.ShardedDataset(2049, train_config["data_path"])
if last_cp: if last_cp:
train_dataset.skip = opt.curr_step * bs * gas train_dataset.skip = opt.curr_step * bs * gas
...@@ -309,8 +307,7 @@ for input_ids, labels in t: ...@@ -309,8 +307,7 @@ for input_ids, labels in t:
}, },
step=curr_step) step=curr_step)
if train_config["do_save"]: if train_config["do_save"] and curr_step % train_config["save_every"] == 0 and curr_step != 0:
if curr_step % train_config["save_every"] == 0 and curr_step != 0:
save_folder = Path(train_config["save_path"]) / f"step_{curr_step}" save_folder = Path(train_config["save_path"]) / f"step_{curr_step}"
save_folder.mkdir(parents=True, exist_ok=True) save_folder.mkdir(parents=True, exist_ok=True)
torch.save(hypernetwork.state_dict(), save_folder / "hyper.pt") torch.save(hypernetwork.state_dict(), save_folder / "hyper.pt")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment