update

fd387a42 · novelailab · 482a7bae · fd387a42 · fd387a42 · fd387a42
Commit fd387a42 authored Jun 10, 2022 by novelailab
7 changed files
--- a/basedformer/lm_utils.py
+++ b/basedformer/lm_utils.py
@@ -31,7 +31,8 @@ def init(model_class, config):
    init_weights(model, config["n_layer"])
    return model

-def no_init(model_class, config):
+def no_init(config):
+    model_class = models.get_model(config["model_class"])
    model = utils.no_init(lambda: model_class(config))
    return model


--- a/basedformer/models/gptj.py
+++ b/basedformer/models/gptj.py
@@ -133,7 +133,7 @@ class SelfAttention(nn.Module):
        x = self.out_proj(x)

        if cache:
-            return x, (key, value)
+            return x, [key, value]
        else:
            return x, None


--- a/basedformer/sampling.py
+++ b/basedformer/sampling.py
--- a/hyperlitetrain.py
+++ b/hyperlitetrain.py
--- a/hypertrain.py
+++ b/hypertrain.py
@@ -15,7 +15,10 @@ from math import log2, ceil
 from basedformer import gptj, optimizer, lm_utils
 from basedformer.utils import *
 import glob
+from transformers import AutoTokenizer
+from basedformer import sampling
 from icecream import ic
+from termcolor import colored

 def _init_weights(module):
    if isinstance(module, nn.Linear):
@@ -101,7 +104,7 @@ class HyperNetwork(nn.Module):
        embed_dim = config["hidden_dim"]
        self.linear = nn.Linear(embed_dim, embed_dim//4, bias=True)
        self.linear2 = nn.Linear(embed_dim//4, embed_dim, bias=True)
-        self.activation = gelu_new
+        self.activation = torch.nn.functional.gelu
        self.num_shifts = ceil(log2(2048)) - 1
        #self.linear.weight.data.normal_(mean=0.0, std=0.02)
        for module in self.modules():
@@ -147,15 +150,53 @@ class HyperNetworkSingle(nn.Module):
        x = x.mul(torch.sigmoid(x))
        return x.bfloat16()

+tokenizer = AutoTokenizer.from_pretrained('gpt2')
+@torch.no_grad()
+def sample(prompt, n_tokens, bsz, hypernetwork=None):
+    torch.seed()
+    tokens = tokenizer.encode(prompt)
+    #print("Prompt:")
+    #for x in range(len(tokens)):
+    #    print(tokenizer.decode([tokens[x]]), end=" | ")
+    tokens = torch.LongTensor(tokens).unsqueeze(0).cuda()
+    tokens = [tokens] * bsz
+    tokens = torch.cat(tokens, dim=0)
+
+    rep_pen = {
+        "penalty": 3,
+    }
+
+    ops = {
+        "rep_pen": rep_pen,
+        "tfs": 0.8,
+        "temp": 0.8,
+    }
+    ops_list = [ops] * bsz
+    tokens_generated = sampling.generate(model.forward, tokens, n_tokens, ops_list=ops_list, hypernetwork=hypernetwork, non_deterministic=True)
+    vanilla_tokens_generated = sampling.generate(model.forward, tokens, n_tokens, ops_list=ops_list, hypernetwork=None)
+    tokens_generated = tokenizer.batch_decode(tokens_generated.cpu().numpy())
+    vanilla_tokens_generated = tokenizer.batch_decode(vanilla_tokens_generated.cpu().numpy())
+    ### send to wandb
+    columns = ["Prompt", "Generated Text", "Vanilla Model"]
+    data = []
+    for x in range(len(tokens_generated)):
+        data.append([prompt, str(tokens_generated[x]), str(vanilla_tokens_generated[x])])
+
+    for gen in tokens_generated:
+        print(colored("==========================================================", "red"))
+        print(colored(gen, "green"))
+        print(colored("==========================================================", "red"))
+    wandb.log({"Generations": wandb.Table(data=data, columns=columns)})
+
 # we need 250 batch size to train the small GPT.
 train_config = {
-    #"data_path": "/home/xuser/diffusionstorage/datasets/enwik9-gpt2-2049.map",
-    "data_path": "/home/xuser/diffusionstorage/datasets/sigurd/map/sigurd_v7_infilling.map",
-    #"data_path": "/home/xuser/diffusionstorage/datasets/OWT2-gpt2-full.map",
+    "data_path": "/home/xuser/diffusionstorage/datasets/enwik9-gpt2-2049.map",
+    #"data_path": "/home/xuser/diffusionstorage/datasets/sigurd/map/sigurd_v7_infilling.map",
+    ##"data_path": "/home/xuser/diffusionstorage/datasets/OWT2-gpt2-full.map",
    #"data_path": "/home/xuser/diffusionstorage/datasets/sigurd/map/sigurd_v5_fs_2049.map",
-    "save_path": "/home/xuser/diffusionstorage/workspace/kuru/basedformer/models/hypernetwork-gptj-2048-infilling",
+    "save_path": "/home/xuser/diffusionstorage/workspace/kuru/basedformer/models/hypernetwork-gptj-2048-enwik9-again",
    "do_save": True,
-    "run_name": "gpt-j-6b-2e-4-infilling",
+    "run_name": "gpt-j-enwik9-6b-postln-bf16-2e-4-4bsz-every5layer",
    "lr": 2e-4,
    "end_lr": 2e-4,
    "warmup_steps": 50,
@@ -165,6 +206,7 @@ train_config = {
    "save_every": 300,
    "amp": False,
    "loss_scale": False,
+    "eval_every": 100,
 }
 torch.manual_seed(train_config["seed"])
 bs = train_config["bs"]
@@ -217,6 +259,7 @@ t = tqdm(train_loader, initial=curr_step)

 scaler = torch.cuda.amp.GradScaler()

+sample("<|endoftext|>", 500, 3, hypernetwork=hypernetwork)
 for input_ids, labels in t:
    timex = time.perf_counter()
    input_ids = input_ids.cuda()
@@ -273,5 +316,8 @@ for input_ids, labels in t:
            torch.save(hypernetwork.state_dict(), save_folder / "hyper.pt")
            opt.save(save_folder / "opt")
            print(f"Saved model at step {curr_step}")
+
+    if curr_step % train_config["eval_every"] == 0:
+        sample("<|endoftext|>", 500, 3, hypernetwork=hypernetwork)
    curr_step += 1
            
\ No newline at end of file
--- a/run_pyfra.py
+++ b/run_pyfra.py
@@ -13,7 +13,7 @@ bash = False

 config_obj = KubeConfig()
 config_obj.set_name(name)
-config_obj.set_gpu(gpu_name=GPU.A100_NVLINK, amount=1)
+config_obj.set_gpu(gpu_name=GPU.A100_PCIE_80GB, amount=1)
 config_obj.set_ram(24)
 config_obj.set_cpu(4)
 config_obj.dry_run(dry)
@@ -36,6 +36,8 @@ if True:
    env1.sh('wandb login 21a9442d42a35e15ce421f2b702ec58508b9adc4')
    env1.sh('pip3 install dotmap icecream')
    path.sh("pip3 install --editable .")
+    path.sh("pip3 install transformers")
+    path.sh("pip3 install termcolor")
 with always_rerun():
    if False:
        #env1.sh('pip3 install transformers')

--- a/scripts/hypersample.py
+++ b/scripts/hypersample.py
@@ -9,23 +9,26 @@ from transformers import AutoTokenizer
 from icecream import ic
 import time
 import sys
+from termcolor import colored

 def main():
-    #save_path = "/home/xuser/diffusionstorage/workspace/kuru/basedformer/models/hypernetwork-gptj-2048-enwik9-bs4-2e-4-catchup"
-    save_path = "/home/xuser/diffusionstorage/workspace/kuru/basedformer/models/hypernetwork-gptj-2048-infilling"
-    cp_list = sorted(os.listdir(save_path), key=lambda x: int(x.split("_")[-1]))
-    last_cp = Path(save_path) / cp_list[-1] if len(cp_list) > 0 else None
+    save_path = "/home/xuser/diffusionstorage/workspace/kuru/basedformer/models/hypernetwork-gptj-2048-enwik9-bs4-2e-4-catchup/step_1200"
+    #save_path = "/home/xuser/diffusionstorage/workspace/kuru/basedformer/models/hypernetwork-gptj-2048-enwik9-again/step_1200"
+    #save_path = "/home/xuser/diffusionstorage/workspace/kuru/basedformer/models/hypernetwork-gptj-2048-infilling"
+    #cp_list = sorted(os.listdir(save_path), key=lambda x: int(x.split("_")[-1]))
+    #last_cp = Path(save_path) / cp_list[-1] if len(cp_list) > 0 else None
+    last_cp = Path(save_path)
    print(last_cp)
    bsz = 1
-    gen_len = 400
+    gen_len = 1000
    #torch.manual_seed(69)
    tokenizer = AutoTokenizer.from_pretrained('gpt2')
    mask = "████████"
    prompt = "You hated the elves enough that if you seen one of them in the forest you would just slice their throats."
    prompt = """'''Kurumuz''' is the founder of tech company [["""
-    promptnomask = f"""The room was lit now by a dozen candles. The door had been locked, and the windows barred; but there were still some faint glimmers of moonlight on the floor outside. For a moment the figure stood motionless in its doorway to look about it with an air of keen and nervous expectancy. Then he came forward into the chamber and moved, where he remained standing for an instant upon his toes like one listening intently before starting to rummage among the books and papers. He selected a large volume from among them and turned back to the window,{mask} holding it between himself and the rest of the room until he could feel the warm breath of the night creeping through the curtains.{mask}"""
-    prompt = f"""The room was lit now by a dozen candles. The door had been locked, and the windows barred; but there were still some faint glimmers of moonlight on the floor outside. For a moment the figure stood motionless in its doorway to look about it with an air of keen and nervous expectancy. Then he came forward into the chamber and moved{mask}, where he remained standing for an instant upon his toes like one listening intently before starting to rummage among the books and papers. He selected a large volume from among them and turned back to the window, holding it between himself and the rest of the room until he could feel the warm breath of the night creeping through the curtains.{mask}"""
-    tokens = tokenizer.encode(promptnomask)
+    #promptnomask = f"""The room was lit now by a dozen candles. The door had been locked, and the windows barred; but there were still some faint glimmers of moonlight on the floor outside. For a moment the figure stood motionless in its doorway to look about it with an air of keen and nervous expectancy. Then he came forward into the chamber and moved, where he remained standing for an instant upon his toes like one listening intently before starting to rummage among the books and papers. He selected a large volume from among them and turned back to the window,{mask} holding it between himself and the rest of the room until he could feel the warm breath of the night creeping through the curtains.{mask}"""
+    #prompt = f"""The room was lit now by a dozen candles. The door had been locked, and the windows barred; but there were still some faint glimmers of moonlight on the floor outside. For a moment the figure stood motionless in its doorway to look about it with an air of keen and nervous expectancy. Then he came forward into the chamber and moved{mask}, where he remained standing for an instant upon his toes like one listening intently before starting to rummage among the books and papers. He selected a large volume from among them and turned back to the window, holding it between himself and the rest of the room until he could feel the warm breath of the night creeping through the curtains.{mask}"""
+    tokens = tokenizer.encode(prompt)
    print(tokens)
    print("Prompt:")
    for x in range(len(tokens)):
@@ -38,7 +41,7 @@ def main():
    t = time.perf_counter()
    model = lmu.load_from_path('pretrained/gptj-6b').cuda().bfloat16().eval()
    hypernetwork = hypernet.HyperNetworkSingle(model.config).cuda().float()
-    print("Loading from step {}".format(cp_list[-1].split("_")[-1]))
+    #print("Loading from step {}".format(cp_list[-1].split("_")[-1]))
    hypernetwork.load_state_dict(torch.load(last_cp / "hyper.pt"))
    
    ic(time.perf_counter() - t)
@@ -53,19 +56,18 @@ def main():
        "temp": 0.8,
    }
    ops_list = [ops] * bsz
-
-    #tokens_generated = sampling.generate(model.forward, tokens, gen_len, ops_list=ops_list, hypernetwork=hypernetwork)
-    tokens_generated = sampling.generate_greedy(model.forward, tokens, gen_len, hypernetwork=hypernetwork)
+    torch.manual_seed(69)
+    tokens_generated = sampling.generate(model.forward, tokens, gen_len, ops_list=ops_list, hypernetwork=hypernetwork, non_deterministic=False)
+    #tokens_generated = sampling.generate_greedy(model.forward, tokens, gen_len, hypernetwork=hypernetwork)
    #tokens_generated_batched = generate_real_batched(model.forward, tokens, gen_len, ops=ops)
-    print(tokens_generated.shape)
-    tokens_generated[tokens_generated == 48585] = 35625
-    ic(prompt)
+    #print(tokens_generated.shape)
+    #tokens_generated[tokens_generated == 48585] = 35625
+    #ic(prompt)
    tokens_generated = tokenizer.batch_decode(tokens_generated.cpu().numpy())
    for gen in tokens_generated:
-        print(str(gen.split("*****")[0]))
-        print("++++++++++++")
-        print(str(gen.split("*****")[1]))
-        print("===========================================================")
+        print(colored("==========================================================", "red"))
+        print(colored(gen, "green"))
+        print(colored("==========================================================", "red"))
    #ic(tokenizer.batch_decode(tokens_generated_batched.cpu().numpy()))
    #timeit(lambda: generate(model.forward, tokens, 30, ops_list=ops_list), n=30)
    #timeit(lambda: generate_real_batched(model.forward, tokens, 30, ops=ops), n=30)