eval harness works

c8d491e1 · novelailab · 44751bc6 · c8d491e1 · c8d491e1 · c8d491e1
Commit c8d491e1 authored May 11, 2022 by novelailab
8 changed files
--- a/.gitignore
+++ b/.gitignore
@@ -132,4 +132,6 @@ models
 gptjconvert
 j6b_vanilla
 wandb
-*.map
\ No newline at end of file
+*.map
+pretrained
+lm_cache
\ No newline at end of file
--- a/basedformer/__init__.py
+++ b/basedformer/__init__.py
+from . import gptj
+
+MODEL_MAP = {
+    "gptj": (gptj.GPTJModel, gptj.GPTJConfig),
+}
+
+def get_model(model_name: str):
+    return MODEL_MAP[model_name]
--- a/basedformer/gptj.py
+++ b/basedformer/gptj.py
@@ -13,7 +13,7 @@ except ImportError:
 import os
 from pathlib import Path
 import math
-from basedformer import lm_base
+from basedformer import lm_utils
 from dataclasses import dataclass

 def fixed_pos_embedding(dim=None, seq_len=None, x=None):
@@ -192,6 +192,7 @@ class GPTJLayer(nn.Module):
 class GPTJModel(nn.Module):
    def __init__(self, config, **kwargs):
        nn.Module.__init__(self)
+        self.config = config
        self.n_layer = config.n_layer
        self.hidden_dim = config.hidden_dim
        self.vocab_embed = nn.Embedding(config.vocab_dim, self.hidden_dim, device=config.device, dtype=config.dtype)
@@ -248,6 +249,7 @@ class GPTJModel(nn.Module):
 class GPTJConfig:
    n_layer: int = 6
    n_head: int = 8
+    n_tokens: int = 2048
    hidden_dim: int = 512
    vocab_dim: int = 50400
    eps: float = 1e-5
@@ -265,5 +267,5 @@ def load_gpt_j(path="models/6b", state_dict=None):
        "eps": 1e-5
    }
    config = GPTJConfig(**config)
-    model = lm_base.load(GPTJModel, config, path)
+    model = lm_utils._load_dict_model(GPTJModel, config, path)
    return model
--- a/basedformer/lm_base.py
+++ b/basedformer/lm_base.py
 from basedformer import utils
+import basedformer
 import math
 import torch
 from torch import nn
@@ -35,7 +36,34 @@ def no_init(model_class, config):
    model = utils.no_init(lambda: model_class(config))
    return model

-def load(model_class, config, path=None, state_dict=None, strict=False):
+def save(model, path):
+    try: os.mkdir(path)
+    except: pass
+    checkpoint = {}
+    for i, x in enumerate(model.state_dict().items()):
+        checkpoint[x[0]] = f"{path}/b{i}.pt"
+        torch.save(x[1], f"{path}/b{i}.pt")
+    torch.save(checkpoint, f"{path}/m.pt")
+
+def load_from_path(config_folder=None, strict=False):
+    config_folder = Path(config_folder)
+    config = _load_config_file(config_folder / "config.json")
+    model_class = basedformer.get_model(config["model_class"])[0]
+    config_class = basedformer.get_model(config["model_class"])[1]
+    model_path = config["model_path"]
+    model_config = config["model_config"]
+    model_config = config_class(**model_config)
+    print(model_config)
+
+    if model_path == ".":
+        # model_path is the config_folder directory.
+        model_path = config_folder
+    
+    model_path = Path(model_path) / "lm"
+    model = _load_dict_model(model_class, model_config, model_path, strict=strict)
+    return model
+    
+def _load_dict_model(model_class, config, path=None, state_dict=None, strict=False):
    # I am kinda sad that we will not have a load function in lm object itself.
    # might be better to add load functions -- actually nope.
    if path:
@@ -45,13 +73,16 @@ def load(model_class, config, path=None, state_dict=None, strict=False):
    model.load_state_dict(state_dict, strict=strict)
    return model

-def save(model, path):
-    try: os.mkdir(path)
-    except: pass
-    checkpoint = {}
-    for i, x in enumerate(model.state_dict().items()):
-        checkpoint[x[0]] = f"{path}/b{i}.pt"
-        torch.save(x[1], f"{path}/b{i}.pt")
-    torch.save(checkpoint, f"{path}/m.pt")
+def _load_config_file(config_file):
+    if not config_file.exists():
+        raise FileNotFoundError(f"Config file not found at {config_file}")
+
+    with open(config_file) as f:
+        config = json.load(f)
+
+    return config
+
+    
+


--- a/basedformer/sampling.py
+++ b/basedformer/sampling.py
@@ -2,7 +2,6 @@ from basedformer import gptj
 from basedformer.utils import * 
 from transformers import AutoTokenizer
 from icecream import ic
-import functorch
 import time
 import sys

@@ -190,7 +189,6 @@ def generate(forward, prompt_tokens, tokens_to_generate=50, ops_list=[{"temp": 0
        "rep_pen": rep_pen,
    }

-    funcnomial = functorch.vmap(func_multinomial, randomness="different")
    for _ in range(tokens_to_generate):
        logits, kv = forward(in_tokens, cache=True, kv=kv)
        logits = logits[:, -1, :] #get the last token in the seq

--- a/eval_tasks/eval_adapter.py
+++ b/eval_tasks/eval_adapter.py
--- a/run_pyfra.py
+++ b/run_pyfra.py
@@ -24,20 +24,25 @@ config_obj.create_service(overwrite=True)
 remote = config_obj.get_pyfra_remote()
 env1 = remote.env('noname', python_version=None)

+
 path = env1.path('/home/xuser/diffusionstorage/workspace/kuru/basedformer')
-env1.sh('pip install /home/xuser/hugessd/pytorch/torch-1.10.1+cu113-cp38-cp38-linux_x86_64.whl')
-env1.sh('pip install einops numpy')
-env1.sh('pip install tqdm')
-env1.sh('pip install /home/xuser/diffusionstorage/workspace/finetune/pokepls/transformers-repo')
-env1.sh('pip3 install einops==0.4.1 pyyaml wandb')
-env1.sh('wandb login 21a9442d42a35e15ce421f2b702ec58508b9adc4')
-env1.sh('pip3 install dotmap icecream')
-path.sh("pip3 install --editable .")
-#path.sh("pip3 uninstall torch")
-#path.sh("pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113")
+
+if False:
+    env1.sh('pip install /home/xuser/hugessd/pytorch/torch-1.10.1+cu113-cp38-cp38-linux_x86_64.whl')
+    env1.sh('pip install einops numpy')
+    env1.sh('pip install tqdm')
+    env1.sh('pip install /home/xuser/diffusionstorage/workspace/finetune/pokepls/transformers-repo')
+    env1.sh('pip3 install einops==0.4.1 pyyaml wandb')
+    env1.sh('wandb login 21a9442d42a35e15ce421f2b702ec58508b9adc4')
+    env1.sh('pip3 install dotmap icecream')
+    path.sh("pip3 install --editable .")
+    #path.sh("pip3 uninstall torch")
+    #path.sh("pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113")
 with always_rerun():
-    if bash:
-        path.sh("bash")
+    if True:
+        path.sh("python3 ../lm-evaluation-harness/main.py --model basedformer --batch_size 8 --model_args pretrained=/home/xuser/diffusionstorage/workspace/kuru/basedformer/pretrained/gptj-6b --device 0 --tasks lambada")
+        #path.sh("python3 ../lm-evaluation-harness/main.py --batch_size 8")
+
    else:
        print(f"Running {sys.argv[1]}")
        path.sh(f'python3 {sys.argv[1]}')
--- a/scripts/comparehf.py
+++ b/scripts/comparehf.py
 from basedformer import gptj
 from basedformer.utils import * 
+import basedformer.lm_utils as lmu
 import time

 import torch
@@ -8,11 +9,7 @@ import numpy as np
 from tqdm import tqdm
 from contextlib import contextmanager
 import torch.nn.functional as F
-from transformers import (
-    AutoModelForCausalLM,
-    GPTNeoForCausalLM,
-    AutoConfig,
-)
+from transformers import GPTNeoForCausalLM
 #replicating timeit magic function of ipython
 def timeit(func, r=1, n=5, quiet=False, function=None, do_tqdm=False, first=True):
    precision = 'ns'
@@ -67,10 +64,11 @@ def timeit(func, r=1, n=5, quiet=False, function=None, do_tqdm=False, first=True


 with torch.no_grad():
-    based_model = gptj.load_gpt_j().cuda().half().eval()
-    print("Loaded based model")
-    hf_model = no_init(lambda: AutoModelForCausalLM.from_pretrained('/home/xuser/models/j6b_ckpt_14001')).cuda().half().eval()
+    hf_model = no_init(lambda: GPTNeoForCausalLM.from_pretrained('/home/xuser/models/j6b_ckpt_14001')).cuda().half().eval()
    print("Loaded hf model")
+    path = "/home/xuser/diffusionstorage/workspace/kuru/basedformer/pretrained/gptj-6b"
+    based_model = lmu.load_from_path(path).cuda().half().eval()
+    print("Loaded based model")
    x = torch.randint(0, 50256, (1, 2048)).cuda().long()

    assert torch.allclose(hf_model.transformer.wte(x), based_model.vocab_embed(x))