Commit c8d491e1 authored by novelailab's avatar novelailab

eval harness works

parent 44751bc6
......@@ -132,4 +132,6 @@ models
gptjconvert
j6b_vanilla
wandb
*.map
\ No newline at end of file
*.map
pretrained
lm_cache
\ No newline at end of file
from . import gptj
MODEL_MAP = {
"gptj": (gptj.GPTJModel, gptj.GPTJConfig),
}
def get_model(model_name: str):
return MODEL_MAP[model_name]
......@@ -13,7 +13,7 @@ except ImportError:
import os
from pathlib import Path
import math
from basedformer import lm_base
from basedformer import lm_utils
from dataclasses import dataclass
def fixed_pos_embedding(dim=None, seq_len=None, x=None):
......@@ -192,6 +192,7 @@ class GPTJLayer(nn.Module):
class GPTJModel(nn.Module):
def __init__(self, config, **kwargs):
nn.Module.__init__(self)
self.config = config
self.n_layer = config.n_layer
self.hidden_dim = config.hidden_dim
self.vocab_embed = nn.Embedding(config.vocab_dim, self.hidden_dim, device=config.device, dtype=config.dtype)
......@@ -248,6 +249,7 @@ class GPTJModel(nn.Module):
class GPTJConfig:
n_layer: int = 6
n_head: int = 8
n_tokens: int = 2048
hidden_dim: int = 512
vocab_dim: int = 50400
eps: float = 1e-5
......@@ -265,5 +267,5 @@ def load_gpt_j(path="models/6b", state_dict=None):
"eps": 1e-5
}
config = GPTJConfig(**config)
model = lm_base.load(GPTJModel, config, path)
model = lm_utils._load_dict_model(GPTJModel, config, path)
return model
from basedformer import utils
import basedformer
import math
import torch
from torch import nn
......@@ -35,7 +36,34 @@ def no_init(model_class, config):
model = utils.no_init(lambda: model_class(config))
return model
def load(model_class, config, path=None, state_dict=None, strict=False):
def save(model, path):
try: os.mkdir(path)
except: pass
checkpoint = {}
for i, x in enumerate(model.state_dict().items()):
checkpoint[x[0]] = f"{path}/b{i}.pt"
torch.save(x[1], f"{path}/b{i}.pt")
torch.save(checkpoint, f"{path}/m.pt")
def load_from_path(config_folder=None, strict=False):
config_folder = Path(config_folder)
config = _load_config_file(config_folder / "config.json")
model_class = basedformer.get_model(config["model_class"])[0]
config_class = basedformer.get_model(config["model_class"])[1]
model_path = config["model_path"]
model_config = config["model_config"]
model_config = config_class(**model_config)
print(model_config)
if model_path == ".":
# model_path is the config_folder directory.
model_path = config_folder
model_path = Path(model_path) / "lm"
model = _load_dict_model(model_class, model_config, model_path, strict=strict)
return model
def _load_dict_model(model_class, config, path=None, state_dict=None, strict=False):
# I am kinda sad that we will not have a load function in lm object itself.
# might be better to add load functions -- actually nope.
if path:
......@@ -45,13 +73,16 @@ def load(model_class, config, path=None, state_dict=None, strict=False):
model.load_state_dict(state_dict, strict=strict)
return model
def save(model, path):
try: os.mkdir(path)
except: pass
checkpoint = {}
for i, x in enumerate(model.state_dict().items()):
checkpoint[x[0]] = f"{path}/b{i}.pt"
torch.save(x[1], f"{path}/b{i}.pt")
torch.save(checkpoint, f"{path}/m.pt")
def _load_config_file(config_file):
if not config_file.exists():
raise FileNotFoundError(f"Config file not found at {config_file}")
with open(config_file) as f:
config = json.load(f)
return config
......@@ -2,7 +2,6 @@ from basedformer import gptj
from basedformer.utils import *
from transformers import AutoTokenizer
from icecream import ic
import functorch
import time
import sys
......@@ -190,7 +189,6 @@ def generate(forward, prompt_tokens, tokens_to_generate=50, ops_list=[{"temp": 0
"rep_pen": rep_pen,
}
funcnomial = functorch.vmap(func_multinomial, randomness="different")
for _ in range(tokens_to_generate):
logits, kv = forward(in_tokens, cache=True, kv=kv)
logits = logits[:, -1, :] #get the last token in the seq
......
This diff is collapsed.
......@@ -24,20 +24,25 @@ config_obj.create_service(overwrite=True)
remote = config_obj.get_pyfra_remote()
env1 = remote.env('noname', python_version=None)
path = env1.path('/home/xuser/diffusionstorage/workspace/kuru/basedformer')
env1.sh('pip install /home/xuser/hugessd/pytorch/torch-1.10.1+cu113-cp38-cp38-linux_x86_64.whl')
env1.sh('pip install einops numpy')
env1.sh('pip install tqdm')
env1.sh('pip install /home/xuser/diffusionstorage/workspace/finetune/pokepls/transformers-repo')
env1.sh('pip3 install einops==0.4.1 pyyaml wandb')
env1.sh('wandb login 21a9442d42a35e15ce421f2b702ec58508b9adc4')
env1.sh('pip3 install dotmap icecream')
path.sh("pip3 install --editable .")
#path.sh("pip3 uninstall torch")
#path.sh("pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113")
if False:
env1.sh('pip install /home/xuser/hugessd/pytorch/torch-1.10.1+cu113-cp38-cp38-linux_x86_64.whl')
env1.sh('pip install einops numpy')
env1.sh('pip install tqdm')
env1.sh('pip install /home/xuser/diffusionstorage/workspace/finetune/pokepls/transformers-repo')
env1.sh('pip3 install einops==0.4.1 pyyaml wandb')
env1.sh('wandb login 21a9442d42a35e15ce421f2b702ec58508b9adc4')
env1.sh('pip3 install dotmap icecream')
path.sh("pip3 install --editable .")
#path.sh("pip3 uninstall torch")
#path.sh("pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113")
with always_rerun():
if bash:
path.sh("bash")
if True:
path.sh("python3 ../lm-evaluation-harness/main.py --model basedformer --batch_size 8 --model_args pretrained=/home/xuser/diffusionstorage/workspace/kuru/basedformer/pretrained/gptj-6b --device 0 --tasks lambada")
#path.sh("python3 ../lm-evaluation-harness/main.py --batch_size 8")
else:
print(f"Running {sys.argv[1]}")
path.sh(f'python3 {sys.argv[1]}')
from basedformer import gptj
from basedformer.utils import *
import basedformer.lm_utils as lmu
import time
import torch
......@@ -8,11 +9,7 @@ import numpy as np
from tqdm import tqdm
from contextlib import contextmanager
import torch.nn.functional as F
from transformers import (
AutoModelForCausalLM,
GPTNeoForCausalLM,
AutoConfig,
)
from transformers import GPTNeoForCausalLM
#replicating timeit magic function of ipython
def timeit(func, r=1, n=5, quiet=False, function=None, do_tqdm=False, first=True):
precision = 'ns'
......@@ -67,10 +64,11 @@ def timeit(func, r=1, n=5, quiet=False, function=None, do_tqdm=False, first=True
with torch.no_grad():
based_model = gptj.load_gpt_j().cuda().half().eval()
print("Loaded based model")
hf_model = no_init(lambda: AutoModelForCausalLM.from_pretrained('/home/xuser/models/j6b_ckpt_14001')).cuda().half().eval()
hf_model = no_init(lambda: GPTNeoForCausalLM.from_pretrained('/home/xuser/models/j6b_ckpt_14001')).cuda().half().eval()
print("Loaded hf model")
path = "/home/xuser/diffusionstorage/workspace/kuru/basedformer/pretrained/gptj-6b"
based_model = lmu.load_from_path(path).cuda().half().eval()
print("Loaded based model")
x = torch.randint(0, 50256, (1, 2048)).cuda().long()
assert torch.allclose(hf_model.transformer.wte(x), based_model.vocab_embed(x))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment