Commit 7a721f81 authored by novelailab's avatar novelailab

update

parent 7eebf8ad
...@@ -127,3 +127,7 @@ dmypy.json ...@@ -127,3 +127,7 @@ dmypy.json
# Pyre type checker # Pyre type checker
.pyre/ .pyre/
models
gptjconvert
j6b_vanilla
from main import *
import time
from time import perf_counter, perf_counter_ns
import numpy as np
from tqdm import tqdm
from contextlib import contextmanager
from transformers import (
AutoModelForCausalLM,
GPTNeoForCausalLM,
AutoConfig,
)
#replicating timeit magic function of ipython
def timeit(func, r=1, n=5, quiet=False, function=None, do_tqdm=False, first=True):
precision = 'ns'
r_arr = np.empty([2, r]) # [0] = mean, [1] = std
if function:
func.__name__ = function.__name__
for i in tqdm(range(r)) if do_tqdm else range(r):
n_arr = np.empty(n)
for k in range(n):
start = perf_counter_ns()
func()
n_arr[k] = perf_counter_ns() - start
if not first:
# delete the first element from n_arr numpy array
n_arr = np.delete(n_arr, 0)
r_arr[0, i] = np.mean(n_arr)
r_arr[1, i] = np.std(n_arr)
best = r_arr[:, np.argmin(r_arr[0])] # [0] = mean, [1] = std
#check if best[0] bigger than 1ms in numpy
if best[0] < 1e3:
precision = 'ns'
elif best[0] >= 1e9:
print('b')
best[0] = best[0] * 1e-9
best[1] = best[1] * 1e-9
precision = 's'
elif best[0] >= 1e6:
best[0] = best[0] * 1e-6
best[1] = best[1] * 1e-6
precision = 'ms'
elif best[0] >= 1e3:
precision = 'μs'
best[0] = best[0] * 1e-3
best[1] = best[1] * 1e-3
if not quiet:
if precision == 'ns':
print(f"{func.__name__}: {best[0]:.0f}{precision} ± {best[1]:.0f}{precision} per loop (mean ± std. dev. of {str(r)} runs, {str(n)} loops each)")
if precision == 'μs':
print(f"{func.__name__}: {best[0]:.2f}{precision} ± {best[1]:.2f}{precision} per loop (mean ± std. dev. of {str(r)} runs, {str(n)} loops each)")
elif precision == 'ms':
print(f"{func.__name__}: {best[0]:.2f}{precision} ± {best[1]:.2f}{precision} per loop (mean ± std. dev. of {str(r)} runs, {str(n)} loops each)")
elif precision == 's':
print(f"{func.__name__}: {best[0]:.4f}{precision} ± {best[1]:.4f}{precision} per loop (mean ± std. dev. of {str(r)} runs, {str(n)} loops each)")
with torch.no_grad():
model = load_gpt_j().cuda().half().eval()
x = torch.zeros(1, 2048).cuda().long()
our = model(x)
print(our.shape)
del model
model = no_init(lambda: AutoModelForCausalLM.from_pretrained('/home/xuser/models/j6b_ckpt_14001')).cuda().half().eval()
hf = model(x, output_hidden_states=True)["hidden_states"][-1]
print(our[0, 2047, 1000:1020])
print(hf[0, 2047, 1000:1020])
print(hf.shape)
\ No newline at end of file
...@@ -159,7 +159,6 @@ class SelfAttention(nn.Module): ...@@ -159,7 +159,6 @@ class SelfAttention(nn.Module):
self.q_proj = nn.Linear(self.hidden_dim, self.hidden_dim, bias=attn_bias, device=device, dtype=dtype) self.q_proj = nn.Linear(self.hidden_dim, self.hidden_dim, bias=attn_bias, device=device, dtype=dtype)
self.out_proj = nn.Linear(self.hidden_dim, self.hidden_dim, bias=attn_bias, device=device, dtype=dtype) self.out_proj = nn.Linear(self.hidden_dim, self.hidden_dim, bias=attn_bias, device=device, dtype=dtype)
self.rotary_dim = self.head_dim self.rotary_dim = self.head_dim
# TODO: handle rotary
sin, cos = fixed_pos_embedding(dim=self.rotary_dim, seq_len=max_positions) sin, cos = fixed_pos_embedding(dim=self.rotary_dim, seq_len=max_positions)
self.register_buffer("sin", sin) self.register_buffer("sin", sin)
self.register_buffer("cos", cos) self.register_buffer("cos", cos)
...@@ -191,7 +190,7 @@ class SelfAttention(nn.Module): ...@@ -191,7 +190,7 @@ class SelfAttention(nn.Module):
x = _merge_heads(x, self.n_head, self.head_dim) x = _merge_heads(x, self.n_head, self.head_dim)
x = self.out_proj(x) x = self.out_proj(x)
return x # a, present, (attentions) return x
class FeedForward(nn.Module): class FeedForward(nn.Module):
def __init__(self, dim=768, hidden_dim=768*4, activation=nn.GELU, device="cuda", dtype=torch.float16): def __init__(self, dim=768, hidden_dim=768*4, activation=nn.GELU, device="cuda", dtype=torch.float16):
...@@ -278,7 +277,7 @@ class GPTLM(nn.Module): ...@@ -278,7 +277,7 @@ class GPTLM(nn.Module):
def forward(self, x): def forward(self, x):
return return
def load_gpt_j(state_dict=None, path=None): def load_gpt_j(path="models/6b", state_dict=None):
config = { config = {
"n_layer": 28, "n_layer": 28,
"n_head": 16, "n_head": 16,
...@@ -288,8 +287,7 @@ def load_gpt_j(state_dict=None, path=None): ...@@ -288,8 +287,7 @@ def load_gpt_j(state_dict=None, path=None):
"activation": gelu_new, "activation": gelu_new,
"Layer": GPTLayer "Layer": GPTLayer
} }
model = GPTModel.load(config, path, state_dict)
model = GPTModel.load(config, path=path)
return model return model
def init_6b(): def init_6b():
...@@ -302,7 +300,7 @@ def init_6b(): ...@@ -302,7 +300,7 @@ def init_6b():
"activation": gelu_new, "activation": gelu_new,
"Layer": GPTLayer "Layer": GPTLayer
} }
model = GPTModel(**config) model = GPTModel.init(config)
return model return model
def init_125m(): def init_125m():
...@@ -331,4 +329,4 @@ def init_1_3b(): ...@@ -331,4 +329,4 @@ def init_1_3b():
} }
model = GPTModel(**config) model = GPTModel(**config)
return model return model
\ No newline at end of file
...@@ -71,11 +71,13 @@ with torch.no_grad(): ...@@ -71,11 +71,13 @@ with torch.no_grad():
timeit(lambda: module(torch.zeros((1, 1000)).long().cuda()), n=20, first=False) timeit(lambda: module(torch.zeros((1, 1000)).long().cuda()), n=20, first=False)
''' '''
module = torch.jit.trace(model, torch.zeros((1, 2048)).long().cuda())
torch.jit.optimize_for_inference(module)
static_input = torch.zeros((1, 2048), device='cuda').long() static_input = torch.zeros((1, 2048), device='cuda').long()
static_out = torch.randn((1, 2048, 2048), device='cuda').half() static_out = torch.randn((1, 2048, 2048), device='cuda').half()
timeit(lambda: module(static_input), n=20, first=False) timeit(lambda: model(static_input), n=1000, first=False)
module = torch.jit.trace(model, torch.zeros((1, 2048)).long().cuda())
torch.jit.optimize_for_inference(module)
timeit(lambda: module(static_input), n=1000, first=False)
s = torch.cuda.Stream() s = torch.cuda.Stream()
s.wait_stream(torch.cuda.current_stream()) s.wait_stream(torch.cuda.current_stream())
with torch.cuda.stream(s): with torch.cuda.stream(s):
......
...@@ -10,7 +10,6 @@ config_obj.set_name(name) ...@@ -10,7 +10,6 @@ config_obj.set_name(name)
config_obj.set_gpu(gpu_name=GPU.RTX_A5000, amount=1) config_obj.set_gpu(gpu_name=GPU.RTX_A5000, amount=1)
config_obj.set_ram(16) config_obj.set_ram(16)
config_obj.set_cpu(4) config_obj.set_cpu(4)
#config_obj.set_cpu_only()
config_obj.dry_run(dry) config_obj.dry_run(dry)
config_obj.print_information() config_obj.print_information()
#config_obj.create_deployment(overwrite=True) #config_obj.create_deployment(overwrite=True)
...@@ -26,5 +25,7 @@ path = env1.path('/home/xuser/diffusionstorage/workspace/kuru/basedformer') ...@@ -26,5 +25,7 @@ path = env1.path('/home/xuser/diffusionstorage/workspace/kuru/basedformer')
env1.sh('pip install /home/xuser/hugessd/pytorch/torch-1.10.1+cu113-cp38-cp38-linux_x86_64.whl') env1.sh('pip install /home/xuser/hugessd/pytorch/torch-1.10.1+cu113-cp38-cp38-linux_x86_64.whl')
env1.sh('pip install einops numpy') env1.sh('pip install einops numpy')
env1.sh('pip install tqdm') env1.sh('pip install tqdm')
env1.sh('pip install /home/xuser/diffusionstorage/workspace/finetune/pokepls/transformers-repo')
with always_rerun(): with always_rerun():
path.sh(f'python3 test.py') print(f"Running {sys.argv[1]}")
\ No newline at end of file path.sh(f'python3 {sys.argv[1]}')
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment