update

7a721f81 · novelailab · 7eebf8ad · 7a721f81 · 7a721f81 · 7a721f81
Commit 7a721f81 authored Feb 20, 2022 by novelailab
Hide whitespace changes
Inline Side-by-side

Showing with 93 additions and 12 deletions

.gitignore .gitignore +4 -0

gptj.py gptj.py +76 -0

main.py main.py +5 -7

test.py test.py +5 -3

test_pyfra.py test_pyfra.py +3 -2

No files found.
--- a/.gitignore
+++ b/.gitignore
@@ -127,3 +127,7 @@ dmypy.json
 # Pyre type checker
 .pyre/
+models
+gptjconvert
+j6b_vanilla
--- a/gptj.py
+++ b/gptj.py
+from main import *
+import time
+from time import perf_counter, perf_counter_ns
+import numpy as np
+from tqdm import tqdm
+from contextlib import contextmanager
+from transformers import (
+    AutoModelForCausalLM,
+    GPTNeoForCausalLM,
+    AutoConfig,
+)
+#replicating timeit magic function of ipython
+def timeit(func, r=1, n=5, quiet=False, function=None, do_tqdm=False, first=True):
+    precision = 'ns'
+    r_arr = np.empty([2, r]) # [0] = mean, [1] = std
+    if function:
+        func.__name__ = function.__name__
+    for i in tqdm(range(r)) if do_tqdm else range(r):
+        n_arr = np.empty(n)
+        for k in range(n):
+            start = perf_counter_ns()
+            func()
+            n_arr[k] = perf_counter_ns() - start
+        if not first:
+            # delete the first element from n_arr numpy array
+            n_arr = np.delete(n_arr, 0)
+        r_arr[0, i] = np.mean(n_arr)
+        r_arr[1, i] = np.std(n_arr)
+    best = r_arr[:, np.argmin(r_arr[0])] # [0] = mean, [1] = std
+    #check if best[0] bigger than 1ms in numpy
+    if best[0] < 1e3:
+        precision = 'ns'
+    elif best[0] >= 1e9:
+        print('b')
+        best[0] = best[0] * 1e-9
+        best[1] = best[1] * 1e-9
+        precision = 's'
+    elif best[0] >= 1e6:
+        best[0] = best[0] * 1e-6
+        best[1] = best[1] * 1e-6
+        precision = 'ms'
+    elif best[0] >= 1e3:
+        precision = 'μs'
+        best[0] = best[0] * 1e-3
+        best[1] = best[1] * 1e-3
+    if not quiet:
+        if precision == 'ns':
+            print(f"{func.__name__}: {best[0]:.0f}{precision} ± {best[1]:.0f}{precision} per loop (mean ± std. dev. of {str(r)} runs, {str(n)} loops each)")
+        if precision == 'μs':
+            print(f"{func.__name__}: {best[0]:.2f}{precision} ± {best[1]:.2f}{precision} per loop (mean ± std. dev. of {str(r)} runs, {str(n)} loops each)")
+        elif precision == 'ms':
+            print(f"{func.__name__}: {best[0]:.2f}{precision} ± {best[1]:.2f}{precision} per loop (mean ± std. dev. of {str(r)} runs, {str(n)} loops each)")
+        elif precision == 's':
+            print(f"{func.__name__}: {best[0]:.4f}{precision} ± {best[1]:.4f}{precision} per loop (mean ± std. dev. of {str(r)} runs, {str(n)} loops each)")
+with torch.no_grad():
+    model = load_gpt_j().cuda().half().eval()
+    x = torch.zeros(1, 2048).cuda().long()
+    our = model(x)
+    print(our.shape)
+    del model
+    model = no_init(lambda: AutoModelForCausalLM.from_pretrained('/home/xuser/models/j6b_ckpt_14001')).cuda().half().eval()
+    hf = model(x, output_hidden_states=True)["hidden_states"][-1]
+    print(our[0, 2047, 1000:1020])
+    print(hf[0, 2047, 1000:1020])
+    print(hf.shape)
\ No newline at end of file
--- a/main.py
+++ b/main.py
@@ -159,7 +159,6 @@ class SelfAttention(nn.Module):
        self.q_proj = nn.Linear(self.hidden_dim, self.hidden_dim, bias=attn_bias, device=device, dtype=dtype)
        self.out_proj = nn.Linear(self.hidden_dim, self.hidden_dim, bias=attn_bias, device=device, dtype=dtype)
        self.rotary_dim = self.head_dim
-        # TODO: handle rotary
        sin, cos = fixed_pos_embedding(dim=self.rotary_dim, seq_len=max_positions)
        self.register_buffer("sin", sin)
        self.register_buffer("cos", cos)
@@ -191,7 +190,7 @@ class SelfAttention(nn.Module):
        x = _merge_heads(x, self.n_head, self.head_dim)
        x = self.out_proj(x)
-        return x  # a, present, (attentions)
+        return x
 class FeedForward(nn.Module):
    def __init__(self, dim=768, hidden_dim=768*4, activation=nn.GELU, device="cuda", dtype=torch.float16):
@@ -278,7 +277,7 @@ class GPTLM(nn.Module):
    def forward(self, x):
        return
-def load_gpt_j(state_dict=None, path=None):
+def load_gpt_j(path="models/6b", state_dict=None):
    config = {
        "n_layer": 28,
        "n_head": 16,
@@ -288,8 +287,7 @@ def load_gpt_j(state_dict=None, path=None):
        "activation": gelu_new,
        "Layer": GPTLayer
    }
+    model = GPTModel.load(config, path, state_dict)
-    model = GPTModel.load(config, path=path)
    return model
 def init_6b():
@@ -302,7 +300,7 @@ def init_6b():
        "activation": gelu_new,
        "Layer": GPTLayer
    }
-    model = GPTModel(**config)
+    model = GPTModel.init(config)
    return model
 def init_125m():
@@ -331,4 +329,4 @@ def init_1_3b():
    }
    model = GPTModel(**config)
    return model
\ No newline at end of file
--- a/test.py
+++ b/test.py
@@ -71,11 +71,13 @@ with torch.no_grad():
    timeit(lambda: module(torch.zeros((1, 1000)).long().cuda()), n=20, first=False)
    '''
-    module = torch.jit.trace(model, torch.zeros((1, 2048)).long().cuda())
-    torch.jit.optimize_for_inference(module)
    static_input = torch.zeros((1, 2048), device='cuda').long()
    static_out = torch.randn((1, 2048, 2048), device='cuda').half()
-    timeit(lambda: module(static_input), n=20, first=False)
+    timeit(lambda: model(static_input), n=1000, first=False)
+    module = torch.jit.trace(model, torch.zeros((1, 2048)).long().cuda())
+    torch.jit.optimize_for_inference(module)
+    timeit(lambda: module(static_input), n=1000, first=False)
    s = torch.cuda.Stream()
    s.wait_stream(torch.cuda.current_stream())
    with torch.cuda.stream(s):

--- a/test_pyfra.py
+++ b/test_pyfra.py
@@ -10,7 +10,6 @@ config_obj.set_name(name)
 config_obj.set_gpu(gpu_name=GPU.RTX_A5000, amount=1)
 config_obj.set_ram(16)
 config_obj.set_cpu(4)
-#config_obj.set_cpu_only()
 config_obj.dry_run(dry)
 config_obj.print_information()
 #config_obj.create_deployment(overwrite=True)
@@ -26,5 +25,7 @@ path = env1.path('/home/xuser/diffusionstorage/workspace/kuru/basedformer')
 env1.sh('pip install /home/xuser/hugessd/pytorch/torch-1.10.1+cu113-cp38-cp38-linux_x86_64.whl')
 env1.sh('pip install einops numpy')
 env1.sh('pip install tqdm')
+env1.sh('pip install /home/xuser/diffusionstorage/workspace/finetune/pokepls/transformers-repo')
 with always_rerun():
-    path.sh(f'python3 test.py')
+    print(f"Running {sys.argv[1]}")
\ No newline at end of file
+    path.sh(f'python3 {sys.argv[1]}')
\ No newline at end of file