Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
B
Basedformer
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Packages
Packages
List
Container Registry
Analytics
Analytics
CI / CD
Code Review
Insights
Issues
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
novelai-storage
Basedformer
Commits
7a721f81
Commit
7a721f81
authored
Feb 20, 2022
by
novelailab
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update
parent
7eebf8ad
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
93 additions
and
12 deletions
+93
-12
.gitignore
.gitignore
+4
-0
gptj.py
gptj.py
+76
-0
main.py
main.py
+5
-7
test.py
test.py
+5
-3
test_pyfra.py
test_pyfra.py
+3
-2
No files found.
.gitignore
View file @
7a721f81
...
...
@@ -127,3 +127,7 @@ dmypy.json
# Pyre type checker
.pyre/
models
gptjconvert
j6b_vanilla
gptj.py
0 → 100644
View file @
7a721f81
from
main
import
*
import
time
from
time
import
perf_counter
,
perf_counter_ns
import
numpy
as
np
from
tqdm
import
tqdm
from
contextlib
import
contextmanager
from
transformers
import
(
AutoModelForCausalLM
,
GPTNeoForCausalLM
,
AutoConfig
,
)
#replicating timeit magic function of ipython
def
timeit
(
func
,
r
=
1
,
n
=
5
,
quiet
=
False
,
function
=
None
,
do_tqdm
=
False
,
first
=
True
):
precision
=
'ns'
r_arr
=
np
.
empty
([
2
,
r
])
# [0] = mean, [1] = std
if
function
:
func
.
__name__
=
function
.
__name__
for
i
in
tqdm
(
range
(
r
))
if
do_tqdm
else
range
(
r
):
n_arr
=
np
.
empty
(
n
)
for
k
in
range
(
n
):
start
=
perf_counter_ns
()
func
()
n_arr
[
k
]
=
perf_counter_ns
()
-
start
if
not
first
:
# delete the first element from n_arr numpy array
n_arr
=
np
.
delete
(
n_arr
,
0
)
r_arr
[
0
,
i
]
=
np
.
mean
(
n_arr
)
r_arr
[
1
,
i
]
=
np
.
std
(
n_arr
)
best
=
r_arr
[:,
np
.
argmin
(
r_arr
[
0
])]
# [0] = mean, [1] = std
#check if best[0] bigger than 1ms in numpy
if
best
[
0
]
<
1e3
:
precision
=
'ns'
elif
best
[
0
]
>=
1e9
:
print
(
'b'
)
best
[
0
]
=
best
[
0
]
*
1e-9
best
[
1
]
=
best
[
1
]
*
1e-9
precision
=
's'
elif
best
[
0
]
>=
1e6
:
best
[
0
]
=
best
[
0
]
*
1e-6
best
[
1
]
=
best
[
1
]
*
1e-6
precision
=
'ms'
elif
best
[
0
]
>=
1e3
:
precision
=
'μs'
best
[
0
]
=
best
[
0
]
*
1e-3
best
[
1
]
=
best
[
1
]
*
1e-3
if
not
quiet
:
if
precision
==
'ns'
:
print
(
f
"{func.__name__}: {best[0]:.0f}{precision} ± {best[1]:.0f}{precision} per loop (mean ± std. dev. of {str(r)} runs, {str(n)} loops each)"
)
if
precision
==
'μs'
:
print
(
f
"{func.__name__}: {best[0]:.2f}{precision} ± {best[1]:.2f}{precision} per loop (mean ± std. dev. of {str(r)} runs, {str(n)} loops each)"
)
elif
precision
==
'ms'
:
print
(
f
"{func.__name__}: {best[0]:.2f}{precision} ± {best[1]:.2f}{precision} per loop (mean ± std. dev. of {str(r)} runs, {str(n)} loops each)"
)
elif
precision
==
's'
:
print
(
f
"{func.__name__}: {best[0]:.4f}{precision} ± {best[1]:.4f}{precision} per loop (mean ± std. dev. of {str(r)} runs, {str(n)} loops each)"
)
with
torch
.
no_grad
():
model
=
load_gpt_j
()
.
cuda
()
.
half
()
.
eval
()
x
=
torch
.
zeros
(
1
,
2048
)
.
cuda
()
.
long
()
our
=
model
(
x
)
print
(
our
.
shape
)
del
model
model
=
no_init
(
lambda
:
AutoModelForCausalLM
.
from_pretrained
(
'/home/xuser/models/j6b_ckpt_14001'
))
.
cuda
()
.
half
()
.
eval
()
hf
=
model
(
x
,
output_hidden_states
=
True
)[
"hidden_states"
][
-
1
]
print
(
our
[
0
,
2047
,
1000
:
1020
])
print
(
hf
[
0
,
2047
,
1000
:
1020
])
print
(
hf
.
shape
)
\ No newline at end of file
main.py
View file @
7a721f81
...
...
@@ -159,7 +159,6 @@ class SelfAttention(nn.Module):
self
.
q_proj
=
nn
.
Linear
(
self
.
hidden_dim
,
self
.
hidden_dim
,
bias
=
attn_bias
,
device
=
device
,
dtype
=
dtype
)
self
.
out_proj
=
nn
.
Linear
(
self
.
hidden_dim
,
self
.
hidden_dim
,
bias
=
attn_bias
,
device
=
device
,
dtype
=
dtype
)
self
.
rotary_dim
=
self
.
head_dim
# TODO: handle rotary
sin
,
cos
=
fixed_pos_embedding
(
dim
=
self
.
rotary_dim
,
seq_len
=
max_positions
)
self
.
register_buffer
(
"sin"
,
sin
)
self
.
register_buffer
(
"cos"
,
cos
)
...
...
@@ -191,7 +190,7 @@ class SelfAttention(nn.Module):
x
=
_merge_heads
(
x
,
self
.
n_head
,
self
.
head_dim
)
x
=
self
.
out_proj
(
x
)
return
x
# a, present, (attentions)
return
x
class
FeedForward
(
nn
.
Module
):
def
__init__
(
self
,
dim
=
768
,
hidden_dim
=
768
*
4
,
activation
=
nn
.
GELU
,
device
=
"cuda"
,
dtype
=
torch
.
float16
):
...
...
@@ -278,7 +277,7 @@ class GPTLM(nn.Module):
def
forward
(
self
,
x
):
return
def
load_gpt_j
(
state_dict
=
None
,
path
=
None
):
def
load_gpt_j
(
path
=
"models/6b"
,
state_dict
=
None
):
config
=
{
"n_layer"
:
28
,
"n_head"
:
16
,
...
...
@@ -288,8 +287,7 @@ def load_gpt_j(state_dict=None, path=None):
"activation"
:
gelu_new
,
"Layer"
:
GPTLayer
}
model
=
GPTModel
.
load
(
config
,
path
=
path
)
model
=
GPTModel
.
load
(
config
,
path
,
state_dict
)
return
model
def
init_6b
():
...
...
@@ -302,7 +300,7 @@ def init_6b():
"activation"
:
gelu_new
,
"Layer"
:
GPTLayer
}
model
=
GPTModel
(
**
config
)
model
=
GPTModel
.
init
(
config
)
return
model
def
init_125m
():
...
...
@@ -331,4 +329,4 @@ def init_1_3b():
}
model
=
GPTModel
(
**
config
)
return
model
return
model
\ No newline at end of file
test.py
View file @
7a721f81
...
...
@@ -71,11 +71,13 @@ with torch.no_grad():
timeit(lambda: module(torch.zeros((1, 1000)).long().cuda()), n=20, first=False)
'''
module
=
torch
.
jit
.
trace
(
model
,
torch
.
zeros
((
1
,
2048
))
.
long
()
.
cuda
())
torch
.
jit
.
optimize_for_inference
(
module
)
static_input
=
torch
.
zeros
((
1
,
2048
),
device
=
'cuda'
)
.
long
()
static_out
=
torch
.
randn
((
1
,
2048
,
2048
),
device
=
'cuda'
)
.
half
()
timeit
(
lambda
:
module
(
static_input
),
n
=
20
,
first
=
False
)
timeit
(
lambda
:
model
(
static_input
),
n
=
1000
,
first
=
False
)
module
=
torch
.
jit
.
trace
(
model
,
torch
.
zeros
((
1
,
2048
))
.
long
()
.
cuda
())
torch
.
jit
.
optimize_for_inference
(
module
)
timeit
(
lambda
:
module
(
static_input
),
n
=
1000
,
first
=
False
)
s
=
torch
.
cuda
.
Stream
()
s
.
wait_stream
(
torch
.
cuda
.
current_stream
())
with
torch
.
cuda
.
stream
(
s
):
...
...
test_pyfra.py
View file @
7a721f81
...
...
@@ -10,7 +10,6 @@ config_obj.set_name(name)
config_obj
.
set_gpu
(
gpu_name
=
GPU
.
RTX_A5000
,
amount
=
1
)
config_obj
.
set_ram
(
16
)
config_obj
.
set_cpu
(
4
)
#config_obj.set_cpu_only()
config_obj
.
dry_run
(
dry
)
config_obj
.
print_information
()
#config_obj.create_deployment(overwrite=True)
...
...
@@ -26,5 +25,7 @@ path = env1.path('/home/xuser/diffusionstorage/workspace/kuru/basedformer')
env1
.
sh
(
'pip install /home/xuser/hugessd/pytorch/torch-1.10.1+cu113-cp38-cp38-linux_x86_64.whl'
)
env1
.
sh
(
'pip install einops numpy'
)
env1
.
sh
(
'pip install tqdm'
)
env1
.
sh
(
'pip install /home/xuser/diffusionstorage/workspace/finetune/pokepls/transformers-repo'
)
with
always_rerun
():
path
.
sh
(
f
'python3 test.py'
)
\ No newline at end of file
print
(
f
"Running {sys.argv[1]}"
)
path
.
sh
(
f
'python3 {sys.argv[1]}'
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment