Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
B
Basedformer
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Packages
Packages
List
Container Registry
Analytics
Analytics
CI / CD
Code Review
Insights
Issues
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
novelai-storage
Basedformer
Commits
4141d527
Commit
4141d527
authored
Apr 06, 2022
by
novelailab
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
a
parent
34739983
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
9 additions
and
6 deletions
+9
-6
basedformer/gptj.py
basedformer/gptj.py
+1
-1
basedformer/lm_base.py
basedformer/lm_base.py
+7
-5
basedformer/optimizer.py
basedformer/optimizer.py
+1
-0
No files found.
basedformer/gptj.py
View file @
4141d527
...
...
@@ -202,7 +202,7 @@ class GPTJLayer(nn.Module):
return
x
class
GPTJModel
(
nn
.
Module
):
def
__init__
(
self
,
hidden_dim
,
n_layer
,
n_head
,
vocab_dim
,
eps
,
activation
=
gelu_new
,
Layer
=
GPTJLayer
,
device
=
"cuda"
,
dtype
=
torch
.
float16
):
def
__init__
(
self
,
hidden_dim
,
n_layer
,
n_head
,
vocab_dim
,
eps
,
activation
=
gelu_new
,
Layer
=
GPTJLayer
,
device
=
"cuda"
,
dtype
=
torch
.
float16
,
**
kwargs
):
nn
.
Module
.
__init__
(
self
)
self
.
n_layer
=
n_layer
self
.
hidden_dim
=
hidden_dim
...
...
basedformer/lm_base.py
View file @
4141d527
...
...
@@ -4,6 +4,7 @@ import torch
from
torch
import
nn
from
basedformer
import
gptj
import
os
import
json
#Having common BaseLM functionality in this class instead of the torch LM itself makes sense.
class
BaseLM
(
nn
.
Module
):
...
...
@@ -32,7 +33,7 @@ class BaseLM(nn.Module):
@
classmethod
def
init
(
cls
,
config
):
lm
=
config
.
model_class
(
**
config
)
lm
=
config
[
"model_class"
]
(
**
config
)
model
=
cls
(
config
,
lm
)
model
.
init_weights
()
#make this modular later
...
...
@@ -46,13 +47,13 @@ class BaseLM(nn.Module):
return
model
@
classmethod
def
load
(
cls
,
model_class
,
config
,
path
=
None
,
state_dict
=
None
,
strict
=
False
):
def
load
(
cls
,
config
,
path
=
None
,
state_dict
=
None
,
strict
=
False
):
# I am kinda sad that we will not have a load function in lm object itself.
# might be better to add load functions
to that as well but not sur
e.
# might be better to add load functions
-- actually nop
e.
if
path
:
state_dict
=
utils
.
SplitCheckpoint
(
path
,
device
=
"cuda"
)
lm
=
model_class
(
**
config
)
lm
=
config
[
"model_class"
]
(
**
config
)
model
=
cls
(
config
,
lm
)
model
.
lm
.
load_state_dict
(
state_dict
,
strict
=
strict
)
return
model
...
...
@@ -73,11 +74,12 @@ class BaseLM(nn.Module):
def
load_gpt_j
(
path
=
"models/6b"
,
state_dict
=
None
):
config
=
{
"model_class"
:
gptj
.
GPTJModel
,
"n_layer"
:
28
,
"n_head"
:
16
,
"hidden_dim"
:
4096
,
"vocab_dim"
:
50400
,
"eps"
:
1e-5
}
model
=
BaseLM
.
load
(
gptj
.
GPTJModel
,
config
,
path
,
state_dict
)
model
=
BaseLM
.
load
(
config
,
path
,
state_dict
)
return
model
basedformer/optimizer.py
View file @
4141d527
...
...
@@ -10,6 +10,7 @@ def lr_schedule(step, warmup_steps, anneal_steps, lr, end_lr):
anneal_percent
=
np
.
clip
(
step
-
warmup_steps
,
0
,
anneal_steps
)
/
anneal_steps
#cosine schedule for annealing
return
lr
*
warmup_percent
-
(
lr
-
end_lr
)
*
(
1
-
np
.
cos
(
np
.
pi
*
anneal_percent
))
/
2
#kinda broken. doesn't start from 0
class
BasedOptimizer
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment