Commit 6ccb1e1f authored by Wes Brown's avatar Wes Brown

Fix output location and add `shuffle` parameter.

parent 80b80fad
...@@ -62,10 +62,15 @@ spec: ...@@ -62,10 +62,15 @@ spec:
# The optimizer to use # The optimizer to use
- name: optimizer - name: optimizer
value: 'adamw' value: 'adamw'
# Tokenization sort
- name: tokenize_sort
value: 'size_descending'
- name: save_steps - name: save_steps
value: 500 value: 800
- name: eval_every - name: eval_every
value: 500 value: 800
- name: shuffle
value: "False"
# Whether to *not* resume from checkpoints. # Whether to *not* resume from checkpoints.
- name: no_resume - name: no_resume
value: 'false' value: 'false'
...@@ -76,7 +81,7 @@ spec: ...@@ -76,7 +81,7 @@ spec:
- name: wandb_key - name: wandb_key
value: '' value: ''
- name: project_id - name: project_id
value: 'hypernetwork-training' value: 'hypernetwork-tests'
# CoreWeave region to default to; ORD1 has most of the GPUs. # CoreWeave region to default to; ORD1 has most of the GPUs.
- name: region - name: region
value: 'ORD1' value: 'ORD1'
...@@ -95,7 +100,7 @@ spec: ...@@ -95,7 +100,7 @@ spec:
- name: hypertrainer_image - name: hypertrainer_image
value: 'docker.io/gooseai/basedformer' value: 'docker.io/gooseai/basedformer'
- name: hypertrainer_tag - name: hypertrainer_tag
value: '60893ad' value: '80b80fa'
templates: templates:
- name: main - name: main
...@@ -127,6 +132,8 @@ spec: ...@@ -127,6 +132,8 @@ spec:
value: "{{workflow.parameters.pad_token}}" value: "{{workflow.parameters.pad_token}}"
- name: boundary - name: boundary
value: "{{workflow.parameters.boundary_token}}" value: "{{workflow.parameters.boundary_token}}"
- name: reorder
value: "{{workflow.parameters.tokenize_sort}}"
- - name: hypertrainer - - name: hypertrainer
template: model-hypertrainer template: model-hypertrainer
...@@ -159,7 +166,7 @@ spec: ...@@ -159,7 +166,7 @@ spec:
- name: seed - name: seed
value: "{{workflow.parameters.random_seed}}" value: "{{workflow.parameters.random_seed}}"
- name: output_path - name: output_path
value: "/{{workflow.parameters.pvc}}/hypernets/" value: "/{{workflow.parameters.pvc}}/hypernets/{{workflow.parameters.run_name}}-{{=sprig.replace('/', '_', sprig.replace('.','_', sprig.replace('-','_', workflow.parameters.model)))}}-{{workflow.parameters.context}}-{{workflow.parameters.tokenizer_tag}}"
- name: cache - name: cache
value: "/{{workflow.parameters.pvc}}/cache/" value: "/{{workflow.parameters.pvc}}/cache/"
- name: torch_cache - name: torch_cache
...@@ -178,6 +185,8 @@ spec: ...@@ -178,6 +185,8 @@ spec:
value: "{{workflow.parameters.project_id}}" value: "{{workflow.parameters.project_id}}"
- name: optimizer - name: optimizer
value: "{{workflow.parameters.optimizer}}" value: "{{workflow.parameters.optimizer}}"
- name: shuffle
value: "{{workflow.parameters.shuffle}}"
- name: model-downloader - name: model-downloader
inputs: inputs:
...@@ -225,6 +234,7 @@ spec: ...@@ -225,6 +234,7 @@ spec:
- name: pad - name: pad
- name: output - name: output
- name: boundary - name: boundary
- name: reorder
retryStrategy: retryStrategy:
limit: 1 limit: 1
container: container:
...@@ -237,7 +247,9 @@ spec: ...@@ -237,7 +247,9 @@ spec:
"-input", "{{inputs.parameters.input}}", "-input", "{{inputs.parameters.input}}",
"-output", "{{inputs.parameters.output}}", "-output", "{{inputs.parameters.output}}",
"-boundary", "{{inputs.parameters.boundary}}", "-boundary", "{{inputs.parameters.boundary}}",
"-sanitize"] "-reorder", "{{inputs.parameters.reorder}}",
#"-sanitize"
]
resources: resources:
requests: requests:
memory: 256Mi memory: 256Mi
...@@ -285,6 +297,7 @@ spec: ...@@ -285,6 +297,7 @@ spec:
- name: save_steps - name: save_steps
- name: eval_every - name: eval_every
- name: warmup - name: warmup
- name: shuffle
container: container:
image: "{{workflow.parameters.hypertrainer_image}}:{{workflow.parameters.hypertrainer_tag}}" image: "{{workflow.parameters.hypertrainer_image}}:{{workflow.parameters.hypertrainer_tag}}"
command: [ "/usr/bin/python3", "/app/hypertrain.py" ] command: [ "/usr/bin/python3", "/app/hypertrain.py" ]
...@@ -299,11 +312,12 @@ spec: ...@@ -299,11 +312,12 @@ spec:
"--gas", "{{inputs.parameters.gas}}", "--gas", "{{inputs.parameters.gas}}",
"--save_steps", "{{inputs.parameters.save_steps}}", "--save_steps", "{{inputs.parameters.save_steps}}",
"--eval_every", "{{inputs.parameters.eval_every}}", "--eval_every", "{{inputs.parameters.eval_every}}",
"--output_path", "{{inputs.parameters.output_path}}", "--output", "{{inputs.parameters.output_path}}",
"--project_id", "{{inputs.parameters.project_id}}", "--project_id", "{{inputs.parameters.project_id}}",
"--epochs", "{{inputs.parameters.epochs}}", "--epochs", "{{inputs.parameters.epochs}}",
"--context_size", "{{inputs.parameters.context}}", "--context_size", "{{inputs.parameters.context}}",
"--optimizer", "{{inputs.parameters.optimizer}}"] "--optimizer", "{{inputs.parameters.optimizer}}",
"--shuffle", "{{inputs.parameters.shuffle}}"]
tty: true tty: true
env: env:
- name: WANDB_API_KEY - name: WANDB_API_KEY
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment