-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathastra_config.py
More file actions
116 lines (111 loc) · 5.46 KB
/
astra_config.py
File metadata and controls
116 lines (111 loc) · 5.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
from peft.tuners.lora.config import LoraConfig
from dataclasses import dataclass, field
from typing import Optional
__all__ = [
"AstraConfig",
"MyLoraConfig"
]
@dataclass
class AstraConfig:
"""
This is the sub-configuration class to store the configuration of a [`LoraModel`] for Astra.
Args:
cache_file (`Optional[str]`):
File to store the SVD cache. The SVD cache is much smaller than the residual model (for example, residual
model of Llama-3-8b is 15GB, while SVD cache is 1.4GB), but with SVD cache and original model weights,
residual model weights can be built quickly. If you need to reuse residual model weights with limited
storage, you can store the SVD cache instead.
covariance_file (`Optional[str]`):
File to store the covariance matrix. If you wish to train multiple models with different ranks, but they
sample from the same dataset, you can store the covariance matrix and reuse it for different ranks. Note
that covariance file is usually large (comparable to model size), so you will need sufficient storage.
verbose (`bool`):
If true, prints the progress of Astra initialization. Defaults to `False`.
use_float16_for_covariance (`bool`):
If true, uses float16 for the covariance matrix. This can reduce the memory usage of the covariance matrix
by half, but may lead to numerical instability. Defaults to `False`.
prune_temporary_fields (`bool`):
If true, temporary fields generated in Astra preprocessing will be pruned. Defaults to `True`.
rank_allocation (`bool`):
Whether to perform dynamic rank allocation. If True, dynamic rank allocation will be performed during preprocessing.
Defaults to False.
rank_pattern (`Optional[str]`):
Path to cache file for dynamic rank allocation results. If specified, dynamic rank allocation results will be loaded/saved here.
Defaults to None.
astra_method (`str`):
The method used for Astra. 'IPM' stands for Instuction-Previewed Mode, which focusing on adapting to downstream tasks.
'KPM' stands for Knowledge-Previewed Mode, which focusing on adapting to downstream tasks.
Defaults to 'IPM'.
"""
cache_file: Optional[str] = field(
default=None,
metadata={
"help": (
"File to store the SVD cache. The SVD cache is much smaller than the residual model (for example, "
"residual model of Llama-3-8b is 15GB, while SVD cache is 1.4GB), but with SVD cache and original model "
"weights, residual model weights can be built quickly. If you need to reuse residual model weights with "
"limited storage, you can store the SVD cache instead."
)
},
)
covariance_file: Optional[str] = field(
default=None,
metadata={
"help": (
"File to store the covariance matrix. If you wish to train multiple models with different ranks, but "
"they sample from the same dataset, you can store the covariance matrix and reuse it for different ranks. "
"Note that covariance file is usually large (comparable to model size), so you will need sufficient storage."
)
},
)
astra_method: str = field(
default="IPM",
metadata={
"help": (
"The method used for Astra. 'IPM' stands for Instuction-Previewed Mode, which focusing on adapting to downstream tasks. "
"'KPM' stands for Knowledge-Previewed Mode, which focusing on preserving the original knowledge of the model when adapting to downstream tasks."
)
},
)
verbose: bool = field(default=False, metadata={"help": "If true, prints the progress of Astra initialization."})
use_float16_for_covariance: bool = field(
default=False,
metadata={
"help": (
"If true, uses float16 for the covariance matrix. This can reduce the memory usage of the covariance matrix "
"by half, but may lead to numerical instability."
)
},
)
prune_temporary_fields: bool = field(
default=True, metadata={"help": "If true, temporary fields generated in Astra preprocessing will be pruned."}
)
rank_allocation: bool = field(
default=False,
metadata={
"help": (
"Whether to perform dynamic rank allocation. If True, dynamic rank allocation will be performed during preprocessing. "
"Defaults to False."
)
},
)
rank_pattern: Optional[str] = field(
default=None,
metadata={
"help": (
"Path to cache file for dynamic rank allocation results. If specified, dynamic rank allocation results will be loaded/saved here. "
"Defaults to None."
)
},
)
@dataclass
class MyLoraConfig(LoraConfig):
astra_config: Optional[AstraConfig] = field(
default=None,
metadata={
"help": (
"The configuration of Astra. If this is passed, then Astra will be used to build the adapter layers. "
"Also set `init_lora_weights='Astra'` in this case."
)
},
)