mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-01-10 23:24:46 +01:00
prompt_tuner.py now shows layer configuration
This commit is contained in:
parent
b1c456ec18
commit
09750acfa0
@ -848,7 +848,7 @@ def device_config(config):
|
||||
print(f"{colors.RED}Please enter an integer between -1 and {n_layers}.{colors.END}")
|
||||
|
||||
print(colors.PURPLE + "\nFinal device configuration:")
|
||||
device_list(n_layers)
|
||||
device_list(n_layers, primary=breakmodel.primary_device)
|
||||
|
||||
# If all layers are on the same device, use the old GPU generation mode
|
||||
while(len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0):
|
||||
|
@ -38,9 +38,19 @@ import breakmodel
|
||||
import torch_lazy_loader
|
||||
import utils
|
||||
|
||||
USE_BREAKMODEL = True
|
||||
use_breakmodel = True
|
||||
|
||||
|
||||
class colors:
|
||||
PURPLE = '\033[95m'
|
||||
BLUE = '\033[94m'
|
||||
CYAN = '\033[96m'
|
||||
GREEN = '\033[92m'
|
||||
YELLOW = '\033[93m'
|
||||
RED = '\033[91m'
|
||||
END = '\033[0m'
|
||||
UNDERLINE = '\033[4m'
|
||||
|
||||
class Send_to_socketio(object):
|
||||
def write(self, bar):
|
||||
print(bar, end="")
|
||||
@ -177,10 +187,29 @@ def patch_transformers():
|
||||
OPTForCausalLM.__init__ = new_init
|
||||
|
||||
|
||||
def device_list(n_layers, primary=None, selected=None):
|
||||
device_count = torch.cuda.device_count()
|
||||
if(device_count < 2):
|
||||
primary = None
|
||||
gpu_blocks = breakmodel.gpu_blocks + (device_count - len(breakmodel.gpu_blocks))*[0]
|
||||
print(f"{colors.YELLOW} DEVICE ID | LAYERS | DEVICE NAME{colors.END}")
|
||||
for i in range(device_count):
|
||||
name = torch.cuda.get_device_name(i)
|
||||
if(len(name) > 47):
|
||||
name = "..." + name[-44:]
|
||||
row_color = colors.END
|
||||
sep_color = colors.YELLOW
|
||||
print(f"{row_color}{colors.YELLOW + '->' + row_color if i == selected else ' '} {'(primary)' if i == primary else ' '*9} {i:3} {sep_color}|{row_color} {gpu_blocks[i]:3} {sep_color}|{row_color} {name}{colors.END}")
|
||||
row_color = colors.END
|
||||
sep_color = colors.YELLOW
|
||||
print(f"{row_color}{colors.YELLOW + '->' + row_color if -1 == selected else ' '} {' '*9} N/A {sep_color}|{row_color} {breakmodel.disk_blocks:3} {sep_color}|{row_color} (Disk cache){colors.END}")
|
||||
print(f"{row_color} {' '*9} N/A {sep_color}|{row_color} {n_layers:3} {sep_color}|{row_color} (CPU){colors.END}")
|
||||
|
||||
|
||||
def move_model_to_devices(model, usegpu, gpu_device):
|
||||
global generator
|
||||
|
||||
if(not USE_BREAKMODEL):
|
||||
if(not use_breakmodel):
|
||||
if(usegpu):
|
||||
model = model.half().to(gpu_device)
|
||||
else:
|
||||
@ -703,8 +732,12 @@ class TrainerBase(abc.ABC):
|
||||
n_layers = utils.num_layers(model_config)
|
||||
convert_to_float16 = True
|
||||
hascuda = torch.cuda.is_available()
|
||||
usegpu = not breakmodel_disklayers and len(breakmodel_gpulayers) == 1 and breakmodel_gpulayers[0] == n_layers
|
||||
usegpu = hascuda and not breakmodel_disklayers and len(breakmodel_gpulayers) == 1 and breakmodel_gpulayers[0] == n_layers
|
||||
gpu_device = breakmodel_primary_device
|
||||
use_breakmodel = bool(hascuda or breakmodel_disklayers or sum(breakmodel_gpulayers))
|
||||
|
||||
assert len(breakmodel_gpulayers) <= torch.cuda.device_count()
|
||||
assert sum(breakmodel_gpulayers) + breakmodel_disklayers <= n_layers
|
||||
|
||||
breakmodel.disk_blocks = breakmodel_disklayers
|
||||
disk_blocks = breakmodel.disk_blocks
|
||||
@ -712,6 +745,8 @@ class TrainerBase(abc.ABC):
|
||||
ram_blocks = ram_blocks = n_layers - sum(gpu_blocks)
|
||||
cumulative_gpu_blocks = tuple(itertools.accumulate(gpu_blocks))
|
||||
|
||||
device_list(n_layers, primary=breakmodel.primary_device)
|
||||
|
||||
def lazy_load_callback(model_dict: Dict[str, Union[torch_lazy_loader.LazyTensor, torch.Tensor]], f, **_):
|
||||
if lazy_load_callback.nested:
|
||||
return
|
||||
@ -726,10 +761,10 @@ class TrainerBase(abc.ABC):
|
||||
for key, value in model_dict.items():
|
||||
original_key = get_original_key(key)
|
||||
if isinstance(value, torch_lazy_loader.LazyTensor) and not any(original_key.startswith(n) for n in utils.layers_module_names):
|
||||
device_map[key] = gpu_device if hascuda and usegpu else "cpu" if not hascuda or not USE_BREAKMODEL else breakmodel.primary_device
|
||||
device_map[key] = gpu_device if hascuda and usegpu else "cpu" if not hascuda or not use_breakmodel else breakmodel.primary_device
|
||||
else:
|
||||
layer = int(max((n for n in utils.layers_module_names if original_key.startswith(n)), key=len).rsplit(".", 1)[1])
|
||||
device = gpu_device if hascuda and usegpu else "disk" if layer < disk_blocks and layer < ram_blocks else "cpu" if not hascuda or not USE_BREAKMODEL else "shared" if layer < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, layer - ram_blocks)
|
||||
device = gpu_device if hascuda and usegpu else "disk" if layer < disk_blocks and layer < ram_blocks else "cpu" if not hascuda or not use_breakmodel else "shared" if layer < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, layer - ram_blocks)
|
||||
device_map[key] = device
|
||||
|
||||
if utils.num_shards is None or utils.current_shard == 0:
|
||||
@ -777,9 +812,9 @@ class TrainerBase(abc.ABC):
|
||||
model_dict[key] = model_dict[key].materialize(f, map_location="cpu")
|
||||
# if model_dict[key].dtype is torch.float32:
|
||||
# fp32_model = True
|
||||
if convert_to_float16 and breakmodel.primary_device != "cpu" and hascuda and (USE_BREAKMODEL or usegpu) and model_dict[key].dtype is torch.float32:
|
||||
if convert_to_float16 and breakmodel.primary_device != "cpu" and hascuda and (use_breakmodel or usegpu) and model_dict[key].dtype is torch.float32:
|
||||
model_dict[key] = model_dict[key].to(torch.float16)
|
||||
if breakmodel.primary_device == "cpu" or (not usegpu and not USE_BREAKMODEL and model_dict[key].dtype is torch.float16):
|
||||
if breakmodel.primary_device == "cpu" or (not usegpu and not use_breakmodel and model_dict[key].dtype is torch.float16):
|
||||
model_dict[key] = model_dict[key].to(torch.float32)
|
||||
if device == "shared":
|
||||
model_dict[key] = model_dict[key].to("cpu").detach_()
|
||||
|
Loading…
Reference in New Issue
Block a user