mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2024-12-12 08:36:28 +01:00
prompt_tuner.py now shows layer configuration
This commit is contained in:
parent
b1c456ec18
commit
09750acfa0
@ -848,7 +848,7 @@ def device_config(config):
|
|||||||
print(f"{colors.RED}Please enter an integer between -1 and {n_layers}.{colors.END}")
|
print(f"{colors.RED}Please enter an integer between -1 and {n_layers}.{colors.END}")
|
||||||
|
|
||||||
print(colors.PURPLE + "\nFinal device configuration:")
|
print(colors.PURPLE + "\nFinal device configuration:")
|
||||||
device_list(n_layers)
|
device_list(n_layers, primary=breakmodel.primary_device)
|
||||||
|
|
||||||
# If all layers are on the same device, use the old GPU generation mode
|
# If all layers are on the same device, use the old GPU generation mode
|
||||||
while(len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0):
|
while(len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0):
|
||||||
|
@ -38,9 +38,19 @@ import breakmodel
|
|||||||
import torch_lazy_loader
|
import torch_lazy_loader
|
||||||
import utils
|
import utils
|
||||||
|
|
||||||
USE_BREAKMODEL = True
|
use_breakmodel = True
|
||||||
|
|
||||||
|
|
||||||
|
class colors:
|
||||||
|
PURPLE = '\033[95m'
|
||||||
|
BLUE = '\033[94m'
|
||||||
|
CYAN = '\033[96m'
|
||||||
|
GREEN = '\033[92m'
|
||||||
|
YELLOW = '\033[93m'
|
||||||
|
RED = '\033[91m'
|
||||||
|
END = '\033[0m'
|
||||||
|
UNDERLINE = '\033[4m'
|
||||||
|
|
||||||
class Send_to_socketio(object):
|
class Send_to_socketio(object):
|
||||||
def write(self, bar):
|
def write(self, bar):
|
||||||
print(bar, end="")
|
print(bar, end="")
|
||||||
@ -177,10 +187,29 @@ def patch_transformers():
|
|||||||
OPTForCausalLM.__init__ = new_init
|
OPTForCausalLM.__init__ = new_init
|
||||||
|
|
||||||
|
|
||||||
|
def device_list(n_layers, primary=None, selected=None):
|
||||||
|
device_count = torch.cuda.device_count()
|
||||||
|
if(device_count < 2):
|
||||||
|
primary = None
|
||||||
|
gpu_blocks = breakmodel.gpu_blocks + (device_count - len(breakmodel.gpu_blocks))*[0]
|
||||||
|
print(f"{colors.YELLOW} DEVICE ID | LAYERS | DEVICE NAME{colors.END}")
|
||||||
|
for i in range(device_count):
|
||||||
|
name = torch.cuda.get_device_name(i)
|
||||||
|
if(len(name) > 47):
|
||||||
|
name = "..." + name[-44:]
|
||||||
|
row_color = colors.END
|
||||||
|
sep_color = colors.YELLOW
|
||||||
|
print(f"{row_color}{colors.YELLOW + '->' + row_color if i == selected else ' '} {'(primary)' if i == primary else ' '*9} {i:3} {sep_color}|{row_color} {gpu_blocks[i]:3} {sep_color}|{row_color} {name}{colors.END}")
|
||||||
|
row_color = colors.END
|
||||||
|
sep_color = colors.YELLOW
|
||||||
|
print(f"{row_color}{colors.YELLOW + '->' + row_color if -1 == selected else ' '} {' '*9} N/A {sep_color}|{row_color} {breakmodel.disk_blocks:3} {sep_color}|{row_color} (Disk cache){colors.END}")
|
||||||
|
print(f"{row_color} {' '*9} N/A {sep_color}|{row_color} {n_layers:3} {sep_color}|{row_color} (CPU){colors.END}")
|
||||||
|
|
||||||
|
|
||||||
def move_model_to_devices(model, usegpu, gpu_device):
|
def move_model_to_devices(model, usegpu, gpu_device):
|
||||||
global generator
|
global generator
|
||||||
|
|
||||||
if(not USE_BREAKMODEL):
|
if(not use_breakmodel):
|
||||||
if(usegpu):
|
if(usegpu):
|
||||||
model = model.half().to(gpu_device)
|
model = model.half().to(gpu_device)
|
||||||
else:
|
else:
|
||||||
@ -703,8 +732,12 @@ class TrainerBase(abc.ABC):
|
|||||||
n_layers = utils.num_layers(model_config)
|
n_layers = utils.num_layers(model_config)
|
||||||
convert_to_float16 = True
|
convert_to_float16 = True
|
||||||
hascuda = torch.cuda.is_available()
|
hascuda = torch.cuda.is_available()
|
||||||
usegpu = not breakmodel_disklayers and len(breakmodel_gpulayers) == 1 and breakmodel_gpulayers[0] == n_layers
|
usegpu = hascuda and not breakmodel_disklayers and len(breakmodel_gpulayers) == 1 and breakmodel_gpulayers[0] == n_layers
|
||||||
gpu_device = breakmodel_primary_device
|
gpu_device = breakmodel_primary_device
|
||||||
|
use_breakmodel = bool(hascuda or breakmodel_disklayers or sum(breakmodel_gpulayers))
|
||||||
|
|
||||||
|
assert len(breakmodel_gpulayers) <= torch.cuda.device_count()
|
||||||
|
assert sum(breakmodel_gpulayers) + breakmodel_disklayers <= n_layers
|
||||||
|
|
||||||
breakmodel.disk_blocks = breakmodel_disklayers
|
breakmodel.disk_blocks = breakmodel_disklayers
|
||||||
disk_blocks = breakmodel.disk_blocks
|
disk_blocks = breakmodel.disk_blocks
|
||||||
@ -712,6 +745,8 @@ class TrainerBase(abc.ABC):
|
|||||||
ram_blocks = ram_blocks = n_layers - sum(gpu_blocks)
|
ram_blocks = ram_blocks = n_layers - sum(gpu_blocks)
|
||||||
cumulative_gpu_blocks = tuple(itertools.accumulate(gpu_blocks))
|
cumulative_gpu_blocks = tuple(itertools.accumulate(gpu_blocks))
|
||||||
|
|
||||||
|
device_list(n_layers, primary=breakmodel.primary_device)
|
||||||
|
|
||||||
def lazy_load_callback(model_dict: Dict[str, Union[torch_lazy_loader.LazyTensor, torch.Tensor]], f, **_):
|
def lazy_load_callback(model_dict: Dict[str, Union[torch_lazy_loader.LazyTensor, torch.Tensor]], f, **_):
|
||||||
if lazy_load_callback.nested:
|
if lazy_load_callback.nested:
|
||||||
return
|
return
|
||||||
@ -726,10 +761,10 @@ class TrainerBase(abc.ABC):
|
|||||||
for key, value in model_dict.items():
|
for key, value in model_dict.items():
|
||||||
original_key = get_original_key(key)
|
original_key = get_original_key(key)
|
||||||
if isinstance(value, torch_lazy_loader.LazyTensor) and not any(original_key.startswith(n) for n in utils.layers_module_names):
|
if isinstance(value, torch_lazy_loader.LazyTensor) and not any(original_key.startswith(n) for n in utils.layers_module_names):
|
||||||
device_map[key] = gpu_device if hascuda and usegpu else "cpu" if not hascuda or not USE_BREAKMODEL else breakmodel.primary_device
|
device_map[key] = gpu_device if hascuda and usegpu else "cpu" if not hascuda or not use_breakmodel else breakmodel.primary_device
|
||||||
else:
|
else:
|
||||||
layer = int(max((n for n in utils.layers_module_names if original_key.startswith(n)), key=len).rsplit(".", 1)[1])
|
layer = int(max((n for n in utils.layers_module_names if original_key.startswith(n)), key=len).rsplit(".", 1)[1])
|
||||||
device = gpu_device if hascuda and usegpu else "disk" if layer < disk_blocks and layer < ram_blocks else "cpu" if not hascuda or not USE_BREAKMODEL else "shared" if layer < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, layer - ram_blocks)
|
device = gpu_device if hascuda and usegpu else "disk" if layer < disk_blocks and layer < ram_blocks else "cpu" if not hascuda or not use_breakmodel else "shared" if layer < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, layer - ram_blocks)
|
||||||
device_map[key] = device
|
device_map[key] = device
|
||||||
|
|
||||||
if utils.num_shards is None or utils.current_shard == 0:
|
if utils.num_shards is None or utils.current_shard == 0:
|
||||||
@ -777,9 +812,9 @@ class TrainerBase(abc.ABC):
|
|||||||
model_dict[key] = model_dict[key].materialize(f, map_location="cpu")
|
model_dict[key] = model_dict[key].materialize(f, map_location="cpu")
|
||||||
# if model_dict[key].dtype is torch.float32:
|
# if model_dict[key].dtype is torch.float32:
|
||||||
# fp32_model = True
|
# fp32_model = True
|
||||||
if convert_to_float16 and breakmodel.primary_device != "cpu" and hascuda and (USE_BREAKMODEL or usegpu) and model_dict[key].dtype is torch.float32:
|
if convert_to_float16 and breakmodel.primary_device != "cpu" and hascuda and (use_breakmodel or usegpu) and model_dict[key].dtype is torch.float32:
|
||||||
model_dict[key] = model_dict[key].to(torch.float16)
|
model_dict[key] = model_dict[key].to(torch.float16)
|
||||||
if breakmodel.primary_device == "cpu" or (not usegpu and not USE_BREAKMODEL and model_dict[key].dtype is torch.float16):
|
if breakmodel.primary_device == "cpu" or (not usegpu and not use_breakmodel and model_dict[key].dtype is torch.float16):
|
||||||
model_dict[key] = model_dict[key].to(torch.float32)
|
model_dict[key] = model_dict[key].to(torch.float32)
|
||||||
if device == "shared":
|
if device == "shared":
|
||||||
model_dict[key] = model_dict[key].to("cpu").detach_()
|
model_dict[key] = model_dict[key].to("cpu").detach_()
|
||||||
|
Loading…
Reference in New Issue
Block a user