mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Accelerate: Remove HAS_ACCELERATE
Accelerate has been a dependency for a while, and as such we probably shouldn't be lugging around code that assumes it isn't present.
This commit is contained in:
20
aiserver.py
20
aiserver.py
@@ -1458,8 +1458,6 @@ def get_model_info(model, directory=""):
|
|||||||
pass
|
pass
|
||||||
#elif model == 'customhuggingface':
|
#elif model == 'customhuggingface':
|
||||||
# show_custom_model_box = True
|
# show_custom_model_box = True
|
||||||
elif not utils.HAS_ACCELERATE and not torch.cuda.is_available():
|
|
||||||
pass
|
|
||||||
elif args.cpu:
|
elif args.cpu:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
@@ -1486,13 +1484,13 @@ def get_model_info(model, directory=""):
|
|||||||
break_values += [0] * (gpu_count - len(break_values))
|
break_values += [0] * (gpu_count - len(break_values))
|
||||||
emit('from_server', {'cmd': 'selected_model_info', 'key_value': key_value, 'key':key, 'multi_online_models': multi_online_models, 'default_url': default_url,
|
emit('from_server', {'cmd': 'selected_model_info', 'key_value': key_value, 'key':key, 'multi_online_models': multi_online_models, 'default_url': default_url,
|
||||||
'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel,
|
'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel,
|
||||||
'disk_break_value': disk_blocks, 'accelerate': utils.HAS_ACCELERATE,
|
'disk_break_value': disk_blocks, 'accelerate': True,
|
||||||
'break_values': break_values, 'gpu_count': gpu_count,
|
'break_values': break_values, 'gpu_count': gpu_count,
|
||||||
'url': url, 'gpu_names': gpu_names, 'models_on_url': models_on_url,
|
'url': url, 'gpu_names': gpu_names, 'models_on_url': models_on_url,
|
||||||
'show_custom_model_box': show_custom_model_box}, broadcast=True, room="UI_1")
|
'show_custom_model_box': show_custom_model_box}, broadcast=True, room="UI_1")
|
||||||
emit('selected_model_info', {'key_value': key_value, 'key':key,
|
emit('selected_model_info', {'key_value': key_value, 'key':key,
|
||||||
'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel, 'multi_online_models': multi_online_models, 'default_url': default_url,
|
'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel, 'multi_online_models': multi_online_models, 'default_url': default_url,
|
||||||
'disk_break_value': disk_blocks, 'disk_break': utils.HAS_ACCELERATE,
|
'disk_break_value': disk_blocks, 'disk_break': True,
|
||||||
'break_values': break_values, 'gpu_count': gpu_count,
|
'break_values': break_values, 'gpu_count': gpu_count,
|
||||||
'url': url, 'gpu_names': gpu_names, 'models_on_url': models_on_url, 'show_online_model_select': show_online_model_select,
|
'url': url, 'gpu_names': gpu_names, 'models_on_url': models_on_url, 'show_online_model_select': show_online_model_select,
|
||||||
'bit_8_available': koboldai_vars.bit_8_available if koboldai_vars.experimental_features else False,
|
'bit_8_available': koboldai_vars.bit_8_available if koboldai_vars.experimental_features else False,
|
||||||
@@ -1525,7 +1523,7 @@ def get_layer_count(model, directory=""):
|
|||||||
else:
|
else:
|
||||||
model_config = AutoConfig.from_pretrained(model, revision=koboldai_vars.revision, cache_dir="cache")
|
model_config = AutoConfig.from_pretrained(model, revision=koboldai_vars.revision, cache_dir="cache")
|
||||||
try:
|
try:
|
||||||
if ((utils.HAS_ACCELERATE and model_config.model_type != 'gpt2') or model_config.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel:
|
if (model_config.model_type != 'gpt2' or model_config.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel:
|
||||||
return utils.num_layers(model_config)
|
return utils.num_layers(model_config)
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
@@ -1819,12 +1817,12 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||||||
# loadsettings()
|
# loadsettings()
|
||||||
logger.init("GPU support", status="Searching")
|
logger.init("GPU support", status="Searching")
|
||||||
koboldai_vars.hascuda = torch.cuda.is_available() and not args.cpu
|
koboldai_vars.hascuda = torch.cuda.is_available() and not args.cpu
|
||||||
koboldai_vars.bmsupported = ((utils.HAS_ACCELERATE and koboldai_vars.model_type != 'gpt2') or koboldai_vars.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel
|
koboldai_vars.bmsupported = ((koboldai_vars.model_type != 'gpt2') or koboldai_vars.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel
|
||||||
if(args.breakmodel is not None and args.breakmodel):
|
if(args.breakmodel is not None and args.breakmodel):
|
||||||
logger.warning("--breakmodel is no longer supported. Breakmodel mode is now automatically enabled when --breakmodel_gpulayers is used (see --help for details).")
|
logger.warning("--breakmodel is no longer supported. Breakmodel mode is now automatically enabled when --breakmodel_gpulayers is used (see --help for details).")
|
||||||
if(args.breakmodel_layers is not None):
|
if(args.breakmodel_layers is not None):
|
||||||
logger.warning("--breakmodel_layers is deprecated. Use --breakmodel_gpulayers instead (see --help for details).")
|
logger.warning("--breakmodel_layers is deprecated. Use --breakmodel_gpulayers instead (see --help for details).")
|
||||||
if(args.model and koboldai_vars.bmsupported and not args.breakmodel_gpulayers and not args.breakmodel_layers and (not utils.HAS_ACCELERATE or not args.breakmodel_disklayers)):
|
if(args.model and koboldai_vars.bmsupported and not args.breakmodel_gpulayers and not args.breakmodel_layers and (not args.breakmodel_disklayers)):
|
||||||
logger.warning("Model launched without the --breakmodel_gpulayers argument, defaulting to GPU only mode.")
|
logger.warning("Model launched without the --breakmodel_gpulayers argument, defaulting to GPU only mode.")
|
||||||
koboldai_vars.bmsupported = False
|
koboldai_vars.bmsupported = False
|
||||||
if(not koboldai_vars.bmsupported and (args.breakmodel_gpulayers is not None or args.breakmodel_layers is not None or args.breakmodel_disklayers is not None)):
|
if(not koboldai_vars.bmsupported and (args.breakmodel_gpulayers is not None or args.breakmodel_layers is not None or args.breakmodel_disklayers is not None)):
|
||||||
@@ -2206,7 +2204,7 @@ def lua_decode(tokens):
|
|||||||
from transformers import GPT2Tokenizer
|
from transformers import GPT2Tokenizer
|
||||||
global tokenizer
|
global tokenizer
|
||||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=koboldai_vars.revision, cache_dir="cache")
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=koboldai_vars.revision, cache_dir="cache")
|
||||||
return utils.decodenewlines(tokenizer.decode(tokens))
|
return utils.decodenewlines(mtokenizer.decode(tokens))
|
||||||
|
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
# Encode string into list of token IDs using current tokenizer
|
# Encode string into list of token IDs using current tokenizer
|
||||||
@@ -3053,8 +3051,6 @@ def get_message(msg):
|
|||||||
if not os.path.exists("settings/"):
|
if not os.path.exists("settings/"):
|
||||||
os.mkdir("settings")
|
os.mkdir("settings")
|
||||||
changed = True
|
changed = True
|
||||||
if not utils.HAS_ACCELERATE:
|
|
||||||
msg['disk_layers'] = "0"
|
|
||||||
if os.path.exists("settings/" + koboldai_vars.model_selected.replace('/', '_') + ".breakmodel"):
|
if os.path.exists("settings/" + koboldai_vars.model_selected.replace('/', '_') + ".breakmodel"):
|
||||||
with open("settings/" + koboldai_vars.model_selected.replace('/', '_') + ".breakmodel", "r") as file:
|
with open("settings/" + koboldai_vars.model_selected.replace('/', '_') + ".breakmodel", "r") as file:
|
||||||
data = file.read().split('\n')[:2]
|
data = file.read().split('\n')[:2]
|
||||||
@@ -3995,7 +3991,7 @@ def generate(txt, minimum, maximum, found_entries=None):
|
|||||||
|
|
||||||
if not koboldai_vars.quiet:
|
if not koboldai_vars.quiet:
|
||||||
logger.debug(f"Prompt Min:{minimum}, Max:{maximum}")
|
logger.debug(f"Prompt Min:{minimum}, Max:{maximum}")
|
||||||
logger.prompt(utils.decodenewlines(tokenizer.decode(txt)).encode("unicode_escape").decode("utf-8"))
|
logger.prompt(utils.decodenewlines(model.tokenizer.decode(txt)).encode("unicode_escape").decode("utf-8"))
|
||||||
|
|
||||||
# Store context in memory to use it for comparison with generated content
|
# Store context in memory to use it for comparison with generated content
|
||||||
koboldai_vars.lastctx = utils.decodenewlines(tokenizer.decode(txt))
|
koboldai_vars.lastctx = utils.decodenewlines(tokenizer.decode(txt))
|
||||||
@@ -6384,8 +6380,6 @@ def UI_2_load_model(data):
|
|||||||
if not os.path.exists("settings/"):
|
if not os.path.exists("settings/"):
|
||||||
os.mkdir("settings")
|
os.mkdir("settings")
|
||||||
changed = True
|
changed = True
|
||||||
if not utils.HAS_ACCELERATE:
|
|
||||||
data['disk_layers'] = "0"
|
|
||||||
if os.path.exists("settings/" + data['model'].replace('/', '_') + ".breakmodel"):
|
if os.path.exists("settings/" + data['model'].replace('/', '_') + ".breakmodel"):
|
||||||
with open("settings/" + data['model'].replace('/', '_') + ".breakmodel", "r") as file:
|
with open("settings/" + data['model'].replace('/', '_') + ".breakmodel", "r") as file:
|
||||||
file_data = file.read().split('\n')[:2]
|
file_data = file.read().split('\n')[:2]
|
||||||
|
@@ -235,11 +235,9 @@ gpu_blocks = []
|
|||||||
disk_blocks = 0
|
disk_blocks = 0
|
||||||
primary_device = 0 if torch.cuda.device_count() > 0 else "cpu"
|
primary_device = 0 if torch.cuda.device_count() > 0 else "cpu"
|
||||||
|
|
||||||
|
from accelerate.hooks import attach_align_device_hook_on_blocks
|
||||||
if utils.HAS_ACCELERATE:
|
from accelerate.utils import OffloadedWeightsLoader, check_device_map, extract_submodules_state_dict, offload_state_dict
|
||||||
from accelerate.hooks import attach_align_device_hook_on_blocks
|
from accelerate import dispatch_model
|
||||||
from accelerate.utils import OffloadedWeightsLoader, check_device_map, extract_submodules_state_dict, offload_state_dict
|
|
||||||
from accelerate import dispatch_model
|
|
||||||
|
|
||||||
def dispatch_model_ex(
|
def dispatch_model_ex(
|
||||||
model: nn.Module,
|
model: nn.Module,
|
||||||
|
38
model.py
38
model.py
@@ -1779,18 +1779,17 @@ class HFTorchInferenceModel(InferenceModel):
|
|||||||
|
|
||||||
if utils.num_shards is None or utils.current_shard == 0:
|
if utils.num_shards is None or utils.current_shard == 0:
|
||||||
utils.offload_index = {}
|
utils.offload_index = {}
|
||||||
if utils.HAS_ACCELERATE:
|
if os.path.isdir("accelerate-disk-cache"):
|
||||||
if os.path.isdir("accelerate-disk-cache"):
|
# Delete all of the files in the disk cache folder without deleting the folder itself to allow people to create symbolic links for this folder
|
||||||
# Delete all of the files in the disk cache folder without deleting the folder itself to allow people to create symbolic links for this folder
|
# (the folder doesn't contain any subfolders so os.remove will do just fine)
|
||||||
# (the folder doesn't contain any subfolders so os.remove will do just fine)
|
for filename in os.listdir("accelerate-disk-cache"):
|
||||||
for filename in os.listdir("accelerate-disk-cache"):
|
try:
|
||||||
try:
|
os.remove(
|
||||||
os.remove(
|
os.path.join("accelerate-disk-cache", filename)
|
||||||
os.path.join("accelerate-disk-cache", filename)
|
)
|
||||||
)
|
except OSError:
|
||||||
except OSError:
|
pass
|
||||||
pass
|
os.makedirs("accelerate-disk-cache", exist_ok=True)
|
||||||
os.makedirs("accelerate-disk-cache", exist_ok=True)
|
|
||||||
if utils.num_shards is not None:
|
if utils.num_shards is not None:
|
||||||
num_tensors = len(
|
num_tensors = len(
|
||||||
utils.get_sharded_checkpoint_num_tensors(
|
utils.get_sharded_checkpoint_num_tensors(
|
||||||
@@ -1883,7 +1882,7 @@ class HFTorchInferenceModel(InferenceModel):
|
|||||||
model_dict[key] = model_dict[key].to(torch.float32)
|
model_dict[key] = model_dict[key].to(torch.float32)
|
||||||
if device == "shared":
|
if device == "shared":
|
||||||
model_dict[key] = model_dict[key].to("cpu").detach_()
|
model_dict[key] = model_dict[key].to("cpu").detach_()
|
||||||
if able_to_pin_layers and utils.HAS_ACCELERATE:
|
if able_to_pin_layers:
|
||||||
try:
|
try:
|
||||||
model_dict[key] = model_dict[key].pin_memory()
|
model_dict[key] = model_dict[key].pin_memory()
|
||||||
except:
|
except:
|
||||||
@@ -1987,10 +1986,9 @@ class HFTorchInferenceModel(InferenceModel):
|
|||||||
)
|
)
|
||||||
row_color = colors.END
|
row_color = colors.END
|
||||||
sep_color = colors.YELLOW
|
sep_color = colors.YELLOW
|
||||||
if utils.HAS_ACCELERATE:
|
print(
|
||||||
print(
|
f"{row_color}{colors.YELLOW + '->' + row_color if -1 == selected else ' '} {' '*9} N/A {sep_color}|{row_color} {breakmodel.disk_blocks:3} {sep_color}|{row_color} (Disk cache){colors.END}"
|
||||||
f"{row_color}{colors.YELLOW + '->' + row_color if -1 == selected else ' '} {' '*9} N/A {sep_color}|{row_color} {breakmodel.disk_blocks:3} {sep_color}|{row_color} (Disk cache){colors.END}"
|
)
|
||||||
)
|
|
||||||
print(
|
print(
|
||||||
f"{row_color} {' '*9} N/A {sep_color}|{row_color} {n_layers:3} {sep_color}|{row_color} (CPU){colors.END}"
|
f"{row_color} {' '*9} N/A {sep_color}|{row_color} {n_layers:3} {sep_color}|{row_color} (CPU){colors.END}"
|
||||||
)
|
)
|
||||||
@@ -2007,9 +2005,7 @@ class HFTorchInferenceModel(InferenceModel):
|
|||||||
breakmodel.gpu_blocks = [0] * n_layers
|
breakmodel.gpu_blocks = [0] * n_layers
|
||||||
return
|
return
|
||||||
|
|
||||||
elif utils.args.breakmodel_gpulayers is not None or (
|
elif utils.args.breakmodel_gpulayers is not None or utils.args.breakmodel_disklayers is not None:
|
||||||
utils.HAS_ACCELERATE and utils.args.breakmodel_disklayers is not None
|
|
||||||
):
|
|
||||||
try:
|
try:
|
||||||
if not utils.args.breakmodel_gpulayers:
|
if not utils.args.breakmodel_gpulayers:
|
||||||
breakmodel.gpu_blocks = []
|
breakmodel.gpu_blocks = []
|
||||||
@@ -2117,7 +2113,7 @@ class HFTorchInferenceModel(InferenceModel):
|
|||||||
if n_layers == 0:
|
if n_layers == 0:
|
||||||
break
|
break
|
||||||
|
|
||||||
if utils.HAS_ACCELERATE and n_layers > 0:
|
if n_layers > 0:
|
||||||
self.breakmodel_device_list(
|
self.breakmodel_device_list(
|
||||||
n_layers, primary=breakmodel.primary_device, selected=-1
|
n_layers, primary=breakmodel.primary_device, selected=-1
|
||||||
)
|
)
|
||||||
|
@@ -303,7 +303,7 @@ def use_lazy_torch_load(enable=True, callback: Optional[Callable] = None, demate
|
|||||||
torch.load = torch_load
|
torch.load = torch_load
|
||||||
|
|
||||||
if dematerialized_modules:
|
if dematerialized_modules:
|
||||||
if use_accelerate_init_empty_weights and utils.HAS_ACCELERATE:
|
if use_accelerate_init_empty_weights:
|
||||||
import accelerate
|
import accelerate
|
||||||
init_empty_weights = accelerate.init_empty_weights()
|
init_empty_weights = accelerate.init_empty_weights()
|
||||||
init_empty_weights.__enter__()
|
init_empty_weights.__enter__()
|
||||||
@@ -334,7 +334,7 @@ def use_lazy_torch_load(enable=True, callback: Optional[Callable] = None, demate
|
|||||||
torch._utils._rebuild_tensor = old_rebuild_tensor
|
torch._utils._rebuild_tensor = old_rebuild_tensor
|
||||||
torch.load = old_torch_load
|
torch.load = old_torch_load
|
||||||
if dematerialized_modules:
|
if dematerialized_modules:
|
||||||
if use_accelerate_init_empty_weights and utils.HAS_ACCELERATE:
|
if use_accelerate_init_empty_weights:
|
||||||
init_empty_weights.__exit__(None, None, None)
|
init_empty_weights.__exit__(None, None, None)
|
||||||
else:
|
else:
|
||||||
torch.nn.Linear.__init__ = old_linear_init
|
torch.nn.Linear.__init__ = old_linear_init
|
||||||
|
7
utils.py
7
utils.py
@@ -9,7 +9,6 @@ import requests
|
|||||||
import requests.adapters
|
import requests.adapters
|
||||||
import time
|
import time
|
||||||
import breakmodel
|
import breakmodel
|
||||||
from transformers import __version__ as transformers_version
|
|
||||||
from transformers import PreTrainedModel
|
from transformers import PreTrainedModel
|
||||||
import packaging.version
|
import packaging.version
|
||||||
from tqdm.auto import tqdm
|
from tqdm.auto import tqdm
|
||||||
@@ -21,12 +20,6 @@ import packaging.version
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
HAS_ACCELERATE = packaging.version.parse(transformers_version) >= packaging.version.parse("4.20.0.dev0")
|
|
||||||
try:
|
|
||||||
import accelerate
|
|
||||||
except ImportError:
|
|
||||||
HAS_ACCELERATE = False
|
|
||||||
|
|
||||||
koboldai_vars = None
|
koboldai_vars = None
|
||||||
args = None
|
args = None
|
||||||
num_shards: Optional[int] = None
|
num_shards: Optional[int] = None
|
||||||
|
Reference in New Issue
Block a user