From a73804ca1e2790e468b917689ba0f4edf9a54d6a Mon Sep 17 00:00:00 2001 From: somebody Date: Sun, 26 Feb 2023 12:18:06 -0600 Subject: [PATCH] Accelerate: Remove HAS_ACCELERATE Accelerate has been a dependency for a while, and as such we probably shouldn't be lugging around code that assumes it isn't present. --- aiserver.py | 20 +++++++------------- breakmodel.py | 8 +++----- model.py | 38 +++++++++++++++++--------------------- torch_lazy_loader.py | 4 ++-- utils.py | 7 ------- 5 files changed, 29 insertions(+), 48 deletions(-) diff --git a/aiserver.py b/aiserver.py index 2f83eba9..05b5d0cc 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1458,8 +1458,6 @@ def get_model_info(model, directory=""): pass #elif model == 'customhuggingface': # show_custom_model_box = True - elif not utils.HAS_ACCELERATE and not torch.cuda.is_available(): - pass elif args.cpu: pass else: @@ -1486,13 +1484,13 @@ def get_model_info(model, directory=""): break_values += [0] * (gpu_count - len(break_values)) emit('from_server', {'cmd': 'selected_model_info', 'key_value': key_value, 'key':key, 'multi_online_models': multi_online_models, 'default_url': default_url, 'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel, - 'disk_break_value': disk_blocks, 'accelerate': utils.HAS_ACCELERATE, + 'disk_break_value': disk_blocks, 'accelerate': True, 'break_values': break_values, 'gpu_count': gpu_count, 'url': url, 'gpu_names': gpu_names, 'models_on_url': models_on_url, 'show_custom_model_box': show_custom_model_box}, broadcast=True, room="UI_1") emit('selected_model_info', {'key_value': key_value, 'key':key, 'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel, 'multi_online_models': multi_online_models, 'default_url': default_url, - 'disk_break_value': disk_blocks, 'disk_break': utils.HAS_ACCELERATE, + 'disk_break_value': disk_blocks, 'disk_break': True, 'break_values': break_values, 'gpu_count': gpu_count, 'url': url, 'gpu_names': gpu_names, 'models_on_url': models_on_url, 'show_online_model_select': show_online_model_select, 'bit_8_available': koboldai_vars.bit_8_available if koboldai_vars.experimental_features else False, @@ -1525,7 +1523,7 @@ def get_layer_count(model, directory=""): else: model_config = AutoConfig.from_pretrained(model, revision=koboldai_vars.revision, cache_dir="cache") try: - if ((utils.HAS_ACCELERATE and model_config.model_type != 'gpt2') or model_config.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel: + if (model_config.model_type != 'gpt2' or model_config.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel: return utils.num_layers(model_config) else: return None @@ -1819,12 +1817,12 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal # loadsettings() logger.init("GPU support", status="Searching") koboldai_vars.hascuda = torch.cuda.is_available() and not args.cpu - koboldai_vars.bmsupported = ((utils.HAS_ACCELERATE and koboldai_vars.model_type != 'gpt2') or koboldai_vars.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel + koboldai_vars.bmsupported = ((koboldai_vars.model_type != 'gpt2') or koboldai_vars.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel if(args.breakmodel is not None and args.breakmodel): logger.warning("--breakmodel is no longer supported. Breakmodel mode is now automatically enabled when --breakmodel_gpulayers is used (see --help for details).") if(args.breakmodel_layers is not None): logger.warning("--breakmodel_layers is deprecated. Use --breakmodel_gpulayers instead (see --help for details).") - if(args.model and koboldai_vars.bmsupported and not args.breakmodel_gpulayers and not args.breakmodel_layers and (not utils.HAS_ACCELERATE or not args.breakmodel_disklayers)): + if(args.model and koboldai_vars.bmsupported and not args.breakmodel_gpulayers and not args.breakmodel_layers and (not args.breakmodel_disklayers)): logger.warning("Model launched without the --breakmodel_gpulayers argument, defaulting to GPU only mode.") koboldai_vars.bmsupported = False if(not koboldai_vars.bmsupported and (args.breakmodel_gpulayers is not None or args.breakmodel_layers is not None or args.breakmodel_disklayers is not None)): @@ -2206,7 +2204,7 @@ def lua_decode(tokens): from transformers import GPT2Tokenizer global tokenizer tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=koboldai_vars.revision, cache_dir="cache") - return utils.decodenewlines(tokenizer.decode(tokens)) + return utils.decodenewlines(mtokenizer.decode(tokens)) #==================================================================# # Encode string into list of token IDs using current tokenizer @@ -3053,8 +3051,6 @@ def get_message(msg): if not os.path.exists("settings/"): os.mkdir("settings") changed = True - if not utils.HAS_ACCELERATE: - msg['disk_layers'] = "0" if os.path.exists("settings/" + koboldai_vars.model_selected.replace('/', '_') + ".breakmodel"): with open("settings/" + koboldai_vars.model_selected.replace('/', '_') + ".breakmodel", "r") as file: data = file.read().split('\n')[:2] @@ -3995,7 +3991,7 @@ def generate(txt, minimum, maximum, found_entries=None): if not koboldai_vars.quiet: logger.debug(f"Prompt Min:{minimum}, Max:{maximum}") - logger.prompt(utils.decodenewlines(tokenizer.decode(txt)).encode("unicode_escape").decode("utf-8")) + logger.prompt(utils.decodenewlines(model.tokenizer.decode(txt)).encode("unicode_escape").decode("utf-8")) # Store context in memory to use it for comparison with generated content koboldai_vars.lastctx = utils.decodenewlines(tokenizer.decode(txt)) @@ -6384,8 +6380,6 @@ def UI_2_load_model(data): if not os.path.exists("settings/"): os.mkdir("settings") changed = True - if not utils.HAS_ACCELERATE: - data['disk_layers'] = "0" if os.path.exists("settings/" + data['model'].replace('/', '_') + ".breakmodel"): with open("settings/" + data['model'].replace('/', '_') + ".breakmodel", "r") as file: file_data = file.read().split('\n')[:2] diff --git a/breakmodel.py b/breakmodel.py index 52000335..75bc03cc 100644 --- a/breakmodel.py +++ b/breakmodel.py @@ -235,11 +235,9 @@ gpu_blocks = [] disk_blocks = 0 primary_device = 0 if torch.cuda.device_count() > 0 else "cpu" - -if utils.HAS_ACCELERATE: - from accelerate.hooks import attach_align_device_hook_on_blocks - from accelerate.utils import OffloadedWeightsLoader, check_device_map, extract_submodules_state_dict, offload_state_dict - from accelerate import dispatch_model +from accelerate.hooks import attach_align_device_hook_on_blocks +from accelerate.utils import OffloadedWeightsLoader, check_device_map, extract_submodules_state_dict, offload_state_dict +from accelerate import dispatch_model def dispatch_model_ex( model: nn.Module, diff --git a/model.py b/model.py index 510c1497..12529aa2 100644 --- a/model.py +++ b/model.py @@ -1779,18 +1779,17 @@ class HFTorchInferenceModel(InferenceModel): if utils.num_shards is None or utils.current_shard == 0: utils.offload_index = {} - if utils.HAS_ACCELERATE: - if os.path.isdir("accelerate-disk-cache"): - # Delete all of the files in the disk cache folder without deleting the folder itself to allow people to create symbolic links for this folder - # (the folder doesn't contain any subfolders so os.remove will do just fine) - for filename in os.listdir("accelerate-disk-cache"): - try: - os.remove( - os.path.join("accelerate-disk-cache", filename) - ) - except OSError: - pass - os.makedirs("accelerate-disk-cache", exist_ok=True) + if os.path.isdir("accelerate-disk-cache"): + # Delete all of the files in the disk cache folder without deleting the folder itself to allow people to create symbolic links for this folder + # (the folder doesn't contain any subfolders so os.remove will do just fine) + for filename in os.listdir("accelerate-disk-cache"): + try: + os.remove( + os.path.join("accelerate-disk-cache", filename) + ) + except OSError: + pass + os.makedirs("accelerate-disk-cache", exist_ok=True) if utils.num_shards is not None: num_tensors = len( utils.get_sharded_checkpoint_num_tensors( @@ -1883,7 +1882,7 @@ class HFTorchInferenceModel(InferenceModel): model_dict[key] = model_dict[key].to(torch.float32) if device == "shared": model_dict[key] = model_dict[key].to("cpu").detach_() - if able_to_pin_layers and utils.HAS_ACCELERATE: + if able_to_pin_layers: try: model_dict[key] = model_dict[key].pin_memory() except: @@ -1987,10 +1986,9 @@ class HFTorchInferenceModel(InferenceModel): ) row_color = colors.END sep_color = colors.YELLOW - if utils.HAS_ACCELERATE: - print( - f"{row_color}{colors.YELLOW + '->' + row_color if -1 == selected else ' '} {' '*9} N/A {sep_color}|{row_color} {breakmodel.disk_blocks:3} {sep_color}|{row_color} (Disk cache){colors.END}" - ) + print( + f"{row_color}{colors.YELLOW + '->' + row_color if -1 == selected else ' '} {' '*9} N/A {sep_color}|{row_color} {breakmodel.disk_blocks:3} {sep_color}|{row_color} (Disk cache){colors.END}" + ) print( f"{row_color} {' '*9} N/A {sep_color}|{row_color} {n_layers:3} {sep_color}|{row_color} (CPU){colors.END}" ) @@ -2007,9 +2005,7 @@ class HFTorchInferenceModel(InferenceModel): breakmodel.gpu_blocks = [0] * n_layers return - elif utils.args.breakmodel_gpulayers is not None or ( - utils.HAS_ACCELERATE and utils.args.breakmodel_disklayers is not None - ): + elif utils.args.breakmodel_gpulayers is not None or utils.args.breakmodel_disklayers is not None: try: if not utils.args.breakmodel_gpulayers: breakmodel.gpu_blocks = [] @@ -2117,7 +2113,7 @@ class HFTorchInferenceModel(InferenceModel): if n_layers == 0: break - if utils.HAS_ACCELERATE and n_layers > 0: + if n_layers > 0: self.breakmodel_device_list( n_layers, primary=breakmodel.primary_device, selected=-1 ) diff --git a/torch_lazy_loader.py b/torch_lazy_loader.py index fae49e51..f87c42e0 100644 --- a/torch_lazy_loader.py +++ b/torch_lazy_loader.py @@ -303,7 +303,7 @@ def use_lazy_torch_load(enable=True, callback: Optional[Callable] = None, demate torch.load = torch_load if dematerialized_modules: - if use_accelerate_init_empty_weights and utils.HAS_ACCELERATE: + if use_accelerate_init_empty_weights: import accelerate init_empty_weights = accelerate.init_empty_weights() init_empty_weights.__enter__() @@ -334,7 +334,7 @@ def use_lazy_torch_load(enable=True, callback: Optional[Callable] = None, demate torch._utils._rebuild_tensor = old_rebuild_tensor torch.load = old_torch_load if dematerialized_modules: - if use_accelerate_init_empty_weights and utils.HAS_ACCELERATE: + if use_accelerate_init_empty_weights: init_empty_weights.__exit__(None, None, None) else: torch.nn.Linear.__init__ = old_linear_init diff --git a/utils.py b/utils.py index 4a39445a..1483d4d4 100644 --- a/utils.py +++ b/utils.py @@ -9,7 +9,6 @@ import requests import requests.adapters import time import breakmodel -from transformers import __version__ as transformers_version from transformers import PreTrainedModel import packaging.version from tqdm.auto import tqdm @@ -21,12 +20,6 @@ import packaging.version from pathlib import Path from typing import List, Optional -HAS_ACCELERATE = packaging.version.parse(transformers_version) >= packaging.version.parse("4.20.0.dev0") -try: - import accelerate -except ImportError: - HAS_ACCELERATE = False - koboldai_vars = None args = None num_shards: Optional[int] = None