diff --git a/aiserver.py b/aiserver.py index b77455df..b656f646 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1735,11 +1735,7 @@ def load_model(model_backend, initial_load=False): if koboldai_vars.model != 'ReadOnly': emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(model_backends[model_backend].model_name if "model_name" in vars(model_backends[model_backend]) else model_backends[model_backend].id)}, broadcast=True) #Have to add a sleep so the server will send the emit for some reason - time.sleep(0.1) - - if 'model' in globals(): - model.unload() - + time.sleep(0.1) # If transformers model was selected & GPU available, ask to use CPU or GPU if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]): @@ -6337,6 +6333,9 @@ def UI_2_resubmit_model_info(data): @socketio.on('load_model') @logger.catch def UI_2_load_model(data): + logger.debug("Unloading previous model") + if 'model' in globals(): + model.unload() logger.debug("Loading model with user input of: {}".format(data)) model_backends[data['plugin']].set_input_parameters(data) load_model(data['plugin']) @@ -8568,6 +8567,8 @@ def put_model(body: ModelSelectionSchema): backend = "Huggingface" try: + if 'model' in globals(): + model.unload() load_model(backend) except Exception as e: koboldai_vars.model = old_model diff --git a/modeling/inference_models/exllama/class.py b/modeling/inference_models/exllama/class.py index a93d3dee..33d2fcc2 100644 --- a/modeling/inference_models/exllama/class.py +++ b/modeling/inference_models/exllama/class.py @@ -221,7 +221,7 @@ class model_backend(InferenceModel): self.tokenizer._koboldai_header = self.tokenizer.encode("") def unload(self): - self.model_config = None + #self.model_config = None # This breaks more than it fixes - Henk self.model = None self.tokenizer = None @@ -289,7 +289,7 @@ class model_backend(InferenceModel): torch.manual_seed(seed) bad_words_ids = [self.tokenizer.bos_token_id] - if utils.koboldai_vars.use_default_badwordids: + if utils.koboldai_vars.use_default_badwordsids: bad_words_ids.append(self.tokenizer.eos_token_id) bad_words_ids.extend(self.bracket_tokens) if single_line: