ExLLaMA fixes

This commit is contained in:
Henk
2023-09-10 18:19:48 +02:00
parent 036db07dfb
commit 533d457678
2 changed files with 8 additions and 7 deletions

View File

@@ -1737,10 +1737,6 @@ def load_model(model_backend, initial_load=False):
#Have to add a sleep so the server will send the emit for some reason #Have to add a sleep so the server will send the emit for some reason
time.sleep(0.1) time.sleep(0.1)
if 'model' in globals():
model.unload()
# If transformers model was selected & GPU available, ask to use CPU or GPU # If transformers model was selected & GPU available, ask to use CPU or GPU
if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]): if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
# loadmodelsettings() # loadmodelsettings()
@@ -6337,6 +6333,9 @@ def UI_2_resubmit_model_info(data):
@socketio.on('load_model') @socketio.on('load_model')
@logger.catch @logger.catch
def UI_2_load_model(data): def UI_2_load_model(data):
logger.debug("Unloading previous model")
if 'model' in globals():
model.unload()
logger.debug("Loading model with user input of: {}".format(data)) logger.debug("Loading model with user input of: {}".format(data))
model_backends[data['plugin']].set_input_parameters(data) model_backends[data['plugin']].set_input_parameters(data)
load_model(data['plugin']) load_model(data['plugin'])
@@ -8568,6 +8567,8 @@ def put_model(body: ModelSelectionSchema):
backend = "Huggingface" backend = "Huggingface"
try: try:
if 'model' in globals():
model.unload()
load_model(backend) load_model(backend)
except Exception as e: except Exception as e:
koboldai_vars.model = old_model koboldai_vars.model = old_model

View File

@@ -221,7 +221,7 @@ class model_backend(InferenceModel):
self.tokenizer._koboldai_header = self.tokenizer.encode("") self.tokenizer._koboldai_header = self.tokenizer.encode("")
def unload(self): def unload(self):
self.model_config = None #self.model_config = None # This breaks more than it fixes - Henk
self.model = None self.model = None
self.tokenizer = None self.tokenizer = None
@@ -289,7 +289,7 @@ class model_backend(InferenceModel):
torch.manual_seed(seed) torch.manual_seed(seed)
bad_words_ids = [self.tokenizer.bos_token_id] bad_words_ids = [self.tokenizer.bos_token_id]
if utils.koboldai_vars.use_default_badwordids: if utils.koboldai_vars.use_default_badwordsids:
bad_words_ids.append(self.tokenizer.eos_token_id) bad_words_ids.append(self.tokenizer.eos_token_id)
bad_words_ids.extend(self.bracket_tokens) bad_words_ids.extend(self.bracket_tokens)
if single_line: if single_line: