mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
ExLLaMA fixes
This commit is contained in:
@@ -1737,10 +1737,6 @@ def load_model(model_backend, initial_load=False):
|
|||||||
#Have to add a sleep so the server will send the emit for some reason
|
#Have to add a sleep so the server will send the emit for some reason
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
|
|
||||||
if 'model' in globals():
|
|
||||||
model.unload()
|
|
||||||
|
|
||||||
|
|
||||||
# If transformers model was selected & GPU available, ask to use CPU or GPU
|
# If transformers model was selected & GPU available, ask to use CPU or GPU
|
||||||
if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
||||||
# loadmodelsettings()
|
# loadmodelsettings()
|
||||||
@@ -6337,6 +6333,9 @@ def UI_2_resubmit_model_info(data):
|
|||||||
@socketio.on('load_model')
|
@socketio.on('load_model')
|
||||||
@logger.catch
|
@logger.catch
|
||||||
def UI_2_load_model(data):
|
def UI_2_load_model(data):
|
||||||
|
logger.debug("Unloading previous model")
|
||||||
|
if 'model' in globals():
|
||||||
|
model.unload()
|
||||||
logger.debug("Loading model with user input of: {}".format(data))
|
logger.debug("Loading model with user input of: {}".format(data))
|
||||||
model_backends[data['plugin']].set_input_parameters(data)
|
model_backends[data['plugin']].set_input_parameters(data)
|
||||||
load_model(data['plugin'])
|
load_model(data['plugin'])
|
||||||
@@ -8568,6 +8567,8 @@ def put_model(body: ModelSelectionSchema):
|
|||||||
backend = "Huggingface"
|
backend = "Huggingface"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
if 'model' in globals():
|
||||||
|
model.unload()
|
||||||
load_model(backend)
|
load_model(backend)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
koboldai_vars.model = old_model
|
koboldai_vars.model = old_model
|
||||||
|
@@ -221,7 +221,7 @@ class model_backend(InferenceModel):
|
|||||||
self.tokenizer._koboldai_header = self.tokenizer.encode("")
|
self.tokenizer._koboldai_header = self.tokenizer.encode("")
|
||||||
|
|
||||||
def unload(self):
|
def unload(self):
|
||||||
self.model_config = None
|
#self.model_config = None # This breaks more than it fixes - Henk
|
||||||
|
|
||||||
self.model = None
|
self.model = None
|
||||||
self.tokenizer = None
|
self.tokenizer = None
|
||||||
@@ -289,7 +289,7 @@ class model_backend(InferenceModel):
|
|||||||
torch.manual_seed(seed)
|
torch.manual_seed(seed)
|
||||||
|
|
||||||
bad_words_ids = [self.tokenizer.bos_token_id]
|
bad_words_ids = [self.tokenizer.bos_token_id]
|
||||||
if utils.koboldai_vars.use_default_badwordids:
|
if utils.koboldai_vars.use_default_badwordsids:
|
||||||
bad_words_ids.append(self.tokenizer.eos_token_id)
|
bad_words_ids.append(self.tokenizer.eos_token_id)
|
||||||
bad_words_ids.extend(self.bracket_tokens)
|
bad_words_ids.extend(self.bracket_tokens)
|
||||||
if single_line:
|
if single_line:
|
||||||
|
Reference in New Issue
Block a user