Merge pull request #474 from ebolam/Model_Plugins
TTS update and double click to load story
This commit is contained in:
commit
974b96789c
81
aiserver.py
81
aiserver.py
|
@ -1477,7 +1477,7 @@ def general_startup(override_args=None):
|
|||
parser.add_argument("--cacheonly", action='store_true', help="Does not save the model to the models folder when it has been downloaded in the cache")
|
||||
parser.add_argument("--customsettings", help="Preloads arguements from json file. You only need to provide the location of the json file. Use customsettings.json template file. It can be renamed if you wish so that you can store multiple configurations. Leave any settings you want as default as null. Any values you wish to set need to be in double quotation marks")
|
||||
parser.add_argument("--no_ui", action='store_true', default=False, help="Disables the GUI and Socket.IO server while leaving the API server running.")
|
||||
parser.add_argument("--summarizer_model", action='store', default="philschmid/bart-large-cnn-samsum", help="Huggingface model to use for summarization. Defaults to sshleifer/distilbart-cnn-12-6")
|
||||
parser.add_argument("--summarizer_model", action='store', default="pszemraj/led-large-book-summary", help="Huggingface model to use for summarization. Defaults to pszemraj/led-large-book-summary")
|
||||
parser.add_argument("--max_summary_length", action='store', default=75, help="Maximum size for summary to send to image generation")
|
||||
parser.add_argument("--multi_story", action='store_true', default=False, help="Allow multi-story mode (experimental)")
|
||||
parser.add_argument("--peft", type=str, help="Specify the path or HuggingFace ID of a Peft to load it. Not supported on TPU. (Experimental)")
|
||||
|
@ -7586,8 +7586,11 @@ def text2img_api(prompt, art_guide="") -> Image.Image:
|
|||
@socketio.on("clear_generated_image")
|
||||
@logger.catch
|
||||
def UI2_clear_generated_image(data):
|
||||
koboldai_vars.picture = ""
|
||||
koboldai_vars.picture_prompt = ""
|
||||
if 'action_id' in data and data['action_id'] is not None:
|
||||
koboldai_vars.actions.clear_picture(data['action_id'])
|
||||
else:
|
||||
koboldai_vars.picture = ""
|
||||
koboldai_vars.picture_prompt = ""
|
||||
|
||||
#==================================================================#
|
||||
# Retrieve previous images
|
||||
|
@ -7600,7 +7603,9 @@ def UI_2_get_story_image(data):
|
|||
print(filename)
|
||||
if filename is not None:
|
||||
with open(filename, "rb") as image_file:
|
||||
return base64.b64encode(image_file.read()).decode("utf-8")
|
||||
return {'img': base64.b64encode(image_file.read()).decode("utf-8"), 'action_id': action_id}
|
||||
else:
|
||||
return {'img': None, 'action_id': action_id}
|
||||
|
||||
#@logger.catch
|
||||
def get_items_locations_from_text(text):
|
||||
|
@ -7651,16 +7656,19 @@ def get_items_locations_from_text(text):
|
|||
#==================================================================#
|
||||
def summarize(text, max_length=100, min_length=30, unload=True):
|
||||
from transformers import pipeline as summary_pipeline
|
||||
from transformers import AutoConfig
|
||||
start_time = time.time()
|
||||
if koboldai_vars.summarizer is None:
|
||||
if os.path.exists("functional_models/{}".format(args.summarizer_model.replace('/', '_'))):
|
||||
koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
|
||||
koboldai_vars.summarizer = AutoModelForSeq2SeqLM.from_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
|
||||
koboldai_vars.summary_model_config = AutoConfig.from_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
|
||||
else:
|
||||
koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained(args.summarizer_model, cache_dir="cache")
|
||||
koboldai_vars.summarizer = AutoModelForSeq2SeqLM.from_pretrained(args.summarizer_model, cache_dir="cache")
|
||||
koboldai_vars.summary_tokenizer.save_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), max_shard_size="500MiB")
|
||||
koboldai_vars.summarizer.save_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), max_shard_size="500MiB")
|
||||
koboldai_vars.summary_model_config = AutoConfig.from_pretrained(args.summarizer_model, cache_dir="cache")
|
||||
|
||||
#Try GPU accel
|
||||
if koboldai_vars.hascuda and torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_reserved(0) >= 1645778560:
|
||||
|
@ -7674,9 +7682,27 @@ def summarize(text, max_length=100, min_length=30, unload=True):
|
|||
#Actual sumarization
|
||||
start_time = time.time()
|
||||
#make sure text is less than 1024 tokens, otherwise we'll crash
|
||||
if len(koboldai_vars.summary_tokenizer.encode(text)) > 1000:
|
||||
text = koboldai_vars.summary_tokenizer.decode(koboldai_vars.summary_tokenizer.encode(text)[:1000])
|
||||
output = tpool.execute(summarizer, text, max_length=max_length, min_length=min_length, do_sample=False)[0]['summary_text']
|
||||
max_tokens = koboldai_vars.summary_model_config.max_encoder_position_embeddings if hasattr(koboldai_vars.summary_model_config, 'max_encoder_position_embeddings') else 1024
|
||||
logger.info("Using max summary tokens of {}".format(max_tokens))
|
||||
if len(koboldai_vars.summary_tokenizer.encode(text)) > max_tokens:
|
||||
text_list = koboldai_vars.actions.sentence_re.findall(text)
|
||||
i=0
|
||||
while i <= len(text_list)-2:
|
||||
if len(koboldai_vars.summary_tokenizer.encode(text_list[i] + text_list[i+1])) < max_tokens:
|
||||
text_list[i] = text_list[i] + text_list[i+1]
|
||||
del text_list[i+1]
|
||||
else:
|
||||
i+=1
|
||||
|
||||
|
||||
else:
|
||||
text_list = [text]
|
||||
|
||||
output = []
|
||||
logger.info("Summarizing with {} chunks of length {}".format(len(text_list), [len(koboldai_vars.summary_tokenizer.encode(x)) for x in text_list]))
|
||||
for text in text_list:
|
||||
output.append(tpool.execute(summarizer, text, max_length=max_length, min_length=min_length, do_sample=False)[0]['summary_text'])
|
||||
output = " ".join(output)
|
||||
logger.debug("Time to summarize: {}".format(time.time()-start_time))
|
||||
#move model back to CPU to save precious vram
|
||||
torch.cuda.empty_cache()
|
||||
|
@ -7696,40 +7722,33 @@ def summarize(text, max_length=100, min_length=30, unload=True):
|
|||
@socketio.on("refresh_auto_memory")
|
||||
@logger.catch
|
||||
def UI_2_refresh_auto_memory(data):
|
||||
max_output_length=500
|
||||
from transformers import AutoConfig
|
||||
koboldai_vars.auto_memory = "Generating..."
|
||||
if koboldai_vars.summary_tokenizer is None:
|
||||
if os.path.exists("models/{}".format(args.summarizer_model.replace('/', '_'))):
|
||||
koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained("models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
|
||||
if os.path.exists("functional_models/{}".format(args.summarizer_model.replace('/', '_'))):
|
||||
koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
|
||||
koboldai_vars.summary_model_config = AutoConfig.from_pretrained("functional_models/{}".format(args.summarizer_model.replace('/', '_')), cache_dir="cache")
|
||||
else:
|
||||
koboldai_vars.summary_tokenizer = AutoTokenizer.from_pretrained(args.summarizer_model, cache_dir="cache")
|
||||
#first, let's get all of our game text and split it into sentences
|
||||
sentences = [x[0] for x in koboldai_vars.actions.to_sentences()]
|
||||
sentences_lengths = [len(koboldai_vars.summary_tokenizer.encode(x)) for x in sentences]
|
||||
koboldai_vars.summary_model_config = AutoConfig.from_pretrained(args.summarizer_model, cache_dir="cache")
|
||||
max_tokens = koboldai_vars.summary_model_config.max_encoder_position_embeddings if hasattr(koboldai_vars.summary_model_config, 'max_encoder_position_embeddings') else 1024
|
||||
|
||||
#first, let's get all of our game text
|
||||
sentences = "".join([x[0] for x in koboldai_vars.actions.to_sentences()])
|
||||
|
||||
pass_number = 1
|
||||
while len(koboldai_vars.summary_tokenizer.encode("".join(sentences))) > 1000:
|
||||
#Now let's split them into 1000 token chunks
|
||||
summary_chunks = [""]
|
||||
summary_chunk_lengths = [0]
|
||||
for i in range(len(sentences)):
|
||||
if summary_chunk_lengths[-1] + sentences_lengths[i] <= 1000:
|
||||
summary_chunks[-1] += sentences[i]
|
||||
summary_chunk_lengths[-1] += sentences_lengths[i]
|
||||
else:
|
||||
summary_chunks.append(sentences[i])
|
||||
summary_chunk_lengths.append(sentences_lengths[i])
|
||||
new_sentences = []
|
||||
i=0
|
||||
for summary_chunk in summary_chunks:
|
||||
logger.debug("summarizing chunk {}".format(i))
|
||||
new_sentences.extend(re.split("(?<=[.!?])\s+", summarize(summary_chunk, unload=False)))
|
||||
i+=1
|
||||
while len(koboldai_vars.summary_tokenizer.encode(sentences)) > max_tokens:
|
||||
new_sentences = summarize(sentences, unload=False, max_length=max_output_length)
|
||||
logger.debug("Pass {}:\nSummarized to {} sentencees from {}".format(pass_number, len(new_sentences), len(sentences)))
|
||||
sentences = new_sentences
|
||||
koboldai_vars.auto_memory += "Pass {}:\n{}\n\n".format(pass_number, "\n".join(sentences))
|
||||
koboldai_vars.auto_memory += "Pass {}:\n{}\n\n".format(pass_number, sentences)
|
||||
pass_number+=1
|
||||
logger.debug("OK, doing final summarization")
|
||||
output = summarize(" ".join(sentences))
|
||||
if len(koboldai_vars.summary_tokenizer.encode(sentences)) > max_output_length:
|
||||
output = summarize(sentences, max_length=max_output_length)
|
||||
else:
|
||||
output = sentences
|
||||
koboldai_vars.auto_memory += "\n\n Final Result:\n" + output
|
||||
|
||||
|
||||
|
|
|
@ -687,8 +687,9 @@ class settings(object):
|
|||
|
||||
class model_settings(settings):
|
||||
local_only_variables = ['apikey', 'default_preset']
|
||||
no_save_variables = ['modelconfig', 'custmodpth', 'generated_tkns',
|
||||
'loaded_layers', 'total_layers', 'total_download_chunks', 'downloaded_chunks', 'presets', 'default_preset',
|
||||
no_save_variables = ['modelconfig', 'custmodpth', 'generated_tkns',
|
||||
'loaded_layers', 'total_layers', 'loaded_checkpoints', 'total_checkpoints',
|
||||
'total_download_chunks', 'downloaded_chunks', 'presets', 'default_preset',
|
||||
'welcome', 'welcome_default', 'simple_randomness', 'simple_creativity', 'simple_repitition',
|
||||
'badwordsids', 'uid_presets', 'model', 'model_type', 'lazy_load', 'fp32_model', 'modeldim', 'horde_wait_time', 'horde_queue_position', 'horde_queue_size', 'newlinemode', 'tqdm_progress', 'tqdm_rem_time', '_tqdm']
|
||||
settings_name = "model"
|
||||
|
@ -705,6 +706,8 @@ class model_settings(settings):
|
|||
self.generated_tkns = 0 # If using a backend that supports Lua generation modifiers, how many tokens have already been generated, otherwise 0
|
||||
self.loaded_layers = 0 # Used in UI 2 to show model loading progress
|
||||
self.total_layers = 0 # Same as above
|
||||
self.loaded_checkpoints = 0
|
||||
self.total_checkpoints = 1
|
||||
self.total_download_chunks = 0 # tracks how much of the model has downloaded for the UI 2
|
||||
self.downloaded_chunks = 0 #as above
|
||||
self._tqdm = tqdm.tqdm(total=self.genamt, file=self.ignore_tqdm()) # tqdm agent for generating tokens. This will allow us to calculate the remaining time
|
||||
|
@ -829,13 +832,22 @@ class model_settings(settings):
|
|||
#Setup TQDP for model loading
|
||||
elif name == "loaded_layers" and '_tqdm' in self.__dict__:
|
||||
if value == 0:
|
||||
self._tqdm.reset(total=self.total_layers)
|
||||
self._tqdm.reset(total=self.total_layers if self.total_checkpoints == 1 else 1000)
|
||||
self.tqdm_progress = 0
|
||||
else:
|
||||
self._tqdm.update(1)
|
||||
self.tqdm_progress = int(float(self.loaded_layers)/float(self.total_layers)*100)
|
||||
if self.total_checkpoints == 1:
|
||||
self._tqdm.update(1)
|
||||
elif self.total_layers != 0 and self.total_checkpoints != 0:
|
||||
proper_progress = (self.loaded_checkpoints + value/self.total_layers)/self.total_checkpoints*1000
|
||||
self._tqdm.update(proper_progress - self._tqdm.n)
|
||||
|
||||
self.tqdm_progress = int(float(self._tqdm.n)/float(self._tqdm.total)*100)
|
||||
|
||||
if self._tqdm.format_dict['rate'] is not None:
|
||||
self.tqdm_rem_time = str(datetime.timedelta(seconds=int(float(self.total_layers-self.loaded_layers)/self._tqdm.format_dict['rate'])))
|
||||
elapsed = self._tqdm.format_dict["elapsed"]
|
||||
rate = self._tqdm.format_dict["rate"]
|
||||
remaining = (self._tqdm.total - self._tqdm.n) / rate if rate and self._tqdm.total else 0
|
||||
self.tqdm_rem_time = str(datetime.timedelta(seconds=remaining))
|
||||
#Setup TQDP for model downloading
|
||||
elif name == "total_download_chunks" and '_tqdm' in self.__dict__:
|
||||
self._tqdm.reset(total=value)
|
||||
|
@ -1247,11 +1259,11 @@ class undefined_settings(settings):
|
|||
class system_settings(settings):
|
||||
local_only_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold',
|
||||
'lua_koboldcore', 'regex_sl', 'acregex_ai', 'acregex_ui', 'comregex_ai', 'comregex_ui',
|
||||
'sp', '_horde_pid', 'inference_config', 'image_pipeline',
|
||||
'sp', '_horde_pid', 'inference_config', 'image_pipeline', 'summary_model_config',
|
||||
'summarizer', 'summary_tokenizer', 'tts_model', 'rng_states', 'comregex_ai', 'comregex_ui', 'colab_arg']
|
||||
no_save_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold',
|
||||
no_save_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', 'summary_model_config',
|
||||
'lua_koboldcore', 'sp', 'sp_length', '_horde_pid', 'horde_share', 'aibusy',
|
||||
'serverstarted', 'inference_config', 'image_pipeline', 'summarizer', 'on_colab'
|
||||
'serverstarted', 'inference_config', 'image_pipeline', 'summarizer', 'on_colab', 'quiet',
|
||||
'summary_tokenizer', 'use_colab_tpu', 'noai', 'disable_set_aibusy', 'cloudflare_link', 'tts_model',
|
||||
'generating_image', 'bit_8_available', 'host', 'hascuda', 'usegpu', 'rng_states', 'comregex_ai', 'comregex_ui', 'git_repository', 'git_branch', 'colab_arg',
|
||||
'disable_model_load']
|
||||
|
@ -1335,6 +1347,7 @@ class system_settings(settings):
|
|||
self.image_pipeline = None
|
||||
self.summarizer = None
|
||||
self.summary_tokenizer = None
|
||||
self.summary_model_config = {}
|
||||
self.keep_img_gen_in_memory = False
|
||||
self.cookies = {} #cookies for colab since colab's URL changes, cookies are lost
|
||||
self.experimental_features = False
|
||||
|
@ -2233,6 +2246,12 @@ class KoboldStoryRegister(object):
|
|||
self.actions[action_id]['picture_filename'] = filename
|
||||
self.actions[action_id]['picture_prompt'] = prompt
|
||||
|
||||
def clear_picture(self, action_id):
|
||||
action_id = int(action_id)
|
||||
if action_id in self.actions:
|
||||
del self.actions[action_id]['picture_filename']
|
||||
del self.actions[action_id]['picture_prompt']
|
||||
|
||||
def get_picture(self, action_id):
|
||||
if action_id == -1:
|
||||
if self.story_settings.prompt_picture_filename == "":
|
||||
|
|
|
@ -340,7 +340,8 @@ class model_backend(InferenceModel):
|
|||
|
||||
self._post_token_gen(self.generator.sequence)
|
||||
|
||||
utils.koboldai_vars.generated_tkns += 1
|
||||
#This is taken care of in the core stopper class that's called below. If you're not using core stoppers then it should remain here
|
||||
#utils.koboldai_vars.generated_tkns += 1
|
||||
|
||||
# Apply stoppers
|
||||
do_stop = False
|
||||
|
@ -374,6 +375,12 @@ class model_backend(InferenceModel):
|
|||
return tokenizer
|
||||
|
||||
def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
|
||||
saved_data = {'layers': [], 'max_ctx': 2048, 'compress_emb': 1, 'ntk_alpha': 1}
|
||||
if os.path.exists("settings/{}.exllama.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self):
|
||||
with open("settings/{}.exllama.model_backend.settings".format(model_name.replace("/", "_")), "r") as f:
|
||||
temp = json.load(f)
|
||||
for key in temp:
|
||||
saved_data[key] = temp[key]
|
||||
requested_parameters = []
|
||||
gpu_count = torch.cuda.device_count()
|
||||
layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
|
||||
|
@ -400,7 +407,7 @@ class model_backend(InferenceModel):
|
|||
"step": 1,
|
||||
"check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)], "value": layer_count, 'check': "="},
|
||||
"check_message": "The sum of assigned layers must equal {}".format(layer_count),
|
||||
"default": [layer_count if i == 0 else 0],
|
||||
"default": saved_data['layers'][i] if len(saved_data['layers']) > i else layer_count if i==0 else 0,
|
||||
"tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)),
|
||||
"menu_path": "Layers",
|
||||
"extra_classes": "",
|
||||
|
@ -415,7 +422,7 @@ class model_backend(InferenceModel):
|
|||
"min": 2048,
|
||||
"max": 16384,
|
||||
"step": 512,
|
||||
"default": 2048,
|
||||
"default": saved_data['max_ctx'],
|
||||
"tooltip": "The maximum context size the model supports",
|
||||
"menu_path": "Configuration",
|
||||
"extra_classes": "",
|
||||
|
@ -430,7 +437,7 @@ class model_backend(InferenceModel):
|
|||
"min": 1,
|
||||
"max": 8,
|
||||
"step": 0.25,
|
||||
"default": 1,
|
||||
"default": saved_data['compress_emb'],
|
||||
"tooltip": "If the model requires compressed embeddings, set them here",
|
||||
"menu_path": "Configuration",
|
||||
"extra_classes": "",
|
||||
|
@ -445,7 +452,7 @@ class model_backend(InferenceModel):
|
|||
"min": 1,
|
||||
"max": 32,
|
||||
"step": 0.25,
|
||||
"default": 1,
|
||||
"default": saved_data['ntk_alpha'],
|
||||
"tooltip": "NTK alpha value",
|
||||
"menu_path": "Configuration",
|
||||
"extra_classes": "",
|
||||
|
@ -490,3 +497,21 @@ class model_backend(InferenceModel):
|
|||
|
||||
self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id']
|
||||
self.path = parameters['path'] if 'path' in parameters else None
|
||||
|
||||
def _save_settings(self):
|
||||
with open(
|
||||
"settings/{}.exllama.model_backend.settings".format(
|
||||
self.model_name.replace("/", "_")
|
||||
),
|
||||
"w",
|
||||
) as f:
|
||||
json.dump(
|
||||
{
|
||||
"layers": self.layers if "layers" in vars(self) else [],
|
||||
"max_ctx": self.model_config.max_seq_len,
|
||||
"compress_emb": self.model_config.compress_pos_emb,
|
||||
"ntk_alpha": self.model_config.alpha_value
|
||||
},
|
||||
f,
|
||||
indent="",
|
||||
)
|
|
@ -314,7 +314,8 @@ class model_backend(InferenceModel):
|
|||
|
||||
self._post_token_gen(self.generator.sequence_ids)
|
||||
|
||||
utils.koboldai_vars.generated_tkns += 1
|
||||
#utils.koboldai_vars.generated_tkns += 1
|
||||
|
||||
|
||||
# Apply stoppers
|
||||
do_stop = False
|
||||
|
@ -349,6 +350,12 @@ class model_backend(InferenceModel):
|
|||
return tokenizer
|
||||
|
||||
def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
|
||||
saved_data = {'max_ctx': 2048, 'compress_emb': 1, 'ntk_alpha': 1}
|
||||
if os.path.exists("settings/{}.exllama.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self):
|
||||
with open("settings/{}.exllama.model_backend.settings".format(model_name.replace("/", "_")), "r") as f:
|
||||
temp = json.load(f)
|
||||
for key in temp:
|
||||
saved_data[key] = temp[key]
|
||||
requested_parameters = []
|
||||
gpu_count = torch.cuda.device_count()
|
||||
layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
|
||||
|
@ -361,7 +368,7 @@ class model_backend(InferenceModel):
|
|||
"min": 2048,
|
||||
"max": 16384,
|
||||
"step": 512,
|
||||
"default": 2048,
|
||||
"default": saved_data['max_ctx'],
|
||||
"tooltip": "The maximum context size the model supports",
|
||||
"menu_path": "Configuration",
|
||||
"extra_classes": "",
|
||||
|
@ -376,7 +383,7 @@ class model_backend(InferenceModel):
|
|||
"min": 1,
|
||||
"max": 8,
|
||||
"step": 0.25,
|
||||
"default": 1,
|
||||
"default": saved_data['compress_emb'],
|
||||
"tooltip": "If the model requires compressed embeddings, set them here",
|
||||
"menu_path": "Configuration",
|
||||
"extra_classes": "",
|
||||
|
@ -391,7 +398,7 @@ class model_backend(InferenceModel):
|
|||
"min": 1,
|
||||
"max": 32,
|
||||
"step": 0.25,
|
||||
"default": 1,
|
||||
"default": saved_data['ntk_alpha'],
|
||||
"tooltip": "NTK alpha value",
|
||||
"menu_path": "Configuration",
|
||||
"extra_classes": "",
|
||||
|
@ -418,4 +425,21 @@ class model_backend(InferenceModel):
|
|||
self.model_config.sdp_thd = 0
|
||||
|
||||
self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id']
|
||||
self.path = parameters['path'] if 'path' in parameters else None
|
||||
self.path = parameters['path'] if 'path' in parameters else None
|
||||
|
||||
def _save_settings(self):
|
||||
with open(
|
||||
"settings/{}.exllamav2.model_backend.settings".format(
|
||||
self.model_name.replace("/", "_")
|
||||
),
|
||||
"w",
|
||||
) as f:
|
||||
json.dump(
|
||||
{
|
||||
"max_ctx": self.model_config.max_seq_len,
|
||||
"compress_emb": self.model_config.compress_pos_emb,
|
||||
"ntk_alpha": self.model_config.alpha_value
|
||||
},
|
||||
f,
|
||||
indent="",
|
||||
)
|
|
@ -52,7 +52,8 @@ import zipfile
|
|||
import pickle
|
||||
import torch
|
||||
import os
|
||||
from typing import Any, Callable, Dict, Optional, Tuple, Type
|
||||
import json
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple, Type
|
||||
|
||||
from torch.nn import Module
|
||||
from torch.storage import UntypedStorage
|
||||
|
@ -398,6 +399,18 @@ def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, miss
|
|||
if input_name not in self._modules and input_name not in local_state:
|
||||
unexpected_keys.append(key)
|
||||
|
||||
def get_sharded_torch_checkpoints(dir: str) -> List[str]:
|
||||
try:
|
||||
with open(os.path.join(dir, "pytorch_model.bin.index.json")) as file:
|
||||
j = json.load(file)
|
||||
except FileNotFoundError:
|
||||
return []
|
||||
|
||||
try:
|
||||
return list(set(j["weight_map"].values()))
|
||||
except KeyError:
|
||||
return []
|
||||
|
||||
@contextlib.contextmanager
|
||||
def use_lazy_load(
|
||||
enable=True,
|
||||
|
@ -410,6 +423,8 @@ def use_lazy_load(
|
|||
return
|
||||
|
||||
begin_time = time.time()
|
||||
utils.koboldai_vars.total_checkpoints = 0
|
||||
utils.koboldai_vars.loaded_checkpoints = 0
|
||||
|
||||
try:
|
||||
LazyloadPatches.__enter__()
|
||||
|
@ -421,6 +436,14 @@ def use_lazy_load(
|
|||
old_torch_load = torch.load
|
||||
|
||||
def torch_load(f, map_location=None, pickle_module=pickle, **pickle_load_args):
|
||||
if not utils.koboldai_vars.total_checkpoints:
|
||||
checkpoints = get_sharded_torch_checkpoints(os.path.dirname(f))
|
||||
# `checkpoints` may be empty if there is an error--return 1 in
|
||||
# this case. Either there was no checkpoint index file (most
|
||||
# common case), or there was a compatibility issue while reading
|
||||
# it.
|
||||
utils.koboldai_vars.total_checkpoints = len(checkpoints) or 1
|
||||
|
||||
model_dict = old_torch_load(
|
||||
f=f,
|
||||
map_location=map_location,
|
||||
|
|
|
@ -220,6 +220,7 @@ class LazyloadPatches:
|
|||
# BEGIN PATCH
|
||||
utils.bar = tqdm(total=len(state_dict), desc="Loading model tensors", file=utils.UIProgressBarFile(), position=1)
|
||||
utils.koboldai_vars.total_layers = len(state_dict)
|
||||
utils.koboldai_vars.loaded_layers = 0
|
||||
|
||||
for param_name, param in sorted(
|
||||
state_dict.items(),
|
||||
|
@ -325,6 +326,7 @@ class LazyloadPatches:
|
|||
fp16_statistics=fp16_statistics,
|
||||
)
|
||||
|
||||
utils.koboldai_vars.loaded_checkpoints += 1
|
||||
return error_msgs, offload_index, state_dict_index
|
||||
|
||||
|
||||
|
|
|
@ -2008,11 +2008,12 @@ function selected_model_info(sent_data) {
|
|||
slider_number.onchange = function() { document.getElementById(this.id.replace("_text", "")).value = this.value;};
|
||||
|
||||
var slider = new_setting.querySelector('#blank_model_settings_slider');
|
||||
slider.value = item['default'];
|
||||
slider.min = item['min'];
|
||||
slider.max = item['max'];
|
||||
slider.step = item["step"];
|
||||
slider.setAttribute("data_type", item['unit']);
|
||||
slider.id = loader + "|" + item['id'] + "_value";
|
||||
slider.value = item['default'];
|
||||
if ('check' in item) {
|
||||
slider.check_data = item['check'];
|
||||
slider_number.check_data = item['check'];
|
||||
|
@ -3602,6 +3603,7 @@ function stream_tokens(tokens) {
|
|||
|
||||
streamBuffer.textContent += streaming.buffer[0];
|
||||
streaming.buffer = streaming.buffer.slice(1);
|
||||
streamBuffer.scrollIntoView({ block: "end" });
|
||||
}
|
||||
|
||||
streaming.typeyTimeout = setTimeout(_char, 10);
|
||||
|
@ -3621,6 +3623,7 @@ function stream_tokens(tokens) {
|
|||
streaming.buffer += tokens[0];
|
||||
} else {
|
||||
streamBuffer.textContent += tokens[0];
|
||||
streamBuffer.scrollIntoView({ block: "end" });
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3874,12 +3877,13 @@ function change_image(data) {
|
|||
|
||||
$el("#image-loading").classList.add("hidden");
|
||||
|
||||
if (data != undefined) {
|
||||
if (data.img != undefined) {
|
||||
var image = new Image();
|
||||
image.src = 'data:image/png;base64,'+data;
|
||||
image.src = 'data:image/png;base64,'+data.img;
|
||||
image.classList.add("action_image");
|
||||
image.setAttribute("context-menu", "generated-image");
|
||||
image.addEventListener("click", imgGenView);
|
||||
image.setAttribute('action_id', data.action_id);
|
||||
image_area.appendChild(image);
|
||||
}
|
||||
}
|
||||
|
@ -7395,11 +7399,12 @@ function imgGenDownload() {
|
|||
function imgGenClear() {
|
||||
const image = $el(".action_image");
|
||||
if (!image) return;
|
||||
action_id = image.getAttribute('action_id');
|
||||
image.remove();
|
||||
|
||||
const container = $el("#action\\ image");
|
||||
container.removeAttribute("tooltip");
|
||||
socket.emit("clear_generated_image", {});
|
||||
socket.emit("clear_generated_image", {'action_id': action_id});
|
||||
}
|
||||
|
||||
function imgGenRetry() {
|
||||
|
|
Loading…
Reference in New Issue