mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Merge branch 'main' into united
This commit is contained in:
14
aiserver.py
14
aiserver.py
@@ -1792,6 +1792,7 @@ def get_layer_count(model, directory=""):
|
|||||||
model_config = AutoConfig.from_pretrained(koboldai_vars.custmodpth.replace('/', '_'), revision=args.revision, cache_dir="cache")
|
model_config = AutoConfig.from_pretrained(koboldai_vars.custmodpth.replace('/', '_'), revision=args.revision, cache_dir="cache")
|
||||||
else:
|
else:
|
||||||
model_config = AutoConfig.from_pretrained(model, revision=args.revision, cache_dir="cache")
|
model_config = AutoConfig.from_pretrained(model, revision=args.revision, cache_dir="cache")
|
||||||
|
model_config = AutoConfig.from_pretrained(model, revision=args.revision, cache_dir="cache")
|
||||||
try:
|
try:
|
||||||
if ((utils.HAS_ACCELERATE and model_config.model_type != 'gpt2') or model_config.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel:
|
if ((utils.HAS_ACCELERATE and model_config.model_type != 'gpt2') or model_config.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel:
|
||||||
return utils.num_layers(model_config)
|
return utils.num_layers(model_config)
|
||||||
@@ -3129,6 +3130,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||||||
tokenizer = GPT2Tokenizer.from_pretrained(koboldai_vars.custmodpth, revision=args.revision, cache_dir="cache")
|
tokenizer = GPT2Tokenizer.from_pretrained(koboldai_vars.custmodpth, revision=args.revision, cache_dir="cache")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||||
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||||
try:
|
try:
|
||||||
model = AutoModelForCausalLM.from_pretrained(koboldai_vars.custmodpth, revision=args.revision, cache_dir="cache", **lowmem)
|
model = AutoModelForCausalLM.from_pretrained(koboldai_vars.custmodpth, revision=args.revision, cache_dir="cache", **lowmem)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -3146,6 +3148,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||||||
tokenizer = GPT2Tokenizer.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=args.revision, cache_dir="cache")
|
tokenizer = GPT2Tokenizer.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=args.revision, cache_dir="cache")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||||
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||||
try:
|
try:
|
||||||
model = AutoModelForCausalLM.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=args.revision, cache_dir="cache", **lowmem)
|
model = AutoModelForCausalLM.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=args.revision, cache_dir="cache", **lowmem)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -3176,6 +3179,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||||||
tokenizer = GPT2Tokenizer.from_pretrained(koboldai_vars.model, revision=args.revision, cache_dir="cache")
|
tokenizer = GPT2Tokenizer.from_pretrained(koboldai_vars.model, revision=args.revision, cache_dir="cache")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||||
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||||
try:
|
try:
|
||||||
model = AutoModelForCausalLM.from_pretrained(koboldai_vars.model, revision=args.revision, cache_dir="cache", **lowmem)
|
model = AutoModelForCausalLM.from_pretrained(koboldai_vars.model, revision=args.revision, cache_dir="cache", **lowmem)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -3260,6 +3264,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||||||
else:
|
else:
|
||||||
from transformers import GPT2Tokenizer
|
from transformers import GPT2Tokenizer
|
||||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||||
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||||
else:
|
else:
|
||||||
from transformers import PreTrainedModel
|
from transformers import PreTrainedModel
|
||||||
from transformers import modeling_utils
|
from transformers import modeling_utils
|
||||||
@@ -3673,6 +3678,7 @@ def lua_decode(tokens):
|
|||||||
from transformers import GPT2Tokenizer
|
from transformers import GPT2Tokenizer
|
||||||
global tokenizer
|
global tokenizer
|
||||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||||
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||||
return utils.decodenewlines(tokenizer.decode(tokens))
|
return utils.decodenewlines(tokenizer.decode(tokens))
|
||||||
|
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
@@ -3685,6 +3691,7 @@ def lua_encode(string):
|
|||||||
from transformers import GPT2Tokenizer
|
from transformers import GPT2Tokenizer
|
||||||
global tokenizer
|
global tokenizer
|
||||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||||
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||||
return tokenizer.encode(utils.encodenewlines(string), max_length=int(4e9), truncation=True)
|
return tokenizer.encode(utils.encodenewlines(string), max_length=int(4e9), truncation=True)
|
||||||
|
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
@@ -4843,18 +4850,24 @@ def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False,
|
|||||||
if(os.path.isdir(tokenizer_id)):
|
if(os.path.isdir(tokenizer_id)):
|
||||||
try:
|
try:
|
||||||
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=args.revision, cache_dir="cache")
|
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=args.revision, cache_dir="cache")
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=args.revision, cache_dir="cache")
|
||||||
except:
|
except:
|
||||||
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=args.revision, cache_dir="cache", use_fast=False)
|
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=args.revision, cache_dir="cache", use_fast=False)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=args.revision, cache_dir="cache", use_fast=False)
|
||||||
elif(os.path.isdir("models/{}".format(tokenizer_id.replace('/', '_')))):
|
elif(os.path.isdir("models/{}".format(tokenizer_id.replace('/', '_')))):
|
||||||
try:
|
try:
|
||||||
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(tokenizer_id.replace('/', '_')), revision=args.revision, cache_dir="cache")
|
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(tokenizer_id.replace('/', '_')), revision=args.revision, cache_dir="cache")
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(tokenizer_id.replace('/', '_')), revision=args.revision, cache_dir="cache")
|
||||||
except:
|
except:
|
||||||
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(tokenizer_id.replace('/', '_')), revision=args.revision, cache_dir="cache", use_fast=False)
|
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(tokenizer_id.replace('/', '_')), revision=args.revision, cache_dir="cache", use_fast=False)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(tokenizer_id.replace('/', '_')), revision=args.revision, cache_dir="cache", use_fast=False)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=args.revision, cache_dir="cache")
|
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=args.revision, cache_dir="cache")
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=args.revision, cache_dir="cache")
|
||||||
except:
|
except:
|
||||||
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=args.revision, cache_dir="cache", use_fast=False)
|
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=args.revision, cache_dir="cache", use_fast=False)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=args.revision, cache_dir="cache", use_fast=False)
|
||||||
except:
|
except:
|
||||||
logger.warning(f"Unknown tokenizer {repr(tokenizer_id)}")
|
logger.warning(f"Unknown tokenizer {repr(tokenizer_id)}")
|
||||||
koboldai_vars.api_tokenizer_id = tokenizer_id
|
koboldai_vars.api_tokenizer_id = tokenizer_id
|
||||||
@@ -5230,6 +5243,7 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
|
|||||||
from transformers import GPT2Tokenizer
|
from transformers import GPT2Tokenizer
|
||||||
global tokenizer
|
global tokenizer
|
||||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||||
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||||
|
|
||||||
lnheader = len(tokenizer._koboldai_header)
|
lnheader = len(tokenizer._koboldai_header)
|
||||||
|
|
||||||
|
@@ -66,7 +66,7 @@
|
|||||||
"#@title <b><-- Select your model below and then click this to start KoboldAI</b>\n",
|
"#@title <b><-- Select your model below and then click this to start KoboldAI</b>\n",
|
||||||
"#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n",
|
"#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Model = \"Nerys 13B V2\" #@param [\"Nerys 13B V2\", \"Erebus 13B\", \"Janeway 13B\", \"Shinen 13B\", \"Skein 20B\", \"Erebus 20B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Shinen 6B\", \"Pygmalion 6B\", \"Pygmalion 6B Dev\", \"Lit V2 6B\", \"Lit 6B\", \"NeoX 20B\", \"OPT 13B\", \"Fairseq Dense 13B\", \"GPT-J-6B\"] {allow-input: true}\n",
|
"Model = \"Nerys 13B V2\" #@param [\"Nerys 13B V2\", \"Erebus 13B\", \"Janeway 13B\", \"Shinen 13B\", \"Skein 20B\", \"Erebus 20B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Shinen 6B\", \"Pygmalion 6B\", \"Lit V2 6B\", \"Lit 6B\", \"NeoX 20B\", \"OPT 13B\", \"Fairseq Dense 13B\", \"GPT-J-6B\"] {allow-input: true}\n",
|
||||||
"Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
|
"Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
|
||||||
"Provider = \"Cloudflare\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
|
"Provider = \"Cloudflare\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
|
||||||
"use_google_drive = True #@param {type:\"boolean\"}\n",
|
"use_google_drive = True #@param {type:\"boolean\"}\n",
|
||||||
|
14
utils.py
14
utils.py
@@ -286,7 +286,7 @@ def _transformers22_aria2_hook(pretrained_model_name_or_path: str, force_downloa
|
|||||||
if token is None:
|
if token is None:
|
||||||
raise EnvironmentError("You specified use_auth_token=True, but a huggingface token was not found.")
|
raise EnvironmentError("You specified use_auth_token=True, but a huggingface token was not found.")
|
||||||
_cache_dir = str(cache_dir) if cache_dir is not None else transformers.TRANSFORMERS_CACHE
|
_cache_dir = str(cache_dir) if cache_dir is not None else transformers.TRANSFORMERS_CACHE
|
||||||
_revision = revision if revision is not None else huggingface_hub.constants.DEFAULT_REVISION
|
_revision = args.revision if args.revision is not None else huggingface_hub.constants.DEFAULT_REVISION
|
||||||
sharded = False
|
sharded = False
|
||||||
headers = {"user-agent": transformers.file_utils.http_user_agent(user_agent)}
|
headers = {"user-agent": transformers.file_utils.http_user_agent(user_agent)}
|
||||||
if use_auth_token:
|
if use_auth_token:
|
||||||
@@ -306,7 +306,7 @@ def _transformers22_aria2_hook(pretrained_model_name_or_path: str, force_downloa
|
|||||||
filename = transformers.modeling_utils.WEIGHTS_INDEX_NAME if sharded else transformers.modeling_utils.WEIGHTS_NAME
|
filename = transformers.modeling_utils.WEIGHTS_INDEX_NAME if sharded else transformers.modeling_utils.WEIGHTS_NAME
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
return
|
return
|
||||||
url = huggingface_hub.hf_hub_url(pretrained_model_name_or_path, filename, revision=revision)
|
url = huggingface_hub.hf_hub_url(pretrained_model_name_or_path, filename, revision=_revision)
|
||||||
if is_cached(filename) or requests.head(url, allow_redirects=True, proxies=proxies, headers=headers):
|
if is_cached(filename) or requests.head(url, allow_redirects=True, proxies=proxies, headers=headers):
|
||||||
break
|
break
|
||||||
if sharded:
|
if sharded:
|
||||||
@@ -320,7 +320,7 @@ def _transformers22_aria2_hook(pretrained_model_name_or_path: str, force_downloa
|
|||||||
with open(map_filename) as f:
|
with open(map_filename) as f:
|
||||||
map_data = json.load(f)
|
map_data = json.load(f)
|
||||||
filenames = set(map_data["weight_map"].values())
|
filenames = set(map_data["weight_map"].values())
|
||||||
urls = [huggingface_hub.hf_hub_url(pretrained_model_name_or_path, n, revision=revision) for n in filenames]
|
urls = [huggingface_hub.hf_hub_url(pretrained_model_name_or_path, n, revision=_revision) for n in filenames]
|
||||||
if not force_download:
|
if not force_download:
|
||||||
urls = [u for u, n in zip(urls, filenames) if not is_cached(n)]
|
urls = [u for u, n in zip(urls, filenames) if not is_cached(n)]
|
||||||
if not urls:
|
if not urls:
|
||||||
@@ -485,6 +485,7 @@ def aria2_hook(pretrained_model_name_or_path: str, force_download=False, cache_d
|
|||||||
import transformers
|
import transformers
|
||||||
import transformers.modeling_utils
|
import transformers.modeling_utils
|
||||||
from huggingface_hub import HfFolder
|
from huggingface_hub import HfFolder
|
||||||
|
_revision = args.revision if args.revision is not None else huggingface_hub.constants.DEFAULT_REVISION
|
||||||
if shutil.which("aria2c") is None: # Don't do anything if aria2 is not installed
|
if shutil.which("aria2c") is None: # Don't do anything if aria2 is not installed
|
||||||
return
|
return
|
||||||
if local_files_only: # If local_files_only is true, we obviously don't need to download anything
|
if local_files_only: # If local_files_only is true, we obviously don't need to download anything
|
||||||
@@ -519,7 +520,7 @@ def aria2_hook(pretrained_model_name_or_path: str, force_download=False, cache_d
|
|||||||
filename = transformers.modeling_utils.WEIGHTS_INDEX_NAME if sharded else transformers.modeling_utils.WEIGHTS_NAME
|
filename = transformers.modeling_utils.WEIGHTS_INDEX_NAME if sharded else transformers.modeling_utils.WEIGHTS_NAME
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
return
|
return
|
||||||
url = huggingface_hub.hf_hub_url(pretrained_model_name_or_path, filename, revision=revision)
|
url = huggingface_hub.hf_hub_url(pretrained_model_name_or_path, filename, revision=_revision)
|
||||||
if is_cached(url) or requests.head(url, allow_redirects=True, proxies=proxies, headers=headers):
|
if is_cached(url) or requests.head(url, allow_redirects=True, proxies=proxies, headers=headers):
|
||||||
break
|
break
|
||||||
if sharded:
|
if sharded:
|
||||||
@@ -533,7 +534,7 @@ def aria2_hook(pretrained_model_name_or_path: str, force_download=False, cache_d
|
|||||||
with open(map_filename) as f:
|
with open(map_filename) as f:
|
||||||
map_data = json.load(f)
|
map_data = json.load(f)
|
||||||
filenames = set(map_data["weight_map"].values())
|
filenames = set(map_data["weight_map"].values())
|
||||||
urls = [huggingface_hub.hf_hub_url(pretrained_model_name_or_path, n, revision=revision) for n in filenames]
|
urls = [huggingface_hub.hf_hub_url(pretrained_model_name_or_path, n, revision=_revision) for n in filenames]
|
||||||
if not force_download:
|
if not force_download:
|
||||||
urls = [u for u in urls if not is_cached(u)]
|
urls = [u for u in urls if not is_cached(u)]
|
||||||
if not urls:
|
if not urls:
|
||||||
@@ -580,7 +581,8 @@ def get_num_shards(filename):
|
|||||||
def get_sharded_checkpoint_num_tensors(pretrained_model_name_or_path, filename, cache_dir=None, force_download=False, proxies=None, resume_download=False, local_files_only=False, use_auth_token=None, user_agent=None, revision=None, **kwargs):
|
def get_sharded_checkpoint_num_tensors(pretrained_model_name_or_path, filename, cache_dir=None, force_download=False, proxies=None, resume_download=False, local_files_only=False, use_auth_token=None, user_agent=None, revision=None, **kwargs):
|
||||||
import transformers.modeling_utils
|
import transformers.modeling_utils
|
||||||
import torch
|
import torch
|
||||||
shard_paths, _ = transformers.modeling_utils.get_checkpoint_shard_files(pretrained_model_name_or_path, filename, cache_dir=cache_dir, force_download=force_download, proxies=proxies, resume_download=resume_download, local_files_only=local_files_only, use_auth_token=use_auth_token, user_agent=user_agent, revision=revision)
|
_revision = args.revision if args.revision is not None else huggingface_hub.constants.DEFAULT_REVISION
|
||||||
|
shard_paths, _ = transformers.modeling_utils.get_checkpoint_shard_files(pretrained_model_name_or_path, filename, cache_dir=cache_dir, force_download=force_download, proxies=proxies, resume_download=resume_download, local_files_only=local_files_only, use_auth_token=use_auth_token, user_agent=user_agent, revision=_revision)
|
||||||
return list(itertools.chain(*(torch.load(p, map_location="cpu").keys() for p in shard_paths)))
|
return list(itertools.chain(*(torch.load(p, map_location="cpu").keys() for p in shard_paths)))
|
||||||
|
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
|
Reference in New Issue
Block a user