mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-02-25 16:08:00 +01:00
Threading + Memory Sizes
Polish effort to suppress a warning and list more accurate VRAM as tested with the full 2048 max tokens.
This commit is contained in:
parent
fca7f8659f
commit
5b5a479f29
13
aiserver.py
13
aiserver.py
@ -11,6 +11,7 @@ eventlet.monkey_patch(all=True, thread=False)
|
|||||||
import os
|
import os
|
||||||
os.system("")
|
os.system("")
|
||||||
os.environ['EVENTLET_THREADPOOL_SIZE'] = '1'
|
os.environ['EVENTLET_THREADPOOL_SIZE'] = '1'
|
||||||
|
os.environ['TOKENIZERS_PARALLELISM'] = 'false'
|
||||||
from eventlet import tpool
|
from eventlet import tpool
|
||||||
|
|
||||||
from os import path, getcwd
|
from os import path, getcwd
|
||||||
@ -74,12 +75,12 @@ modellist = [
|
|||||||
["Horni 2.7B (NSFW)", "KoboldAI/GPT-Neo-2.7B-Horni", "8GB"],
|
["Horni 2.7B (NSFW)", "KoboldAI/GPT-Neo-2.7B-Horni", "8GB"],
|
||||||
["Horni-LN 2.7B (Novel)", "KoboldAI/GPT-Neo-2.7B-Horni-LN", "8GB"],
|
["Horni-LN 2.7B (Novel)", "KoboldAI/GPT-Neo-2.7B-Horni-LN", "8GB"],
|
||||||
["Shinen 2.7B (NSFW)", "KoboldAI/GPT-Neo-2.7B-Shinen", "8GB"],
|
["Shinen 2.7B (NSFW)", "KoboldAI/GPT-Neo-2.7B-Shinen", "8GB"],
|
||||||
["GPT-J 6B", "EleutherAI/gpt-j-6B", "12GB"],
|
["GPT-J 6B", "EleutherAI/gpt-j-6B", "16GB"],
|
||||||
["GPT-Neo 2.7B", "EleutherAI/gpt-neo-2.7B", "8GB"],
|
["GPT-Neo 2.7B", "EleutherAI/gpt-neo-2.7B", "8GB"],
|
||||||
["GPT-Neo 1.3B", "EleutherAI/gpt-neo-1.3B", "6GB"],
|
["GPT-Neo 1.3B", "EleutherAI/gpt-neo-1.3B", "6GB"],
|
||||||
["GPT-2 XL", "gpt2-xl", "8GB"],
|
["GPT-2 XL", "gpt2-xl", "6GB"],
|
||||||
["GPT-2 Large", "gpt2-large", "6GB"],
|
["GPT-2 Large", "gpt2-large", "4GB"],
|
||||||
["GPT-2 Med", "gpt2-medium", "4GB"],
|
["GPT-2 Med", "gpt2-medium", "2GB"],
|
||||||
["GPT-2", "gpt2", "2GB"],
|
["GPT-2", "gpt2", "2GB"],
|
||||||
["OpenAI API (requires API key)", "OAI", ""],
|
["OpenAI API (requires API key)", "OAI", ""],
|
||||||
["InferKit API (requires API key)", "InferKit", ""],
|
["InferKit API (requires API key)", "InferKit", ""],
|
||||||
@ -201,7 +202,7 @@ class vars:
|
|||||||
# Function to get model selection at startup
|
# Function to get model selection at startup
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
def getModelSelection():
|
def getModelSelection():
|
||||||
print(" # Model V/RAM\n =========================================")
|
print(" # Model VRAM\n =========================================")
|
||||||
i = 1
|
i = 1
|
||||||
for m in modellist:
|
for m in modellist:
|
||||||
print(" {0} - {1}\t\t{2}".format("{:<2}".format(i), m[0].ljust(15), m[2]))
|
print(" {0} - {1}\t\t{2}".format("{:<2}".format(i), m[0].ljust(15), m[2]))
|
||||||
@ -432,7 +433,7 @@ if args.model:
|
|||||||
vars.colaburl = args.path + "/request"; # Lets just use the same parameter to keep it simple
|
vars.colaburl = args.path + "/request"; # Lets just use the same parameter to keep it simple
|
||||||
|
|
||||||
else:
|
else:
|
||||||
print("{0}Welcome to the KoboldAI Server!\nSelect an AI model to continue:{1}\n".format(colors.CYAN, colors.END))
|
print("{0}Welcome to the KoboldAI Server!\nListed RAM is the optimal VRAM and CPU ram can be up to twice the amount.\nMost models can run at less VRAM with reduced max tokens or less layers on the GPU.\nSelect an AI model to continue:{1}\n".format(colors.CYAN, colors.END))
|
||||||
getModelSelection()
|
getModelSelection()
|
||||||
|
|
||||||
# If transformers model was selected & GPU available, ask to use CPU or GPU
|
# If transformers model was selected & GPU available, ask to use CPU or GPU
|
||||||
|
Loading…
x
Reference in New Issue
Block a user