Change the command line flags for breakmodel
This commit is contained in:
parent
a1e4405aa6
commit
91352ea9f1
21
aiserver.py
21
aiserver.py
|
@ -204,7 +204,15 @@ def device_config(model):
|
||||||
n_layers = model.config.num_layers
|
n_layers = model.config.num_layers
|
||||||
model.half().to('cpu')
|
model.half().to('cpu')
|
||||||
gc.collect()
|
gc.collect()
|
||||||
if(args.breakmodel_layers is not None):
|
if(args.layers is not None):
|
||||||
|
try:
|
||||||
|
breakmodel.gpu_blocks = list(map(int, args.layers.split(',')))
|
||||||
|
assert len(gpu_blocks) <= torch.cuda.device_count()
|
||||||
|
assert sum(gpu_blocks) <= n_layers
|
||||||
|
except:
|
||||||
|
print("WARNING: --layers is malformatted. Please use the --help option to see correct usage of --layers. Defaulting to all layers on device 0.", file=sys.stderr)
|
||||||
|
breakmodel.gpu_blocks = [n_layers]
|
||||||
|
elif(args.breakmodel_layers is not None):
|
||||||
breakmodel.gpu_blocks = [n_layers - max(0, min(n_layers, args.breakmodel_layers))]
|
breakmodel.gpu_blocks = [n_layers - max(0, min(n_layers, args.breakmodel_layers))]
|
||||||
else:
|
else:
|
||||||
device_count = torch.cuda.device_count()
|
device_count = torch.cuda.device_count()
|
||||||
|
@ -270,8 +278,9 @@ parser.add_argument("--remote", action='store_true', help="Optimizes KoboldAI fo
|
||||||
parser.add_argument("--model", help="Specify the Model Type to skip the Menu")
|
parser.add_argument("--model", help="Specify the Model Type to skip the Menu")
|
||||||
parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)")
|
parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)")
|
||||||
parser.add_argument("--cpu", action='store_true', help="By default unattended launches are on the GPU use this option to force CPU usage.")
|
parser.add_argument("--cpu", action='store_true', help="By default unattended launches are on the GPU use this option to force CPU usage.")
|
||||||
parser.add_argument("--breakmodel", action='store_true', help="For models that support GPU-CPU hybrid generation, use this feature instead of GPU or CPU generation")
|
parser.add_argument("--breakmodel", action='store_true', help=argparse.SUPPRESS)
|
||||||
parser.add_argument("--breakmodel_layers", type=int, help="Specify the number of layers to commit to system RAM if --breakmodel is used")
|
parser.add_argument("--breakmodel_layers", type=int, help=argparse.SUPPRESS)
|
||||||
|
parser.add_argument("--layers", type=str, help="If using a model that supports hybrid generation, this is a comma-separated list that specifies how many layers to put on each GPU device. For example to put 8 layers on device 0, 9 layers on device 1 and 11 layers on device 2, use --layers 8,9,11")
|
||||||
parser.add_argument("--override_delete", action='store_true', help="Deleting stories from inside the browser is disabled if you are using --remote and enabled otherwise. Using this option will instead allow deleting stories if using --remote and prevent deleting stories otherwise.")
|
parser.add_argument("--override_delete", action='store_true', help="Deleting stories from inside the browser is disabled if you are using --remote and enabled otherwise. Using this option will instead allow deleting stories if using --remote and prevent deleting stories otherwise.")
|
||||||
parser.add_argument("--override_rename", action='store_true', help="Renaming stories from inside the browser is disabled if you are using --remote and enabled otherwise. Using this option will instead allow renaming stories if using --remote and prevent renaming stories otherwise.")
|
parser.add_argument("--override_rename", action='store_true', help="Renaming stories from inside the browser is disabled if you are using --remote and enabled otherwise. Using this option will instead allow renaming stories if using --remote and prevent renaming stories otherwise.")
|
||||||
parser.add_argument("--configname", help="Force a fixed configuration name to aid with config management.")
|
parser.add_argument("--configname", help="Force a fixed configuration name to aid with config management.")
|
||||||
|
@ -304,6 +313,12 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly"]):
|
||||||
print("{0}Looking for GPU support...{1}".format(colors.PURPLE, colors.END), end="")
|
print("{0}Looking for GPU support...{1}".format(colors.PURPLE, colors.END), end="")
|
||||||
vars.hascuda = torch.cuda.is_available()
|
vars.hascuda = torch.cuda.is_available()
|
||||||
vars.bmsupported = vars.model in ("EleutherAI/gpt-neo-1.3B", "EleutherAI/gpt-neo-2.7B", "NeoCustom")
|
vars.bmsupported = vars.model in ("EleutherAI/gpt-neo-1.3B", "EleutherAI/gpt-neo-2.7B", "NeoCustom")
|
||||||
|
if(args.breakmodel is not None and args.breakmodel):
|
||||||
|
print("WARNING: --breakmodel is no longer supported. Breakmodel mode is now automatically enabled when --layers is used (see --help for details).", file=sys.stderr)
|
||||||
|
if(args.breakmodel_layers is not None):
|
||||||
|
print("WARNING: --breakmodel_layers is deprecated. Use --layers instead (see --help for details).", file=sys.stderr)
|
||||||
|
if(not vars.bmsupported and (args.layers is not None or args.breakmodel_layers is not None)):
|
||||||
|
print("WARNING: This model does not support hybrid generation. --layers will be ignored.", file=sys.stderr)
|
||||||
if(vars.hascuda):
|
if(vars.hascuda):
|
||||||
print("{0}FOUND!{1}".format(colors.GREEN, colors.END))
|
print("{0}FOUND!{1}".format(colors.GREEN, colors.END))
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Reference in New Issue