mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Don't import breakmodel until it's actually needed
breakmodel imports torch which takes a long time to import. We should delay the importing of torch as long as possible.
This commit is contained in:
@ -26,7 +26,6 @@ import gensettings
|
|||||||
from utils import debounce
|
from utils import debounce
|
||||||
import utils
|
import utils
|
||||||
import structures
|
import structures
|
||||||
import breakmodel
|
|
||||||
|
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
# Variables & Storage
|
# Variables & Storage
|
||||||
@ -392,6 +391,7 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly"]):
|
|||||||
if(vars.usegpu):
|
if(vars.usegpu):
|
||||||
generator = pipeline('text-generation', model=model, tokenizer=tokenizer, device=0)
|
generator = pipeline('text-generation', model=model, tokenizer=tokenizer, device=0)
|
||||||
elif(vars.breakmodel): # Use both RAM and VRAM (breakmodel)
|
elif(vars.breakmodel): # Use both RAM and VRAM (breakmodel)
|
||||||
|
import breakmodel
|
||||||
n_layers = model.config.num_layers
|
n_layers = model.config.num_layers
|
||||||
breakmodel.total_blocks = n_layers
|
breakmodel.total_blocks = n_layers
|
||||||
model.half().to('cpu')
|
model.half().to('cpu')
|
||||||
@ -442,6 +442,7 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly"]):
|
|||||||
if(vars.usegpu):
|
if(vars.usegpu):
|
||||||
generator = pipeline('text-generation', model=vars.model, device=0)
|
generator = pipeline('text-generation', model=vars.model, device=0)
|
||||||
elif(vars.breakmodel): # Use both RAM and VRAM (breakmodel)
|
elif(vars.breakmodel): # Use both RAM and VRAM (breakmodel)
|
||||||
|
import breakmodel
|
||||||
model = AutoModel.from_pretrained(vars.model)
|
model = AutoModel.from_pretrained(vars.model)
|
||||||
n_layers = model.config.num_layers
|
n_layers = model.config.num_layers
|
||||||
breakmodel.total_blocks = n_layers
|
breakmodel.total_blocks = n_layers
|
||||||
|
Reference in New Issue
Block a user