mirror of
				https://github.com/KoboldAI/KoboldAI-Client.git
				synced 2025-06-05 21:59:24 +02:00 
			
		
		
		
	Don't import breakmodel until it's actually needed
breakmodel imports torch which takes a long time to import. We should delay the importing of torch as long as possible.
This commit is contained in:
		| @@ -26,7 +26,6 @@ import gensettings | |||||||
| from utils import debounce | from utils import debounce | ||||||
| import utils | import utils | ||||||
| import structures | import structures | ||||||
| import breakmodel |  | ||||||
|  |  | ||||||
| #==================================================================# | #==================================================================# | ||||||
| # Variables & Storage | # Variables & Storage | ||||||
| @@ -392,6 +391,7 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly"]): | |||||||
|                 if(vars.usegpu): |                 if(vars.usegpu): | ||||||
|                     generator = pipeline('text-generation', model=model, tokenizer=tokenizer, device=0) |                     generator = pipeline('text-generation', model=model, tokenizer=tokenizer, device=0) | ||||||
|                 elif(vars.breakmodel):  # Use both RAM and VRAM (breakmodel) |                 elif(vars.breakmodel):  # Use both RAM and VRAM (breakmodel) | ||||||
|  |                     import breakmodel | ||||||
|                     n_layers = model.config.num_layers |                     n_layers = model.config.num_layers | ||||||
|                     breakmodel.total_blocks = n_layers |                     breakmodel.total_blocks = n_layers | ||||||
|                     model.half().to('cpu') |                     model.half().to('cpu') | ||||||
| @@ -442,6 +442,7 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly"]): | |||||||
|                 if(vars.usegpu): |                 if(vars.usegpu): | ||||||
|                     generator = pipeline('text-generation', model=vars.model, device=0) |                     generator = pipeline('text-generation', model=vars.model, device=0) | ||||||
|                 elif(vars.breakmodel):  # Use both RAM and VRAM (breakmodel) |                 elif(vars.breakmodel):  # Use both RAM and VRAM (breakmodel) | ||||||
|  |                     import breakmodel | ||||||
|                     model = AutoModel.from_pretrained(vars.model) |                     model = AutoModel.from_pretrained(vars.model) | ||||||
|                     n_layers = model.config.num_layers |                     n_layers = model.config.num_layers | ||||||
|                     breakmodel.total_blocks = n_layers |                     breakmodel.total_blocks = n_layers | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Gnome Ann
					Gnome Ann