Merge pull request #141 from ebolam/Web-UI

Functional --model/--path, fix for switching models
2025-02-10 00:30:49 +01:00 · 2022-06-09 01:48:38 +02:00 · 2022-06-09 01:48:38 +02:00 · ae2ee0dd57
commit ae2ee0dd57
parent 1a46d97ad5 c565978fff
5 changed files with 387 additions and 290 deletions
--- a/aiserver.py
+++ b/aiserver.py
@ -45,6 +45,7 @@ import sys
 import gc

 import lupa
+import importlib

 # KoboldAI
 import fileops
@ -53,13 +54,21 @@ from utils import debounce
 import utils
 import structures
 import torch
-from transformers import StoppingCriteria, GPT2TokenizerFast, GPT2LMHeadModel, GPTNeoForCausalLM, GPTNeoModel, AutoModelForCausalLM, AutoTokenizer
+from transformers import StoppingCriteria, GPT2TokenizerFast, GPT2LMHeadModel, GPTNeoForCausalLM, GPTNeoModel, AutoModelForCausalLM, AutoTokenizer, PreTrainedModel, modeling_utils
+from transformers import __version__ as transformers_version
+import transformers
+try:
+    from transformers.models.opt.modeling_opt import OPTDecoder
+except:
+    pass
+import transformers.generation_utils
 global tpu_mtj_backend


 if lupa.LUA_VERSION[:2] != (5, 4):
    print(f"Please install lupa==1.10. You have lupa {lupa.__version__}.", file=sys.stderr)

+patch_causallm_patched = False

 # Make sure tqdm progress bars display properly in Colab
 from tqdm.auto import tqdm
@ -255,7 +264,8 @@ class vars:
    last_userscripts = []  # List of previous userscript filenames from the previous time userscripts were send via usstatitems
    corescript  = "default.lua"  # Filename of corescript to load
    # badwords    = []     # Array of str/chr values that should be removed from output
-    badwordsids = [[13460], [6880], [50256], [42496], [4613], [17414], [22039], [16410], [27], [29], [38430], [37922], [15913], [24618], [28725], [58], [47175], [36937], [26700], [12878], [16471], [37981], [5218], [29795], [13412], [45160], [3693], [49778], [4211], [20598], [36475], [33409], [44167], [32406], [29847], [29342], [42669], [685], [25787], [7359], [3784], [5320], [33994], [33490], [34516], [43734], [17635], [24293], [9959], [23785], [21737], [28401], [18161], [26358], [32509], [1279], [38155], [18189], [26894], [6927], [14610], [23834], [11037], [14631], [26933], [46904], [22330], [25915], [47934], [38214], [1875], [14692], [41832], [13163], [25970], [29565], [44926], [19841], [37250], [49029], [9609], [44438], [16791], [17816], [30109], [41888], [47527], [42924], [23984], [49074], [33717], [31161], [49082], [30138], [31175], [12240], [14804], [7131], [26076], [33250], [3556], [38381], [36338], [32756], [46581], [17912], [49146]] # Tokenized array of badwords used to prevent AI artifacting
+    badwordsids = []
+    badwordsids_default = [[13460], [6880], [50256], [42496], [4613], [17414], [22039], [16410], [27], [29], [38430], [37922], [15913], [24618], [28725], [58], [47175], [36937], [26700], [12878], [16471], [37981], [5218], [29795], [13412], [45160], [3693], [49778], [4211], [20598], [36475], [33409], [44167], [32406], [29847], [29342], [42669], [685], [25787], [7359], [3784], [5320], [33994], [33490], [34516], [43734], [17635], [24293], [9959], [23785], [21737], [28401], [18161], [26358], [32509], [1279], [38155], [18189], [26894], [6927], [14610], [23834], [11037], [14631], [26933], [46904], [22330], [25915], [47934], [38214], [1875], [14692], [41832], [13163], [25970], [29565], [44926], [19841], [37250], [49029], [9609], [44438], [16791], [17816], [30109], [41888], [47527], [42924], [23984], [49074], [33717], [31161], [49082], [30138], [31175], [12240], [14804], [7131], [26076], [33250], [3556], [38381], [36338], [32756], [46581], [17912], [49146]] # Tokenized array of badwords used to prevent AI artifacting
    badwordsids_neox = [[0], [1], [44162], [9502], [12520], [31841], [36320], [49824], [34417], [6038], [34494], [24815], [26635], [24345], [3455], [28905], [44270], [17278], [32666], [46880], [7086], [43189], [37322], [17778], [20879], [49821], [3138], [14490], [4681], [21391], [26786], [43134], [9336], [683], [48074], [41256], [19181], [29650], [28532], [36487], [45114], [46275], [16445], [15104], [11337], [1168], [5647], [29], [27482], [44965], [43782], [31011], [42944], [47389], [6334], [17548], [38329], [32044], [35487], [2239], [34761], [7444], [1084], [12399], [18990], [17636], [39083], [1184], [35830], [28365], [16731], [43467], [47744], [1138], [16079], [40116], [45564], [18297], [42368], [5456], [18022], [42696], [34476], [23505], [23741], [39334], [37944], [45382], [38709], [33440], [26077], [43600], [34418], [36033], [6660], [48167], [48471], [15775], [19884], [41533], [1008], [31053], [36692], [46576], [20095], [20629], [31759], [46410], [41000], [13488], [30952], [39258], [16160], [27655], [22367], [42767], [43736], [49694], [13811], [12004], [46768], [6257], [37471], [5264], [44153], [33805], [20977], [21083], [25416], [14277], [31096], [42041], [18331], [33376], [22372], [46294], [28379], [38475], [1656], [5204], [27075], [50001], [16616], [11396], [7748], [48744], [35402], [28120], [41512], [4207], [43144], [14767], [15640], [16595], [41305], [44479], [38958], [18474], [22734], [30522], [46267], [60], [13976], [31830], [48701], [39822], [9014], [21966], [31422], [28052], [34607], [2479], [3851], [32214], [44082], [45507], [3001], [34368], [34758], [13380], [38363], [4299], [46802], [30996], [12630], [49236], [7082], [8795], [5218], [44740], [9686], [9983], [45301], [27114], [40125], [1570], [26997], [544], [5290], [49193], [23781], [14193], [40000], [2947], [43781], [9102], [48064], [42274], [18772], [49384], [9884], [45635], [43521], [31258], [32056], [47686], [21760], [13143], [10148], [26119], [44308], [31379], [36399], [23983], [46694], [36134], [8562], [12977], [35117], [28591], [49021], [47093], [28653], [29013], [46468], [8605], [7254], [25896], [5032], [8168], [36893], [38270], [20499], [27501], [34419], [29547], [28571], [36586], [20871], [30537], [26842], [21375], [31148], [27618], [33094], [3291], [31789], [28391], [870], [9793], [41361], [47916], [27468], [43856], [8850], [35237], [15707], [47552], [2730], [41449], [45488], [3073], [49806], [21938], [24430], [22747], [20924], [46145], [20481], [20197], [8239], [28231], [17987], [42804], [47269], [29972], [49884], [21382], [46295], [36676], [34616], [3921], [26991], [27720], [46265], [654], [9855], [40354], [5291], [34904], [44342], [2470], [14598], [880], [19282], [2498], [24237], [21431], [16369], [8994], [44524], [45662], [13663], [37077], [1447], [37786], [30863], [42854], [1019], [20322], [4398], [12159], [44072], [48664], [31547], [18736], [9259], [31], [16354], [21810], [4357], [37982], [5064], [2033], [32871], [47446], [62], [22158], [37387], [8743], [47007], [17981], [11049], [4622], [37916], [36786], [35138], [29925], [14157], [18095], [27829], [1181], [22226], [5709], [4725], [30189], [37014], [1254], [11380], [42989], [696], [24576], [39487], [30119], [1092], [8088], [2194], [9899], [14412], [21828], [3725], [13544], [5180], [44679], [34398], [3891], [28739], [14219], [37594], [49550], [11326], [6904], [17266], [5749], [10174], [23405], [9955], [38271], [41018], [13011], [48392], [36784], [24254], [21687], [23734], [5413], [41447], [45472], [10122], [17555], [15830], [47384], [12084], [31350], [47940], [11661], [27988], [45443], [905], [49651], [16614], [34993], [6781], [30803], [35869], [8001], [41604], [28118], [46462], [46762], [16262], [17281], [5774], [10943], [5013], [18257], [6750], [4713], [3951], [11899], [38791], [16943], [37596], [9318], [18413], [40473], [13208], [16375]]
    badwordsids_opt = [[44717], [46613], [48513], [49923], [50185], [48755], [8488], [43303], [49659], [48601], [49817], [45405], [48742], [49925], [47720], [11227], [48937], [48784], [50017], [42248], [49310], [48082], [49895], [50025], [49092], [49007], [8061], [44226], [0], [742], [28578], [15698], [49784], [46679], [39365], [49281], [49609], [48081], [48906], [46161], [48554], [49670], [48677], [49721], [49632], [48610], [48462], [47457], [10975], [46077], [28696], [48709], [43839], [49798], [49154], [48203], [49625], [48395], [50155], [47161], [49095], [48833], [49420], [49666], [48443], [22176], [49242], [48651], [49138], [49750], [40389], [48021], [21838], [49070], [45333], [40862], [1], [49915], [33525], [49858], [50254], [44403], [48992], [48872], [46117], [49853], [47567], [50206], [41552], [50068], [48999], [49703], [49940], [49329], [47620], [49868], [49962], [2], [44082], [50236], [31274], [50260], [47052], [42645], [49177], [17523], [48691], [49900], [49069], [49358], [48794], [47529], [46479], [48457], [646], [49910], [48077], [48935], [46386], [48902], [49151], [48759], [49803], [45587], [48392], [47789], [48654], [49836], [49230], [48188], [50264], [46844], [44690], [48505], [50161], [27779], [49995], [41833], [50154], [49097], [48520], [50018], [8174], [50084], [49366], [49526], [50193], [7479], [49982], [3]]
    fp32_model  = False  # Whether or not the most recently loaded HF model was in fp32 format
@ -349,14 +359,40 @@ print("{0}OK!{1}".format(colors.GREEN, colors.END))
 #==================================================================#
 # Function to get model selection at startup
 #==================================================================#
-def sendModelSelection(menu="mainmenu"):
+def sendModelSelection(menu="mainmenu", folder="./models"):
    #If we send one of the manual load options, send back the list of model directories, otherwise send the menu
    if menu in ('NeoCustom', 'GPT2Custom'):
-        menu_list = [[folder, menu, "", False] for folder in next(os.walk('./models'))[1]]
+        (paths, breadcrumbs) = get_folder_path_info(folder)
+        menu_list = [[folder, menu, "", False] for folder in paths]
        menu_list.append(["Return to Main Menu", "mainmenu", "", True])
-        emit('from_server', {'cmd': 'show_model_menu', 'data': menu_list, 'menu': 'custom'}, broadcast=True)
+        emit('from_server', {'cmd': 'show_model_menu', 'data': menu_list, 'menu': menu, 'breadcrumbs': breadcrumbs}, broadcast=True)
    else:
-        emit('from_server', {'cmd': 'show_model_menu', 'data': model_menu[menu], 'menu': menu}, broadcast=True)
+        emit('from_server', {'cmd': 'show_model_menu', 'data': model_menu[menu], 'menu': menu, 'breadcrumbs': []}, broadcast=True)
+
+def get_folder_path_info(base):
+    if base == 'This PC':
+        breadcrumbs = [['This PC', 'This PC']]
+        paths = [["{}:\\".format(chr(i)), "{}:\\".format(chr(i))] for i in range(65, 91) if os.path.exists("{}:".format(chr(i)))]
+    else:
+        path = os.path.abspath(base)
+        if path[-1] == "\\":
+            path = path[:-1]
+        breadcrumbs = []
+        for i in range(len(path.split("\\"))):
+            breadcrumbs.append(["\\".join(path.split("\\")[:i+1]),
+                                 path.split("\\")[i]])
+        if len(breadcrumbs) == 1:
+            breadcrumbs = [["{}:\\".format(chr(i)), "{}:\\".format(chr(i))] for i in range(65, 91) if os.path.exists("{}:".format(chr(i)))]
+        else:
+            if len([["{}:\\".format(chr(i)), "{}:\\".format(chr(i))] for i in range(65, 91) if os.path.exists("{}:".format(chr(i)))]) > 0:
+                breadcrumbs.insert(0, ['This PC', 'This PC'])
+        paths = []
+        base_path = os.path.abspath(base)
+        for item in os.listdir(base_path):
+            if os.path.isdir(os.path.join(base_path, item)):
+                paths.append([os.path.join(base_path, item), item])
+    # Paths/breadcrumbs is a list of lists, where the first element in the sublist is the full path and the second is the folder name
+    return (paths, breadcrumbs)

 def getModelSelection(modellist):
    print("    #    Model\t\t\t\t\t\tVRAM\n    ========================================================")
@ -395,6 +431,15 @@ def getModelSelection(modellist):
                print("{0}Select an AI model to continue:{1}\n".format(colors.CYAN, colors.END))
                getModelSelection(mainmenu)

+def check_if_dir_is_model(path):
+    try:
+        from transformers import AutoConfig
+        model_config = AutoConfig.from_pretrained(path, revision=vars.revision, cache_dir="cache")
+    except:
+        return False
+    return True
+    
+    
 #==================================================================#
 # Return all keys in tokenizer dictionary containing char
 #==================================================================#
@ -986,6 +1031,7 @@ def get_model_info(model, directory=""):
    key_value = ""
    break_values = []
    url = False
+    gpu_count = torch.cuda.device_count()
    if model in [x[1] for x in model_menu['apilist']]:
        if path.exists("settings/{}.settings".format(model)):
            with open("settings/{}.settings".format(model), "r") as file:
@ -1014,10 +1060,10 @@ def get_model_info(model, directory=""):
                    break_values = file.read().split(",")
            else:
                break_values = [layer_count]
-                break_values += [0] * (gpu+1 - len(break_values))
+            break_values += [0] * (gpu_count - len(break_values))
    emit('from_server', {'cmd': 'selected_model_info', 'key_value': key_value, 'key':key, 
                         'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel, 
-                         'break_values': break_values, 'gpu_count': torch.cuda.device_count(),
+                         'break_values': break_values, 'gpu_count': gpu_count,
                         'url': url}, broadcast=True)
    if key_value != "":
        get_oai_models(key_value)
@ -1030,12 +1076,12 @@ def get_layer_count(model, directory=""):
        # Get the model_type from the config or assume a model type if it isn't present
        else:
            from transformers import AutoConfig
-            if vars.custmodpth == "":
+            if directory == "":
                model_config = AutoConfig.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
            elif(os.path.isdir(vars.custmodpth.replace('/', '_'))):
                model_config = AutoConfig.from_pretrained(vars.custmodpth.replace('/', '_'), revision=vars.revision, cache_dir="cache")
-            elif(os.path.isdir("models/{}".format(vars.custmodpth.replace('/', '_')))):
-                model_config = AutoConfig.from_pretrained("models/{}".format(vars.custmodpth.replace('/', '_')), revision=vars.revision, cache_dir="cache")
+            elif(os.path.isdir(directory)):
+                model_config = AutoConfig.from_pretrained(directory, revision=vars.revision, cache_dir="cache")
            else:
                model_config = AutoConfig.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
        
@ -1100,193 +1146,8 @@ def get_oai_models(key):
        emit('from_server', {'cmd': 'errmsg', 'data': req.json()})
        
            
-
-def load_model(use_gpu=True, gpu_layers=None, initial_load=False, online_model=""):
-    global model
-    global generator
-    global torch
-    global model_config
-    vars.noai = False
-    if not initial_load:
-        set_aibusy(True)
-        if vars.model != 'ReadOnly':
-            emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(vars.model)}, broadcast=True)
-            #Have to add a sleep so the server will send the emit for some reason
-            time.sleep(0.1)
-    if gpu_layers is not None:
-        args.breakmodel_gpulayers = gpu_layers
-    
-    #We need to wipe out the existing model and refresh the cuda cache
-    model = None
-    generator = None
-    try:
-        torch.cuda.empty_cache()
-    except:
-        pass
-    
-    #Let's set the GooseAI or OpenAI server URLs if that's applicable
-    if online_model != "":
-        if path.exists("settings/{}.settings".format(vars.model)):
-            changed=False
-            with open("settings/{}.settings".format(vars.model), "r") as file:
-                # Check if API key exists
-                js = json.load(file)
-                if 'online_model' in js:
-                    if js['online_model'] != online_model:
-                        changed=True
-                        js['online_model'] = online_model
-                else:
-                    changed=True
-                    js['online_model'] = online_model
-            if changed:
-                with open("settings/{}.settings".format(vars.model), "w") as file:
-                    file.write(json.dumps(js, indent=3))
-        # Swap OAI Server if GooseAI was selected
-        if(vars.model == "GooseAI"):
-            vars.oaiengines = "https://api.goose.ai/v1/engines"
-            vars.model = "OAI"
-            args.configname = "GooseAI" + "/" + online_model
-        else:
-            args.configname = vars.model + "/" + online_model
-        vars.oaiurl = vars.oaiengines + "/{0}/completions".format(online_model)
-    
-    
-    # If transformers model was selected & GPU available, ask to use CPU or GPU
-    if(vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
-        vars.allowsp = True
-        # Test for GPU support
-        
-        # Make model path the same as the model name to make this consistent with the other loading method if it isn't a known model type
-        # This code is not just a workaround for below, it is also used to make the behavior consistent with other loading methods - Henk717
-        if(not vars.model in ["NeoCustom", "GPT2Custom"]):
-            vars.custmodpth = vars.model
-        elif(vars.model == "NeoCustom"):
-            vars.model = os.path.basename(os.path.normpath(vars.custmodpth))
-
-        # Get the model_type from the config or assume a model type if it isn't present
-        from transformers import AutoConfig
-        if(os.path.isdir(vars.custmodpth.replace('/', '_'))):
-            try:
-                model_config = AutoConfig.from_pretrained(vars.custmodpth.replace('/', '_'), revision=vars.revision, cache_dir="cache")
-                vars.model_type = model_config.model_type
-            except ValueError as e:
-                vars.model_type = "not_found"
-        elif(os.path.isdir("models/{}".format(vars.custmodpth.replace('/', '_')))):
-            try:
-                model_config = AutoConfig.from_pretrained("models/{}".format(vars.custmodpth.replace('/', '_')), revision=vars.revision, cache_dir="cache")
-                vars.model_type = model_config.model_type
-            except ValueError as e:
-                vars.model_type = "not_found"
-        else:
-            try:
-                model_config = AutoConfig.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
-                vars.model_type = model_config.model_type
-            except ValueError as e:
-                vars.model_type = "not_found"
-        if(vars.model_type == "not_found" and vars.model == "NeoCustom"):
-            vars.model_type = "gpt_neo"
-        elif(vars.model_type == "not_found" and vars.model == "GPT2Custom"):
-            vars.model_type = "gpt2"
-        elif(vars.model_type == "not_found"):
-            print("WARNING: No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)")
-            vars.model_type = "gpt_neo"
-
-        if(vars.model_type == "opt"):
-            vars.badwordsids = vars.badwordsids_opt
-
-    if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
-        loadmodelsettings()
-        loadsettings()
-        print("{0}Looking for GPU support...{1}".format(colors.PURPLE, colors.END), end="")
-        vars.hascuda = torch.cuda.is_available()
-        vars.bmsupported = vars.model_type in ("gpt_neo", "gptj", "xglm", "opt") and not vars.nobreakmodel
-        if(args.breakmodel is not None and args.breakmodel):
-            print("WARNING: --breakmodel is no longer supported. Breakmodel mode is now automatically enabled when --breakmodel_gpulayers is used (see --help for details).", file=sys.stderr)
-        if(args.breakmodel_layers is not None):
-            print("WARNING: --breakmodel_layers is deprecated. Use --breakmodel_gpulayers instead (see --help for details).", file=sys.stderr)
-        if(args.model and vars.bmsupported and not args.breakmodel_gpulayers and not args.breakmodel_layers):
-            print("WARNING: Model launched without the --breakmodel_gpulayers argument, defaulting to GPU only mode.", file=sys.stderr)
-            vars.bmsupported = False
-        if(not vars.bmsupported and (args.breakmodel_gpulayers is not None or args.breakmodel_layers is not None)):
-            print("WARNING: This model does not support hybrid generation. --breakmodel_gpulayers will be ignored.", file=sys.stderr)
-        if(vars.hascuda):
-            print("{0}FOUND!{1}".format(colors.GREEN, colors.END))
-        else:
-            print("{0}NOT FOUND!{1}".format(colors.YELLOW, colors.END))
-        
-        if args.model:
-            if(vars.hascuda):
-                genselected = True
-                vars.usegpu = True
-                vars.breakmodel = False
-            if(vars.bmsupported):
-                vars.usegpu = False
-                vars.breakmodel = True
-            if(args.cpu):
-                vars.usegpu = False
-                vars.breakmodel = False
-        elif(vars.hascuda):    
-            if(vars.bmsupported):
-                genselected = True
-                vars.usegpu = False
-                vars.breakmodel = True
-            else:
-                genselected = False
-        else:
-            genselected = False
-
-        if(vars.hascuda):
-            if(use_gpu):
-                if(vars.bmsupported):
-                    vars.breakmodel = True
-                    vars.usegpu = False
-                    genselected = True
-                else:
-                    vars.breakmodel = False
-                    vars.usegpu = True
-                    genselected = True
-            else:
-                vars.breakmodel = False
-                vars.usegpu = False
-                genselected = True
-
-    # Ask for API key if InferKit was selected
-    if(vars.model == "InferKit"):
-        vars.apikey = vars.oaiapikey
-                    
-    # Swap OAI Server if GooseAI was selected
-    if(vars.model == "GooseAI"):
-        vars.oaiengines = "https://api.goose.ai/v1/engines"
-        vars.model = "OAI"
-        args.configname = "GooseAI"
-
-    # Ask for API key if OpenAI was selected
-    if(vars.model == "OAI"):
-        if not args.configname:
-            args.configname = "OAI"
-        
-    if(vars.model == "ReadOnly"):
-        vars.noai = True
-
-    # Start transformers and create pipeline
-    if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
-        if(not vars.noai):
-            print("{0}Initializing transformers, please wait...{1}".format(colors.PURPLE, colors.END))
-            from transformers import StoppingCriteria, GPT2TokenizerFast, GPT2LMHeadModel, GPTNeoForCausalLM, GPTNeoModel, AutoModelForCausalLM, AutoTokenizer
-            for m in ("GPTJModel", "XGLMModel"):
-                try:
-                    globals()[m] = getattr(__import__("transformers"), m)
-                except:
-                    pass
-            try:
-                from transformers.models.opt.modeling_opt import OPTDecoder
-            except:
-                pass
-            import transformers.generation_utils
-            from transformers import __version__ as transformers_version
-
-            from transformers import PreTrainedModel
-            from transformers import modeling_utils
+def patch_transformers():
+    global transformers
    old_from_pretrained = PreTrainedModel.from_pretrained.__func__
    @classmethod
    def new_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
@ -1309,101 +1170,6 @@ def load_model(use_gpu=True, gpu_layers=None, initial_load=False, online_model="
            return old_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs)
        modeling_utils.get_checkpoint_shard_files = new_get_checkpoint_shard_files
        
-            # Lazy loader
-            import torch_lazy_loader
-            def get_lazy_load_callback(n_layers, convert_to_float16=True):
-                if not vars.lazy_load:
-                    return
-
-                from tqdm.auto import tqdm
-
-                if "breakmodel" in globals():
-                    gpu_blocks = breakmodel.gpu_blocks
-                    ram_blocks = ram_blocks = n_layers - sum(gpu_blocks)
-                    cumulative_gpu_blocks = tuple(itertools.accumulate(gpu_blocks))
-                else:
-                    ram_blocks = gpu_blocks = cumulative_gpu_blocks = None
-
-                def lazy_load_callback(model_dict, f, **_):
-                    if lazy_load_callback.nested:
-                        return
-                    lazy_load_callback.nested = True
-
-                    device_map = {}
-
-                    for _key, spec in lazy_load_spec.get("layer_weights", {}).items():
-                        for layer in range(n_layers):
-                            key = _key.format(layer=layer)
-                            if key not in model_dict:
-                                continue
-                            device = vars.gpu_device if vars.hascuda and vars.usegpu else "cpu" if not vars.hascuda or not vars.breakmodel or layer < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, layer - ram_blocks)
-                            device_map[key] = device
-
-                    for key, value in model_dict.items():
-                        if isinstance(value, torch_lazy_loader.LazyTensor) and key not in device_map:
-                            device_map[key] = vars.gpu_device if vars.hascuda and vars.usegpu else "cpu"
-
-                    if utils.num_shards is None or utils.current_shard == 0:
-                        if utils.num_shards is not None:
-                            num_tensors = len(utils.get_sharded_checkpoint_num_tensors(utils.from_pretrained_model_name, utils.from_pretrained_index_filename, **utils.from_pretrained_kwargs))
-                        else:
-                            num_tensors = len(device_map)
-                        print(flush=True)
-                        utils.bar = tqdm(total=num_tensors, desc="Loading model tensors", file=Send_to_socketio())
-
-                    with zipfile.ZipFile(f, "r") as z:
-                        try:
-                            last_storage_key = None
-                            f = None
-                            current_offset = 0
-                            if utils.num_shards is not None:
-                                utils.current_shard += 1
-                            for key in sorted(device_map.keys(), key=lambda k: (model_dict[k].key, model_dict[k].seek_offset)):
-                                storage_key = model_dict[key].key
-                                if storage_key != last_storage_key or model_dict[key].seek_offset < current_offset:
-                                    last_storage_key = storage_key
-                                    if isinstance(f, zipfile.ZipExtFile):
-                                        f.close()
-                                    f = z.open(f"archive/data/{storage_key}")
-                                    current_offset = 0
-                                if current_offset != model_dict[key].seek_offset:
-                                    f.read(model_dict[key].seek_offset - current_offset)
-                                    current_offset = model_dict[key].seek_offset
-                                device = device_map[key]
-                                size = functools.reduce(lambda x, y: x * y, model_dict[key].shape, 1)
-                                dtype = model_dict[key].dtype
-                                nbytes = size if dtype is torch.bool else size * ((torch.finfo if dtype.is_floating_point else torch.iinfo)(dtype).bits >> 3)
-                                #print(f"Transferring <{key}>  to  {'(CPU)' if device == 'cpu' else '[device ' + str(device) + ']'} ... ", end="", flush=True)
-                                model_dict[key] = model_dict[key].materialize(f, map_location="cpu")
-                                if model_dict[key].dtype is torch.float32:
-                                    vars.fp32_model = True
-                                if convert_to_float16 and vars.hascuda and (vars.breakmodel or vars.usegpu) and model_dict[key].dtype is torch.float32:
-                                    model_dict[key] = model_dict[key].to(torch.float16)
-                                if not vars.usegpu and not vars.breakmodel and model_dict[key].dtype is torch.float16:
-                                    model_dict[key] = model_dict[key].to(torch.float32)
-                                model_dict[key] = model_dict[key].to(device)
-                                #print("OK", flush=True)
-                                current_offset += nbytes
-                                utils.bar.update(1)
-                        finally:
-                            if utils.num_shards is None or utils.current_shard >= utils.num_shards:
-                                utils.bar.close()
-                                utils.bar = None
-                            lazy_load_callback.nested = False
-                            if isinstance(f, zipfile.ZipExtFile):
-                                f.close()
-
-                lazy_load_callback.nested = False
-                return lazy_load_callback
-
-            lazy_load_config_path = os.path.join("maps", vars.model_type + ".json")
-            if(vars.lazy_load and "model_config" in globals() and os.path.isfile(lazy_load_config_path)):
-                with open(lazy_load_config_path) as f:
-                    lazy_load_spec = json.load(f)
-
-            else:
-                vars.lazy_load = False
-
    # Some versions of transformers 4.17.0.dev0 are affected by
    # https://github.com/huggingface/transformers/issues/15736
    # This is a workaround for those versions of transformers.
@ -1635,6 +1401,286 @@ def load_model(use_gpu=True, gpu_layers=None, initial_load=False, online_model="
        return stopping_criteria
    transformers.generation_utils.GenerationMixin._get_stopping_criteria = new_get_stopping_criteria

+def load_model(use_gpu=True, gpu_layers=None, initial_load=False, online_model=""):
+    global model
+    global generator
+    global torch
+    global model_config
+    global GPT2TokenizerFast
+    print("Loading vars.model: {} vars.custmodpth: {}".format(vars.model, vars.custmodpth))
+    vars.noai = False
+    if not initial_load:
+        set_aibusy(True)
+        if vars.model != 'ReadOnly':
+            emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(vars.model)}, broadcast=True)
+            #Have to add a sleep so the server will send the emit for some reason
+            time.sleep(0.1)
+    if gpu_layers is not None:
+        args.breakmodel_gpulayers = gpu_layers
+    
+    #We need to wipe out the existing model and refresh the cuda cache
+    model = None
+    generator = None
+    model_config = None
+    try:
+        torch.cuda.empty_cache()
+    except:
+        pass
+        
+    #Reload our badwords
+    vars.badwordsids = vars.badwordsids_default
+    
+    #Let's set the GooseAI or OpenAI server URLs if that's applicable
+    if online_model != "":
+        if path.exists("settings/{}.settings".format(vars.model)):
+            changed=False
+            with open("settings/{}.settings".format(vars.model), "r") as file:
+                # Check if API key exists
+                js = json.load(file)
+                if 'online_model' in js:
+                    if js['online_model'] != online_model:
+                        changed=True
+                        js['online_model'] = online_model
+                else:
+                    changed=True
+                    js['online_model'] = online_model
+            if changed:
+                with open("settings/{}.settings".format(vars.model), "w") as file:
+                    file.write(json.dumps(js, indent=3))
+        # Swap OAI Server if GooseAI was selected
+        if(vars.model == "GooseAI"):
+            vars.oaiengines = "https://api.goose.ai/v1/engines"
+            vars.model = "OAI"
+            args.configname = "GooseAI" + "/" + online_model
+        else:
+            args.configname = vars.model + "/" + online_model
+        vars.oaiurl = vars.oaiengines + "/{0}/completions".format(online_model)
+    
+    
+    # If transformers model was selected & GPU available, ask to use CPU or GPU
+    if(vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
+        vars.allowsp = True
+        # Test for GPU support
+        
+        # Make model path the same as the model name to make this consistent with the other loading method if it isn't a known model type
+        # This code is not just a workaround for below, it is also used to make the behavior consistent with other loading methods - Henk717
+        if(not vars.model in ["NeoCustom", "GPT2Custom"]):
+            vars.custmodpth = vars.model
+        elif(vars.model == "NeoCustom"):
+            vars.model = os.path.basename(os.path.normpath(vars.custmodpth))
+
+        # Get the model_type from the config or assume a model type if it isn't present
+        from transformers import AutoConfig
+        if(os.path.isdir(vars.custmodpth.replace('/', '_'))):
+            try:
+                model_config = AutoConfig.from_pretrained(vars.custmodpth.replace('/', '_'), revision=vars.revision, cache_dir="cache")
+                vars.model_type = model_config.model_type
+            except ValueError as e:
+                vars.model_type = "not_found"
+        elif(os.path.isdir("models/{}".format(vars.custmodpth.replace('/', '_')))):
+            try:
+                model_config = AutoConfig.from_pretrained("models/{}".format(vars.custmodpth.replace('/', '_')), revision=vars.revision, cache_dir="cache")
+                vars.model_type = model_config.model_type
+            except ValueError as e:
+                vars.model_type = "not_found"
+        else:
+            try:
+                model_config = AutoConfig.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
+                vars.model_type = model_config.model_type
+            except ValueError as e:
+                vars.model_type = "not_found"
+        if(vars.model_type == "not_found" and vars.model == "NeoCustom"):
+            vars.model_type = "gpt_neo"
+        elif(vars.model_type == "not_found" and vars.model == "GPT2Custom"):
+            vars.model_type = "gpt2"
+        elif(vars.model_type == "not_found"):
+            print("WARNING: No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)")
+            vars.model_type = "gpt_neo"
+
+        if(vars.model_type == "opt"):
+            vars.badwordsids = vars.badwordsids_opt
+
+    if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
+        loadmodelsettings()
+        loadsettings()
+        print("{0}Looking for GPU support...{1}".format(colors.PURPLE, colors.END), end="")
+        vars.hascuda = torch.cuda.is_available()
+        vars.bmsupported = vars.model_type in ("gpt_neo", "gptj", "xglm", "opt") and not vars.nobreakmodel
+        if(args.breakmodel is not None and args.breakmodel):
+            print("WARNING: --breakmodel is no longer supported. Breakmodel mode is now automatically enabled when --breakmodel_gpulayers is used (see --help for details).", file=sys.stderr)
+        if(args.breakmodel_layers is not None):
+            print("WARNING: --breakmodel_layers is deprecated. Use --breakmodel_gpulayers instead (see --help for details).", file=sys.stderr)
+        if(args.model and vars.bmsupported and not args.breakmodel_gpulayers and not args.breakmodel_layers):
+            print("WARNING: Model launched without the --breakmodel_gpulayers argument, defaulting to GPU only mode.", file=sys.stderr)
+            vars.bmsupported = False
+        if(not vars.bmsupported and (args.breakmodel_gpulayers is not None or args.breakmodel_layers is not None)):
+            print("WARNING: This model does not support hybrid generation. --breakmodel_gpulayers will be ignored.", file=sys.stderr)
+        if(vars.hascuda):
+            print("{0}FOUND!{1}".format(colors.GREEN, colors.END))
+        else:
+            print("{0}NOT FOUND!{1}".format(colors.YELLOW, colors.END))
+        
+        if args.model:
+            if(vars.hascuda):
+                genselected = True
+                vars.usegpu = True
+                vars.breakmodel = False
+            if(vars.bmsupported):
+                vars.usegpu = False
+                vars.breakmodel = True
+            if(args.cpu):
+                vars.usegpu = False
+                vars.breakmodel = False
+        elif(vars.hascuda):    
+            if(vars.bmsupported):
+                genselected = True
+                vars.usegpu = False
+                vars.breakmodel = True
+            else:
+                genselected = False
+        else:
+            genselected = False
+
+        if(vars.hascuda):
+            if(use_gpu):
+                if(vars.bmsupported):
+                    vars.breakmodel = True
+                    vars.usegpu = False
+                    genselected = True
+                else:
+                    vars.breakmodel = False
+                    vars.usegpu = True
+                    genselected = True
+            else:
+                vars.breakmodel = False
+                vars.usegpu = False
+                genselected = True
+
+    # Ask for API key if InferKit was selected
+    if(vars.model == "InferKit"):
+        vars.apikey = vars.oaiapikey
+                    
+    # Swap OAI Server if GooseAI was selected
+    if(vars.model == "GooseAI"):
+        vars.oaiengines = "https://api.goose.ai/v1/engines"
+        vars.model = "OAI"
+        args.configname = "GooseAI"
+
+    # Ask for API key if OpenAI was selected
+    if(vars.model == "OAI"):
+        if not args.configname:
+            args.configname = "OAI"
+        
+    if(vars.model == "ReadOnly"):
+        vars.noai = True
+
+    # Start transformers and create pipeline
+    if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
+        if(not vars.noai):
+            print("{0}Initializing transformers, please wait...{1}".format(colors.PURPLE, colors.END))
+            for m in ("GPTJModel", "XGLMModel"):
+                try:
+                    globals()[m] = getattr(__import__("transformers"), m)
+                except:
+                    pass
+
+            # Lazy loader
+            import torch_lazy_loader
+            def get_lazy_load_callback(n_layers, convert_to_float16=True):
+                if not vars.lazy_load:
+                    return
+
+                from tqdm.auto import tqdm
+
+                if "breakmodel" in globals():
+                    gpu_blocks = breakmodel.gpu_blocks
+                    ram_blocks = ram_blocks = n_layers - sum(gpu_blocks)
+                    cumulative_gpu_blocks = tuple(itertools.accumulate(gpu_blocks))
+                else:
+                    ram_blocks = gpu_blocks = cumulative_gpu_blocks = None
+
+                def lazy_load_callback(model_dict, f, **_):
+                    if lazy_load_callback.nested:
+                        return
+                    lazy_load_callback.nested = True
+
+                    device_map = {}
+
+                    for _key, spec in lazy_load_spec.get("layer_weights", {}).items():
+                        for layer in range(n_layers):
+                            key = _key.format(layer=layer)
+                            if key not in model_dict:
+                                continue
+                            device = vars.gpu_device if vars.hascuda and vars.usegpu else "cpu" if not vars.hascuda or not vars.breakmodel or layer < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, layer - ram_blocks)
+                            device_map[key] = device
+
+                    for key, value in model_dict.items():
+                        if isinstance(value, torch_lazy_loader.LazyTensor) and key not in device_map:
+                            device_map[key] = vars.gpu_device if vars.hascuda and vars.usegpu else "cpu"
+
+                    if utils.num_shards is None or utils.current_shard == 0:
+                        if utils.num_shards is not None:
+                            num_tensors = len(utils.get_sharded_checkpoint_num_tensors(utils.from_pretrained_model_name, utils.from_pretrained_index_filename, **utils.from_pretrained_kwargs))
+                        else:
+                            num_tensors = len(device_map)
+                        print(flush=True)
+                        utils.bar = tqdm(total=num_tensors, desc="Loading model tensors", file=Send_to_socketio())
+
+                    with zipfile.ZipFile(f, "r") as z:
+                        try:
+                            last_storage_key = None
+                            f = None
+                            current_offset = 0
+                            if utils.num_shards is not None:
+                                utils.current_shard += 1
+                            for key in sorted(device_map.keys(), key=lambda k: (model_dict[k].key, model_dict[k].seek_offset)):
+                                storage_key = model_dict[key].key
+                                if storage_key != last_storage_key or model_dict[key].seek_offset < current_offset:
+                                    last_storage_key = storage_key
+                                    if isinstance(f, zipfile.ZipExtFile):
+                                        f.close()
+                                    f = z.open(f"archive/data/{storage_key}")
+                                    current_offset = 0
+                                if current_offset != model_dict[key].seek_offset:
+                                    f.read(model_dict[key].seek_offset - current_offset)
+                                    current_offset = model_dict[key].seek_offset
+                                device = device_map[key]
+                                size = functools.reduce(lambda x, y: x * y, model_dict[key].shape, 1)
+                                dtype = model_dict[key].dtype
+                                nbytes = size if dtype is torch.bool else size * ((torch.finfo if dtype.is_floating_point else torch.iinfo)(dtype).bits >> 3)
+                                #print(f"Transferring <{key}>  to  {'(CPU)' if device == 'cpu' else '[device ' + str(device) + ']'} ... ", end="", flush=True)
+                                model_dict[key] = model_dict[key].materialize(f, map_location="cpu")
+                                if model_dict[key].dtype is torch.float32:
+                                    vars.fp32_model = True
+                                if convert_to_float16 and vars.hascuda and (vars.breakmodel or vars.usegpu) and model_dict[key].dtype is torch.float32:
+                                    model_dict[key] = model_dict[key].to(torch.float16)
+                                if not vars.usegpu and not vars.breakmodel and model_dict[key].dtype is torch.float16:
+                                    model_dict[key] = model_dict[key].to(torch.float32)
+                                model_dict[key] = model_dict[key].to(device)
+                                #print("OK", flush=True)
+                                current_offset += nbytes
+                                utils.bar.update(1)
+                        finally:
+                            if utils.num_shards is None or utils.current_shard >= utils.num_shards:
+                                utils.bar.close()
+                                utils.bar = None
+                            lazy_load_callback.nested = False
+                            if isinstance(f, zipfile.ZipExtFile):
+                                f.close()
+
+                lazy_load_callback.nested = False
+                return lazy_load_callback
+
+            lazy_load_config_path = os.path.join("maps", vars.model_type + ".json")
+            if(vars.lazy_load and "model_config" in globals() and os.path.isfile(lazy_load_config_path)):
+                with open(lazy_load_config_path) as f:
+                    lazy_load_spec = json.load(f)
+
+            else:
+                vars.lazy_load = False
+
+            
+
            def get_hidden_size_from_model(model):
                try:
                    return int(model.model.decoder.project_in.in_features)
@ -2991,19 +3037,23 @@ def get_message(msg):
        # If we're on a custom line that we have selected a model for, the path variable will be in msg
        # so if that's missing we need to run the menu to show the model folders in the models folder
        if msg['data'] in ('NeoCustom', 'GPT2Custom') and 'path' not in msg:
-            sendModelSelection(menu=msg['data'])
-        #elif msg['data'] in ('OAI', 'GooseAI'):
-        #    vars.model = msg['data']
-        #    get_oai_models()
-        #    emit('from_server', {'cmd': 'hide_layer_bar'}, broadcast=True)
-        #    emit('from_server', {'cmd': 'check_enable_model_load', 'model': vars.model}, broadcast=True)
+            if 'folder' not in msg:
+                folder = "./models"
+            else:
+                folder = msg['folder']
+            sendModelSelection(menu=msg['data'], folder=folder)
+        elif msg['data'] in ('NeoCustom', 'GPT2Custom'):
+            if check_if_dir_is_model(msg['path']):
+                vars.model = msg['data']
+                vars.custmodpth = msg['path']
+                get_model_info(msg['data'], directory=msg['path'])
+            else:
+                sendModelSelection(menu=msg['data'], folder=msg['path'])
        else:
            vars.model = msg['data']
            if 'path' in msg:
-                if msg['data'] == 'NeoCustom':
-                    get_model_info(vars.custmodpth, directory=msg['path'])
-                else:
-                    get_model_info(vars.model, directory=msg['path'])
+                vars.custmodpth = msg['path']
+                get_model_info(msg['data'], directory=msg['path'])
            else:
                get_model_info(vars.model)
            
@ -5685,6 +5735,7 @@ if __name__ == "__main__":
    print("{0}\nStarting webserver...{1}".format(colors.GREEN, colors.END), flush=True)

    general_startup()
+    patch_transformers()
    #show_select_model_list()
    if vars.model == "" or vars.model is None:
        vars.model = "ReadOnly"
@ -5740,6 +5791,7 @@ if __name__ == "__main__":

 else:
    general_startup()
+    patch_transformers()
    #show_select_model_list()
    if vars.model == "" or vars.model is None:
        vars.model = "ReadOnly"
--- a/static/application.js
+++ b/static/application.js
@ -991,22 +991,44 @@ function hideUSPopup() {
 }


-function buildLoadModelList(ar, menu) {
+function buildLoadModelList(ar, menu, breadcrumbs) {
 	disableButtons([load_model_accept]);
 	loadmodelcontent.html("");
+	$("#loadmodellistbreadcrumbs").html("");
 	var i;
+	for(i=0; i<breadcrumbs.length; i++) {
+		$("#loadmodellistbreadcrumbs").append("<button class=\"breadcrumbitem\" id='model_breadcrumbs"+i+"' name='"+ar[0][1]+"' value='"+breadcrumbs[i][0]+"'>"+breadcrumbs[i][1]+"</button><font color=white>\\</font>");
+		$("#model_breadcrumbs"+i).off("click").on("click", (function () {
+				return function () {
+					socket.send({'cmd': 'selectmodel', 'data': $(this).attr("name"), 'folder': $(this).attr("value")});
+					disableButtons([load_model_accept]);
+				}
+			})(i));
+	}
+	if (breadcrumbs.length > 0) {
+		$("#loadmodellistbreadcrumbs").append("<hr size='1'>")  
+	}
 	for(i=0; i<ar.length; i++) {
 		var html
 		html = "<div class=\"flex\">\
 			<div class=\"loadlistpadding\"></div>"
+		//if the menu item is a link to another menu
 		if(ar[i][3]) {
 			html = html + "<span class=\"loadlisticon loadmodellisticon-folder oi oi-folder allowed\"  aria-hidden=\"true\"></span>"
 		} else {
+		//this is a model
 			html = html + "<div class=\"loadlistpadding\"></div>"
 		}
+		if (Array.isArray(ar[i][0])) {
+			full_path = ar[i][0][0];
+			folder = ar[i][0][1];
+		} else {
+			full_path = "";
+			folder = ar[i][0];
+		}
 		html = html + "<div class=\"loadlistpadding\"></div>\
-						<div class=\"loadlistitem\" id=\"loadmodel"+i+"\" name=\""+ar[i][1]+"\" pretty_name=\""+ar[i][0]+"\">\
-							<div>"+ar[i][0]+"</div>\
+						<div class=\"loadlistitem\" id=\"loadmodel"+i+"\" name=\""+ar[i][1]+"\" pretty_name=\""+full_path+"\">\
+							<div>"+folder+"</div>\
 							<div class=\"flex-push-right\">"+ar[i][2]+"</div>\
 						</div>\
 					</div>"
@ -1020,7 +1042,7 @@ function buildLoadModelList(ar, menu) {
 				}
 			})(i));
 		//If we're in the custom load menu (we need to send the path data back in that case)
-		} else if(menu == 'custom') {
+		} else if(['NeoCustom', 'GPT2Custom'].includes(menu)) {
 			$("#loadmodel"+i).off("click").on("click", (function () {
 				return function () {
 					socket.send({'cmd': 'selectmodel', 'data': $(this).attr("name"), 'path': $(this).attr("pretty_name")});
@ -2472,11 +2494,12 @@ $(document).ready(function(){
 				debug_area.addClass("hidden");
 			}
 		} else if(msg.cmd == 'show_model_menu') {
+			console.log(msg)
 			$("#use_gpu_div").addClass("hidden");
 			$("#modelkey").addClass("hidden");
 			$("#modellayers").addClass("hidden");
 			$("#oaimodel").addClass("hidden")
-			buildLoadModelList(msg.data, msg.menu);
+			buildLoadModelList(msg.data, msg.menu, msg.breadcrumbs);
 		} else if(msg.cmd == 'selected_model_info') {
 			enableButtons([load_model_accept]);
 			$("#oaimodel").addClass("hidden")
--- a/static/custom.css
+++ b/static/custom.css
@ -1035,7 +1035,7 @@ body.connected .statusiconlabel, .statusiconlabel.always-available {
 }

 .loadlistitem {
-	padding: 5px 10px 5px 10px;
+	padding: 0px 0px 0px 0px;
 	display: flex;
 	flex-grow: 1;
 	color: #ffffff;
@ -1051,6 +1051,28 @@ body.connected .statusiconlabel, .statusiconlabel.always-available {
 	background-color: #688f1f;
 }

+.breadcrumbitem {
+	padding: 5px 10px 5px 10px;
+	color: #ffffff;
+	background-color: transparent;
+	border: none;
+	
+	-moz-transition: background-color 0.25s ease-in;
+	-o-transition: background-color 0.25s ease-in;
+	-webkit-transition: background-color 0.25s ease-in;
+	transition: background-color 0.25s ease-in;
+}
+
+.breadcrumbitem:hover {
+	cursor: pointer;
+	background-color: #688f1f;
+}
+
+hr {
+    padding: 0px;
+    margin: 0px;    
+}
+
 .loadlistpadding {
 	padding-right: 10px;
 }
--- a/templates/index.html
+++ b/templates/index.html
@ -279,8 +279,8 @@
 			<div class="popuptitlebar">
 				<div class="popuptitletext">Select A Model To Load</div>
 			</div>
-			<div class="loadmodellistheader">
-				<div>Model</div>
+			<div id="loadmodellistbreadcrumbs">
+				
 			</div>
 			<div id="loadmodellistcontent" style="overflow: scroll; height: 300px;">
 			</div>
--- a/utils.py
+++ b/utils.py
@ -149,7 +149,7 @@ def decodenewlines(txt):
 #  Returns number of layers given an HF model config
 #==================================================================#
 def num_layers(config):
-    return config.num_layers if hasattr(config, "num_layers") else config.n_layer if hasattr(config, "n_layer") else config.num_hidden_layers
+    return config.num_layers if hasattr(config, "num_layers") else config.n_layer if hasattr(config, "n_layer") else config.num_hidden_layers if hasattr(config, 'num_hidden_layers') else None

 #==================================================================#
 #  Downloads huggingface checkpoints using aria2c if possible