Merge commit 'refs/pull/179/head' of https://github.com/ebolam/KoboldAI into UI2

2025-06-05 21:59:24 +02:00 · 2022-09-27 08:21:05 -04:00
parent b840381aef 7f5ba8a678
commit 3afd617cb4
11 changed files with 138 additions and 155 deletions
--- a/aiserver.py
+++ b/aiserver.py
@@ -1,8 +1,8 @@
 #!/usr/bin/python3
 #==================================================================#
 # KoboldAI
-# Version: 1.18.1
-# By: KoboldAIDev and the KoboldAI Community
+# Version: 1.19.0
+# By: The KoboldAI Community
 #==================================================================#

 # External packages
@@ -1438,6 +1438,7 @@ def get_model_info(model, directory=""):
    show_online_model_select=False
    gpu_count = torch.cuda.device_count()
    gpu_names = []
+    send_horde_models = False
    for i in range(gpu_count):
        gpu_names.append(torch.cuda.get_device_name(i))
    if model in ['Colab', 'API']:
@@ -1458,9 +1459,10 @@ def get_model_info(model, directory=""):
                    key_value = js["apikey"]
                elif 'oaiapikey' in js and js['oaiapikey'] != "":
                    key_value = js["oaiapikey"]
-                if 'oaiurl' in js and js['oaiurl'] != "":
-                    default_url = js['oaiurl']
-                get_cluster_models({'model': model, 'key': key_value, 'url': default_url})
+                if 'url' in js and js['url'] != "":
+                    url = js['url']
+            if key_value != "":
+                send_horde_models = True
    elif model in [x[1] for x in model_menu['apilist']]:
        show_online_model_select=True
        if path.exists("settings/{}.v2_settings".format(model)):
@@ -1519,6 +1521,10 @@ def get_model_info(model, directory=""):
                         'disk_break_value': disk_blocks, 'disk_break': utils.HAS_ACCELERATE,
                         'break_values': break_values, 'gpu_count': gpu_count,
                         'url': url, 'gpu_names': gpu_names, 'models_on_url': models_on_url, 'show_online_model_select': show_online_model_select})
+    if send_horde_models:
+        get_cluster_models({'key': key_value, 'url': default_url})
+    elif key_value != "" and model in [x[1] for x in model_menu['apilist']] and model != 'CLUSTER':
+        get_oai_models(key_value)
    
    

@@ -1697,6 +1703,7 @@ def get_cluster_models(msg):
        js={}
        with open(get_config_filename(model), "w") as file:
            js["apikey"] = koboldai_vars.oaiapikey
+            js["url"] = url
            file.write(json.dumps(js, indent=3))
        
    logger.init_ok("KAI Horde Models", status="OK")
@@ -2130,6 +2137,8 @@ def patch_transformers():
            if not koboldai_vars.output_streaming:
                return False

+            if koboldai_vars.chatmode:
+                return False
            koboldai_vars.actions.stream_tokens([utils.decodenewlines(tokenizer.decode(x[-1])) for x in input_ids])
            return False

@@ -2683,15 +2692,14 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
                    if(os.path.isdir(koboldai_vars.custmodpth)):
                        try:
                            tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache")
-                        except Exception as e:
-                            pass
-                        try:
-                            tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache", use_fast=False)
                        except Exception as e:
                            try:
-                                tokenizer = GPT2TokenizerFast.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache")
+                                tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache", use_fast=False)
                            except Exception as e:
-                                tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=koboldai_vars.revision, cache_dir="cache")
+                                try:
+                                    tokenizer = GPT2TokenizerFast.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache")
+                                except Exception as e:
+                                    tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=koboldai_vars.revision, cache_dir="cache")
                        try:
                            model     = AutoModelForCausalLM.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache", **lowmem)
                        except Exception as e:
@@ -2701,15 +2709,14 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
                    elif(os.path.isdir("models/{}".format(koboldai_vars.model.replace('/', '_')))):
                        try:
                            tokenizer = AutoTokenizer.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache")
-                        except Exception as e:
-                            pass
-                        try:
-                            tokenizer = AutoTokenizer.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache", use_fast=False)
                        except Exception as e:
                            try:
-                                tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache")
+                                tokenizer = AutoTokenizer.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache", use_fast=False)
                            except Exception as e:
-                                tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=koboldai_vars.revision, cache_dir="cache")
+                                try:
+                                    tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache")
+                                except Exception as e:
+                                    tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=koboldai_vars.revision, cache_dir="cache")
                        try:
                            model     = AutoModelForCausalLM.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache", **lowmem)
                        except Exception as e:
@@ -2732,15 +2739,14 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal

                        try:
                            tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.model, revision=koboldai_vars.revision, cache_dir="cache")
-                        except Exception as e:
-                            pass
-                        try:
-                            tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.model, revision=koboldai_vars.revision, cache_dir="cache", use_fast=False)
                        except Exception as e:
                            try:
-                                tokenizer = GPT2TokenizerFast.from_pretrained(koboldai_vars.model, revision=koboldai_vars.revision, cache_dir="cache")
+                                tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.model, revision=koboldai_vars.revision, cache_dir="cache", use_fast=False)
                            except Exception as e:
-                                tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=koboldai_vars.revision, cache_dir="cache")
+                                try:
+                                    tokenizer = GPT2TokenizerFast.from_pretrained(koboldai_vars.model, revision=koboldai_vars.revision, cache_dir="cache")
+                                except Exception as e:
+                                    tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=koboldai_vars.revision, cache_dir="cache")
                        try:
                            model     = AutoModelForCausalLM.from_pretrained(koboldai_vars.model, revision=koboldai_vars.revision, cache_dir="cache", **lowmem)
                        except Exception as e:
@@ -2772,7 +2778,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
                                        map_data = json.load(f)
                                    filenames = set(map_data["weight_map"].values())
                                    # Save the pytorch_model.bin.index.json of a sharded model
-                                    shutil.move(utils.from_pretrained_index_filename, os.path.join("models/{}".format(koboldai_vars.model.replace('/', '_')), transformers.modeling_utils.WEIGHTS_INDEX_NAME))
+                                    shutil.move(os.path.realpath(utils.from_pretrained_index_filename), os.path.join("models/{}".format(koboldai_vars.model.replace('/', '_')), transformers.modeling_utils.WEIGHTS_INDEX_NAME))
                                    # Then save the pytorch_model-#####-of-#####.bin files
                                    for filename in filenames:
                                        shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(koboldai_vars.model, filename, revision=koboldai_vars.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(koboldai_vars.model.replace('/', '_')), filename))
@@ -3042,7 +3048,7 @@ def index():
    if args.no_ui:
        return redirect('/api/latest')
    else:
-        return render_template('index.html', hide_ai_menu=args.noaimenu, flaskwebgui=koboldai_vars.flaskwebgui)
+        return render_template('index.html', hide_ai_menu=args.noaimenu)
@app.route('/api', strict_slashes=False)
 def api():
    return redirect('/api/latest')
@@ -6601,14 +6607,14 @@ def checkworldinfo(txt, allowed_entries=None, allowed_folders=None, force_use_tx
                # Remove leading/trailing spaces if the option is enabled
                if(koboldai_vars.wirmvwhtsp):
                    ky = k.strip()
-                if ky in txt:
+                if ky.lower() in txt.lower():
                    if wi.get("selective", False) and len(keys_secondary):
                        found = False
                        for ks in keys_secondary:
                            ksy = ks
                            if(koboldai_vars.wirmvwhtsp):
                                ksy = ks.strip()
-                            if ksy in txt:
+                            if ksy.lower() in txt.lower():
                                wimem = wimem + wi["content"] + "\n"
                                found_entries.add(id(wi))
                                found = True
@@ -8434,29 +8440,6 @@ def socket_io_relay(queue, socketio):
        time.sleep(0.2)
        

-#==================================================================#
-# Event triggered when program errors out
-#==================================================================#
-def my_except_hook(exctype, value, traceback):
-    if koboldai_vars.debug:
-        print("sending error to clients")
-    socketio.emit("error", "{}: {}".format(exctype, value), broadcast=True, room="UI_2")
-    sys.__excepthook__(exctype, value, traceback)
-sys.excepthook = my_except_hook
-
-from werkzeug.exceptions import HTTPException
-
-@app.errorhandler(Exception)
-def handle_exception(e):
-    # pass through HTTP errors
-    if isinstance(e, HTTPException):
-        return e
-
-    # now you're handling non-HTTP exceptions only
-    if koboldai_vars.debug:
-        print("sending error to clients")
-    socketio.emit("error", "{}: {}".format(e.message, e.args), broadcast=True, room="UI_2")
-    return render_template("500_generic.html", e=e), 500


 #==================================================================#
@@ -9445,7 +9428,7 @@ def post_generate(body: GenerationInputSchema):
            schema: GenerationInputSchema
            example:
              prompt: |-2
-                Explosions of suspicious origin occur at AMNAT satellite-receiver stations from Turkey to Labrador as three high-level Canadian defense ministers vanish and then a couple of days later are photographed at a Volgograd bistro hoisting shots of Stolichnaya with Slavic bimbos on their knee.
+                Niko the kobold stalked carefully down the alley, his small scaly figure obscured by a dusky cloak that fluttered lightly in the cold winter breeze.
              top_p: 0.9
              temperature: 0.5
      responses:
@@ -9457,8 +9440,7 @@ def post_generate(body: GenerationInputSchema):
              example:
                results:
                  - text: |-2
-                       It is later established that all of the cabinet members have died of old age.
-                      MEGAMATRIX becomes involved in the growing number of mass abductions and kidnappings. Many disappearances occur along highways in western Canada, usually when traffic has come to a standstill because of a stalled truck or snowstorm. One or two abducted individuals will be released within a day or so but never
+                       Holding up his tail to keep it from dragging in the dirty snow that covered the cobblestone, he waited patiently for the butcher to turn his attention from his stall so that he could pilfer his next meal: a tender-looking chicken.
        {api_validation_error_response}
        {api_not_implemented_response}
        {api_server_busy_response}
@@ -11829,26 +11811,26 @@ def run():
    else:
        if args.unblock:
            if not args.no_ui:
-                import webbrowser
-                webbrowser.open_new('http://localhost:{0}'.format(port))
+                try:
+                    import webbrowser
+                    webbrowser.open_new('http://localhost:{0}'.format(port))
+                except:
+                    pass
            logger.init_ok("Webserver", status="OK")
            logger.message(f"Webserver started! You may now connect with a browser at http://127.0.0.1:{port}")
            koboldai_vars.serverstarted = True
            socketio.run(app, port=port, host='0.0.0.0')
        else:
-            try:
-                from flaskwebgui import FlaskUI
-                koboldai_vars.serverstarted = True
-                koboldai_vars.flaskwebgui = True
-                FlaskUI(app, socketio=socketio, start_server="flask-socketio", maximized=True, close_server_on_exit=True).run()
-            except:
-                if not args.no_ui:
+            if not args.no_ui:
+                try:
                    import webbrowser
                    webbrowser.open_new('http://localhost:{0}'.format(port))
-                logger.init_ok("Webserver", status="OK")
-                logger.message(f"Webserver started! You may now connect with a browser at http://127.0.0.1:{port}")
-                koboldai_vars.serverstarted = True
-                socketio.run(app, port=port)
+                except:
+                    pass
+            logger.init_ok("Webserver", status="OK")
+            logger.message(f"Webserver started! You may now connect with a browser at http://127.0.0.1:{port}")
+            koboldai_vars.serverstarted = True
+            socketio.run(app, port=port)
    logger.init("Webserver", status="Closed")
    
 if __name__ == "__main__":