Model: Port rest of models over

Generation's still broke but it's a start
2025-06-05 21:59:24 +02:00 · 2023-02-25 16:05:56 -06:00
parent f8c4158ebc
commit 6b4905de30
3 changed files with 1987 additions and 1760 deletions
--- a/aiserver.py
+++ b/aiserver.py
--- a/model.py
+++ b/model.py
--- a/utils.py
+++ b/utils.py
@@ -8,6 +8,7 @@ from urllib.error import HTTPError
 import requests
 import requests.adapters
 import time
 import breakmodel
 from transformers import __version__ as transformers_version
 from transformers import PreTrainedModel
 import packaging.version
@@ -637,6 +638,7 @@ def get_missing_module_names(model: PreTrainedModel, names: List[str]) -> List[s
 class UIProgressBarFile(object):
    """Write TQDM progress to the UI."""
    def write(self, bar):
        bar = bar.replace("\r", "").replace("\n", "").replace(chr(0), "")
        if bar != "" and [ord(num) for num in bar] != [27, 91, 65]: #No idea why we're getting the 27, 1, 65 character set, just killing to so we can move on
@@ -650,3 +652,31 @@ class UIProgressBarFile(object):
    def flush(self):
        pass
 def get_auxilary_device():
    """Get device auxilary tensors like inputs should be stored on."""
    # NOTE: TPU isn't a torch device, so TPU stuff gets sent to CPU.
    if koboldai_vars.hascuda and koboldai_vars.usegpu:
        return koboldai_vars.gpu_device
    elif koboldai_vars.hascuda and koboldai_vars.breakmodel:
        return breakmodel.primary_device
    return "cpu"
 #==================================================================#
 # Strips submitted text from the text returned by the AI
 #==================================================================#
 def getnewcontent(txt, tokenizer):
    # If the submitted context was blank, then everything is new
    if(koboldai_vars.lastctx == ""):
        return txt
    # Tokenize the last context and the generated content
    ctxtokens = tokenizer.encode(encodenewlines(koboldai_vars.lastctx), max_length=int(2e9), truncation=True)
    txttokens = tokenizer.encode(encodenewlines(txt), max_length=int(2e9), truncation=True)
    dif       = (len(txttokens) - len(ctxtokens)) * -1
    # Remove the context from the returned text
    newtokens = txttokens[dif:]
    return decodenewlines(tokenizer.decode(newtokens))