From cb577bd491c09957a68c947ce8159395651559ed Mon Sep 17 00:00:00 2001 From: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Fri, 1 Sep 2023 20:38:58 -0500 Subject: [PATCH 1/8] Create api_example.py --- api_example.py | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 api_example.py diff --git a/api_example.py b/api_example.py new file mode 100644 index 00000000..8c5baa2a --- /dev/null +++ b/api_example.py @@ -0,0 +1,55 @@ +import requests + +user = "User:" +bot = "Bot:" +ENDPOINT = "http://127.0.0.1:5000" +conversation_history = [] # using a list to update conversation history is more memory efficient than constantly updating a string + +def get_prompt(user_msg): + return { + "prompt": f"{user_msg}", + "use_story": False, #Needs to be set in KoboldAI webUI + "use_memory": False, #Needs to be set in KoboldAI webUI + "use_authors_note": False, #Needs to be set in KoboldAI webUI + "use_world_info": False, #Needs to be set in KoboldAI webUI + "max_context_length": 2048, + "max_length": 120, + "rep_pen": 1.0, + "rep_pen_range": 2048, + "rep_pen_slope": 0.7, + "temperature": 0.7, + "tfs": 0.97, + "top_a": 0.8, + "top_k": 0, + "top_p": 0.5, + "typical": 0.19, + "sampler_order": [6,0,1,3,4,2,5], + "singleline": False, + "sampler_seed": 69420, # Use specific seed for text generation? + "sampler_full_determinism": False, # Always give same output with same settings? + "frmttriminc": False, #Trim incomplete sentences + "frmtrmblln": False, #Remove blank lines + "stop_sequence": ["\n\n\n\n\n", f"{user}"] + } + +while True: + try: + user_message = input(f"{user}") + + if len(user_message.strip()) < 1: + print(f"{bot}Please provide a valid input.") + continue + + fullmsg = f"{conversation_history[-1] if conversation_history else ''}{user} {user_message}\n{bot} " # Add all of conversation history if it exists and add User and Bot names + prompt = get_prompt(fullmsg) # Process prompt into KoboldAI API format + response = requests.post(f"{ENDPOINT}/api/v1/generate", json=prompt) # Send prompt to API + + if response.status_code == 200: + results = response.json()['results'] # Set results as JSON response + text = results[0]['text'] # inside results, look in first group for section labeled 'text' + response_text = text.split('\n')[0].replace(" ", " ") # Optional, keep only the text before a new line, and replace double spaces with normal ones + conversation_history.append(f"{fullmsg}{response_text}\n") # Add the response to the end of your conversation history + print(f"{bot} {response_text}") + + except Exception as e: + print(f"An error occurred: {e}") From 4c03402e335c8cb7f27315412a8b0e648b93a41c Mon Sep 17 00:00:00 2001 From: Henk Date: Sat, 2 Sep 2023 03:41:01 +0200 Subject: [PATCH 2/8] API Example Polish and Fixes --- api_example.py | 58 ++++++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/api_example.py b/api_example.py index 8c5baa2a..aa35f883 100644 --- a/api_example.py +++ b/api_example.py @@ -2,54 +2,56 @@ import requests user = "User:" bot = "Bot:" -ENDPOINT = "http://127.0.0.1:5000" +ENDPOINT = "http://localhost:5000/api" conversation_history = [] # using a list to update conversation history is more memory efficient than constantly updating a string def get_prompt(user_msg): return { "prompt": f"{user_msg}", - "use_story": False, #Needs to be set in KoboldAI webUI - "use_memory": False, #Needs to be set in KoboldAI webUI - "use_authors_note": False, #Needs to be set in KoboldAI webUI - "use_world_info": False, #Needs to be set in KoboldAI webUI - "max_context_length": 2048, - "max_length": 120, - "rep_pen": 1.0, - "rep_pen_range": 2048, - "rep_pen_slope": 0.7, - "temperature": 0.7, - "tfs": 0.97, - "top_a": 0.8, - "top_k": 0, - "top_p": 0.5, - "typical": 0.19, - "sampler_order": [6,0,1,3,4,2,5], - "singleline": False, - "sampler_seed": 69420, # Use specific seed for text generation? - "sampler_full_determinism": False, # Always give same output with same settings? - "frmttriminc": False, #Trim incomplete sentences - "frmtrmblln": False, #Remove blank lines - "stop_sequence": ["\n\n\n\n\n", f"{user}"] + "use_story": "False", # Use the story from the KoboldAI UI, can be managed using other API calls (See /api for the documentation) + "use_memory": "False", # Use the memnory from the KoboldAI UI, can be managed using other API calls (See /api for the documentation) + "use_authors_note": "False", # Use the authors notes from the KoboldAI UI, can be managed using other API calls (See /api for the documentation) + "use_world_info": "False", # Use the World Info from the KoboldAI UI, can be managed using other API calls (See /api for the documentation) + "max_context_length": 2048, # How much of the prompt will we submit to the AI generator? (Prevents AI / memory overloading) + "max_length": 100, # How long should the response be? + "rep_pen": 1.1, # Prevent the AI from repeating itself + "rep_pen_range": 2048, # The range to which to apply the previous + "rep_pen_slope": 0.7, # This number determains the strength of the repetition penalty over time + "temperature": 0.5, # How random should the AI be? In a low value we pick the most probable token, high values are a dice roll + "tfs": 0.97, # Tail free sampling, https://www.trentonbricken.com/Tail-Free-Sampling/ + "top_a": 0.0, # Top A sampling , https://github.com/BlinkDL/RWKV-LM/tree/4cb363e5aa31978d801a47bc89d28e927ab6912e#the-top-a-sampling-method + "top_k": 0, # Keep the X most probable tokens + "top_p": 0.9, # Top P sampling / Nucleus Sampling, https://arxiv.org/pdf/1904.09751.pdf + "typical": 1.0, # Typical Sampling, https://arxiv.org/pdf/2202.00666.pdf + "sampler_order": [6,0,1,3,4,2,5], # Order to apply the samplers, our default in this script is already the optimal one. KoboldAI Lite contains an easy list of what the + "stop_sequence": [f"{user}"], # When should the AI stop generating? In this example we stop when it tries to speak on behalf of the user. + #"sampler_seed": 1337, # Use specific seed for text generation? This helps with consistency across tests. + "singleline": "False", # Only return a response that fits on a single line, this can help with chatbots but also makes them less verbose + "sampler_full_determinism": "False", # Always return the same result for the same query, best used with a static seed + "frmttriminc": "True", # Trim incomplete sentences, prevents sentences that are unfinished but can interfere with coding and other non english sentences + "frmtrmblln": "False", #Remove blank lines + "quiet": "False" # Don't print what you are doing in the KoboldAI console, helps with user privacy } while True: try: - user_message = input(f"{user}") + user_message = input(f"{user} ") if len(user_message.strip()) < 1: print(f"{bot}Please provide a valid input.") continue - fullmsg = f"{conversation_history[-1] if conversation_history else ''}{user} {user_message}\n{bot} " # Add all of conversation history if it exists and add User and Bot names + fullmsg = f"{conversation_history[-1] if conversation_history else ''}{user} {user_message}\n{bot}" # Add all of conversation history if it exists and add User and Bot names prompt = get_prompt(fullmsg) # Process prompt into KoboldAI API format - response = requests.post(f"{ENDPOINT}/api/v1/generate", json=prompt) # Send prompt to API - + response = requests.post(f"{ENDPOINT}/v1/generate", json=prompt) # Send prompt to API if response.status_code == 200: results = response.json()['results'] # Set results as JSON response text = results[0]['text'] # inside results, look in first group for section labeled 'text' response_text = text.split('\n')[0].replace(" ", " ") # Optional, keep only the text before a new line, and replace double spaces with normal ones conversation_history.append(f"{fullmsg}{response_text}\n") # Add the response to the end of your conversation history + else: + print(response) print(f"{bot} {response_text}") except Exception as e: - print(f"An error occurred: {e}") + print(f"An error occurred: {e}") \ No newline at end of file From 966b6f8a2e78b835601ee974cbad1739d6fa9e5b Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sun, 3 Sep 2023 11:40:28 +0800 Subject: [PATCH 3/8] Updated Kobold Lite to v60 --- static/klite.html | 1153 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 953 insertions(+), 200 deletions(-) diff --git a/static/klite.html b/static/klite.html index 33ba94f0..661e8945 100644 --- a/static/klite.html +++ b/static/klite.html @@ -1,9 +1,9 @@ - + @@ -8148,8 +8712,6 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob Share - - @@ -8184,7 +8746,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob - +
@@ -8216,8 +8778,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
@@ -8336,6 +8898,9 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob This option explicitly assigns worker IDs, fixed based on the current workers available at model selection time. + + + @@ -8398,7 +8963,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
Improve Prompt (System Message Injection)
+ value="" onload="togglejailbreak()"> +
+
Unban Tokens (KAI)
+ +
Persist Session
@@ -8787,7 +9358,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob