From e55a9d31c2e067ed42732dafddd6c67b696f3ceb Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Mon, 8 May 2023 22:55:59 +0200 Subject: [PATCH] Update readme, clean up gitmodules file --- .gitmodules | 8 -------- README.md | 6 ++---- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/.gitmodules b/.gitmodules index 4a1fb7c9..0107a8c3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,11 +4,3 @@ [submodule "KoboldAI-Horde-Bridge"] path = KoboldAI-Horde-Bridge url = https://github.com/db0/KoboldAI-Horde-Bridge -[submodule "repos/gptq"] - path = repos/gptq - url = https://github.com/0cc4m/GPTQ-for-LLaMa - branch = a8303654c200c25577130466e5f9bc1e70fc8a50 -[submodule "repos/hf_bleeding_edge"] - path = repos/hf_bleeding_edge - url = https://github.com/0cc4m/hf_bleeding_edge - branch = b5d0b80c6947605b9ccf080fc17b68a516ea5857 diff --git a/README.md b/README.md index aadfd345..517c00e8 100644 --- a/README.md +++ b/README.md @@ -42,16 +42,14 @@ Put your 4bit quantized .pt or .safetensors in that folder with all associated . Then move your model folder to KoboldAI/models, and rename the .pt or .safetensors file in your model folder to `4bit.pt` or `4bit.safetensors` for non-groupsize models or `4bit-g.pt` or `4bit-.safetensors` for a groupsize mode (Example: `4bit-128g.safetensors`) -So - your .pt's model folder should look like this: "4bit.pt, config.json, generation_config.json, pytorch_model.bin.index.json, special_tokens_map.json, tokenizer.model, tokenizer_config.json" Note: the 4bit.pt file can be in the same folder as the regular HF .bin files it was quantized from, so long as the 4-bit toggle switch is on, it'll load the quantized model (4-bit switch explained below). +So - your .pt's model folder should look like this: "4bit.pt, config.json, generation_config.json, pytorch_model.bin.index.json, special_tokens_map.json, tokenizer.model, tokenizer_config.json" Note: the 4bit.pt file can be in the same folder as the regular HF .bin files it was quantized from, it'll load the quantized model. #### Running KoboldAI and loading 4bit models If you haven't done so already, exit the command prompt/leave KAI's conda env. (Close the commandline window on Windows, run `exit` on Linux) Run `play.bat` [windows], `play.sh` [linux Nvidia], or `play-rocm.sh` [linux AMD] -Switch to UI2, then load your model and be sure 4-bit toggle is on. - -The 4bit toggle shows when a model to load is selected. +Switch to UI2, then load your model. ## KoboldAI - Your gateway to GPT writing