Compare commits
175 Commits
Author | SHA1 | Date |
---|---|---|
henk717 | f49d763e2a | |
henk717 | fd24d95981 | |
henk717 | 61a0042c66 | |
henk717 | 8b7ab2f93b | |
henk717 | 0ea758b789 | |
henk717 | 2db1812ee4 | |
anhad | 3287328fe4 | |
anhad | a92951f47e | |
henk717 | 7d39b353c0 | |
henk717 | 58b4c48fdb | |
henk717 | bf61e5ef02 | |
Henk | 386fd1f034 | |
henk717 | d86f61151b | |
henk717 | ebab774aab | |
henk717 | ee93fe6e4a | |
henk717 | 9cb93d6b4c | |
henk717 | d6b1ff513d | |
henk717 | c11a269493 | |
henk717 | 148f900324 | |
henk717 | b66110ea54 | |
henk717 | d2b399d7bc | |
henk717 | f2b643a639 | |
Henk | 1499763472 | |
SmolBleat | 692fe2e5ee | |
henk717 | c3bf89a94f | |
henk717 | 1ae1d499e8 | |
henk717 | b808f039ab | |
henk717 | d88f109073 | |
henk717 | b4cb09590f | |
henk717 | 5f0e2001a7 | |
henk717 | dddde7dbc3 | |
Bogdan Drema | 92a0bf9524 | |
henk717 | e4c15fe1f6 | |
henk717 | b432d55d99 | |
henk717 | ee6e7e9b72 | |
Syler Clayton | 860b697a70 | |
henk717 | 29c2d4b7a6 | |
henk717 | fd12214091 | |
henk717 | bb51127bbf | |
henk717 | 72b4669563 | |
henk717 | ab779efe0e | |
YellowRoseCx | 3c48a77a52 | |
YellowRoseCx | f826930c02 | |
henk717 | 66264d38c4 | |
henk717 | 94eb8ff825 | |
Henk | 219b824b9b | |
henk717 | ffa5c0bc13 | |
henk717 | 487739911a | |
Henk | 2ed6cdb411 | |
henk717 | 142cb354f9 | |
Henk | 93bf023bd7 | |
henk717 | 750cc3d2dc | |
Henk | 0e06fc371f | |
Divided by Zer0 | 6426e3ca24 | |
Divided by Zer0 | 2de9672b95 | |
henk717 | c27faf56e6 | |
henk717 | 5962a6cb4f | |
Henk | 1378fe8beb | |
waffshappen | a0d4497c95 | |
waffshappen | d026bd79cb | |
Henk | cc01ad730a | |
Henk | b58daa1ba1 | |
henk717 | 661bd5c99e | |
Henk | 257a535be5 | |
Henk | 739cccd8ed | |
henk717 | e9cf9fa6d0 | |
henk717 | 031c06347f | |
henk717 | a185cbd015 | |
henk717 | a046db4ded | |
henk717 | 47a27fa906 | |
henk717 | 24f50d6fb7 | |
henk717 | 22acde1ab7 | |
Henk | e9859cf17d | |
Henk | 307fc97b9d | |
henk717 | 4a88e41d14 | |
henk717 | 1628b789d1 | |
Henk | 857476ef6b | |
Henk | 7fc5c46c1d | |
henk717 | 1dbc987048 | |
henk717 | a04f99891f | |
henk717 | 75fecb86cc | |
Gouvernathor | a4f49c097a | |
Gouvernathor | 55cf5f2f67 | |
henk717 | 23b2d3a99e | |
somebody | 9efbe381cf | |
henk717 | 0a926e41e4 | |
vfbd | 33ba3e7e27 | |
Henk | eeb1774d42 | |
Henk | 9a8e8a0005 | |
henk717 | dd7363548c | |
henk717 | 686845cd21 | |
somebody | e6656d68a1 | |
henk717 | 55ef53f39b | |
henk717 | 0b3e22ee13 | |
Henk | d0cb463c53 | |
henk717 | e8245478d6 | |
henk717 | f72ceeadd0 | |
henk717 | 04d9172fcd | |
vfbd | 9a3f0eaab2 | |
henk717 | f2077b8e58 | |
Henk | 2603f1fd5d | |
Henk | 3084552c05 | |
Henk | 13dff68de8 | |
Henk | a66e1443fd | |
Henk | 440c5c333e | |
Henk | f1e4664d56 | |
Henk | eb52ebd082 | |
henk717 | 09b5ffc09d | |
vfbd | b20d80ca2a | |
henk717 | 2e3a80b8ea | |
henk717 | 7b5a766b4a | |
vfbd | 3233e78c56 | |
Henk | 442a9760b8 | |
henk717 | 2300fb46ff | |
Henk | 8ee795055c | |
Henk | ea8b50d31e | |
Henk | 0da404d4f8 | |
Henk | 4699ded3ce | |
henk717 | 351fb3c80b | |
henk717 | 10a779d8c1 | |
vfbd | f7b799be56 | |
ebolam | d588dc0096 | |
ebolam | 73865ba066 | |
henk717 | f8be854e09 | |
henk717 | 2795ced3a4 | |
vfbd | 9ff50d81fd | |
henk717 | c6ed656a76 | |
Llama | e5d0cc7b49 | |
Llama | 6eb3abbdb8 | |
henk717 | fff7837a4a | |
henk717 | be5ffe763c | |
Llama | 8357c3e485 | |
Llama | 05bcd3af11 | |
Llama | 4a01f345de | |
vfbd | bdc73ef393 | |
henk717 | 59e3a40496 | |
Henk | 64715b18d6 | |
Henk | d5143eeb80 | |
henk717 | 739cf0aae7 | |
vfbd | 323f593a96 | |
henk717 | b85d74f22c | |
henk717 | 9f18811ff9 | |
henk717 | 6af0e842f2 | |
vfbd | bdfa6d86b7 | |
vfbd | dd1c25241d | |
vfbd | 1a59a4acea | |
vfbd | 6758d5b538 | |
vfbd | cbab98cc23 | |
vfbd | 51135e192b | |
vfbd | 624f916dc6 | |
vfbd | 07eb2b5c4f | |
vfbd | a51e4f0651 | |
vfbd | bae8d88651 | |
vfbd | aede7ef192 | |
vfbd | 1e9f0e68a0 | |
vfbd | b60d14e3bf | |
vfbd | 09750acfa0 | |
vfbd | b1c456ec18 | |
vfbd | 8da6893407 | |
vfbd | 3d5c83fc23 | |
vfbd | 584056b6d5 | |
vfbd | f79926b73d | |
vfbd | a49a633164 | |
vfbd | 05cf9b1dde | |
vfbd | 728e19a7f0 | |
vfbd | 4e88b277d4 | |
vfbd | 31ea1bafac | |
vfbd | 8823059713 | |
vfbd | 00e8928ee6 | |
vfbd | 9cf1b071b5 | |
vfbd | d1925452f6 | |
vfbd | e469a64a02 | |
vfbd | ee492647ff | |
vfbd | 168c14fd4c | |
vfbd | 289248ef40 |
|
@ -48,7 +48,7 @@ If you would like to play KoboldAI online for free on a powerful computer you ca
|
|||
|
||||
Each edition features different models and requires different hardware to run, this means that if you are unable to obtain a TPU or a GPU you might still be able to use the other version. The models you can use are listed underneath the edition. To open a Colab click the big link featuring the editions name.
|
||||
|
||||
## [TPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb)
|
||||
## [Models the TPU can run:](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb)
|
||||
|
||||
| Model | Style | Description |
|
||||
| --- | --- | --- |
|
||||
|
@ -64,22 +64,26 @@ Each edition features different models and requires different hardware to run, t
|
|||
| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-13B) | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. Compared to other models the dataset focuses primarily on literature and contains little else. |
|
||||
| [GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6B) by EleutherAI | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |
|
||||
|
||||
## [GPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb)
|
||||
## [Models the Colab GPU can run:](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb)
|
||||
|
||||
| Model | Style | Description |
|
||||
| --- | --- | --- |
|
||||
| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-2.7B-Nerys) by Mr Seeker | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of Shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |
|
||||
| [Erebus](https://huggingface.co/KoboldAI/OPT-2.7B-Erebus) by Mr Seeker | NSFW | Erebus is our community's flagship NSFW model, being a combination of multiple large datasets that include Literotica, Shinen and erotic novels from Nerys and featuring thourough tagging support it covers the vast majority of erotic writing styles. This model is capable of replacing both the Lit and Shinen models in terms of content and style and has been well received as (one of) the best NSFW models out there. If you wish to use this model for commercial or non research usage we recommend choosing the 20B version as that one is not subject to the restrictive OPT license. |
|
||||
| [Tiefighter 13B by KoboldAI](https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter) | Hybrid | Tiefighter 13B is a very versitile fiction Hybrid, it can write, chat and play adventure games and can also answer regular instructions (Although we do not recommend this model for factual use due to its fictional nature). This is an excellent starting model, for the best results avoid using Second person writing in your chats unless you are wanting it to become a text adventure.|
|
||||
| [Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |
|
||||
| [Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | Novel | Picard is a model trained for SFW Novels based on Neo 2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |
|
||||
| [AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |
|
||||
| [Horni LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | Novel | This model is based on Horni 2.7B and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |
|
||||
| [Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |
|
||||
| [Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you Shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |
|
||||
| [OPT](https://huggingface.co/facebook/opt-2.7b) by Metaseq | Generic | OPT is considered one of the best base models as far as content goes, its behavior has the strengths of both GPT-Neo and Fairseq Dense. Compared to Neo duplicate and unnecessary content has been left out, while additional literature was added in similar to the Fairseq Dense model. The Fairseq Dense model however lacks the broader data that OPT does have. The biggest downfall of OPT is its license, which prohibits any commercial usage, or usage beyond research purposes. |
|
||||
| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-2.7B) | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger models from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. Compared to other models the dataset focuses primarily on literature and contains little else. |
|
||||
| [MythoMax 13B](https://huggingface.co/TheBloke/MythoMax-L2-13B-GPTQ) by Gryphe | Roleplay | An improved, potentially even perfected variant of MythoMix, my MythoLogic-L2 and Huginn merge using a highly experimental tensor type merge technique¹. |
|
||||
| [Holomax 13B by KoboldAI](https://huggingface.co/KoboldAI/LLaMA2-13B-Holomax) | Adventure | This is an expansion merge to the well-praised MythoMax model from Gryphe (60%) using MrSeeker's KoboldAI Holodeck model (40%). The goal of this model is to enhance story-writing capabilities while preserving the desirable traits of the MythoMax model as much as possible (It does limit chat reply length). |
|
||||
| [Airoboros 13B](https://huggingface.co/jondurbin/airoboros-13b) by Jon Durbin | Generic | This is an instruction fine-tuned llama-2 model, using synthetic instructions generated by airoboros⁵. |
|
||||
| [Emerhyst 13B](https://huggingface.co/Undi95/Emerhyst-13B) by Undi | Roleplay | An attempt using BlockMerge_Gradient to get better result. In addition, LimaRP v3 was used⁷. |
|
||||
| [Chronos 13B](https://huggingface.co/elinas/chronos-13b) by Elinas | Generic | This model is primarily focused on chat, roleplay, and storywriting, but can accomplish other tasks such as simple reasoning and coding. Chronos generates very long outputs with coherent text, largely due to the human inputs it was trained on. |
|
||||
| [Spring Dragon by Henk717](https://huggingface.co/Henk717/spring-dragon) | Adventure | This model is a recreation attempt of the AI Dungeon 2 Dragon model. To achieve this, the "text_adventures.txt" dataset was used, which was bundled with the original AI Dungeon 2 GitHub release prior to the online service. It is worth noting that the same dataset file was used to create the Dragon model, where Dragon is a GPT-3 175B Davinci model from 2020. |
|
||||
| [Holodeck By KoboldAI](https://huggingface.co/KoboldAI/LLAMA2-13B-Holodeck-1) | Adventure |LLAMA2 13B-Holodeck is a finetune created using Meta's llama 2 model.The training data contains around 3000 ebooks in various genres. Most parts of the dataset have been prepended using the following text: [Genre: <genre1>, <genre2>|
|
||||
| [Neo](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |
|
||||
|
||||
| [Various 2.7b models]() by various | Various smaller models are also possible to load in GPU colab. | |
|
||||
### Styles
|
||||
|
||||
| Type | Description |
|
||||
|
@ -105,7 +109,7 @@ KoboldAI has a large number of dependencies you will need to install on your com
|
|||
|
||||
### Downloading the latest version of KoboldAI
|
||||
|
||||
KoboldAI is a rolling release on our github, the code you see is also the game. You can the software by clicking on the green Code button at the top of the page and clicking Download ZIP.
|
||||
KoboldAI is a rolling release on our github, the code you see is also the game. You can download the software by clicking on the green Code button at the top of the page and clicking Download ZIP, or use the `git clone` command instead. Then, on Windows you need to you run install_requirements.bat (using admin mode is recommanded to avoid errors), and once it's done, or if you're on Linux, either play.bat/sh or remote-play.bat/sh to run it.
|
||||
|
||||
The easiest way for Windows users is to use the [offline installer](https://sourceforge.net/projects/koboldai/files/latest/download) below.
|
||||
|
||||
|
@ -228,4 +232,4 @@ Did we miss your contribution? Feel free to issue a commit adding your name to t
|
|||
|
||||
KoboldAI is licensed with a AGPL license, in short this means that it can be used by anyone for any purpose. However, if you decide to make a publicly available instance your users are entitled to a copy of the source code including all modifications that you have made (which needs to be available trough an interface such as a button on your website), you may also not distribute this project in a form that does not contain the source code (Such as compiling / encrypting the code and distributing this version without also distributing the source code that includes the changes that you made. You are allowed to distribute this in a closed form if you also provide a separate archive with the source code.).
|
||||
|
||||
umamba.exe is bundled for convenience because we observed that many of our users had trouble with command line download methods, it is not part of our project and does not fall under the AGPL license. It is licensed under the BSD-3-Clause license. Other files with differing licenses will have a reference or embedded version of this license within the file. It has been sourced from https://anaconda.org/conda-forge/micromamba/files and its source code can be found here : https://github.com/mamba-org/mamba/tree/master/micromamba
|
||||
umamba.exe is bundled for convenience because we observed that many of our users had trouble with command line download methods, it is not part of our project and does not fall under the AGPL license. It is licensed under the BSD-3-Clause license. Other files with differing licenses will have a reference or embedded version of this license within the file. It has been sourced from https://anaconda.org/conda-forge/micromamba/files and its source code can be found here : https://github.com/mamba-org/mamba/tree/master/micromamba
|
423
aiserver.py
423
aiserver.py
|
@ -1,7 +1,7 @@
|
|||
#!/usr/bin/python3
|
||||
#==================================================================#
|
||||
# KoboldAI
|
||||
# Version: 1.19.0
|
||||
# Version: 1.19.2
|
||||
# By: The KoboldAI Community
|
||||
#==================================================================#
|
||||
|
||||
|
@ -125,6 +125,7 @@ model_menu = {
|
|||
["NSFW Models", "nsfwlist", "", True],
|
||||
["Untuned OPT", "optlist", "", True],
|
||||
["Untuned GPT-Neo/J", "gptneolist", "", True],
|
||||
["Untuned Pythia", "pythialist", "", True],
|
||||
["Untuned Fairseq Dense", "fsdlist", "", True],
|
||||
["Untuned Bloom", "bloomlist", "", True],
|
||||
["Untuned XGLM", "xglmlist", "", True],
|
||||
|
@ -154,6 +155,7 @@ model_menu = {
|
|||
["OPT Nerys 6B V2 (Hybrid)", "KoboldAI/OPT-6B-nerys-v2", "16GB", False],
|
||||
["Janeway FSD 6.7B", "KoboldAI/fairseq-dense-6.7B-Janeway", "16GB", False],
|
||||
["Janeway Neo 6B", "KoboldAI/GPT-J-6B-Janeway", "16GB", False],
|
||||
["Qilin Lit 6B (SFW)", "rexwang8/qilin-lit-6b", "16GB", False],
|
||||
["Janeway Neo 2.7B", "KoboldAI/GPT-Neo-2.7B-Janeway", "8GB", False],
|
||||
["Janeway FSD 2.7B", "KoboldAI/fairseq-dense-2.7B-Janeway", "8GB", False],
|
||||
["Nerys FSD 2.7B (Hybrid)", "KoboldAI/fairseq-dense-2.7B-Nerys", "8GB", False],
|
||||
|
@ -163,13 +165,16 @@ model_menu = {
|
|||
],
|
||||
'nsfwlist': [
|
||||
["Erebus 20B (NSFW)", "KoboldAI/GPT-NeoX-20B-Erebus", "64GB", False],
|
||||
["Nerybus 13B (NSFW)", "KoboldAI/OPT-13B-Nerybus-Mix", "32GB", False],
|
||||
["Erebus 13B (NSFW)", "KoboldAI/OPT-13B-Erebus", "32GB", False],
|
||||
["Shinen FSD 13B (NSFW)", "KoboldAI/fairseq-dense-13B-Shinen", "32GB", False],
|
||||
["Nerybus 6.7B (NSFW)", "KoboldAI/OPT-6.7B-Nerybus-Mix", "16GB", False],
|
||||
["Erebus 6.7B (NSFW)", "KoboldAI/OPT-6.7B-Erebus", "16GB", False],
|
||||
["Shinen FSD 6.7B (NSFW)", "KoboldAI/fairseq-dense-6.7B-Shinen", "16GB", False],
|
||||
["Lit V2 6B (NSFW)", "hakurei/litv2-6B-rev3", "16GB", False],
|
||||
["Lit 6B (NSFW)", "hakurei/lit-6B", "16GB", False],
|
||||
["Shinen 6B (NSFW)", "KoboldAI/GPT-J-6B-Shinen", "16GB", False],
|
||||
["Nerybus 2.7B (NSFW)", "KoboldAI/OPT-2.7B-Nerybus-Mix", "8GB", False],
|
||||
["Erebus 2.7B (NSFW)", "KoboldAI/OPT-2.7B-Erebus", "8GB", False],
|
||||
["Horni 2.7B (NSFW)", "KoboldAI/GPT-Neo-2.7B-Horni", "8GB", False],
|
||||
["Shinen 2.7B (NSFW)", "KoboldAI/GPT-Neo-2.7B-Shinen", "8GB", False],
|
||||
|
@ -183,12 +188,31 @@ model_menu = {
|
|||
],
|
||||
'gptneolist': [
|
||||
["GPT-NeoX 20B", "EleutherAI/gpt-neox-20b", "64GB", False],
|
||||
["Pythia 13B (NeoX, Same dataset)", "EleutherAI/pythia-13b", "32GB", False],
|
||||
["GPT-J 6B", "EleutherAI/gpt-j-6B", "16GB", False],
|
||||
["GPT-Neo 2.7B", "EleutherAI/gpt-neo-2.7B", "8GB", False],
|
||||
["GPT-Neo 1.3B", "EleutherAI/gpt-neo-1.3B", "6GB", False],
|
||||
["Pythia 800M (NeoX, Same dataset)", "EleutherAI/pythia-800m", "4GB", False],
|
||||
["Pythia 350M (NeoX, Same dataset)", "EleutherAI/pythia-350m", "2GB", False],
|
||||
["GPT-Neo 125M", "EleutherAI/gpt-neo-125M", "2GB", False],
|
||||
["Return to Main Menu", "mainmenu", "", True],
|
||||
],
|
||||
'pythialist': [
|
||||
["Pythia 13B Deduped", "EleutherAI/pythia-13b-deduped", "32GB", False],
|
||||
["Pythia 13B", "EleutherAI/pythia-13b", "32GB", False],
|
||||
["Pythia 6.7B Deduped", "EleutherAI/pythia-6.7b-deduped", "16GB", False],
|
||||
["Pythia 6.7B", "EleutherAI/pythia-6.7b", "16GB", False],
|
||||
["Pythia 1.3B Deduped", "EleutherAI/pythia-1.3b-deduped", "6GB", False],
|
||||
["Pythia 1.3B", "EleutherAI/pythia-1.3b", "6GB", False],
|
||||
["Pythia 800M", "EleutherAI/pythia-800m", "4GB", False],
|
||||
["Pythia 350M Deduped", "EleutherAI/pythia-350m-deduped", "2GB", False],
|
||||
["Pythia 350M", "EleutherAI/pythia-350m", "2GB", False],
|
||||
["Pythia 125M Deduped", "EleutherAI/pythia-125m-deduped", "2GB", False],
|
||||
["Pythia 125M", "EleutherAI/pythia-125m", "2GB", False],
|
||||
["Pythia 19M Deduped", "EleutherAI/pythia-19m-deduped", "1GB", False],
|
||||
["Pythia 19M", "EleutherAI/pythia-19m", "1GB", False],
|
||||
["Return to Main Menu", "mainmenu", "", True],
|
||||
],
|
||||
'gpt2list': [
|
||||
["GPT-2 XL", "gpt2-xl", "6GB", False],
|
||||
["GPT-2 Large", "gpt2-large", "4GB", False],
|
||||
|
@ -377,6 +401,7 @@ class vars:
|
|||
comregex_ai = re.compile(r'(?:\n<\|(?:.|\n)*?\|>(?=\n|$))|(?:<\|(?:.|\n)*?\|>\n?)') # Pattern for matching comments to remove them before sending them to the AI
|
||||
comregex_ui = re.compile(r'(<\|(?:.|\n)*?\|>)') # Pattern for matching comments in the editor
|
||||
sampler_order = utils.default_sampler_order.copy()
|
||||
rng_states = {} # Used by the POST /generate endpoint to store sampler RNG states
|
||||
chatmode = False
|
||||
chatname = "You"
|
||||
adventure = False
|
||||
|
@ -451,6 +476,7 @@ def emit(*args, **kwargs):
|
|||
return _emit(*args, **kwargs)
|
||||
except AttributeError:
|
||||
return socketio.emit(*args, **kwargs)
|
||||
utils.emit = emit
|
||||
|
||||
# marshmallow/apispec setup
|
||||
from apispec import APISpec
|
||||
|
@ -630,7 +656,7 @@ tags = [
|
|||
api_version = None # This gets set automatically so don't change this value
|
||||
|
||||
api_v1 = KoboldAPISpec(
|
||||
version="1.1.4",
|
||||
version="1.2.1",
|
||||
prefixes=["/api/v1", "/api/latest"],
|
||||
tags=tags,
|
||||
)
|
||||
|
@ -755,6 +781,12 @@ def getmodelname():
|
|||
modelname = vars.model
|
||||
return modelname
|
||||
|
||||
#==================================================================#
|
||||
# Get hidden size from model
|
||||
#==================================================================#
|
||||
def get_hidden_size_from_model(model):
|
||||
return model.get_input_embeddings().embedding_dim
|
||||
|
||||
#==================================================================#
|
||||
# Breakmodel configuration functions
|
||||
#==================================================================#
|
||||
|
@ -872,7 +904,7 @@ def device_config(config):
|
|||
print(f"{colors.RED}Please enter an integer between -1 and {n_layers}.{colors.END}")
|
||||
|
||||
logger.init_ok("Final device configuration:", status="Info")
|
||||
device_list(n_layers)
|
||||
device_list(n_layers, primary=breakmodel.primary_device)
|
||||
|
||||
# If all layers are on the same device, use the old GPU generation mode
|
||||
while(len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0):
|
||||
|
@ -988,7 +1020,7 @@ def loadmodelsettings():
|
|||
if("nobreakmodel" in js):
|
||||
vars.nobreakmodel = js["nobreakmodel"]
|
||||
if("sampler_order" in js):
|
||||
sampler_order = vars.sampler_order
|
||||
sampler_order = js["sampler_order"]
|
||||
if(len(sampler_order) < 7):
|
||||
sampler_order = [6] + sampler_order
|
||||
vars.sampler_order = sampler_order
|
||||
|
@ -1126,7 +1158,7 @@ def processsettings(js):
|
|||
if("andepth" in js):
|
||||
vars.andepth = js["andepth"]
|
||||
if("sampler_order" in js):
|
||||
sampler_order = vars.sampler_order
|
||||
sampler_order = js["sampler_order"]
|
||||
if(len(sampler_order) < 7):
|
||||
sampler_order = [6] + sampler_order
|
||||
vars.sampler_order = sampler_order
|
||||
|
@ -1353,6 +1385,8 @@ def general_startup(override_args=None):
|
|||
args = parser.parse_args(shlex.split(os.environ["KOBOLDAI_ARGS"]))
|
||||
else:
|
||||
args = parser.parse_args()
|
||||
|
||||
utils.args = args
|
||||
|
||||
set_logger_verbosity(args.verbosity)
|
||||
quiesce_logger(args.quiesce)
|
||||
|
@ -1480,7 +1514,7 @@ def get_model_info(model, directory=""):
|
|||
models_on_url = True
|
||||
url = True
|
||||
key = True
|
||||
default_url = 'https://koboldai.net'
|
||||
default_url = 'https://horde.koboldai.net'
|
||||
multi_online_models = True
|
||||
if path.exists(get_config_filename(model)):
|
||||
with open(get_config_filename(model), "r") as file:
|
||||
|
@ -1559,13 +1593,13 @@ def get_layer_count(model, directory=""):
|
|||
model = directory
|
||||
from transformers import AutoConfig
|
||||
if(os.path.isdir(model.replace('/', '_'))):
|
||||
model_config = AutoConfig.from_pretrained(model.replace('/', '_'), revision=vars.revision, cache_dir="cache")
|
||||
model_config = AutoConfig.from_pretrained(model.replace('/', '_'), revision=args.revision, cache_dir="cache")
|
||||
elif(os.path.isdir("models/{}".format(model.replace('/', '_')))):
|
||||
model_config = AutoConfig.from_pretrained("models/{}".format(model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
||||
model_config = AutoConfig.from_pretrained("models/{}".format(model.replace('/', '_')), revision=args.revision, cache_dir="cache")
|
||||
elif(os.path.isdir(directory)):
|
||||
model_config = AutoConfig.from_pretrained(directory, revision=vars.revision, cache_dir="cache")
|
||||
model_config = AutoConfig.from_pretrained(directory, revision=args.revision, cache_dir="cache")
|
||||
else:
|
||||
model_config = AutoConfig.from_pretrained(model, revision=vars.revision, cache_dir="cache")
|
||||
model_config = AutoConfig.from_pretrained(model, revision=args.revision, cache_dir="cache")
|
||||
try:
|
||||
if ((utils.HAS_ACCELERATE and model_config.model_type != 'gpt2') or model_config.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not vars.nobreakmodel:
|
||||
return utils.num_layers(model_config)
|
||||
|
@ -1640,7 +1674,7 @@ def get_cluster_models(msg):
|
|||
# Get list of models from public cluster
|
||||
logger.init("KAI Horde Models", status="Retrieving")
|
||||
try:
|
||||
req = requests.get("{}/api/v1/models".format(url))
|
||||
req = requests.get(f"{url}/api/v2/status/models?type=text")
|
||||
except requests.exceptions.ConnectionError:
|
||||
logger.init_err("KAI Horde Models", status="Failed")
|
||||
logger.error("Provided KoboldAI Horde URL unreachable")
|
||||
|
@ -1656,10 +1690,11 @@ def get_cluster_models(msg):
|
|||
engines = req.json()
|
||||
logger.debug(engines)
|
||||
try:
|
||||
engines = [[en, en] for en in engines]
|
||||
engines = [[en["name"], en["name"]] for en in engines]
|
||||
except:
|
||||
logger.error(engines)
|
||||
raise
|
||||
logger.debug(engines)
|
||||
|
||||
online_model = ""
|
||||
changed=False
|
||||
|
@ -1789,7 +1824,9 @@ def patch_transformers():
|
|||
if not args.no_aria2:
|
||||
utils.aria2_hook(pretrained_model_name_or_path, **kwargs)
|
||||
return old_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
|
||||
PreTrainedModel.from_pretrained = new_from_pretrained
|
||||
if(not hasattr(PreTrainedModel, "_kai_patched")):
|
||||
PreTrainedModel.from_pretrained = new_from_pretrained
|
||||
PreTrainedModel._kai_patched = True
|
||||
if(hasattr(modeling_utils, "get_checkpoint_shard_files")):
|
||||
old_get_checkpoint_shard_files = modeling_utils.get_checkpoint_shard_files
|
||||
def new_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs):
|
||||
|
@ -1903,34 +1940,26 @@ def patch_transformers():
|
|||
|
||||
from torch.nn import functional as F
|
||||
|
||||
class ProbabilityVisualizerLogitsProcessor(LogitsProcessor):
|
||||
def __init__(self):
|
||||
pass
|
||||
def visualize_probabilities(scores: torch.FloatTensor) -> None:
|
||||
assert scores.ndim == 2
|
||||
|
||||
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
|
||||
assert scores.ndim == 2
|
||||
assert input_ids.ndim == 2
|
||||
if vars.numseqs > 1 or not vars.show_probs:
|
||||
return
|
||||
|
||||
if vars.numseqs > 1 or not vars.show_probs:
|
||||
return scores
|
||||
probs = F.softmax(scores, dim = -1).cpu().numpy()[0]
|
||||
token_prob_info = []
|
||||
for token_id, score in sorted(enumerate(probs), key=lambda x: x[1], reverse=True)[:8]:
|
||||
token_prob_info.append({
|
||||
"tokenId": token_id,
|
||||
"decoded": utils.decodenewlines(tokenizer.decode(token_id)),
|
||||
"score": float(score),
|
||||
})
|
||||
|
||||
probs = F.softmax(scores, dim = -1).cpu().numpy()[0]
|
||||
|
||||
token_prob_info = []
|
||||
for token_id, score in sorted(enumerate(probs), key=lambda x: x[1], reverse=True)[:8]:
|
||||
token_prob_info.append({
|
||||
"tokenId": token_id,
|
||||
"decoded": utils.decodenewlines(tokenizer.decode(token_id)),
|
||||
"score": float(score),
|
||||
})
|
||||
|
||||
vars.token_stream_queue.probability_buffer = token_prob_info
|
||||
return scores
|
||||
vars.token_stream_queue.probability_buffer = token_prob_info
|
||||
|
||||
def new_get_logits_processor(*args, **kwargs) -> LogitsProcessorList:
|
||||
processors = new_get_logits_processor.old_get_logits_processor(*args, **kwargs)
|
||||
processors.insert(0, LuaLogitsProcessor())
|
||||
processors.append(ProbabilityVisualizerLogitsProcessor())
|
||||
return processors
|
||||
new_get_logits_processor.old_get_logits_processor = transformers.generation_utils.GenerationMixin._get_logits_processor
|
||||
transformers.generation_utils.GenerationMixin._get_logits_processor = new_get_logits_processor
|
||||
|
@ -1952,6 +1981,7 @@ def patch_transformers():
|
|||
sampler_order = [6] + sampler_order
|
||||
for k in sampler_order:
|
||||
scores = self.__warper_list[k](input_ids, scores, *args, **kwargs)
|
||||
visualize_probabilities(scores)
|
||||
return scores
|
||||
|
||||
def new_get_logits_warper(beams: int = 1,) -> LogitsProcessorList:
|
||||
|
@ -2205,19 +2235,19 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||
from transformers import AutoConfig
|
||||
if(os.path.isdir(vars.custmodpth.replace('/', '_'))):
|
||||
try:
|
||||
model_config = AutoConfig.from_pretrained(vars.custmodpth.replace('/', '_'), revision=vars.revision, cache_dir="cache")
|
||||
model_config = AutoConfig.from_pretrained(vars.custmodpth.replace('/', '_'), revision=args.revision, cache_dir="cache")
|
||||
vars.model_type = model_config.model_type
|
||||
except ValueError as e:
|
||||
vars.model_type = "not_found"
|
||||
elif(os.path.isdir("models/{}".format(vars.custmodpth.replace('/', '_')))):
|
||||
try:
|
||||
model_config = AutoConfig.from_pretrained("models/{}".format(vars.custmodpth.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
||||
model_config = AutoConfig.from_pretrained("models/{}".format(vars.custmodpth.replace('/', '_')), revision=args.revision, cache_dir="cache")
|
||||
vars.model_type = model_config.model_type
|
||||
except ValueError as e:
|
||||
vars.model_type = "not_found"
|
||||
else:
|
||||
try:
|
||||
model_config = AutoConfig.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
||||
model_config = AutoConfig.from_pretrained(vars.custmodpth, revision=args.revision, cache_dir="cache")
|
||||
vars.model_type = model_config.model_type
|
||||
except ValueError as e:
|
||||
vars.model_type = "not_found"
|
||||
|
@ -2354,6 +2384,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||
with zipfile.ZipFile(f, "r") as z:
|
||||
try:
|
||||
last_storage_key = None
|
||||
zipfolder = os.path.basename(os.path.normpath(f)).split('.')[0]
|
||||
f = None
|
||||
current_offset = 0
|
||||
able_to_pin_layers = True
|
||||
|
@ -2365,7 +2396,10 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||
last_storage_key = storage_key
|
||||
if isinstance(f, zipfile.ZipExtFile):
|
||||
f.close()
|
||||
f = z.open(f"archive/data/{storage_key}")
|
||||
try:
|
||||
f = z.open(f"archive/data/{storage_key}")
|
||||
except:
|
||||
f = z.open(f"{zipfolder}/data/{storage_key}")
|
||||
current_offset = 0
|
||||
if current_offset != model_dict[key].seek_offset:
|
||||
f.read(model_dict[key].seek_offset - current_offset)
|
||||
|
@ -2401,6 +2435,15 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||
if utils.num_shards is None or utils.current_shard >= utils.num_shards:
|
||||
if utils.offload_index:
|
||||
for name, tensor in utils.named_buffers:
|
||||
dtype = tensor.dtype
|
||||
if convert_to_float16 and breakmodel.primary_device != "cpu" and vars.hascuda and (vars.breakmodel or vars.usegpu):
|
||||
dtype = torch.float16
|
||||
if breakmodel.primary_device == "cpu" or (not vars.usegpu and not vars.breakmodel):
|
||||
dtype = torch.float32
|
||||
if name in model_dict and model_dict[name].dtype is not dtype:
|
||||
model_dict[name] = model_dict[name].to(dtype)
|
||||
if tensor.dtype is not dtype:
|
||||
tensor = tensor.to(dtype)
|
||||
if name not in utils.offload_index:
|
||||
accelerate.utils.offload_weight(tensor, name, "accelerate-disk-cache", index=utils.offload_index)
|
||||
accelerate.utils.save_offload_index(utils.offload_index, "accelerate-disk-cache")
|
||||
|
@ -2414,9 +2457,6 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||
return lazy_load_callback
|
||||
|
||||
|
||||
def get_hidden_size_from_model(model):
|
||||
return model.get_input_embeddings().embedding_dim
|
||||
|
||||
def maybe_low_cpu_mem_usage() -> Dict[str, Any]:
|
||||
if(packaging.version.parse(transformers_version) < packaging.version.parse("4.11.0")):
|
||||
logger.warning(f"Please upgrade to transformers 4.11.0 for lower RAM usage. You have transformers {transformers_version}.")
|
||||
|
@ -2446,19 +2486,19 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||
with(maybe_use_float16()):
|
||||
try:
|
||||
if os.path.exists(vars.custmodpth):
|
||||
model = GPT2LMHeadModel.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
||||
model = GPT2LMHeadModel.from_pretrained(vars.custmodpth, revision=args.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(vars.custmodpth, revision=args.revision, cache_dir="cache")
|
||||
elif os.path.exists(os.path.join("models/", vars.custmodpth)):
|
||||
model = GPT2LMHeadModel.from_pretrained(os.path.join("models/", vars.custmodpth), revision=vars.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(os.path.join("models/", vars.custmodpth), revision=vars.revision, cache_dir="cache")
|
||||
model = GPT2LMHeadModel.from_pretrained(os.path.join("models/", vars.custmodpth), revision=args.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(os.path.join("models/", vars.custmodpth), revision=args.revision, cache_dir="cache")
|
||||
else:
|
||||
model = GPT2LMHeadModel.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
||||
model = GPT2LMHeadModel.from_pretrained(vars.custmodpth, revision=args.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(vars.custmodpth, revision=args.revision, cache_dir="cache")
|
||||
except Exception as e:
|
||||
if("out of memory" in traceback.format_exc().lower()):
|
||||
raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.")
|
||||
raise e
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(vars.custmodpth, revision=args.revision, cache_dir="cache")
|
||||
model.save_pretrained("models/{}".format(vars.model.replace('/', '_')), max_shard_size="500MiB")
|
||||
tokenizer.save_pretrained("models/{}".format(vars.model.replace('/', '_')))
|
||||
vars.modeldim = get_hidden_size_from_model(model)
|
||||
|
@ -2505,38 +2545,38 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||
lowmem = {}
|
||||
if(os.path.isdir(vars.custmodpth)):
|
||||
try:
|
||||
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", use_fast=False)
|
||||
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=args.revision, cache_dir="cache", use_fast=False)
|
||||
except Exception as e:
|
||||
try:
|
||||
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
||||
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=args.revision, cache_dir="cache")
|
||||
except Exception as e:
|
||||
try:
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(vars.custmodpth, revision=args.revision, cache_dir="cache")
|
||||
except Exception as e:
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||
try:
|
||||
model = AutoModelForCausalLM.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", **lowmem)
|
||||
model = AutoModelForCausalLM.from_pretrained(vars.custmodpth, revision=args.revision, cache_dir="cache", **lowmem)
|
||||
except Exception as e:
|
||||
if("out of memory" in traceback.format_exc().lower()):
|
||||
raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.")
|
||||
model = GPTNeoForCausalLM.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", **lowmem)
|
||||
model = GPTNeoForCausalLM.from_pretrained(vars.custmodpth, revision=args.revision, cache_dir="cache", **lowmem)
|
||||
elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))):
|
||||
try:
|
||||
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", use_fast=False)
|
||||
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=args.revision, cache_dir="cache", use_fast=False)
|
||||
except Exception as e:
|
||||
try:
|
||||
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
||||
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=args.revision, cache_dir="cache")
|
||||
except Exception as e:
|
||||
try:
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=args.revision, cache_dir="cache")
|
||||
except Exception as e:
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||
try:
|
||||
model = AutoModelForCausalLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", **lowmem)
|
||||
model = AutoModelForCausalLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=args.revision, cache_dir="cache", **lowmem)
|
||||
except Exception as e:
|
||||
if("out of memory" in traceback.format_exc().lower()):
|
||||
raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.")
|
||||
model = GPTNeoForCausalLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", **lowmem)
|
||||
model = GPTNeoForCausalLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=args.revision, cache_dir="cache", **lowmem)
|
||||
else:
|
||||
old_rebuild_tensor = torch._utils._rebuild_tensor
|
||||
def new_rebuild_tensor(storage: Union[torch_lazy_loader.LazyTensor, torch.Storage], storage_offset, shape, stride):
|
||||
|
@ -2552,28 +2592,28 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||
torch._utils._rebuild_tensor = new_rebuild_tensor
|
||||
|
||||
try:
|
||||
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", use_fast=False)
|
||||
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=args.revision, cache_dir="cache", use_fast=False)
|
||||
except Exception as e:
|
||||
try:
|
||||
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
||||
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=args.revision, cache_dir="cache")
|
||||
except Exception as e:
|
||||
try:
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(vars.model, revision=args.revision, cache_dir="cache")
|
||||
except Exception as e:
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||
try:
|
||||
model = AutoModelForCausalLM.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", **lowmem)
|
||||
model = AutoModelForCausalLM.from_pretrained(vars.model, revision=args.revision, cache_dir="cache", **lowmem)
|
||||
except Exception as e:
|
||||
if("out of memory" in traceback.format_exc().lower()):
|
||||
raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.")
|
||||
model = GPTNeoForCausalLM.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", **lowmem)
|
||||
model = GPTNeoForCausalLM.from_pretrained(vars.model, revision=args.revision, cache_dir="cache", **lowmem)
|
||||
|
||||
torch._utils._rebuild_tensor = old_rebuild_tensor
|
||||
|
||||
if not args.colab or args.savemodel:
|
||||
import shutil
|
||||
tokenizer.save_pretrained("models/{}".format(vars.model.replace('/', '_')))
|
||||
if(vars.fp32_model): # Use save_pretrained to convert fp32 models to fp16
|
||||
if(vars.fp32_model and ("breakmodel" not in globals() or not breakmodel.disk_blocks)): # Use save_pretrained to convert fp32 models to fp16, unless we are using disk cache because save_pretrained is not supported in that case
|
||||
model = model.half()
|
||||
model.save_pretrained("models/{}".format(vars.model.replace('/', '_')), max_shard_size="500MiB")
|
||||
else: # For fp16 models, we can just copy the model files directly
|
||||
|
@ -2583,10 +2623,10 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||
import huggingface_hub
|
||||
legacy = packaging.version.parse(transformers_version) < packaging.version.parse("4.22.0.dev0")
|
||||
# Save the config.json
|
||||
shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(vars.model, transformers.configuration_utils.CONFIG_NAME, revision=vars.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(vars.model.replace('/', '_')), transformers.configuration_utils.CONFIG_NAME))
|
||||
shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(vars.model, transformers.configuration_utils.CONFIG_NAME, revision=args.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(vars.model.replace('/', '_')), transformers.configuration_utils.CONFIG_NAME))
|
||||
if(utils.num_shards is None):
|
||||
# Save the pytorch_model.bin of an unsharded model
|
||||
shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(vars.model, transformers.modeling_utils.WEIGHTS_NAME, revision=vars.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(vars.model.replace('/', '_')), transformers.modeling_utils.WEIGHTS_NAME))
|
||||
shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(vars.model, transformers.modeling_utils.WEIGHTS_NAME, revision=args.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(vars.model.replace('/', '_')), transformers.modeling_utils.WEIGHTS_NAME))
|
||||
else:
|
||||
with open(utils.from_pretrained_index_filename) as f:
|
||||
map_data = json.load(f)
|
||||
|
@ -2595,7 +2635,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||
shutil.move(os.path.realpath(utils.from_pretrained_index_filename), os.path.join("models/{}".format(vars.model.replace('/', '_')), transformers.modeling_utils.WEIGHTS_INDEX_NAME))
|
||||
# Then save the pytorch_model-#####-of-#####.bin files
|
||||
for filename in filenames:
|
||||
shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(vars.model, filename, revision=vars.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(vars.model.replace('/', '_')), filename))
|
||||
shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(vars.model, filename, revision=args.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(vars.model.replace('/', '_')), filename))
|
||||
shutil.rmtree("cache/")
|
||||
|
||||
if(vars.badwordsids is vars.badwordsids_default and vars.model_type not in ("gpt2", "gpt_neo", "gptj")):
|
||||
|
@ -2641,7 +2681,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||
|
||||
else:
|
||||
from transformers import GPT2Tokenizer
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||
else:
|
||||
from transformers import PreTrainedModel
|
||||
from transformers import modeling_utils
|
||||
|
@ -2658,7 +2698,9 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||
if not args.no_aria2:
|
||||
utils.aria2_hook(pretrained_model_name_or_path, **kwargs)
|
||||
return old_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
|
||||
PreTrainedModel.from_pretrained = new_from_pretrained
|
||||
if(not hasattr(PreTrainedModel, "_kai_patched")):
|
||||
PreTrainedModel.from_pretrained = new_from_pretrained
|
||||
PreTrainedModel._kai_patched = True
|
||||
if(hasattr(modeling_utils, "get_checkpoint_shard_files")):
|
||||
old_get_checkpoint_shard_files = modeling_utils.get_checkpoint_shard_files
|
||||
def new_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs):
|
||||
|
@ -2738,11 +2780,11 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||
# If we're running Colab or OAI, we still need a tokenizer.
|
||||
if(vars.model in ("Colab", "API", "CLUSTER")):
|
||||
from transformers import GPT2Tokenizer
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B", revision=vars.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B", revision=args.revision, cache_dir="cache")
|
||||
loadsettings()
|
||||
elif(vars.model == "OAI"):
|
||||
from transformers import GPT2Tokenizer
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||
loadsettings()
|
||||
# Load the TPU backend if requested
|
||||
elif(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
|
||||
|
@ -2904,7 +2946,7 @@ def lua_startup():
|
|||
except lupa.LuaError as e:
|
||||
print(colors.RED + "ERROR!" + colors.END)
|
||||
vars.lua_koboldbridge.obliterate_multiverse()
|
||||
logger.debug('LUA ERROR: ' + str(e).replace("\033", ""))
|
||||
logger.error('LUA ERROR: ' + str(e).replace("\033", ""))
|
||||
logger.warning("Lua engine stopped; please open 'Userscripts' and press Load to reinitialize scripts.")
|
||||
exit(1)
|
||||
logger.init_ok("LUA bridge", status="OK")
|
||||
|
@ -2963,7 +3005,7 @@ def load_lua_scripts():
|
|||
if(vars.serverstarted):
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': 'Lua script error; please check console.'}, broadcast=True)
|
||||
sendUSStatItems()
|
||||
logger.debug('LUA ERROR: ' + str(e).replace("\033", ""))
|
||||
logger.error('LUA ERROR: ' + str(e).replace("\033", ""))
|
||||
logger.warning("Lua engine stopped; please open 'Userscripts' and press Load to reinitialize scripts.")
|
||||
if(vars.serverstarted):
|
||||
set_aibusy(0)
|
||||
|
@ -2999,7 +3041,7 @@ def lua_decode(tokens):
|
|||
if("tokenizer" not in globals()):
|
||||
from transformers import GPT2Tokenizer
|
||||
global tokenizer
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||
return utils.decodenewlines(tokenizer.decode(tokens))
|
||||
|
||||
#==================================================================#
|
||||
|
@ -3011,7 +3053,7 @@ def lua_encode(string):
|
|||
if("tokenizer" not in globals()):
|
||||
from transformers import GPT2Tokenizer
|
||||
global tokenizer
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||
return tokenizer.encode(utils.encodenewlines(string), max_length=int(4e9), truncation=True)
|
||||
|
||||
#==================================================================#
|
||||
|
@ -3045,6 +3087,8 @@ def lua_compute_context(submission, entries, folders, kwargs):
|
|||
force_use_txt=True,
|
||||
scan_story=kwargs["scan_story"] if kwargs["scan_story"] != None else True,
|
||||
)
|
||||
if kwargs["include_anote"] is not None and not kwargs["include_anote"]:
|
||||
anotetxt = ""
|
||||
txt, _, _ = calcsubmitbudget(
|
||||
len(actions),
|
||||
winfo,
|
||||
|
@ -3460,7 +3504,7 @@ def execute_inmod():
|
|||
vars.lua_running = False
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': 'Lua script error; please check console.'}, broadcast=True)
|
||||
sendUSStatItems()
|
||||
logger.debug('LUA ERROR: ' + str(e).replace("\033", ""))
|
||||
logger.error('LUA ERROR: ' + str(e).replace("\033", ""))
|
||||
logger.warning("Lua engine stopped; please open 'Userscripts' and press Load to reinitialize scripts.")
|
||||
set_aibusy(0)
|
||||
|
||||
|
@ -3477,7 +3521,7 @@ def execute_outmod():
|
|||
vars.lua_running = False
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': 'Lua script error; please check console.'}, broadcast=True)
|
||||
sendUSStatItems()
|
||||
logger.debug('LUA ERROR: ' + str(e).replace("\033", ""))
|
||||
logger.error('LUA ERROR: ' + str(e).replace("\033", ""))
|
||||
logger.warning("Lua engine stopped; please open 'Userscripts' and press Load to reinitialize scripts.")
|
||||
set_aibusy(0)
|
||||
if(vars.lua_koboldbridge.resend_settings_required):
|
||||
|
@ -4158,19 +4202,19 @@ def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False,
|
|||
try:
|
||||
if(os.path.isdir(tokenizer_id)):
|
||||
try:
|
||||
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=vars.revision, cache_dir="cache")
|
||||
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=args.revision, cache_dir="cache")
|
||||
except:
|
||||
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=vars.revision, cache_dir="cache", use_fast=False)
|
||||
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=args.revision, cache_dir="cache", use_fast=False)
|
||||
elif(os.path.isdir("models/{}".format(tokenizer_id.replace('/', '_')))):
|
||||
try:
|
||||
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(tokenizer_id.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
||||
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(tokenizer_id.replace('/', '_')), revision=args.revision, cache_dir="cache")
|
||||
except:
|
||||
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(tokenizer_id.replace('/', '_')), revision=vars.revision, cache_dir="cache", use_fast=False)
|
||||
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(tokenizer_id.replace('/', '_')), revision=args.revision, cache_dir="cache", use_fast=False)
|
||||
else:
|
||||
try:
|
||||
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=vars.revision, cache_dir="cache")
|
||||
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=args.revision, cache_dir="cache")
|
||||
except:
|
||||
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=vars.revision, cache_dir="cache", use_fast=False)
|
||||
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=args.revision, cache_dir="cache", use_fast=False)
|
||||
except:
|
||||
logger.warning(f"Unknown tokenizer {repr(tokenizer_id)}")
|
||||
vars.api_tokenizer_id = tokenizer_id
|
||||
|
@ -4582,7 +4626,7 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
|
|||
if("tokenizer" not in globals()):
|
||||
from transformers import GPT2Tokenizer
|
||||
global tokenizer
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||
|
||||
lnheader = len(tokenizer._koboldai_header)
|
||||
|
||||
|
@ -4897,7 +4941,7 @@ def generate(txt, minimum, maximum, found_entries=None):
|
|||
vars.lua_running = False
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': 'Lua script error; please check console.'}, broadcast=True)
|
||||
sendUSStatItems()
|
||||
logger.debug('LUA ERROR: ' + str(e).replace("\033", ""))
|
||||
logger.error('LUA ERROR: ' + str(e).replace("\033", ""))
|
||||
logger.warning("Lua engine stopped; please open 'Userscripts' and press Load to reinitialize scripts.")
|
||||
else:
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': 'Error occurred during generator call; please check console.'}, broadcast=True)
|
||||
|
@ -5229,15 +5273,21 @@ def sendtocluster(txt, min, max):
|
|||
cluster_metadata = {
|
||||
'prompt': txt,
|
||||
'params': reqdata,
|
||||
'api_key': vars.apikey,
|
||||
'models': vars.cluster_requested_models,
|
||||
}
|
||||
'trusted_workers': False,
|
||||
}
|
||||
client_agent = "KoboldAI:1.19.3:koboldai.org"
|
||||
cluster_headers = {
|
||||
'apikey': vars.apikey,
|
||||
"Client-Agent": client_agent
|
||||
}
|
||||
logger.debug(f"Horde Payload: {cluster_metadata}")
|
||||
try:
|
||||
# Create request
|
||||
req = requests.post(
|
||||
vars.colaburl[:-8] + "/api/v1/generate/sync",
|
||||
vars.colaburl[:-8] + "/api/v2/generate/text/async",
|
||||
json=cluster_metadata,
|
||||
headers=cluster_headers,
|
||||
)
|
||||
except requests.exceptions.ConnectionError:
|
||||
errmsg = f"Horde unavailable. Please try again later"
|
||||
|
@ -5265,13 +5315,76 @@ def sendtocluster(txt, min, max):
|
|||
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
|
||||
set_aibusy(0)
|
||||
return
|
||||
gen_servers = [(cgen['server_name'],cgen['server_id']) for cgen in js]
|
||||
logger.info(f"Generations by: {gen_servers}")
|
||||
|
||||
request_id = js["id"]
|
||||
logger.debug("Horde Request ID: {}".format(request_id))
|
||||
|
||||
cluster_agent_headers = {
|
||||
"Client-Agent": client_agent
|
||||
}
|
||||
finished = False
|
||||
|
||||
while not finished:
|
||||
try:
|
||||
req = requests.get(vars.colaburl[:-8] + "/api/v2/generate/text/status/" + request_id, headers=cluster_agent_headers)
|
||||
except requests.exceptions.ConnectionError:
|
||||
errmsg = f"Horde unavailable. Please try again later"
|
||||
logger.error(errmsg)
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
|
||||
set_aibusy(0)
|
||||
return
|
||||
|
||||
if not req.ok:
|
||||
errmsg = f"KoboldAI API Error: Failed to get a standard reply from the Horde. Please check the console."
|
||||
logger.error(req.text)
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
|
||||
set_aibusy(0)
|
||||
return
|
||||
|
||||
try:
|
||||
req_status = req.json()
|
||||
except requests.exceptions.JSONDecodeError:
|
||||
errmsg = f"Unexpected message received from the KoboldAI Horde: '{req.text}'"
|
||||
logger.error(errmsg)
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
|
||||
set_aibusy(0)
|
||||
return
|
||||
|
||||
if "done" not in req_status:
|
||||
errmsg = f"Unexpected response received from the KoboldAI Horde: '{js}'"
|
||||
logger.error(errmsg)
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
|
||||
set_aibusy(0)
|
||||
return
|
||||
|
||||
finished = req_status["done"]
|
||||
|
||||
if not finished:
|
||||
logger.debug(req_status)
|
||||
time.sleep(1)
|
||||
|
||||
logger.debug("Last Horde Status Message: {}".format(js))
|
||||
if req_status["faulted"]:
|
||||
errmsg = "Horde Text generation faulted! Please try again"
|
||||
logger.error(errmsg)
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
|
||||
set_aibusy(0)
|
||||
return
|
||||
|
||||
generations = req_status['generations']
|
||||
gen_workers = [(cgen['worker_name'],cgen['worker_id']) for cgen in generations]
|
||||
logger.info(f"Generations by: {gen_workers}")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# Just in case we want to announce it to the user
|
||||
if len(js) == 1:
|
||||
warnmsg = f"Text generated by {js[0]['server_name']}"
|
||||
if len(generations) == 1:
|
||||
warnmsg = f"Text generated by {[w[0] for w in gen_workers]}"
|
||||
emit('from_server', {'cmd': 'warnmsg', 'data': warnmsg}, broadcast=True)
|
||||
genout = [cgen['text'] for cgen in js]
|
||||
genout = [cgen['text'] for cgen in generations]
|
||||
|
||||
for i in range(vars.numseqs):
|
||||
vars.lua_koboldbridge.outputs[i+1] = genout[i]
|
||||
|
@ -5405,7 +5518,7 @@ def tpumtjgenerate(txt, minimum, maximum, found_entries=None):
|
|||
vars.lua_running = False
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': 'Lua script error; please check console.'}, broadcast=True)
|
||||
sendUSStatItems()
|
||||
logger.debug('LUA ERROR: ' + str(e).replace("\033", ""))
|
||||
logger.error('LUA ERROR: ' + str(e).replace("\033", ""))
|
||||
logger.warning("Lua engine stopped; please open 'Userscripts' and press Load to reinitialize scripts.")
|
||||
else:
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': 'Error occurred during generator call; please check console.'}, broadcast=True)
|
||||
|
@ -7450,6 +7563,13 @@ def story_load_validator(name: str):
|
|||
raise ValidationError("Must be a valid story name.")
|
||||
return True
|
||||
|
||||
def permutation_validator(lst: list):
|
||||
if any(not isinstance(e, int) for e in lst):
|
||||
return
|
||||
if min(lst) != 0 or max(lst) != len(lst) - 1 or len(set(lst)) != len(lst):
|
||||
raise ValidationError("Must be a permutation of the first N non-negative integers, where N is the length of this array")
|
||||
return True
|
||||
|
||||
class GenerationInputSchema(SamplerSettingsSchema):
|
||||
prompt: str = fields.String(required=True, metadata={"description": "This is the submission."})
|
||||
use_memory: bool = fields.Boolean(load_default=False, metadata={"description": "Whether or not to use the memory from the KoboldAI GUI when generating text."})
|
||||
|
@ -7469,6 +7589,9 @@ class GenerationInputSchema(SamplerSettingsSchema):
|
|||
disable_input_formatting: bool = fields.Boolean(load_default=True, metadata={"description": "When enabled, all input formatting options default to `false` instead of the value in the KoboldAI GUI"})
|
||||
frmtadsnsp: Optional[bool] = fields.Boolean(metadata={"description": "Input formatting option. When enabled, adds a leading space to your input if there is no trailing whitespace at the end of the previous action.\n\nIf `disable_input_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."})
|
||||
quiet: Optional[bool] = fields.Boolean(metadata={"description": "When enabled, Generated output will not be displayed in the console."})
|
||||
sampler_order: Optional[List[int]] = fields.List(fields.Integer(), validate=[validate.Length(min=6), permutation_validator], metadata={"description": "Sampler order to be used. If N is the length of this array, then N must be greater than or equal to 6 and the array must be a permutation of the first N non-negative integers."})
|
||||
sampler_seed: Optional[int] = fields.Integer(validate=validate.Range(min=0, max=2**64 - 1), metadata={"description": "RNG seed to use for sampling. If not specified, the global RNG will be used."})
|
||||
sampler_full_determinism: Optional[bool] = fields.Boolean(metadata={"description": "If enabled, the generated text will always be the same as long as you use the same RNG seed, input and settings. If disabled, only the *sequence* of generated texts that you get when repeatedly generating text will be the same given the same RNG seed, input and settings."})
|
||||
|
||||
class GenerationResultSchema(KoboldSchema):
|
||||
text: str = fields.String(required=True, metadata={"description": "Generated output as plain text."})
|
||||
|
@ -7559,6 +7682,29 @@ def _generate_text(body: GenerationInputSchema):
|
|||
"msg": "Server is busy; please try again later.",
|
||||
"type": "service_unavailable",
|
||||
}}), mimetype="application/json", status=503))
|
||||
if vars.use_colab_tpu:
|
||||
import tpu_mtj_backend
|
||||
if hasattr(body, "sampler_seed"):
|
||||
# If a seed was specified, we need to save the global RNG state so we
|
||||
# can restore it later
|
||||
old_seed = vars.seed
|
||||
old_rng_state = tpu_mtj_backend.get_rng_state() if vars.use_colab_tpu else torch.get_rng_state()
|
||||
vars.seed = body.sampler_seed
|
||||
# We should try to use a previously saved RNG state with the same seed
|
||||
if body.sampler_seed in vars.rng_states:
|
||||
if vars.use_colab_tpu:
|
||||
tpu_mtj_backend.set_rng_state(vars.rng_states[body.sampler_seed])
|
||||
else:
|
||||
torch.set_rng_state(vars.rng_states[body.sampler_seed])
|
||||
else:
|
||||
if vars.use_colab_tpu:
|
||||
tpu_mtj_backend.set_rng_state(tpu_mtj_backend.new_rng_state(body.sampler_seed))
|
||||
else:
|
||||
torch.manual_seed(body.sampler_seed)
|
||||
vars.rng_states[body.sampler_seed] = tpu_mtj_backend.get_rng_state() if vars.use_colab_tpu else torch.get_rng_state()
|
||||
if hasattr(body, "sampler_order"):
|
||||
if len(body.sampler_order) < 7:
|
||||
body.sampler_order = [6] + body.sampler_order
|
||||
# This maps each property of the setting to use when sending the generate idempotently
|
||||
# To the object which typically contains it's value
|
||||
# This allows to set the property only for the API generation, and then revert the setting
|
||||
|
@ -7584,6 +7730,8 @@ def _generate_text(body: GenerationInputSchema):
|
|||
"max_context_length": ("vars", "max_length", None),
|
||||
"n": ("vars", "numseqs", None),
|
||||
"quiet": ("vars", "quiet", None),
|
||||
"sampler_order": ("vars", "sampler_order", None),
|
||||
"sampler_full_determinism": ("vars", "full_determinism", None),
|
||||
}
|
||||
saved_settings = {}
|
||||
set_aibusy(1)
|
||||
|
@ -7633,6 +7781,12 @@ def _generate_text(body: GenerationInputSchema):
|
|||
vars.output_streaming = output_streaming
|
||||
if vars.allowsp and getattr(body, "soft_prompt", None) is not None:
|
||||
spRequest(old_spfilename)
|
||||
if hasattr(body, "sampler_seed"):
|
||||
vars.seed = old_seed
|
||||
if vars.use_colab_tpu:
|
||||
tpu_mtj_backend.set_rng_state(old_rng_state)
|
||||
else:
|
||||
torch.set_rng_state(old_rng_state)
|
||||
set_aibusy(0)
|
||||
return output
|
||||
|
||||
|
@ -7816,7 +7970,7 @@ def prompt_validator(prompt: str):
|
|||
raise ValidationError("String does not match expected pattern.")
|
||||
|
||||
class SubmissionInputSchema(KoboldSchema):
|
||||
prompt: str = fields.String(required=True, validate=prompt_validator, metadata={"pattern": r"^.*\S.*$", "description": "This is the submission."})
|
||||
prompt: str = fields.String(required=True, validate=prompt_validator, metadata={"pattern": r"^[\S\s]*\S[\S\s]*$", "description": "This is the submission."})
|
||||
disable_input_formatting: bool = fields.Boolean(load_default=True, metadata={"description": "When enabled, disables all input formatting options, overriding their individual enabled/disabled states."})
|
||||
frmtadsnsp: Optional[bool] = fields.Boolean(metadata={"description": "Input formatting option. When enabled, adds a leading space to your input if there is no trailing whitespace at the end of the previous action."})
|
||||
|
||||
|
@ -9838,6 +9992,60 @@ def put_config_soft_prompt(body: SoftPromptSettingSchema):
|
|||
settingschanged()
|
||||
return {}
|
||||
|
||||
class SamplerSeedSettingSchema(KoboldSchema):
|
||||
value: int = fields.Integer(validate=validate.Range(min=0, max=2**64 - 1), required=True)
|
||||
|
||||
@api_v1.get("/config/sampler_seed")
|
||||
@api_schema_wrap
|
||||
def get_config_sampler_seed():
|
||||
"""---
|
||||
get:
|
||||
summary: Retrieve the current global sampler seed value
|
||||
tags:
|
||||
- config
|
||||
responses:
|
||||
200:
|
||||
description: Successful request
|
||||
content:
|
||||
application/json:
|
||||
schema: SamplerSeedSettingSchema
|
||||
example:
|
||||
value: 3475097509890965500
|
||||
"""
|
||||
return {"value": __import__("tpu_mtj_backend").get_rng_seed() if vars.use_colab_tpu else __import__("torch").initial_seed()}
|
||||
|
||||
@api_v1.put("/config/sampler_seed")
|
||||
@api_schema_wrap
|
||||
def put_config_sampler_seed(body: SamplerSeedSettingSchema):
|
||||
"""---
|
||||
put:
|
||||
summary: Set the global sampler seed value
|
||||
tags:
|
||||
- config
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema: SamplerSeedSettingSchema
|
||||
example:
|
||||
value: 3475097509890965500
|
||||
responses:
|
||||
200:
|
||||
description: Successful request
|
||||
content:
|
||||
application/json:
|
||||
schema: EmptySchema
|
||||
{api_validation_error_response}
|
||||
"""
|
||||
if vars.use_colab_tpu:
|
||||
import tpu_mtj_backend
|
||||
tpu_mtj_backend.set_rng_seed(body.value)
|
||||
else:
|
||||
import torch
|
||||
torch.manual_seed(body.value)
|
||||
vars.seed = body.value
|
||||
return {}
|
||||
|
||||
config_endpoint_schemas: List[Type[KoboldSchema]] = []
|
||||
|
||||
def config_endpoint_schema(c: Type[KoboldSchema]):
|
||||
|
@ -10035,6 +10243,25 @@ class AddSentenceSpacingSettingsSchema(KoboldSchema):
|
|||
name = "add sentence spacing (input formatting)"
|
||||
example_yaml_value = "false"
|
||||
|
||||
@config_endpoint_schema
|
||||
class SamplerOrderSettingSchema(KoboldSchema):
|
||||
value = fields.List(fields.Integer(), validate=[validate.Length(min=6), permutation_validator], required=True)
|
||||
class KoboldMeta:
|
||||
route_name = "sampler_order"
|
||||
obj = "vars"
|
||||
var_name = "sampler_order"
|
||||
name = "sampler order"
|
||||
example_yaml_value = "[6, 0, 1, 2, 3, 4, 5]"
|
||||
|
||||
@config_endpoint_schema
|
||||
class SamplerFullDeterminismSettingSchema(KoboldSchema):
|
||||
value = fields.Boolean(required=True)
|
||||
class KoboldMeta:
|
||||
route_name = "sampler_full_determinism"
|
||||
obj = "vars"
|
||||
var_name = "full_determinism"
|
||||
name = "sampler full determinism"
|
||||
example_yaml_value = "false"
|
||||
|
||||
|
||||
for schema in config_endpoint_schemas:
|
||||
|
|
231
colab/GPU.ipynb
231
colab/GPU.ipynb
|
@ -1,23 +1,4 @@
|
|||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"name": "ColabKobold GPU",
|
||||
"private_outputs": true,
|
||||
"provenance": [],
|
||||
"collapsed_sections": [],
|
||||
"include_colab_link": true
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
},
|
||||
"accelerator": "GPU"
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
|
@ -35,52 +16,99 @@
|
|||
"id": "kX9y5koxa58q"
|
||||
},
|
||||
"source": [
|
||||
"## [You can get faster generations and higher context with our Koboldcpp Notebook](https://koboldai.org/colabcpp)\n",
|
||||
"\n",
|
||||
"# Welcome to KoboldAI on Google Colab, GPU Edition!\n",
|
||||
"KoboldAI is a powerful and easy way to use a variety of AI based text generation experiences. You can use it to write stories, blog posts, play a text adventure game, use it like a chatbot and more! In some cases it might even help you with an assignment or programming task (But always make sure the information the AI mentions is correct, it loves to make stuff up).\n",
|
||||
"\n",
|
||||
"For more information about KoboldAI check our our Github readme : https://github.com/KoboldAI/KoboldAI-Client/blob/main/readme.md\n",
|
||||
"\n",
|
||||
"For the larger AI models (That are typically more coherent) check out our **[TPU edition](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb)**!"
|
||||
"---\n",
|
||||
"## How to load KoboldAI: Everything you need to know\n",
|
||||
"1. On a phone? First put your browser in desktop mode because of a Google Colab bug. Otherwise nothing will happen when you click the play button. Then tap the play button next to \"<-- Tap This if you play on Mobile\", you will see an audio player. Keep the audio player playing so Colab does not get shut down in the background.\n",
|
||||
"2. Select the desired model, you will find a description of all the available models further down the page.\n",
|
||||
"3. Click the play button next to \"<-- Select your model below and then click this to start KoboldAI\".\n",
|
||||
"4. Got a message saying no accelerator is available? Click cancel, and try again in a few minutes. If you do not manage to get a session when you frequently try again try at a different time of day, colab can be busy or your priority may have been lowered by frequent usage.\n",
|
||||
"5. After everything is done loading you will get a link that you can use to open KoboldAI. In case of Localtunnel you will also be warned that some people are abusing Localtunnel for phishing, once you acknowledge this warning you will be taken to KoboldAI's interface. If you picked Cloudflare and get a 1033 error refresh the error page after waiting one minute.\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"Further down the page you can find descriptions of the models, and tips to get the most out of your Google Colab experience.\n",
|
||||
"\n",
|
||||
"Make sure to keep this page open while you are using KoboldAI, and check back regularly to see if you got a Captcha. Failure to complete the captcha's in time can result in termination of your session or a lower priority towards the TPUs.\n",
|
||||
"\n",
|
||||
"Firefox users need to disable the enhanced tracking protection or use a different browser in order to be able to use Google Colab without errors (This is not something we can do anything about, the cookie blocker breaks the Google Drive integration because it uses different domains)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "ewkXkyiFP2Hq"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#@title <-- Tap this if you play on Mobile { display-mode: \"form\" }\n",
|
||||
"%%html\n",
|
||||
"<b>Press play on the music player to keep the tab alive, then start KoboldAI below (Uses only 13MB of data)</b><br/>\n",
|
||||
"<audio src=\"https://henk.tech/colabkobold/silence.m4a\" controls>"
|
||||
],
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
"<audio src=\"https://raw.githubusercontent.com/KoboldAI/KoboldAI-Client/main/colab/silence.m4a\" controls>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "lVftocpwCoYw",
|
||||
"cellView": "form"
|
||||
"cellView": "form",
|
||||
"id": "lVftocpwCoYw"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#@title <b><-- Select your model below and then click this to start KoboldAI</b>\n",
|
||||
"#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n",
|
||||
"\n",
|
||||
"Model = \"Nerys 2.7B\" #@param [\"Nerys 2.7B\", \"AID 2.7B\", \"Erebus 2.7B\", \"Janeway 2.7B\", \"Picard 2.7B\", \"Horni LN 2.7B\", \"Horni 2.7B\", \"Shinen 2.7B\", \"OPT 2.7B\", \"Fairseq Dense 2.7B\", \"Neo 2.7B\"] {allow-input: true}\n",
|
||||
"Model = \"Nerys V2 6B\" #@param [\"Tiefighter 13B (United)\", \"Echidna 13B (United)\", \"HoloMax 13B (United)\", \"Emerhyst 13B (United)\", \"MythoMax 13B (United)\", \"Huginn 13B (United)\", \"Chronos 13B (United)\", \"Airoboros M2.0 13B (United)\", \"Holodeck 13B (United)\", \"Spring Dragon 13B (United)\", \"Nerys V2 6B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Nerys 2.7B\", \"AID 2.7B\", \"Janeway 2.7B\", \"Picard 2.7B\", \"OPT 2.7B\", \"Fairseq Dense 2.7B\", \"Neo 2.7B\"] {allow-input: true}\n",
|
||||
"Revision = \"\" #@param [\"\"]{allow-input: true}\n",
|
||||
"Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
|
||||
"Provider = \"Localtunnel\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
|
||||
"Provider = \"Cloudflare\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
|
||||
"use_google_drive = True #@param {type:\"boolean\"}\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"if not os.path.isfile(\"/opt/bin/nvidia-smi\"):\n",
|
||||
" raise RuntimeError(\"⚠️Colab did not give you a GPU due to usage limits, this can take a few hours before they let you back in. Check out https://lite.koboldai.net for a free alternative (that does not provide an API link but can load KoboldAI saves and chat cards) or subscribe to Colab Pro for immediate access.⚠️\")\n",
|
||||
"\n",
|
||||
"!nvidia-smi\n",
|
||||
"from google.colab import drive\n",
|
||||
"drive.mount('/content/drive/')\n",
|
||||
"if use_google_drive:\n",
|
||||
" drive.mount('/content/drive/')\n",
|
||||
"else:\n",
|
||||
" import os\n",
|
||||
" if not os.path.exists(\"/content/drive\"):\n",
|
||||
" os.mkdir(\"/content/drive\")\n",
|
||||
" if not os.path.exists(\"/content/drive/MyDrive/\"):\n",
|
||||
" os.mkdir(\"/content/drive/MyDrive/\")\n",
|
||||
"\n",
|
||||
"if Model == \"Nerys 2.7B\":\n",
|
||||
" Model = \"KoboldAI/fairseq-dense-2.7B-Nerys\"\n",
|
||||
"if Model == \"Nerys V2 6B\":\n",
|
||||
" Model = \"KoboldAI/OPT-6B-nerys-v2\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
"elif Model == \"Erebus 2.7B\":\n",
|
||||
" Model = \"KoboldAI/OPT-2.7B-Erebus\"\n",
|
||||
"elif Model == \"Skein 6B\":\n",
|
||||
" Model = \"KoboldAI/GPT-J-6B-Skein\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
"elif Model == \"Janeway 6B\":\n",
|
||||
" Model = \"KoboldAI/GPT-J-6B-Janeway\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
"elif Model == \"Adventure 6B\":\n",
|
||||
" Model = \"KoboldAI/GPT-J-6B-Adventure\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
"elif Model == \"Shinen 6B\":\n",
|
||||
" Model = \"KoboldAI/GPT-J-6B-Shinen\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
"elif Model == \"Nerys 2.7B\":\n",
|
||||
" Model = \"KoboldAI/fairseq-dense-2.7B-Nerys\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
"elif Model == \"Janeway 2.7B\":\n",
|
||||
|
@ -95,18 +123,6 @@
|
|||
" Model = \"KoboldAI/GPT-Neo-2.7B-AID\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
"elif Model == \"Horni LN 2.7B\":\n",
|
||||
" Model = \"KoboldAI/GPT-Neo-2.7B-Horni-LN\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
"elif Model == \"Horni 2.7B\":\n",
|
||||
" Model = \"KoboldAI/GPT-Neo-2.7B-Horni\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
"elif Model == \"Shinen 2.7B\":\n",
|
||||
" Model = \"KoboldAI/GPT-Neo-2.7B-Shinen\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
"elif Model == \"Fairseq Dense 2.7B\":\n",
|
||||
" Model = \"KoboldAI/fairseq-dense-2.7B\"\n",
|
||||
" path = \"\"\n",
|
||||
|
@ -119,55 +135,95 @@
|
|||
" Model = \"EleutherAI/gpt-neo-2.7B\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
"elif Model == \"Tiefighter 13B (United)\":\n",
|
||||
" Model = \"KoboldAI/LLaMA2-13B-Tiefighter\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
" Version = \"United\"\n",
|
||||
"elif Model == \"Echidna 13B (United)\":\n",
|
||||
" Model = \"NeverSleep/Echidna-13b-v0.3\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
" Version = \"United\"\n",
|
||||
"elif Model == \"Huginn 13B (United)\":\n",
|
||||
" Model = \"The-Face-Of-Goonery/Huginn-13b-v1.2\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
" Version = \"United\"\n",
|
||||
"elif Model == \"Chronos 13B (United)\":\n",
|
||||
" Model = \"elinas/chronos-13b-v2\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
" Version = \"United\"\n",
|
||||
"elif Model == \"Airoboros M2.0 13B (United)\":\n",
|
||||
" Model = \"jondurbin/airoboros-l2-13b-gpt4-m2.0\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
" Version = \"United\"\n",
|
||||
"elif Model == \"Emerhyst 13B (United)\":\n",
|
||||
" Model = \"Undi95/Emerhyst-13B\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
" Version = \"United\"\n",
|
||||
"elif Model == \"MythoMax 13B (United)\":\n",
|
||||
" Model = \"Gryphe/MythoMax-L2-13b\"\n",
|
||||
" Revision = \"\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
" Version = \"United\"\n",
|
||||
"elif Model == \"Spring Dragon 13B (United)\":\n",
|
||||
" Model = \"Henk717/spring-dragon\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
" Version = \"United\"\n",
|
||||
"elif Model == \"Holodeck 13B (United)\":\n",
|
||||
" Model = \"KoboldAI/LLAMA2-13B-Holodeck-1\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
" Version = \"United\"\n",
|
||||
"elif Model == \"HoloMax 13B (United)\":\n",
|
||||
" Model = \"KoboldAI/LLaMA2-13B-Holomax\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
" Version = \"United\"\n",
|
||||
"\n",
|
||||
"if Provider == \"Localtunnel\":\n",
|
||||
" tunnel = \"--localtunnel yes\"\n",
|
||||
"else:\n",
|
||||
" tunnel = \"\"\n",
|
||||
"\n",
|
||||
"!wget https://koboldai.org/ckds -O - | bash /dev/stdin -m $Model -g $Version $tunnel"
|
||||
],
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
"!wget https://koboldai.org/ckds -O - | bash /dev/stdin -m $Model -g $Version $Revision $tunnel"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "Lrm840I33hkC"
|
||||
},
|
||||
"source": [
|
||||
"# GPU Edition Model Descriptions\n",
|
||||
"| Model | Style | Description |\n",
|
||||
"| --- | --- | --- |\n",
|
||||
"| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-2.7B-Nerys) by Mr Seeker | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of Shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
|
||||
"| [Erebus](https://huggingface.co/KoboldAI/OPT-2.7B-Erebus) by Mr Seeker | NSFW | Erebus is our community's flagship NSFW model, being a combination of multiple large datasets that include Literotica, Shinen and erotic novels from Nerys and featuring thourough tagging support it covers the vast majority of erotic writing styles. This model is capable of replacing both the Lit and Shinen models in terms of content and style and has been well received as (one of) the best NSFW models out there. If you wish to use this model for commercial or non research usage we recommend choosing the 20B version as that one is not subject to the restrictive OPT license. |\n",
|
||||
"| [Tiefighter 13B by KoboldAI](https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter) | Hybrid | Tiefighter 13B is a very versitile fiction Hybrid, it can write, chat and play adventure games and can also answer regular instructions (Although we do not recommend this model for factual use due to its fictional nature). This is an excellent starting model, for the best results avoid using Second person writing in your chats unless you are wanting it to become a text adventure.|\n",
|
||||
"| [Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
|
||||
"| [Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | Novel | Picard is a model trained for SFW Novels based on Neo 2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n",
|
||||
"| [AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n",
|
||||
"| [Horni LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | Novel | This model is based on Horni 2.7B and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n",
|
||||
"| [Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n",
|
||||
"| [Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you Shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
|
||||
"| [OPT](https://huggingface.co/facebook/opt-2.7b) by Metaseq | Generic | OPT is considered one of the best base models as far as content goes, its behavior has the strengths of both GPT-Neo and Fairseq Dense. Compared to Neo duplicate and unnecessary content has been left out, while additional literature was added in similar to the Fairseq Dense model. The Fairseq Dense model however lacks the broader data that OPT does have. The biggest downfall of OPT is its license, which prohibits any commercial usage, or usage beyond research purposes. |\n",
|
||||
"| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-2.7B) | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger models from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. Compared to other models the dataset focuses primarily on literature and contains little else. |\n",
|
||||
"| [MythoMax 13B](https://huggingface.co/TheBloke/MythoMax-L2-13B-GPTQ) by Gryphe | Roleplay | An improved, potentially even perfected variant of MythoMix, my MythoLogic-L2 and Huginn merge using a highly experimental tensor type merge technique¹. |\n",
|
||||
"| [Holomax 13B by KoboldAI](https://huggingface.co/KoboldAI/LLaMA2-13B-Holomax) | Adventure | This is an expansion merge to the well-praised MythoMax model from Gryphe (60%) using MrSeeker's KoboldAI Holodeck model (40%). The goal of this model is to enhance story-writing capabilities while preserving the desirable traits of the MythoMax model as much as possible (It does limit chat reply length). |\n",
|
||||
"| [Airoboros 13B](https://huggingface.co/jondurbin/airoboros-13b) by Jon Durbin | Generic | This is an instruction fine-tuned llama-2 model, using synthetic instructions generated by airoboros⁵. |\n",
|
||||
"| [Emerhyst 13B](https://huggingface.co/Undi95/Emerhyst-13B) by Undi | Roleplay | An attempt using BlockMerge_Gradient to get better result. In addition, LimaRP v3 was used⁷. |\n",
|
||||
"| [Chronos 13B](https://huggingface.co/elinas/chronos-13b) by Elinas | Generic | This model is primarily focused on chat, roleplay, and storywriting, but can accomplish other tasks such as simple reasoning and coding. Chronos generates very long outputs with coherent text, largely due to the human inputs it was trained on. |\n",
|
||||
"| [Spring Dragon by Henk717](https://huggingface.co/Henk717/spring-dragon) | Adventure | This model is a recreation attempt of the AI Dungeon 2 Dragon model. To achieve this, the \"text_adventures.txt\" dataset was used, which was bundled with the original AI Dungeon 2 GitHub release prior to the online service. It is worth noting that the same dataset file was used to create the Dragon model, where Dragon is a GPT-3 175B Davinci model from 2020. |\n",
|
||||
"| [Holodeck By KoboldAI](https://huggingface.co/KoboldAI/LLAMA2-13B-Holodeck-1) | Adventure |LLAMA2 13B-Holodeck is a finetune created using Meta's llama 2 model.The training data contains around 3000 ebooks in various genres. Most parts of the dataset have been prepended using the following text: [Genre: <genre1>, <genre2>|\n",
|
||||
"| [Neo](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n",
|
||||
"\n",
|
||||
"# [TPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb)\n",
|
||||
"\n",
|
||||
"| Model | Style | Description |\n",
|
||||
"| --- | --- | --- |\n",
|
||||
"| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-13B-Nerys) by Mr Seeker | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of Shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
|
||||
"| [Erebus](https://huggingface.co/KoboldAI/OPT-13B-Erebus) by Mr Seeker | NSFW | Erebus is our community's flagship NSFW model, being a combination of multiple large datasets that include Literotica, Shinen and erotic novels from Nerys and featuring thourough tagging support it covers the vast majority of erotic writing styles. This model is capable of replacing both the Lit and Shinen models in terms of content and style and has been well received as (one of) the best NSFW models out there. If you wish to use this model for commercial or non research usage we recommend choosing the 20B version as that one is not subject to the restrictive OPT license. |\n",
|
||||
"| [Janeway](https://huggingface.co/KoboldAI/fairseq-dense-13B-Janeway) by Mr Seeker | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
|
||||
"| [Shinen](https://huggingface.co/KoboldAI/fairseq-dense-13B-Shinen) by Mr Seeker | NSFW | Shinen is an NSFW model trained on a variety of stories from the website Sexstories it contains many different kinks. It has been merged into the larger (and better) Erebus model. |\n",
|
||||
"| [Skein](https://huggingface.co/KoboldAI/GPT-J-6B-Skein) by VE\\_FORBRYDERNE | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |\n",
|
||||
"| [Adventure](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by VE\\_FORBRYDERNE | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n",
|
||||
"| [Lit](https://huggingface.co/hakurei/lit-6B) ([V2](https://huggingface.co/hakurei/litv2-6B-rev3)) by Haru | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n",
|
||||
"| [OPT](https://huggingface.co/facebook/opt-13b) by Metaseq | Generic | OPT is considered one of the best base models as far as content goes, its behavior has the strengths of both GPT-Neo and Fairseq Dense. Compared to Neo duplicate and unnecessary content has been left out, while additional literature was added in similar to the Fairseq Dense model. The Fairseq Dense model however lacks the broader data that OPT does have. The biggest downfall of OPT is its license, which prohibits any commercial usage, or usage beyond research purposes. |\n",
|
||||
"| [Neo(X)](https://huggingface.co/EleutherAI/gpt-neox-20b) by EleutherAI | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |\n",
|
||||
"| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-13B) | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. Compared to other models the dataset focuses primarily on literature and contains little else. |\n",
|
||||
"| [GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6B) by EleutherAI | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |\n",
|
||||
"\n",
|
||||
"| Style | Description |\n",
|
||||
"| --------- | ------------------------------------------------------------ |\n",
|
||||
"| Novel | For regular story writing, not compatible with Adventure mode or other specialty modes. |\n",
|
||||
"| NSFW | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |\n",
|
||||
"| Adventure | These models are excellent for people willing to play KoboldAI like a Text Adventure game and are meant to be used with Adventure mode enabled. Even if you wish to use it as a Novel style model you should always have Adventure mode on and set it to story. These models typically have a strong bias towards the use of the word You and without Adventure mode enabled break the story flow and write actions on your behalf. |\n",
|
||||
"| Generic | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |\n",
|
||||
"\n",
|
||||
|
@ -183,10 +239,39 @@
|
|||
"7. As you play KoboldAI, keep this Colab tab open in the background and check occationally for Captcha's so they do not shut your instance down. If you do get shut down you can always download a copy of your gamesave in the Save menu inside KoboldAI. Stories are never lost as long as you keep KoboldAI open in your browser.\n",
|
||||
"\n",
|
||||
"Get a error message saying you do not have access to a GPU/TPU instance? Do not continue and try again later, KoboldAI will not run correctly without them."
|
||||
],
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "Lrm840I33hkC"
|
||||
}
|
||||
"cellView": "form",
|
||||
"id": "5k8fK4F6UiTs"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#@title <b>Model Cleaner</b>\n",
|
||||
"#@markdown Out of space? Run this to remove all cached models (Google Drive models are not effected).\n",
|
||||
"!rm -rf /content/KoboldAI-Client/cache/*\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
],
|
||||
"metadata": {
|
||||
"accelerator": "GPU",
|
||||
"colab": {
|
||||
"name": "ColabKobold GPU",
|
||||
"private_outputs": true,
|
||||
"provenance": [],
|
||||
"include_colab_link": true
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
|
@ -46,7 +46,7 @@
|
|||
"#@title <-- Tap this if you play on Mobile { display-mode: \"form\" }\n",
|
||||
"%%html\n",
|
||||
"<b>Press play on the music player to keep the tab alive, then start KoboldAI below (Uses only 13MB of data)</b><br/>\n",
|
||||
"<audio src=\"https://henk.tech/colabkobold/silence.m4a\" controls>"
|
||||
"<audio src=\"https://raw.githubusercontent.com/KoboldAI/KoboldAI-Client/main/colab/silence.m4a\" controls>"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "ZIL7itnNaw5V"
|
||||
|
@ -66,9 +66,10 @@
|
|||
"#@title <b><-- Select your model below and then click this to start KoboldAI</b>\n",
|
||||
"#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n",
|
||||
"\n",
|
||||
"Model = \"Nerys 13B V2\" #@param [\"Nerys 13B V2\", \"Erebus 13B\", \"Janeway 13B\", \"Shinen 13B\", \"Skein 20B\", \"Erebus 20B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Shinen 6B\", \"Lit V2 6B\", \"Lit 6B\", \"NeoX 20B\", \"OPT 13B\", \"Fairseq Dense 13B\", \"GPT-J-6B\"] {allow-input: true}\n",
|
||||
"Model = \"Nerys 13B V2\" #@param [\"Nerys 13B V2\", \"Janeway 13B\", \"Skein 20B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"NeoX 20B\", \"OPT 13B\", \"Fairseq Dense 13B\", \"GPT-J-6B\"] {allow-input: true}\n",
|
||||
"Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
|
||||
"Provider = \"Localtunnel\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
|
||||
"Provider = \"Cloudflare\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
|
||||
"use_google_drive = True #@param {type:\"boolean\"}\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"try:\n",
|
||||
|
@ -79,7 +80,16 @@
|
|||
" raise RuntimeError(\"⚠️You can not run this notebook without the TPU accelerator, go to Runtime->Sessions, terminate your session and then try again.⚠️\")\n",
|
||||
"print('Now we will need your Google Drive to store settings and saves, you must login with the same account you used for Colab.')\n",
|
||||
"from google.colab import drive\n",
|
||||
"drive.mount('/content/drive/')\n",
|
||||
"if use_google_drive:\n",
|
||||
" drive.mount('/content/drive/')\n",
|
||||
"else:\n",
|
||||
" import os\n",
|
||||
" if not os.path.exists(\"/content/drive\"):\n",
|
||||
" os.mkdir(\"/content/drive\")\n",
|
||||
" if not os.path.exists(\"/content/drive/MyDrive/\"):\n",
|
||||
" os.mkdir(\"/content/drive/MyDrive/\")\n",
|
||||
"\n",
|
||||
"Revision = \"\"\n",
|
||||
"\n",
|
||||
"if Model == \"Janeway 13B\":\n",
|
||||
" Model = \"KoboldAI/fairseq-dense-13B-Janeway\"\n",
|
||||
|
@ -89,18 +99,6 @@
|
|||
" Model = \"KoboldAI/OPT-13B-Nerys-v2\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
"elif Model == \"Erebus 13B\":\n",
|
||||
" Model = \"KoboldAI/OPT-13B-Erebus\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
"elif Model == \"Shinen 13B\":\n",
|
||||
" Model = \"KoboldAI/fairseq-dense-13B-Shinen\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
"elif Model == \"Erebus 20B\":\n",
|
||||
" Model = \"KoboldAI/GPT-NeoX-20B-Erebus\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
"elif Model == \"Skein 20B\":\n",
|
||||
" Model = \"KoboldAI/GPT-NeoX-20B-Skein\"\n",
|
||||
" path = \"\"\n",
|
||||
|
@ -121,18 +119,6 @@
|
|||
" Model = \"KoboldAI/GPT-J-6B-Adventure\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
"elif Model == \"Lit V2 6B\":\n",
|
||||
" Model = \"hakurei/litv2-6B-rev3\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
"elif Model == \"Lit 6B\":\n",
|
||||
" Model = \"hakurei/lit-6B\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
"elif Model == \"Shinen 6B\":\n",
|
||||
" Model = \"KoboldAI/GPT-J-6B-Shinen\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
"elif Model == \"OPT 13B\":\n",
|
||||
" Model = \"facebook/opt-13b\"\n",
|
||||
" path = \"\"\n",
|
||||
|
@ -154,7 +140,7 @@
|
|||
"else:\n",
|
||||
" tunnel = \"\"\n",
|
||||
"\n",
|
||||
"!wget https://koboldai.org/ckds -O - | bash /dev/stdin $path$download -m $Model -g $Version $tunnel"
|
||||
"!wget https://koboldai.org/ckds -O - | bash /dev/stdin $path$download -m $Model -g $Version $tunnel $Revision"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -165,12 +151,9 @@
|
|||
"| Model | Style | Description |\n",
|
||||
"| --- | --- | --- |\n",
|
||||
"| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-13B-Nerys) by Mr Seeker | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of Shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
|
||||
"| [Erebus](https://huggingface.co/KoboldAI/OPT-13B-Erebus) by Mr Seeker | NSFW | Erebus is our community's flagship NSFW model, being a combination of multiple large datasets that include Literotica, Shinen and erotic novels from Nerys and featuring thourough tagging support it covers the vast majority of erotic writing styles. This model is capable of replacing both the Lit and Shinen models in terms of content and style and has been well received as (one of) the best NSFW models out there. If you wish to use this model for commercial or non research usage we recommend choosing the 20B version as that one is not subject to the restrictive OPT license. |\n",
|
||||
"| [Janeway](https://huggingface.co/KoboldAI/fairseq-dense-13B-Janeway) by Mr Seeker | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
|
||||
"| [Shinen](https://huggingface.co/KoboldAI/fairseq-dense-13B-Shinen) by Mr Seeker | NSFW | Shinen is an NSFW model trained on a variety of stories from the website Sexstories it contains many different kinks. It has been merged into the larger (and better) Erebus model. |\n",
|
||||
"| [Skein](https://huggingface.co/KoboldAI/GPT-J-6B-Skein) by VE\\_FORBRYDERNE | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |\n",
|
||||
"| [Adventure](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by VE\\_FORBRYDERNE | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n",
|
||||
"| [Lit](https://huggingface.co/hakurei/lit-6B) ([V2](https://huggingface.co/hakurei/litv2-6B-rev3)) by Haru | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n",
|
||||
"| [OPT](https://huggingface.co/facebook/opt-13b) by Metaseq | Generic | OPT is considered one of the best base models as far as content goes, its behavior has the strengths of both GPT-Neo and Fairseq Dense. Compared to Neo duplicate and unnecessary content has been left out, while additional literature was added in similar to the Fairseq Dense model. The Fairseq Dense model however lacks the broader data that OPT does have. The biggest downfall of OPT is its license, which prohibits any commercial usage, or usage beyond research purposes. |\n",
|
||||
"| [Neo(X)](https://huggingface.co/EleutherAI/gpt-neox-20b) by EleutherAI | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |\n",
|
||||
"| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-13B) | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. Compared to other models the dataset focuses primarily on literature and contains little else. |\n",
|
||||
|
@ -181,13 +164,9 @@
|
|||
"| Model | Style | Description |\n",
|
||||
"| --- | --- | --- |\n",
|
||||
"| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-2.7B-Nerys) by Mr Seeker | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of Shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
|
||||
"| [Erebus](https://huggingface.co/KoboldAI/OPT-2.7B-Erebus) by Mr Seeker | NSFW | Erebus is our community's flagship NSFW model, being a combination of multiple large datasets that include Literotica, Shinen and erotic novels from Nerys and featuring thourough tagging support it covers the vast majority of erotic writing styles. This model is capable of replacing both the Lit and Shinen models in terms of content and style and has been well received as (one of) the best NSFW models out there. If you wish to use this model for commercial or non research usage we recommend choosing the 20B version as that one is not subject to the restrictive OPT license. |\n",
|
||||
"| [Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
|
||||
"| [Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | Novel | Picard is a model trained for SFW Novels based on Neo 2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n",
|
||||
"| [AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n",
|
||||
"| [Horni LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | Novel | This model is based on Horni 2.7B and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n",
|
||||
"| [Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n",
|
||||
"| [Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you Shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
|
||||
"| [OPT](https://huggingface.co/facebook/opt-2.7b) by Metaseq | Generic | OPT is considered one of the best base models as far as content goes, its behavior has the strengths of both GPT-Neo and Fairseq Dense. Compared to Neo duplicate and unnecessary content has been left out, while additional literature was added in similar to the Fairseq Dense model. The Fairseq Dense model however lacks the broader data that OPT does have. The biggest downfall of OPT is its license, which prohibits any commercial usage, or usage beyond research purposes. |\n",
|
||||
"| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-2.7B) | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger models from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. Compared to other models the dataset focuses primarily on literature and contains little else. |\n",
|
||||
"| [Neo](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n",
|
||||
|
@ -196,7 +175,6 @@
|
|||
"| Style | Description |\n",
|
||||
"| --- | --- |\n",
|
||||
"| Novel | For regular story writing, not compatible with Adventure mode or other specialty modes. |\n",
|
||||
"| NSFW | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |\n",
|
||||
"| Adventure | These models are excellent for people willing to play KoboldAI like a Text Adventure game and are meant to be used with Adventure mode enabled. Even if you wish to use it as a Novel style model you should always have Adventure mode on and set it to story. These models typically have a strong bias towards the use of the word You and without Adventure mode enabled break the story flow and write actions on your behalf. |\n",
|
||||
"| Generic | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |\n",
|
||||
"\n",
|
||||
|
@ -232,7 +210,6 @@
|
|||
"name": "ColabKobold TPU",
|
||||
"provenance": [],
|
||||
"private_outputs": true,
|
||||
"collapsed_sections": [],
|
||||
"include_colab_link": true
|
||||
},
|
||||
"kernelspec": {
|
||||
|
|
Binary file not shown.
|
@ -1,5 +1,7 @@
|
|||
@echo off
|
||||
cd /D %~dp0
|
||||
SET CONDA_SHLVL=
|
||||
|
||||
TITLE CMD for KoboldAI Runtime
|
||||
SET /P M=<loader.settings
|
||||
IF %M%==1 GOTO drivemap
|
||||
|
|
|
@ -6,4 +6,4 @@ WORKDIR /content/
|
|||
COPY env.yml /home/micromamba/env.yml
|
||||
RUN micromamba install -y -n base -f /home/micromamba/env.yml
|
||||
USER root
|
||||
RUN apt update && apt install xorg -y
|
||||
RUN apt update && apt install xorg aria2 -y
|
||||
|
|
|
@ -5,6 +5,8 @@ services:
|
|||
environment:
|
||||
- DISPLAY=${DISPLAY}
|
||||
network_mode: "host"
|
||||
security_opt:
|
||||
- label:disable
|
||||
volumes:
|
||||
- /tmp/.X11-unix:/tmp/.X11-unix
|
||||
- /etc/protocols:/etc/protocols:ro
|
||||
|
|
|
@ -3,4 +3,4 @@ WORKDIR /content/
|
|||
COPY env.yml /home/micromamba/env.yml
|
||||
RUN micromamba install -y -n base -f /home/micromamba/env.yml
|
||||
USER root
|
||||
RUN apt update && apt install xorg libsqlite3-0 -y
|
||||
RUN apt update && apt install xorg libsqlite3-0 aria2 -y
|
||||
|
|
|
@ -5,6 +5,8 @@ services:
|
|||
environment:
|
||||
- DISPLAY=${DISPLAY}
|
||||
network_mode: "host"
|
||||
security_opt:
|
||||
- label:disable
|
||||
volumes:
|
||||
- /tmp/.X11-unix:/tmp/.X11-unix
|
||||
- /etc/protocols:/etc/protocols:ro
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
FROM debian
|
||||
RUN apt update && apt install wget aria2 git bzip2 -y
|
||||
RUN git clone https://github.com/henk717/koboldai /opt/koboldai
|
||||
RUN git clone https://github.com/koboldai/koboldai-client /opt/koboldai
|
||||
WORKDIR /opt/koboldai
|
||||
RUN ./install_requirements.sh cuda
|
||||
COPY docker-helper.sh /opt/koboldai/docker-helper.sh
|
||||
|
|
|
@ -9,7 +9,7 @@ if [[ ! -v KOBOLDAI_DATADIR ]];then
|
|||
fi
|
||||
|
||||
mkdir $KOBOLDAI_DATADIR/stories
|
||||
if [[ ! -v KOBOLDAI_MODELDIR ]];then
|
||||
if [[ -v KOBOLDAI_MODELDIR ]];then
|
||||
mkdir $KOBOLDAI_MODELDIR/models
|
||||
fi
|
||||
mkdir $KOBOLDAI_DATADIR/settings
|
||||
|
@ -28,7 +28,7 @@ rm -rf userscripts/
|
|||
rm softprompts
|
||||
rm -rf softprompts/
|
||||
|
||||
if [[ ! -v KOBOLDAI_MODELDIR ]];then
|
||||
if [[ -v KOBOLDAI_MODELDIR ]];then
|
||||
rm models
|
||||
rm -rf models/
|
||||
#rm cache
|
||||
|
@ -39,7 +39,7 @@ ln -s $KOBOLDAI_DATADIR/stories/ stories
|
|||
ln -s $KOBOLDAI_DATADIR/settings/ settings
|
||||
ln -s $KOBOLDAI_DATADIR/softprompts/ softprompts
|
||||
ln -s $KOBOLDAI_DATADIR/userscripts/ userscripts
|
||||
if [[ ! -v KOBOLDAI_MODELDIR ]];then
|
||||
if [[ -v KOBOLDAI_MODELDIR ]];then
|
||||
ln -s $KOBOLDAI_MODELDIR/models/ models
|
||||
#ln -s $KOBOLDAI_MODELDIR/cache/ cache
|
||||
fi
|
||||
|
|
|
@ -1,26 +0,0 @@
|
|||
name: koboldai
|
||||
channels:
|
||||
- pytorch
|
||||
- conda-forge
|
||||
- defaults
|
||||
dependencies:
|
||||
- colorama
|
||||
- flask-socketio
|
||||
- flask-session
|
||||
- pytorch
|
||||
- cudatoolkit=11.1
|
||||
- tensorflow-gpu
|
||||
- python=3.8.*
|
||||
- eventlet
|
||||
- markdown
|
||||
- bleach=4.1.0
|
||||
- pip
|
||||
- git=2.35.1
|
||||
- marshmallow>=3.13
|
||||
- apispec-webframeworks
|
||||
- loguru
|
||||
- pip:
|
||||
- git+https://github.com/finetuneanon/transformers@gpt-neo-localattention3-rp-b
|
||||
- flask-cloudflared
|
||||
- flask-ngrok
|
||||
- lupa==1.10
|
|
@ -5,12 +5,15 @@ channels:
|
|||
- defaults
|
||||
dependencies:
|
||||
- colorama
|
||||
- flask-socketio
|
||||
- flask-session
|
||||
- flask=2.2.3
|
||||
- flask-socketio=5.3.2
|
||||
- flask-session=0.4.0
|
||||
- python-socketio=5.7.2
|
||||
- pytorch=1.11.*
|
||||
- python=3.8.*
|
||||
- cudatoolkit=11.1
|
||||
- eventlet
|
||||
- eventlet=0.33.3
|
||||
- dnspython=2.2.1
|
||||
- markdown
|
||||
- bleach=4.1.0
|
||||
- pip
|
||||
|
@ -20,9 +23,15 @@ dependencies:
|
|||
- marshmallow>=3.13
|
||||
- apispec-webframeworks
|
||||
- loguru
|
||||
- termcolor
|
||||
- psutil
|
||||
- pip:
|
||||
- flask-cloudflared
|
||||
- flask-cloudflared==0.0.10
|
||||
- flask-ngrok
|
||||
- Werkzeug==2.3.7
|
||||
- lupa==1.10
|
||||
- transformers>=4.20.1
|
||||
- transformers==4.24.0
|
||||
- huggingface_hub==0.12.1
|
||||
- safetensors
|
||||
- accelerate
|
||||
- git+https://github.com/VE-FORBRYDERNE/mkultra
|
||||
|
|
|
@ -1,25 +0,0 @@
|
|||
name: koboldai-ft
|
||||
channels:
|
||||
- conda-forge
|
||||
- defaults
|
||||
dependencies:
|
||||
- colorama
|
||||
- flask-socketio
|
||||
- flask-session
|
||||
- python=3.8.*
|
||||
- eventlet
|
||||
- markdown
|
||||
- bleach=4.1.0
|
||||
- pip
|
||||
- git=2.35.1
|
||||
- marshmallow>=3.13
|
||||
- apispec-webframeworks
|
||||
- loguru
|
||||
- pip:
|
||||
- --find-links https://download.pytorch.org/whl/rocm4.2/torch_stable.html
|
||||
- torch
|
||||
- torchvision==0.11.1
|
||||
- flask-cloudflared
|
||||
- git+https://github.com/finetuneanon/transformers@gpt-neo-localattention3-rp-b
|
||||
- flask-ngrok
|
||||
- lupa==1.10
|
|
@ -4,10 +4,13 @@ channels:
|
|||
- defaults
|
||||
dependencies:
|
||||
- colorama
|
||||
- flask-socketio
|
||||
- flask-session
|
||||
- flask=2.2.3
|
||||
- flask-socketio=5.3.2
|
||||
- flask-session=0.4.0
|
||||
- python-socketio=5.7.2
|
||||
- python=3.8.*
|
||||
- eventlet
|
||||
- eventlet=0.33.3
|
||||
- dnspython=2.2.1
|
||||
- markdown
|
||||
- bleach=4.1.0
|
||||
- pip
|
||||
|
@ -17,12 +20,17 @@ dependencies:
|
|||
- marshmallow>=3.13
|
||||
- apispec-webframeworks
|
||||
- loguru
|
||||
- termcolor
|
||||
- psutil
|
||||
- pip:
|
||||
- --extra-index-url https://download.pytorch.org/whl/rocm5.1.1
|
||||
- torch
|
||||
- torchvision
|
||||
- flask-cloudflared
|
||||
- torch==1.12.1+rocm5.1.1
|
||||
- flask-cloudflared==0.0.10
|
||||
- flask-ngrok
|
||||
- Werkzeug==2.3.7
|
||||
- lupa==1.10
|
||||
- transformers>=4.20.1
|
||||
- transformers==4.24.0
|
||||
- huggingface_hub==0.12.1
|
||||
- safetensors
|
||||
- accelerate
|
||||
- git+https://github.com/VE-FORBRYDERNE/mkultra
|
||||
|
|
|
@ -86,7 +86,7 @@ def uspath(filename):
|
|||
def getstoryfiles():
|
||||
list = []
|
||||
for file in listdir("stories"):
|
||||
if file.endswith(".json"):
|
||||
if file.endswith(".json") and not file.endswith(".v2.json"):
|
||||
ob = {}
|
||||
ob["name"] = file.replace(".json", "")
|
||||
f = open("stories/"+file, "r")
|
||||
|
|
|
@ -8,6 +8,7 @@ echo.
|
|||
|
||||
Reg add "HKLM\SYSTEM\CurrentControlSet\Control\FileSystem" /v "LongPathsEnabled" /t REG_DWORD /d "1" /f 2>nul
|
||||
cd /D %~dp0
|
||||
SET CONDA_SHLVL=
|
||||
|
||||
if exist miniconda3\ (
|
||||
echo Delete existing installation?
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
#!/bin/bash
|
||||
if [[ $1 = "cuda" ]]; then
|
||||
if [[ $1 = "cuda" || $1 = "CUDA" ]]; then
|
||||
wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
|
||||
bin/micromamba create -f environments/huggingface.yml -r runtime -n koboldai -y
|
||||
# Weird micromamba bug causes it to fail the first time, running it twice just to be safe, the second time is much faster
|
||||
bin/micromamba create -f environments/huggingface.yml -r runtime -n koboldai -y
|
||||
exit
|
||||
fi
|
||||
if [[ $1 = "rocm" ]]; then
|
||||
if [[ $1 = "rocm" || $1 = "ROCM" ]]; then
|
||||
wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
|
||||
bin/micromamba create -f environments/rocm.yml -r runtime -n koboldai-rocm -y
|
||||
# Weird micromamba bug causes it to fail the first time, running it twice just to be safe, the second time is much faster
|
||||
|
|
|
@ -9,11 +9,11 @@
|
|||
},
|
||||
"static_weights": {
|
||||
"transformer.wte.weight": {"mtj": {"module": "embedding_shard/~/linear", "param": "w", "transforms": ["no_transpose", "vocab_pad"]}},
|
||||
"transformer.wte.bias": {"mtj": {"module": "embedding_shard/~/linear", "param": "b"}},
|
||||
"transformer.wte.bias": {"mtj": {"module": "embedding_shard/~/linear", "param": "b", "transforms": ["vocab_pad"]}},
|
||||
"transformer.ln_f.weight": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "scale"}},
|
||||
"transformer.ln_f.bias": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "offset"}},
|
||||
"lm_head.weight": {"mtj": {"module": "projection_shard/~/linear", "param": "w", "transforms": ["vocab_pad"]}},
|
||||
"lm_head.bias": {"mtj": {"module": "projection_shard/~/linear", "param": "b"}}
|
||||
"lm_head.bias": {"mtj": {"module": "projection_shard/~/linear", "param": "b", "transforms": ["vocab_pad"]}}
|
||||
},
|
||||
"layer_weights": {
|
||||
"transformer.h.{layer}.attn.bias": {},
|
||||
|
|
4
play.bat
4
play.bat
|
@ -1,5 +1,9 @@
|
|||
@echo off
|
||||
cd /D %~dp0
|
||||
SET CONDA_SHLVL=
|
||||
|
||||
rmdir /S /Q flask_session
|
||||
|
||||
TITLE KoboldAI - Server
|
||||
SET /P M=<loader.settings
|
||||
IF %M%==1 GOTO drivemap
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,18 +1,25 @@
|
|||
transformers>=4.20.1
|
||||
Flask
|
||||
Flask-SocketIO
|
||||
transformers==4.24.0
|
||||
huggingface_hub==0.12.1
|
||||
Flask==2.2.3
|
||||
Flask-SocketIO==5.3.2
|
||||
Werkzeug==2.3.7
|
||||
python-socketio==5.7.2
|
||||
requests
|
||||
torch >= 1.9, < 1.13
|
||||
flask-cloudflared
|
||||
flask-cloudflared==0.0.10
|
||||
flask-ngrok
|
||||
eventlet
|
||||
eventlet==0.33.3
|
||||
dnspython==2.2.1
|
||||
lupa==1.10
|
||||
markdown
|
||||
bleach==4.1.0
|
||||
sentencepiece
|
||||
protobuf
|
||||
accelerate
|
||||
flask-session
|
||||
flask-session==0.4.0
|
||||
marshmallow>=3.13
|
||||
apispec-webframeworks
|
||||
loguru
|
||||
termcolor
|
||||
safetensors
|
||||
git+https://github.com/VE-FORBRYDERNE/mkultra
|
|
@ -2,21 +2,26 @@ torch >= 1.9, < 1.13
|
|||
numpy
|
||||
tqdm
|
||||
requests
|
||||
dm-haiku == 0.0.5
|
||||
jax == 0.2.21
|
||||
jaxlib >= 0.1.69, <= 0.3.7
|
||||
transformers >= 4.20.1
|
||||
dm-haiku==0.0.9
|
||||
jax==0.3.25
|
||||
jaxlib==0.3.25
|
||||
chex == 0.1.5
|
||||
transformers == 4.24.0
|
||||
huggingface_hub==0.12.1
|
||||
progressbar2
|
||||
git+https://github.com/VE-FORBRYDERNE/mesh-transformer-jax@ck
|
||||
flask
|
||||
Flask-SocketIO
|
||||
flask-cloudflared >= 0.0.5
|
||||
Flask==2.2.3
|
||||
Flask-SocketIO==5.3.2
|
||||
python-socketio==5.7.2
|
||||
flask-cloudflared==0.0.10
|
||||
flask-ngrok
|
||||
eventlet
|
||||
Werkzeug==2.3.7
|
||||
eventlet==0.33.3
|
||||
dnspython==2.2.1
|
||||
lupa==1.10
|
||||
markdown
|
||||
bleach==4.1.0
|
||||
flask-session
|
||||
flask-session==0.4.0
|
||||
marshmallow>=3.13
|
||||
apispec-webframeworks
|
||||
loguru
|
||||
|
|
|
@ -3492,28 +3492,26 @@ $(document).ready(function(){
|
|||
|
||||
// Shortcuts
|
||||
$(window).keydown(function (ev) {
|
||||
// Only ctrl prefixed (for now)
|
||||
if (!ev.ctrlKey) return;
|
||||
|
||||
let handled = true;
|
||||
switch (ev.key) {
|
||||
// Ctrl+Z - Back
|
||||
case "z":
|
||||
button_actback.click();
|
||||
break;
|
||||
// Ctrl+Y - Forward
|
||||
case "y":
|
||||
button_actfwd.click();
|
||||
break;
|
||||
// Ctrl+E - Retry
|
||||
case "e":
|
||||
button_actretry.click();
|
||||
break;
|
||||
default:
|
||||
handled = false;
|
||||
if (ev.altKey)
|
||||
switch (ev.key) {
|
||||
// Alt+Z - Back
|
||||
case "z":
|
||||
button_actback.click();
|
||||
break;
|
||||
// Alt+Y - Forward
|
||||
case "y":
|
||||
button_actfwd.click();
|
||||
break;
|
||||
// Alt+R - Retry
|
||||
case "r":
|
||||
button_actretry.click();
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
|
||||
if (handled) ev.preventDefault();
|
||||
ev.preventDefault();
|
||||
});
|
||||
|
||||
$("#anotetemplate").on("input", function() {
|
||||
|
@ -3796,4 +3794,4 @@ function getSelectedOptions(element) {
|
|||
output.push(item.value);
|
||||
}
|
||||
return output;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,9 +50,13 @@ import itertools
|
|||
import zipfile
|
||||
import pickle
|
||||
import torch
|
||||
import numpy as np
|
||||
import collections
|
||||
import _codecs
|
||||
import utils
|
||||
import os
|
||||
from torch.nn import Module
|
||||
from typing import Any, Callable, Dict, Optional, Tuple, Union
|
||||
from typing import Any, Callable, Dict, Optional, Tuple, Type, Union
|
||||
|
||||
|
||||
_EXTRA_STATE_KEY_SUFFIX = '_extra_state'
|
||||
|
@ -90,12 +94,16 @@ class LazyTensor:
|
|||
def __repr__(self):
|
||||
return self.__view(repr)
|
||||
|
||||
def materialize(self, checkpoint: Union[zipfile.ZipFile, zipfile.ZipExtFile], map_location=None, no_grad=True) -> torch.Tensor:
|
||||
def materialize(self, checkpoint: Union[zipfile.ZipFile, zipfile.ZipExtFile], map_location=None, no_grad=True, filename="pytorch_model.bin") -> torch.Tensor:
|
||||
filename = os.path.basename(os.path.normpath(filename)).split('.')[0]
|
||||
size = reduce(lambda x, y: x * y, self.shape, 1)
|
||||
dtype = self.dtype
|
||||
nbytes = size if dtype is torch.bool else size * ((torch.finfo if dtype.is_floating_point else torch.iinfo)(dtype).bits >> 3)
|
||||
if isinstance(checkpoint, zipfile.ZipFile):
|
||||
f = checkpoint.open(f"archive/data/{self.key}", "r")
|
||||
try:
|
||||
f = checkpoint.open(f"archive/data/{self.key}", "r")
|
||||
except:
|
||||
f = checkpoint.open(f"{filename}/data/{self.key}", "r")
|
||||
f.read(self.seek_offset)
|
||||
else:
|
||||
f = checkpoint
|
||||
|
@ -111,8 +119,50 @@ class LazyTensor:
|
|||
tensor._backward_hooks = self.backward_hooks
|
||||
return tensor
|
||||
|
||||
class RestrictedUnpickler(pickle.Unpickler):
|
||||
def original_persistent_load(self, saved_id):
|
||||
return super().persistent_load(saved_id)
|
||||
|
||||
class _LazyUnpickler(pickle.Unpickler):
|
||||
def forced_persistent_load(self, saved_id):
|
||||
if saved_id[0] != "storage":
|
||||
raise pickle.UnpicklingError("`saved_id[0]` must be 'storage'")
|
||||
return self.original_persistent_load(saved_id)
|
||||
|
||||
def find_class(self, module, name):
|
||||
if module == "collections" and name == "OrderedDict":
|
||||
return collections.OrderedDict
|
||||
elif module == "torch._utils" and name == "_rebuild_tensor_v2":
|
||||
return torch._utils._rebuild_tensor_v2
|
||||
elif module == "torch" and name in (
|
||||
"DoubleStorage",
|
||||
"FloatStorage",
|
||||
"HalfStorage",
|
||||
"LongStorage",
|
||||
"IntStorage",
|
||||
"ShortStorage",
|
||||
"CharStorage",
|
||||
"ByteStorage",
|
||||
"BoolStorage",
|
||||
"BFloat16Storage",
|
||||
):
|
||||
return getattr(torch, name)
|
||||
elif module == "numpy.core.multiarray" and name == "scalar":
|
||||
return np.core.multiarray.scalar
|
||||
elif module == "numpy" and name == "dtype":
|
||||
return np.dtype
|
||||
elif module == "_codecs" and name == "encode":
|
||||
return _codecs.encode
|
||||
else:
|
||||
# Forbid everything else.
|
||||
qualified_name = name if module == "__builtin__" else f"{module}.{name}"
|
||||
raise pickle.UnpicklingError(f"`{qualified_name}` is forbidden; the model you are loading probably contains malicious code")
|
||||
|
||||
def load(self, *args, **kwargs):
|
||||
self.original_persistent_load = getattr(self, "persistent_load", pickle.Unpickler.persistent_load)
|
||||
self.persistent_load = self.forced_persistent_load
|
||||
return super().load(*args, **kwargs)
|
||||
|
||||
class _LazyUnpickler(RestrictedUnpickler):
|
||||
lazy_loaded_storages: Dict[str, LazyTensor]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
|
@ -127,7 +177,6 @@ class _LazyUnpickler(pickle.Unpickler):
|
|||
return LazyTensor(storage_type, key, location)
|
||||
|
||||
def load(self, *args, **kwargs):
|
||||
self.persistent_load = self.forced_persistent_load
|
||||
retval = super().load(*args, **kwargs)
|
||||
self.lazy_loaded_storages = {}
|
||||
return retval
|
||||
|
@ -213,16 +262,33 @@ def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, miss
|
|||
unexpected_keys.append(key)
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def use_custom_unpickler(unpickler: Type[pickle.Unpickler] = RestrictedUnpickler):
|
||||
try:
|
||||
old_unpickler = pickle.Unpickler
|
||||
pickle.Unpickler = unpickler
|
||||
|
||||
old_pickle_load = pickle.load
|
||||
|
||||
def new_pickle_load(*args, **kwargs):
|
||||
return pickle.Unpickler(*args, **kwargs).load()
|
||||
|
||||
pickle.load = new_pickle_load
|
||||
|
||||
yield
|
||||
|
||||
finally:
|
||||
pickle.Unpickler = old_unpickler
|
||||
pickle.load = old_pickle_load
|
||||
|
||||
@contextlib.contextmanager
|
||||
def use_lazy_torch_load(enable=True, callback: Optional[Callable] = None, dematerialized_modules=False, use_accelerate_init_empty_weights=False):
|
||||
if not enable:
|
||||
yield False
|
||||
with use_custom_unpickler(RestrictedUnpickler):
|
||||
yield False
|
||||
return
|
||||
|
||||
try:
|
||||
old_unpickler = pickle.Unpickler
|
||||
pickle.Unpickler = _LazyUnpickler
|
||||
|
||||
old_rebuild_tensor = torch._utils._rebuild_tensor
|
||||
torch._utils._rebuild_tensor = _rebuild_tensor
|
||||
|
||||
|
@ -261,10 +327,10 @@ def use_lazy_torch_load(enable=True, callback: Optional[Callable] = None, demate
|
|||
old_load_from_state_dict = torch.nn.Module._load_from_state_dict
|
||||
torch.nn.Module._load_from_state_dict = _load_from_state_dict
|
||||
|
||||
yield True
|
||||
with use_custom_unpickler(_LazyUnpickler):
|
||||
yield True
|
||||
|
||||
finally:
|
||||
pickle.Unpickler = old_unpickler
|
||||
torch._utils._rebuild_tensor = old_rebuild_tensor
|
||||
torch.load = old_torch_load
|
||||
if dematerialized_modules:
|
||||
|
|
|
@ -55,7 +55,7 @@ from mesh_transformer.util import to_bf16
|
|||
|
||||
params: Dict[str, Any] = {}
|
||||
|
||||
__seed = random.randrange(sys.maxsize)
|
||||
__seed = random.randrange(2**64)
|
||||
rng = random.Random(__seed)
|
||||
|
||||
|
||||
|
@ -69,8 +69,17 @@ def set_rng_seed(seed: int):
|
|||
return seed
|
||||
|
||||
def randomize_rng_seed():
|
||||
return set_rng_seed(random.randrange(sys.maxsize))
|
||||
return set_rng_seed(random.randrange(2**64))
|
||||
|
||||
def get_rng_state():
|
||||
return rng
|
||||
|
||||
def set_rng_state(state):
|
||||
global rng
|
||||
rng = state
|
||||
|
||||
def new_rng_state(seed: int):
|
||||
return random.Random(seed)
|
||||
|
||||
def warper_callback(logits) -> np.array:
|
||||
raise NotImplementedError("`tpu_mtj_backend.warper_callback()` needs to be defined")
|
||||
|
@ -946,6 +955,7 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2):
|
|||
|
||||
import torch
|
||||
import torch.utils.dlpack
|
||||
import torch_lazy_loader
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
move_xmap = jax.experimental.maps.xmap(
|
||||
|
@ -987,8 +997,9 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2):
|
|||
continue
|
||||
layer = checkpoint_layer - 2
|
||||
shards = []
|
||||
for checkpoint_shard in range(checkpoint_shards):
|
||||
shards.append(torch.load(path_template.format(layer=checkpoint_layer, shard=checkpoint_shard), map_location="cpu"))
|
||||
with torch_lazy_loader.use_custom_unpickler(torch_lazy_loader.RestrictedUnpickler):
|
||||
for checkpoint_shard in range(checkpoint_shards):
|
||||
shards.append(torch.load(path_template.format(layer=checkpoint_layer, shard=checkpoint_shard), map_location="cpu"))
|
||||
for key in shards[0]:
|
||||
if key == "attention.rotary_emb.inv_freq":
|
||||
continue
|
||||
|
@ -1038,7 +1049,7 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2):
|
|||
raise RuntimeError(error)
|
||||
|
||||
|
||||
def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpoint=False, **kwargs) -> None:
|
||||
def load_model(path: str, driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None:
|
||||
global thread_resources_env, seq, tokenizer, network, params, pad_token_id
|
||||
|
||||
if "pad_token_id" in kwargs:
|
||||
|
@ -1137,6 +1148,10 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
|||
if param not in params:
|
||||
params[param] = default_params[param]
|
||||
|
||||
# Use an optimization that will allow us to avoid one extra transpose operation
|
||||
if hf_checkpoint:
|
||||
params["transposed_linear"] = True
|
||||
|
||||
# Load tokenizer
|
||||
if vars.model == "TPUMeshTransformerGPTNeoX":
|
||||
tokenizer = Tokenizer.from_file(os.path.join(path, "20B_tokenizer.json"))
|
||||
|
@ -1180,10 +1195,6 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
|||
thread_resources_env = maps.ResourceEnv(maps.Mesh(devices, ('dp', 'mp')), ())
|
||||
maps.thread_resources.env = thread_resources_env
|
||||
|
||||
global shard_xmap, batch_xmap
|
||||
shard_xmap = __shard_xmap()
|
||||
batch_xmap = __batch_xmap(shard_dim=cores_per_replica)
|
||||
|
||||
global badwords
|
||||
# These are the tokens that we don't want the AI to ever write
|
||||
badwords = jnp.array(vars.badwordsids).squeeze()
|
||||
|
@ -1229,6 +1240,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
|||
from tqdm.auto import tqdm
|
||||
import functools
|
||||
|
||||
|
||||
def callback(model_dict, f, **_):
|
||||
if callback.nested:
|
||||
return
|
||||
|
@ -1236,6 +1248,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
|||
with zipfile.ZipFile(f, "r") as z:
|
||||
try:
|
||||
last_storage_key = None
|
||||
zipfolder = os.path.basename(os.path.normpath(f)).split('.')[0]
|
||||
f = None
|
||||
current_offset = 0
|
||||
if utils.current_shard == 0:
|
||||
|
@ -1268,7 +1281,10 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
|||
last_storage_key = storage_key
|
||||
if isinstance(f, zipfile.ZipExtFile):
|
||||
f.close()
|
||||
f = z.open(f"archive/data/{storage_key}")
|
||||
try:
|
||||
f = z.open(f"archive/data/{storage_key}")
|
||||
except:
|
||||
f = z.open(f"{zipfolder}/data/{storage_key}")
|
||||
current_offset = 0
|
||||
if current_offset != model_dict[key].seek_offset:
|
||||
f.read(model_dict[key].seek_offset - current_offset)
|
||||
|
@ -1293,23 +1309,25 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
|||
if "divide_by_shards" in transforms:
|
||||
tensor /= params["cores_per_replica"]
|
||||
if "vocab_pad" in transforms:
|
||||
tensor = torch.nn.functional.pad(tensor, (0, 0, 0, params["n_vocab_padding"]))
|
||||
if "no_transpose" not in transforms and tensor.ndim == 2:
|
||||
tensor = tensor.T
|
||||
tensor = torch.nn.functional.pad(tensor, (0,) * (tensor.ndim * 2 - 1) + (params["n_vocab_padding"],))
|
||||
# We don't need to transpose linear module weights anymore because MTJ will do it for us if `transposed_linear` is set to True in the config
|
||||
#if "no_transpose" not in transforms and tensor.ndim == 2:
|
||||
# tensor = tensor.T
|
||||
tensor.unsqueeze_(0)
|
||||
if tensor.dtype is torch.float16 or tensor.dtype is torch.float32:
|
||||
tensor = tensor.bfloat16()
|
||||
|
||||
|
||||
# Shard the tensor so that parts of the tensor can be used
|
||||
# on different TPU cores
|
||||
tensor = reshard_reverse(
|
||||
tensor,
|
||||
params["cores_per_replica"],
|
||||
network.state["params"][spec["module"]][spec["param"]].shape,
|
||||
)
|
||||
tensor = jnp.array(tensor.detach())
|
||||
if tensor.dtype is torch.float16 or tensor.dtype is torch.float32:
|
||||
tensor = tensor.bfloat16()
|
||||
network.state["params"][spec["module"]][spec["param"]] = move_xmap(
|
||||
jax.dlpack.from_dlpack(torch.utils.dlpack.to_dlpack(
|
||||
reshard_reverse(
|
||||
tensor,
|
||||
params["cores_per_replica"],
|
||||
network.state["params"][spec["module"]][spec["param"]].shape,
|
||||
)
|
||||
)).copy(),
|
||||
tensor,
|
||||
np.empty(params["cores_per_replica"]),
|
||||
)
|
||||
|
||||
|
@ -1396,3 +1414,6 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
|||
model = GPTNeoForCausalLM.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
||||
|
||||
#network.state = network.move_xmap(network.state, np.zeros(cores_per_replica))
|
||||
global shard_xmap, batch_xmap
|
||||
shard_xmap = __shard_xmap()
|
||||
batch_xmap = __batch_xmap(shard_dim=cores_per_replica)
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
@echo off
|
||||
cd /d %~dp0
|
||||
SET CONDA_SHLVL=
|
||||
|
||||
TITLE KoboldAI - Updater
|
||||
SET /P M=<loader.settings
|
||||
IF %M%==1 GOTO drivemap
|
||||
|
|
|
@ -500,6 +500,7 @@
|
|||
<li>kwargs? (<code>table<string, any></code>): Table of optional keyword arguments from the following list. Defaults to <code>{}</code>.
|
||||
<ul>
|
||||
<li>scan_story? (<code>boolean</code>): Whether or not to scan the past few actions of the story for world info keys in addition to the submission like how world info normally behaves. If this is set to <code>false</code>, only the <code>submission</code> is scanned for world info keys. Defaults to <code>true</code>.</li>
|
||||
<li>include_anote? (<code>boolean</code>): Whether to include the author's note in the story. Defaults to <code>true</code>, pass <code>false</code> to suppress including the author's note.</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
@ -574,6 +575,7 @@
|
|||
<li>kwargs? (<code>table<string, any></code>): Table of optional keyword arguments from the following list. Defaults to <code>{}</code>.
|
||||
<ul>
|
||||
<li>scan_story? (<code>boolean</code>): Whether or not to scan the past few actions of the story for world info keys in addition to the submission like how world info normally behaves. If this is set to <code>false</code>, only the <code>submission</code> is scanned for world info keys. Defaults to <code>true</code>.</li>
|
||||
<li>include_anote? (<code>boolean</code>): Whether to include the author's note in the story. Defaults to <code>true</code>, pass <code>false</code> to suppress including the author's note.</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
@ -687,6 +689,7 @@
|
|||
<li>kwargs? (<code>table<string, any></code>): Table of optional keyword arguments from the following list. Defaults to <code>{}</code>.
|
||||
<ul>
|
||||
<li>scan_story? (<code>boolean</code>): Whether or not to scan the past few actions of the story for world info keys in addition to the submission like how world info normally behaves. If this is set to <code>false</code>, only the <code>submission</code> is scanned for world info keys. Defaults to <code>true</code>.</li>
|
||||
<li>include_anote? (<code>boolean</code>): Whether to include the author's note in the story. Defaults to <code>true</code>, pass <code>false</code> to suppress including the author's note.</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
|
|
@ -538,6 +538,7 @@ Computes the context that would be sent to the generator with the user's current
|
|||
* entries? (`KoboldWorldInfoEntry|table<any, KoboldWorldInfoEntry>`): A `KoboldWorldInfoEntry` or table thereof that indicates an allowed subset of world info entries to include in the context. Defaults to all world info entries.
|
||||
* kwargs? (`table<string, any>`): Table of optional keyword arguments from the following list. Defaults to `{}`.
|
||||
* scan_story? (`boolean`): Whether or not to scan the past few actions of the story for world info keys in addition to the submission like how world info normally behaves. If this is set to `false`, only the `submission` is scanned for world info keys. Defaults to `true`.
|
||||
* include_anote? (`boolean`): Whether to include the author's note in the story. Defaults to `true`, pass `false` to suppress including the author's note.
|
||||
|
||||
### Returns
|
||||
|
||||
|
@ -636,6 +637,7 @@ The same as calling `kobold.worldinfo:compute_context()` with this world info en
|
|||
* submission (`string`): String to use as simulated user's input after being formatted by input formatting.
|
||||
* kwargs? (`table<string, any>`): Table of optional keyword arguments from the following list. Defaults to `{}`.
|
||||
* scan_story? (`boolean`): Whether or not to scan the past few actions of the story for world info keys in addition to the submission like how world info normally behaves. If this is set to `false`, only the `submission` is scanned for world info keys. Defaults to `true`.
|
||||
* include_anote? (`boolean`): Whether to include the author's note in the story. Defaults to `true`, pass `false` to suppress including the author's note.
|
||||
|
||||
### Returns
|
||||
|
||||
|
@ -819,6 +821,7 @@ Unlike `kobold.worldinfo:compute_context()`, this function doesn't include world
|
|||
* entries? (`KoboldWorldInfoEntry|table<any, KoboldWorldInfoEntry>`): A `KoboldWorldInfoEntry` or table thereof that indicates an allowed subset of world info entries to include in the context. Entries that are not inside of the folder are still not included. Defaults to all world info entries in the folder.
|
||||
* kwargs? (`table<string, any>`): Table of optional keyword arguments from the following list. Defaults to `{}`.
|
||||
* scan_story? (`boolean`): Whether or not to scan the past few actions of the story for world info keys in addition to the submission like how world info normally behaves. If this is set to `false`, only the `submission` is scanned for world info keys. Defaults to `true`.
|
||||
* include_anote? (`boolean`): Whether to include the author's note in the story. Defaults to `true`, pass `false` to suppress including the author's note.
|
||||
|
||||
### Returns
|
||||
|
||||
|
|
26
utils.py
26
utils.py
|
@ -27,6 +27,7 @@ except ImportError:
|
|||
HAS_ACCELERATE = False
|
||||
|
||||
vars = None
|
||||
args = None
|
||||
num_shards: Optional[int] = None
|
||||
current_shard = 0
|
||||
from_pretrained_model_name = ""
|
||||
|
@ -40,6 +41,8 @@ named_buffers: Optional[List[tuple]] = None
|
|||
|
||||
default_sampler_order = [6, 0, 1, 2, 3, 4, 5]
|
||||
|
||||
emit = None
|
||||
|
||||
#==================================================================#
|
||||
# Decorator to prevent a function's actions from being run until
|
||||
# at least x seconds have passed without the function being called
|
||||
|
@ -198,6 +201,7 @@ def _download_with_aria2(aria2_config: str, total_length: int, directory: str =
|
|||
pass
|
||||
|
||||
import transformers
|
||||
aria2_port = 6799 if vars is None else vars.aria2_port
|
||||
lengths = {}
|
||||
s = requests.Session()
|
||||
s.mount("http://", requests.adapters.HTTPAdapter(max_retries=requests.adapters.Retry(total=120, backoff_factor=1)))
|
||||
|
@ -208,9 +212,9 @@ def _download_with_aria2(aria2_config: str, total_length: int, directory: str =
|
|||
with tempfile.NamedTemporaryFile("w+b", delete=False) as f:
|
||||
f.write(aria2_config)
|
||||
f.flush()
|
||||
p = subprocess.Popen(["aria2c", "-x", "10", "-s", "10", "-j", "10", "--enable-rpc=true", f"--rpc-secret={secret}", "--rpc-listen-port", str(vars.aria2_port), "--disable-ipv6", "--file-allocation=trunc", "--allow-overwrite", "--auto-file-renaming=false", "-d", directory, "-i", f.name, "-U", transformers.file_utils.http_user_agent(user_agent)] + (["-c"] if not force_download else []) + ([f"--header='Authorization: Bearer {use_auth_token}'"] if use_auth_token else []), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
||||
p = subprocess.Popen(["aria2c", "-x", "10", "-s", "10", "-j", "10", "--enable-rpc=true", f"--rpc-secret={secret}", "--rpc-listen-port", str(aria2_port), "--disable-ipv6", "--file-allocation=trunc", "--allow-overwrite", "--auto-file-renaming=false", "-d", directory, "-i", f.name, "-U", transformers.file_utils.http_user_agent(user_agent)] + (["-c"] if not force_download else []) + ([f"--header='Authorization: Bearer {use_auth_token}'"] if use_auth_token else []), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
||||
while p.poll() is None:
|
||||
r = s.post(f"http://localhost:{vars.aria2_port}/jsonrpc", json={"jsonrpc": "2.0", "id": "kai", "method": "aria2.tellActive", "params": [f"token:{secret}"]}).json()["result"]
|
||||
r = s.post(f"http://localhost:{aria2_port}/jsonrpc", json={"jsonrpc": "2.0", "id": "kai", "method": "aria2.tellActive", "params": [f"token:{secret}"]}).json()["result"]
|
||||
if not r:
|
||||
s.close()
|
||||
if bar is not None:
|
||||
|
@ -257,7 +261,7 @@ def _transformers22_aria2_hook(pretrained_model_name_or_path: str, force_downloa
|
|||
if token is None:
|
||||
raise EnvironmentError("You specified use_auth_token=True, but a huggingface token was not found.")
|
||||
_cache_dir = str(cache_dir) if cache_dir is not None else transformers.TRANSFORMERS_CACHE
|
||||
_revision = revision if revision is not None else huggingface_hub.constants.DEFAULT_REVISION
|
||||
_revision = args.revision if args.revision is not None else huggingface_hub.constants.DEFAULT_REVISION
|
||||
sharded = False
|
||||
headers = {"user-agent": transformers.file_utils.http_user_agent(user_agent)}
|
||||
if use_auth_token:
|
||||
|
@ -268,7 +272,7 @@ def _transformers22_aria2_hook(pretrained_model_name_or_path: str, force_downloa
|
|||
|
||||
def is_cached(filename):
|
||||
try:
|
||||
huggingface_hub.hf_hub_download(pretrained_model_name_or_path, filename, cache_dir=cache_dir, local_files_only=True)
|
||||
huggingface_hub.hf_hub_download(pretrained_model_name_or_path, filename, cache_dir=cache_dir, local_files_only=True, revision=_revision)
|
||||
except ValueError:
|
||||
return False
|
||||
return True
|
||||
|
@ -277,7 +281,7 @@ def _transformers22_aria2_hook(pretrained_model_name_or_path: str, force_downloa
|
|||
filename = transformers.modeling_utils.WEIGHTS_INDEX_NAME if sharded else transformers.modeling_utils.WEIGHTS_NAME
|
||||
except AttributeError:
|
||||
return
|
||||
url = huggingface_hub.hf_hub_url(pretrained_model_name_or_path, filename, revision=revision)
|
||||
url = huggingface_hub.hf_hub_url(pretrained_model_name_or_path, filename, revision=_revision)
|
||||
if is_cached(filename) or requests.head(url, allow_redirects=True, proxies=proxies, headers=headers):
|
||||
break
|
||||
if sharded:
|
||||
|
@ -291,7 +295,7 @@ def _transformers22_aria2_hook(pretrained_model_name_or_path: str, force_downloa
|
|||
with open(map_filename) as f:
|
||||
map_data = json.load(f)
|
||||
filenames = set(map_data["weight_map"].values())
|
||||
urls = [huggingface_hub.hf_hub_url(pretrained_model_name_or_path, n, revision=revision) for n in filenames]
|
||||
urls = [huggingface_hub.hf_hub_url(pretrained_model_name_or_path, n, revision=_revision) for n in filenames]
|
||||
if not force_download:
|
||||
urls = [u for u, n in zip(urls, filenames) if not is_cached(n)]
|
||||
if not urls:
|
||||
|
@ -456,6 +460,7 @@ def aria2_hook(pretrained_model_name_or_path: str, force_download=False, cache_d
|
|||
import transformers
|
||||
import transformers.modeling_utils
|
||||
from huggingface_hub import HfFolder
|
||||
_revision = args.revision if args.revision is not None else huggingface_hub.constants.DEFAULT_REVISION
|
||||
if shutil.which("aria2c") is None: # Don't do anything if aria2 is not installed
|
||||
return
|
||||
if local_files_only: # If local_files_only is true, we obviously don't need to download anything
|
||||
|
@ -490,7 +495,7 @@ def aria2_hook(pretrained_model_name_or_path: str, force_download=False, cache_d
|
|||
filename = transformers.modeling_utils.WEIGHTS_INDEX_NAME if sharded else transformers.modeling_utils.WEIGHTS_NAME
|
||||
except AttributeError:
|
||||
return
|
||||
url = huggingface_hub.hf_hub_url(pretrained_model_name_or_path, filename, revision=revision)
|
||||
url = huggingface_hub.hf_hub_url(pretrained_model_name_or_path, filename, revision=_revision)
|
||||
if is_cached(url) or requests.head(url, allow_redirects=True, proxies=proxies, headers=headers):
|
||||
break
|
||||
if sharded:
|
||||
|
@ -504,7 +509,7 @@ def aria2_hook(pretrained_model_name_or_path: str, force_download=False, cache_d
|
|||
with open(map_filename) as f:
|
||||
map_data = json.load(f)
|
||||
filenames = set(map_data["weight_map"].values())
|
||||
urls = [huggingface_hub.hf_hub_url(pretrained_model_name_or_path, n, revision=revision) for n in filenames]
|
||||
urls = [huggingface_hub.hf_hub_url(pretrained_model_name_or_path, n, revision=_revision) for n in filenames]
|
||||
if not force_download:
|
||||
urls = [u for u in urls if not is_cached(u)]
|
||||
if not urls:
|
||||
|
@ -551,7 +556,8 @@ def get_num_shards(filename):
|
|||
def get_sharded_checkpoint_num_tensors(pretrained_model_name_or_path, filename, cache_dir=None, force_download=False, proxies=None, resume_download=False, local_files_only=False, use_auth_token=None, user_agent=None, revision=None, **kwargs):
|
||||
import transformers.modeling_utils
|
||||
import torch
|
||||
shard_paths, _ = transformers.modeling_utils.get_checkpoint_shard_files(pretrained_model_name_or_path, filename, cache_dir=cache_dir, force_download=force_download, proxies=proxies, resume_download=resume_download, local_files_only=local_files_only, use_auth_token=use_auth_token, user_agent=user_agent, revision=revision)
|
||||
_revision = args.revision if args.revision is not None else huggingface_hub.constants.DEFAULT_REVISION
|
||||
shard_paths, _ = transformers.modeling_utils.get_checkpoint_shard_files(pretrained_model_name_or_path, filename, cache_dir=cache_dir, force_download=force_download, proxies=proxies, resume_download=resume_download, local_files_only=local_files_only, use_auth_token=use_auth_token, user_agent=user_agent, revision=_revision)
|
||||
return list(itertools.chain(*(torch.load(p, map_location="cpu").keys() for p in shard_paths)))
|
||||
|
||||
#==================================================================#
|
||||
|
@ -602,4 +608,4 @@ def get_missing_module_names(model: PreTrainedModel, names: List[str]) -> List[s
|
|||
else:
|
||||
recurse(c[1], head=name + ".")
|
||||
recurse(model)
|
||||
return missing_names
|
||||
return missing_names
|
Loading…
Reference in New Issue