Added new function text2img_api()

- sends prompt to locally hosted stable-diffusion-webui API and retrieves image result. info from SD API is embedded in png file as it is saved. image files are named with the current date at generation and saved in /stories/art/ Modified UI_2_generate_image() to call text2img_api() instead of text2img_horde Signed-off-by: Robert Vining <offers@robertrvining.com>
2025-06-05 21:59:24 +02:00 · 2022-11-01 18:01:20 +10:00
parent 07eb1b5a26
commit 107bbfa291
1 changed files with 53 additions and 5 deletions
--- a/aiserver.py
+++ b/aiserver.py
@@ -81,7 +81,7 @@ import transformers.generation_utils

 # Text2img
 import base64
-from PIL import Image, ImageFont, ImageDraw, ImageFilter, ImageOps
+from PIL import Image, ImageFont, ImageDraw, ImageFilter, ImageOps, PngImagePlugin
 from io import BytesIO

 global tpu_mtj_backend
@@ -9098,7 +9098,7 @@ def UI_2_generate_image(data):
    #If we don't have a GPU, use horde if we're allowed to
    start_time = time.time()
    if ((not koboldai_vars.hascuda or not os.path.exists("models/stable-diffusion-v1-4")) and koboldai_vars.img_gen_priority != 0) or  koboldai_vars.img_gen_priority == 3:
-        b64_data = text2img_horde(", ".join(keys), art_guide = art_guide)
+        b64_data = text2img_api(", ".join(keys), art_guide = art_guide)
    else:
        import psutil
        #We aren't being forced to use horde, so now let's figure out if we should use local
@@ -9108,9 +9108,9 @@ def UI_2_generate_image(data):
        elif torch.cuda.get_device_properties(0).total_memory > 6000000000 and koboldai_vars.img_gen_priority <= 1:
            #We could do it locally by swapping the model out
            print("Could do local or online")
-            b64_data = text2img_horde(", ".join(keys), art_guide = art_guide)
+            b64_data = text2img_api(", ".join(keys), art_guide = art_guide)
        elif koboldai_vars.img_gen_priority != 0:
-            b64_data = text2img_horde(", ".join(keys), art_guide = art_guide)
+            b64_data = text2img_api(", ".join(keys), art_guide = art_guide)
    logger.debug("Time to Generate Image {}".format(time.time()-start_time))
    koboldai_vars.picture = b64_data
    koboldai_vars.picture_prompt = ", ".join(keys)
@@ -9136,7 +9136,7 @@ def text2img_local(prompt, art_guide="", filename="new.png"):
    def get_image(pipe, prompt, num_inference_steps):
        from torch import autocast
        with autocast("cuda"):
-            return pipe(prompt, num_inference_steps=num_inference_steps)["sample"][0]
+            return pipe(prompt, num_inference_steps=num_inference_steps).images[0]
    image = tpool.execute(get_image, pipe, prompt, num_inference_steps=35)
    buffered = BytesIO()
    image.save(buffered, format="JPEG")
@@ -9195,6 +9195,54 @@ def text2img_horde(prompt,
        koboldai_vars.generating_image = False
        logger.error(submit_req.text)

+#@logger.catch
+def text2img_api(prompt, 
+             art_guide = 'fantasy illustration, artstation, by Hugin Miyama by Taiki Kawakami, cinematic lighting', 
+             filename = "story_art.png"):
+    logger.debug("Generating Image using external API")
+    koboldai_vars.generating_image = True
+    final_imgen_params = {
+        "prompt": "{}, {}".format(prompt, art_guide),
+        "n": 1,
+        "width": 512,
+        "height": 512,
+        "steps": 50,
+        "cfg_scale": 14,
+        "negative_prompt": " lowres, bad anatomy, bad hands out of frame, two heads, totem pole, several faces, extra fingers, mutated hands, (poorly drawn hands:1.21), (poorly drawn face:1.21), (mutation:1.331), (deformed:1.331), (ugly:1.21), blurry, (bad anatomy:1.21), (bad proportions:1.331), (extra limbs:1.21), glitchy, ((clip through table)), adherent bodies, slimy bodies, (badly visible legs), captions, words",
+    }
+
+    final_submit_dict = {
+        "prompt": "{}, {}".format(prompt, art_guide),
+        "params": final_imgen_params,
+    }
+    apiaddress = 'http://127.0.0.1:7860/sdapi/v1/txt2img'    
+    payload_json = json.dumps(final_imgen_params)
+    logger.debug(final_submit_dict)
+    submit_req = requests.post(url=f'{apiaddress}', data=payload_json).json()
+    if submit_req:
+        results = submit_req
+        for i in results['images']:
+            final_src_img = Image.open(BytesIO(base64.b64decode(i.split(",",1)[1])))
+            buffer = BytesIO()
+            final_src_img.save(buffer, format="Webp", quality=95)
+            b64img = base64.b64encode(buffer.getvalue()).decode("utf8")
+            base64_bytes = b64img.encode('utf-8')
+            img_bytes = base64.b64decode(base64_bytes)
+            img = Image.open(BytesIO(img_bytes))
+            #dtnow = datetime.datetime.now()
+            #dt_string = dtnow().strftime("%H%M%S%d%m%Y")
+            dt_string = datetime.datetime.now().strftime("%H%M%S%d%m%Y")
+            final_filename = "stories/art/{}_{}".format(dt_string,filename)
+            pnginfo = PngImagePlugin.PngInfo()
+            pnginfo.add_text("parameters", str(results['info']))
+            img.save(final_filename, pnginfo=pnginfo)
+            logger.debug("Saved Image")
+            koboldai_vars.generating_image = False
+            return(b64img)
+    else:
+        koboldai_vars.generating_image = False
+        logger.error(submit_req.text)
+
 #@logger.catch
 def get_items_locations_from_text(text):
    # load model and tokenizer