diff --git a/app.py b/app.py index aacccc4..2ae5850 100644 --- a/app.py +++ b/app.py @@ -60,16 +60,17 @@ def image_generation_process( warmup=10, acceleration="tensorrt", use_lcm_lora=False, - mode="txt2img", + mode="img2img", cfg_type="none", use_denoising_batch=True, + output_type="pil", ) start_prompt = "A glowing, vintage phone booth standing in surreal landscapes across different scene" # Prepare the stream stream.prepare( prompt=start_prompt, - num_inference_steps=4, + num_inference_steps=50, ) # Prepare image @@ -92,6 +93,7 @@ def image_generation_process( input_image= input_queue.get(block=True) + # input_image = stream.preprocess_image('input.png') # Check if a new prompt is available in the prompt_queue @@ -106,9 +108,9 @@ def image_generation_process( - preprocessed_image =stream.preprocess_image(x_output) + # preprocessed_image =stream.postprocess_image(x_output) - queue.put(preprocessed_image, block=False) + queue.put(x_output, block=False) # queue.put(preprocessed_image, block=False) @@ -171,13 +173,13 @@ def main()-> None: # process_show=ctx.Process(target=receive_images, args=(queue, fps_queue)) # process_show.start() - print("Starting spout output process") + # print("Starting spout output process") process_spout_out=ctx.Process(target=send_spout_image, args=(queue, 512, 512)) process_spout_out.start() process_gen.join() - process_spout_in.join() + # process_spout_in.join() process_spout_out.join() process_osc.join() @@ -186,7 +188,7 @@ def main()-> None: print("Process interrupted") process_gen.terminate() - process_spout_in.terminate() + # process_spout_in.terminate() process_spout_out.terminate() process_osc.terminate() diff --git a/img2img.py b/img2img.py new file mode 100644 index 0000000..5adaf98 --- /dev/null +++ b/img2img.py @@ -0,0 +1,112 @@ +import sys +import os + +sys.path.append( + os.path.join( + os.path.dirname(__file__), + "..", + "..", + ) +) + +from utils.wrapper import StreamDiffusionWrapper + +import torch + +# from config import Args +from pydantic import BaseModel, Field +from PIL import Image +import math + +# base_model = "stabilityai/sd-turbo" +# taesd_model = "madebyollin/taesd" +base_model = "./models/sd-turbo" +taesd_model = "./models/taesd" + +default_prompt = "Portrait of The Joker halloween costume, face painting, with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece" +default_negative_prompt = "black and white, blurry, low resolution, pixelated, pixel art, low quality, low fidelity" + +page_content = """

StreamDiffusion

+

Image-to-Image SD-Turbo

+

+ This demo showcases + StreamDiffusion + +Image to Image pipeline using + SD-Turbo with a MJPEG stream server. +

+""" + + +class Pipeline: + class Info(BaseModel): + name: str = "StreamDiffusion img2img" + input_mode: str = "image" + page_content: str = page_content + + class InputParams(BaseModel): + prompt: str = Field( + default_prompt, + title="Prompt", + field="textarea", + id="prompt", + ) + # negative_prompt: str = Field( + # default_negative_prompt, + # title="Negative Prompt", + # field="textarea", + # id="negative_prompt", + # ) + width: int = Field( + 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width" + ) + height: int = Field( + 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height" + ) + + def __init__(self, device: torch.device, torch_dtype: torch.dtype): + params = self.InputParams() + self.stream = StreamDiffusionWrapper( + model_id_or_path=base_model, + use_tiny_vae=True, + device=device, + dtype=torch_dtype, + t_index_list=[35, 45], + frame_buffer_size=1, + width=params.width, + height=params.height, + use_lcm_lora=False, + output_type="pil", + warmup=10, + vae_id=taesd_model, + acceleration="xformers", + mode="img2img", + use_denoising_batch=True, + cfg_type="none", + # use_safety_checker=args.safety_checker, + enable_similar_image_filter=True, + similar_image_filter_threshold=0.98, + # engine_dir=args.engine_dir, + ) + + self.last_prompt = default_prompt + self.stream.prepare( + prompt=default_prompt, + negative_prompt=default_negative_prompt, + num_inference_steps=50, + guidance_scale=1.2, + ) + + def predict(self, image: Image.Image, params: "Pipeline.InputParams") -> Image.Image: + image_tensor = self.stream.preprocess_image(image) + # output_image = self.stream(image=image_tensor, prompt=params.prompt) + output_image = self.stream(image=image_tensor, prompt=params.prompt) + + return output_image \ No newline at end of file diff --git a/input.png b/input.png new file mode 100644 index 0000000..3d6f015 Binary files /dev/null and b/input.png differ diff --git a/main.py b/main.py new file mode 100644 index 0000000..627a68d --- /dev/null +++ b/main.py @@ -0,0 +1,120 @@ +from fastapi import FastAPI +from pydantic import BaseModel +import datetime +import torch +from PIL import Image +import numpy as np +import SpoutGL +from OpenGL.GL import GL_RGBA +import time +import img2img + +def main(): + TARGET_FPS = 60 + SPOUT_RECEIVER_NAME = "Spout DX11 Sender" + SPOUT_SENDER_NAME = "Output - StreamDiffusion" + WIDTH = 512 + HEIGHT = 512 + PROMPT = "a beautiful landscape painting, trending on artstation, 8k, hyperrealistic" + timestamp = datetime.datetime.now() + fps = 30.0 + + print("Initializing StreamDiffusion pipeline...") + global pipeline + try: + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + torch_dtype = torch.float16 + pipeline = img2img.Pipeline(device, torch_dtype) + + app = FastAPI() + + @app.get("/health") + def read_root(): + return {"status": "ok"} + + class PromptUpdate(BaseModel): + prompt: str + + @app.post("/api/update/prompt") + async def update_prompt(update: PromptUpdate): + global PROMPT + PROMPT = update.prompt + print(f"Prompt updated to: {PROMPT}") + return {"message": "Prompt updated successfully", "new_prompt": PROMPT} + + print("Pipeline initialized.") + except Exception as e: + print(f"Error initializing StreamDiffusion pipeline: {e}") + return + + print(f"Initializing Spout receiver for '{SPOUT_RECEIVER_NAME}'...") + spout_receiver = SpoutGL.SpoutReceiver() + spout_receiver.setReceiverName(SPOUT_RECEIVER_NAME) + + print(f"Initializing Spout sender as '{SPOUT_SENDER_NAME}'...") + spout_sender = SpoutGL.SpoutSender() + spout_sender.setSenderName(SPOUT_SENDER_NAME) + + image_bgra = np.zeros((HEIGHT, WIDTH, 4), dtype=np.uint8) + + import uvicorn + import threading + config = uvicorn.Config(app, host="0.0.0.0", port=34800, log_level="info") + server = uvicorn.Server(config) + threading.Thread(target=server.run, daemon=True).start() + print("FastAPI server started at http://0.0.0.0:34800") + + try: + print("Starting main loop. Press Ctrl+C to exit.") + while True: + received = spout_receiver.receiveImage(image_bgra, GL_RGBA, False, 0) + # print(f"Received: {received}, Connected: {spout_receiver.isConnected()}, Updated: {spout_receiver.isUpdated()}, Empty: {SpoutGL.helpers.isBufferEmpty(image_bgra)}") + + if received: + if spout_receiver.isUpdated(): + continue + + if spout_receiver.isConnected() and SpoutGL.helpers.isBufferEmpty(image_bgra): + continue + + image_rgb_array = image_bgra[:, :, [2,1,0]] + input_image = Image.fromarray(image_rgb_array, 'RGB') + # input_image.save("debug_input.png") + + + params = img2img.Pipeline.InputParams(prompt=PROMPT) + output_image = pipeline.predict(image=input_image, params=params) + # output_image.save("debug_output.png") + + # output_rgba_array = np.array(output_image.convert("RGBA")) + # output_bgra_array = output_rgba_array[:, :, [2, 1, 0, 3]] + # buffer = np.ascontiguousarray(output_bgra_array) + output_bgr_array = np.array(output_image, dtype=np.uint8)[:, :, ::-1] + output_bgra_array = np.zeros((HEIGHT, WIDTH, 4), dtype=np.uint8) + output_bgra_array[:, :, :3] = output_bgr_array + output_bgra_array[:, :, 3] = 255 + buffer = output_bgra_array + + spout_sender.sendImage(buffer, WIDTH, HEIGHT, GL_RGBA, False, 0) + + # timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3] + dt = (datetime.datetime.now() - timestamp).total_seconds() + t = 0.05 + fps = fps * t + 1 / dt * (1 - t) + timestamp = datetime.datetime.now() + + print("\033[92m[ STREAM DIFFUSION ]\033[0m " + f"Frame processed and sent to Spout: {fps:2f}", end="\r", flush=True) + else: + time.sleep(1. / TARGET_FPS) + + except KeyboardInterrupt: + print("\nExiting...") + finally: + print("Releasing Spout resources.") + spout_receiver.releaseReceiver() + spout_sender.releaseSender() + + +if __name__ == "__main__": + main() + diff --git a/requirements.txt b/requirements.txt index f6a6498..edc7a75 100644 --- a/requirements.txt +++ b/requirements.txt @@ -45,8 +45,8 @@ streamdiffusion @ git+https://github.com/cumulo-autumn/StreamDiffusion.git@b6232 sympy==1.13.3 termcolor==3.1.0 tokenizers==0.15.2 -torch==2.1.0+cu121 -torchvision==0.16.0+cu121 +torch==2.1.0 +torchvision==0.16.0 tqdm==4.67.1 transformers==4.35.2 twython==3.9.1 diff --git a/spout_util.py b/spout_util.py index fc4b15d..a0aa628 100644 --- a/spout_util.py +++ b/spout_util.py @@ -9,55 +9,68 @@ from OpenGL import GL from multiprocessing import Queue import numpy as np - +from PIL import Image TARGET_FPS = 30 SEND_WIDTH = 512 SEND_HEIGHT = 512 - +alpha_cache = np.full((512, 512, 1), 255, dtype=np.uint8) def spout_buffer_to_tensor(buffer, width, height): - np_buffer = np.asarray(buffer, dtype=np.uint8) + # np_buffer = np.asarray(buffer, dtype=np.uint8) + np_buffer=np.frombuffer(buffer, dtype=np.uint8) image_bgra = np_buffer.reshape((height, width, 4)) image_rgb = image_bgra[..., [2, 1, 0]] image_float = image_rgb.astype(np.float32) / 255.0 - # image_normalized = (image_float * 2.0) - 1.0 + # image_normalized = (image_float * 2.0) - 1.0 tensor = torch.from_numpy(image_float).permute(2, 0, 1) + del np_buffer # Free memory + del image_bgra # Free memory + del image_rgb # Free memory + del image_float # Free memory + + return tensor.unsqueeze(0) def get_spout_image(queue, wwidth: int, wheight: int) -> None: with SpoutGL.SpoutReceiver() as receiver: receiver.setReceiverName("Spout DX11 Sender") - - buffer = None + image_bgra = np.zeros((SEND_HEIGHT, SEND_WIDTH, 4), dtype=np.uint8) while True: - result = receiver.receiveImage(buffer, GL.GL_RGBA, False, 0) + result = receiver.receiveImage(image_bgra, GL.GL_RGBA, False, 0) # print("Receive result", result) if receiver.isUpdated(): - width = receiver.getSenderWidth() - height = receiver.getSenderHeight() - buffer = array.array('B', [0] * (width * height * 4)) # Correctly reallocate buffer with updated size - print("Spout Receiver updated, Buffer size", width, height) - - if buffer and result and not SpoutGL.helpers.isBufferEmpty(buffer): - pixels=spout_buffer_to_tensor(buffer, width, height) + continue + # width = receiver.getSenderWidth() + # height = receiver.getSenderHeight() + # image_bgra = array.array('B', [0] * (width * height * 4)) # Correctly reallocate buffer with updated size + # print("Spout Receiver updated, Buffer size", width, height) + + # if buffer and result and not SpoutGL.helpers.isBufferEmpty(buffer): + if SpoutGL.helpers.isBufferEmpty(image_bgra): + continue + # pixels=spout_buffer_to_tensor(buffer, width, height) # print("get_spout_image", pixels.shape) - queue.put(pixels, block=False) + + image_rgb_array= image_bgra[:, :, [2, 1, 0]] + pixels=Image.fromarray(image_rgb_array, 'RGB') + queue.put(pixels, block=False) + # Wait until the next frame is ready # Wait time is in milliseconds; note that 0 will return immediately # receiver.waitFrameSync("SpoutSender", 10000) - - + + def randcolor(): @@ -66,38 +79,47 @@ def randcolor(): def tensor_to_spout_image(tensor): image = tensor.squeeze(0) - image = image.permute(1, 2, 0) - image_np = image.cpu().numpy() - - if image_np.min() < 0: - image_np = (image_np + 1) / 2 # Scale from [-1, 1] to [0, 1] - image_np = np.clip(image_np * 255, 0, 255).astype(np.uint8) + if image.device.type != "cpu": + image = image.cpu() + image = image.permute(1, 2, 0).numpy() - h, w, _ = image_np.shape - alpha = np.full((h, w, 1), 255, dtype=np.uint8) - image_rgba = np.concatenate((image_np, alpha), axis=-1) + if image.min() < 0: + image = (image + 1) / 2 # Scale from [-1, 1] to [0, 1] + image = np.clip(image * 255, 0, 255).astype(np.uint8) + # h, w, _ = image_np.shape + # alpha = np.full((h, w, 1), 255, dtype=np.uint8) + image_rgba = np.concatenate((image, alpha_cache), axis=-1) image_bgra = image_rgba[..., [2, 1, 0, 3]] + del image # Free memory + return np.ascontiguousarray(image_bgra) # Ensure the array is contiguous in memory def send_spout_image(queue: Queue, width: int, height: int)->None: with SpoutGL.SpoutSender() as sender: sender.setSenderName("StreamDiffusion") - + while True: # Check if there are images in the queue if not queue.empty(): - image = queue.get(block=False) - pixels = tensor_to_spout_image(image) - - result = sender.sendImage(pixels, width, height, GL.GL_RGBA, False, 0) + output_image = queue.get(block=False) + # pixels = tensor_to_spout_image(image) + + output_bgr_array = np.array(output_image, dtype=np.uint8)[:, :, ::-1] + output_bgra_array = np.zeros((SEND_HEIGHT, SEND_WIDTH, 4), dtype=np.uint8) + output_bgra_array[:, :, :3] = output_bgr_array + output_bgra_array[:, :, 3] = 255 + buffer = output_bgra_array + + + result = sender.sendImage(buffer, width, height, GL.GL_RGBA, False, 0) # print("Send result", result) # Indicate that a frame is ready to read sender.setFrameSync("StreamDiffusion") - + # Wait for next send attempt # time.sleep(1./TARGET_FPS) \ No newline at end of file