diff --git a/app.py b/app.py
index aacccc4..2ae5850 100644
--- a/app.py
+++ b/app.py
@@ -60,16 +60,17 @@ def image_generation_process(
         warmup=10,
         acceleration="tensorrt",
         use_lcm_lora=False,
-        mode="txt2img",
+        mode="img2img",
         cfg_type="none",
         use_denoising_batch=True,
+        output_type="pil",
     )
      
     start_prompt = "A glowing, vintage phone booth standing in surreal landscapes across different scene"
     # Prepare the stream
     stream.prepare(
         prompt=start_prompt,
-        num_inference_steps=4,
+        num_inference_steps=50,
     )
 
     # Prepare image
@@ -92,6 +93,7 @@ def image_generation_process(
       
 
         input_image= input_queue.get(block=True)
+        # input_image = stream.preprocess_image('input.png')
 
         
         # Check if a new prompt is available in the prompt_queue
@@ -106,9 +108,9 @@ def image_generation_process(
 
 
 
-        preprocessed_image =stream.preprocess_image(x_output)
+        # preprocessed_image =stream.postprocess_image(x_output)
        
-        queue.put(preprocessed_image, block=False)
+        queue.put(x_output, block=False)
 
         # queue.put(preprocessed_image, block=False)
 
@@ -171,13 +173,13 @@ def main()-> None:
         # process_show=ctx.Process(target=receive_images, args=(queue, fps_queue))
         # process_show.start()
 
-        print("Starting spout output process")
+        # print("Starting spout output process")
         process_spout_out=ctx.Process(target=send_spout_image, args=(queue, 512, 512))
         process_spout_out.start()
 
 
         process_gen.join()
-        process_spout_in.join()
+        # process_spout_in.join()
         process_spout_out.join()
         process_osc.join()
 
@@ -186,7 +188,7 @@ def main()-> None:
         print("Process interrupted")
         
         process_gen.terminate()
-        process_spout_in.terminate()
+        # process_spout_in.terminate()
         process_spout_out.terminate()
         process_osc.terminate()
 
diff --git a/img2img.py b/img2img.py
new file mode 100644
index 0000000..5adaf98
--- /dev/null
+++ b/img2img.py
@@ -0,0 +1,112 @@
+import sys
+import os
+
+sys.path.append(
+    os.path.join(
+        os.path.dirname(__file__),
+        "..",
+        "..",
+    )
+)
+
+from utils.wrapper import StreamDiffusionWrapper
+
+import torch
+
+# from config import Args
+from pydantic import BaseModel, Field
+from PIL import Image
+import math
+
+# base_model = "stabilityai/sd-turbo"
+# taesd_model = "madebyollin/taesd"
+base_model = "./models/sd-turbo"
+taesd_model = "./models/taesd"
+
+default_prompt = "Portrait of The Joker halloween costume, face painting, with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
+default_negative_prompt = "black and white, blurry, low resolution, pixelated,  pixel art, low quality, low fidelity"
+
+page_content = """<h1 class="text-3xl font-bold">StreamDiffusion</h1>
+<h3 class="text-xl font-bold">Image-to-Image SD-Turbo</h3>
+<p class="text-sm">
+    This demo showcases
+    <a
+    href="https://github.com/cumulo-autumn/StreamDiffusion"
+    target="_blank"
+    class="text-blue-500 underline hover:no-underline">StreamDiffusion
+</a>
+Image to Image pipeline using
+    <a
+    href="https://huggingface.co/stabilityai/sd-turbo"
+    target="_blank"
+    class="text-blue-500 underline hover:no-underline">SD-Turbo</a
+    > with a MJPEG stream server.
+</p>
+"""
+
+
+class Pipeline:
+    class Info(BaseModel):
+        name: str = "StreamDiffusion img2img"
+        input_mode: str = "image"
+        page_content: str = page_content
+
+    class InputParams(BaseModel):
+        prompt: str = Field(
+            default_prompt,
+            title="Prompt",
+            field="textarea",
+            id="prompt",
+        )
+        # negative_prompt: str = Field(
+        #     default_negative_prompt,
+        #     title="Negative Prompt",
+        #     field="textarea",
+        #     id="negative_prompt",
+        # )
+        width: int = Field(
+            512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
+        )
+        height: int = Field(
+            512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
+        )
+
+    def __init__(self, device: torch.device, torch_dtype: torch.dtype):
+        params = self.InputParams()
+        self.stream = StreamDiffusionWrapper(
+            model_id_or_path=base_model,
+            use_tiny_vae=True,
+            device=device,
+            dtype=torch_dtype,
+            t_index_list=[35, 45],
+            frame_buffer_size=1,
+            width=params.width,
+            height=params.height,
+            use_lcm_lora=False,
+            output_type="pil",
+            warmup=10,
+            vae_id=taesd_model,
+            acceleration="xformers",
+            mode="img2img",
+            use_denoising_batch=True,
+            cfg_type="none",
+            # use_safety_checker=args.safety_checker,
+            enable_similar_image_filter=True,
+            similar_image_filter_threshold=0.98,
+            # engine_dir=args.engine_dir,
+        )
+
+        self.last_prompt = default_prompt
+        self.stream.prepare(
+            prompt=default_prompt,
+            negative_prompt=default_negative_prompt,
+            num_inference_steps=50,
+            guidance_scale=1.2,
+        )
+
+    def predict(self, image: Image.Image, params: "Pipeline.InputParams") -> Image.Image:
+        image_tensor = self.stream.preprocess_image(image)
+        # output_image = self.stream(image=image_tensor, prompt=params.prompt)
+        output_image = self.stream(image=image_tensor, prompt=params.prompt)
+
+        return output_image
\ No newline at end of file
diff --git a/input.png b/input.png
new file mode 100644
index 0000000..3d6f015
Binary files /dev/null and b/input.png differ
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..627a68d
--- /dev/null
+++ b/main.py
@@ -0,0 +1,120 @@
+from fastapi import FastAPI
+from pydantic import BaseModel
+import datetime
+import torch
+from PIL import Image
+import numpy as np
+import SpoutGL
+from OpenGL.GL import GL_RGBA
+import time
+import img2img
+
+def main():
+    TARGET_FPS = 60
+    SPOUT_RECEIVER_NAME = "Spout DX11 Sender"
+    SPOUT_SENDER_NAME = "Output - StreamDiffusion"
+    WIDTH = 512
+    HEIGHT = 512
+    PROMPT = "a beautiful landscape painting, trending on artstation, 8k, hyperrealistic"
+    timestamp = datetime.datetime.now()
+    fps = 30.0
+
+    print("Initializing StreamDiffusion pipeline...")
+    global pipeline
+    try:
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        torch_dtype = torch.float16
+        pipeline = img2img.Pipeline(device, torch_dtype)
+
+        app = FastAPI()
+
+        @app.get("/health")
+        def read_root():
+            return {"status": "ok"}
+
+        class PromptUpdate(BaseModel):
+            prompt: str
+
+        @app.post("/api/update/prompt")
+        async def update_prompt(update: PromptUpdate):
+            global PROMPT
+            PROMPT = update.prompt
+            print(f"Prompt updated to: {PROMPT}")
+            return {"message": "Prompt updated successfully", "new_prompt": PROMPT}
+
+        print("Pipeline initialized.")
+    except Exception as e:
+        print(f"Error initializing StreamDiffusion pipeline: {e}")
+        return
+
+    print(f"Initializing Spout receiver for '{SPOUT_RECEIVER_NAME}'...")
+    spout_receiver = SpoutGL.SpoutReceiver()
+    spout_receiver.setReceiverName(SPOUT_RECEIVER_NAME)
+
+    print(f"Initializing Spout sender as '{SPOUT_SENDER_NAME}'...")
+    spout_sender = SpoutGL.SpoutSender()
+    spout_sender.setSenderName(SPOUT_SENDER_NAME)
+
+    image_bgra = np.zeros((HEIGHT, WIDTH, 4), dtype=np.uint8)
+
+    import uvicorn
+    import threading
+    config = uvicorn.Config(app, host="0.0.0.0", port=34800, log_level="info")
+    server = uvicorn.Server(config)
+    threading.Thread(target=server.run, daemon=True).start()
+    print("FastAPI server started at http://0.0.0.0:34800")
+
+    try:
+        print("Starting main loop. Press Ctrl+C to exit.")
+        while True:
+            received = spout_receiver.receiveImage(image_bgra, GL_RGBA, False, 0)
+            # print(f"Received: {received}, Connected: {spout_receiver.isConnected()}, Updated: {spout_receiver.isUpdated()}, Empty: {SpoutGL.helpers.isBufferEmpty(image_bgra)}")
+
+            if received:
+                if spout_receiver.isUpdated(): 
+                    continue
+                
+                if spout_receiver.isConnected() and SpoutGL.helpers.isBufferEmpty(image_bgra):
+                    continue
+
+                image_rgb_array = image_bgra[:, :, [2,1,0]]
+                input_image = Image.fromarray(image_rgb_array, 'RGB')
+                # input_image.save("debug_input.png")
+                
+
+                params = img2img.Pipeline.InputParams(prompt=PROMPT)
+                output_image = pipeline.predict(image=input_image, params=params)                
+                # output_image.save("debug_output.png")
+
+                # output_rgba_array = np.array(output_image.convert("RGBA"))
+                # output_bgra_array = output_rgba_array[:, :, [2, 1, 0, 3]]
+                # buffer = np.ascontiguousarray(output_bgra_array)
+                output_bgr_array = np.array(output_image, dtype=np.uint8)[:, :, ::-1]
+                output_bgra_array = np.zeros((HEIGHT, WIDTH, 4), dtype=np.uint8)
+                output_bgra_array[:, :, :3] = output_bgr_array
+                output_bgra_array[:, :, 3] = 255
+                buffer = output_bgra_array
+
+                spout_sender.sendImage(buffer, WIDTH, HEIGHT, GL_RGBA, False, 0)
+
+                # timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
+                dt = (datetime.datetime.now() - timestamp).total_seconds()
+                t = 0.05
+                fps = fps * t + 1 / dt * (1 - t)
+                timestamp = datetime.datetime.now()
+
+                print("\033[92m[ STREAM DIFFUSION ]\033[0m " + f"Frame processed and sent to Spout: {fps:2f}", end="\r", flush=True)
+            else:
+                time.sleep(1. / TARGET_FPS)
+
+    except KeyboardInterrupt:
+        print("\nExiting...")
+    finally:
+        print("Releasing Spout resources.")
+        spout_receiver.releaseReceiver()
+        spout_sender.releaseSender()
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/requirements.txt b/requirements.txt
index f6a6498..edc7a75 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -45,8 +45,8 @@ streamdiffusion @ git+https://github.com/cumulo-autumn/StreamDiffusion.git@b6232
 sympy==1.13.3
 termcolor==3.1.0
 tokenizers==0.15.2
-torch==2.1.0+cu121
-torchvision==0.16.0+cu121
+torch==2.1.0
+torchvision==0.16.0
 tqdm==4.67.1
 transformers==4.35.2
 twython==3.9.1
diff --git a/spout_util.py b/spout_util.py
index fc4b15d..a0aa628 100644
--- a/spout_util.py
+++ b/spout_util.py
@@ -9,55 +9,68 @@ from OpenGL import GL
 
 from multiprocessing import Queue
 import numpy as np
-
+from PIL import Image
 
 
 TARGET_FPS = 30
 SEND_WIDTH = 512
 SEND_HEIGHT = 512
 
-
+alpha_cache = np.full((512, 512, 1), 255, dtype=np.uint8)
 
 
 
 def spout_buffer_to_tensor(buffer, width, height):
-    np_buffer = np.asarray(buffer, dtype=np.uint8)
+    # np_buffer = np.asarray(buffer, dtype=np.uint8)
+    np_buffer=np.frombuffer(buffer, dtype=np.uint8)
     image_bgra = np_buffer.reshape((height, width, 4))
 
     image_rgb = image_bgra[..., [2, 1, 0]]
     image_float = image_rgb.astype(np.float32) / 255.0
-    # image_normalized = (image_float * 2.0) - 1.0
+    # image_normalized = (image_float * 2.0) - 1.0    
     tensor = torch.from_numpy(image_float).permute(2, 0, 1)
 
+    del np_buffer  # Free memory
+    del image_bgra  # Free memory
+    del image_rgb  # Free memory
+    del image_float  # Free memory
+
+
     return tensor.unsqueeze(0)
 
 
 def get_spout_image(queue, wwidth: int, wheight: int) -> None:
     with SpoutGL.SpoutReceiver() as receiver:
         receiver.setReceiverName("Spout DX11 Sender")
-
-        buffer = None
+        image_bgra = np.zeros((SEND_HEIGHT, SEND_WIDTH, 4), dtype=np.uint8)
 
         while True:
-            result = receiver.receiveImage(buffer, GL.GL_RGBA, False, 0)
+            result = receiver.receiveImage(image_bgra, GL.GL_RGBA, False, 0)
             # print("Receive result", result)
 
             if receiver.isUpdated():
-                width = receiver.getSenderWidth()
-                height = receiver.getSenderHeight()
-                buffer = array.array('B', [0] * (width * height * 4))  # Correctly reallocate buffer with updated size
-                print("Spout Receiver updated, Buffer size", width, height)
-
-            if buffer and result and not SpoutGL.helpers.isBufferEmpty(buffer):
-                pixels=spout_buffer_to_tensor(buffer, width, height)
+                continue
+                # width = receiver.getSenderWidth()
+                # height = receiver.getSenderHeight()
+                # image_bgra = array.array('B', [0] * (width * height * 4))  # Correctly reallocate buffer with updated size
+                # print("Spout Receiver updated, Buffer size", width, height)
+
+            # if buffer and result and not SpoutGL.helpers.isBufferEmpty(buffer):
+            if SpoutGL.helpers.isBufferEmpty(image_bgra):
+                continue
+                # pixels=spout_buffer_to_tensor(buffer, width, height)
                 # print("get_spout_image", pixels.shape)
-                queue.put(pixels, block=False)
+            
+            image_rgb_array= image_bgra[:, :, [2, 1, 0]]
+            pixels=Image.fromarray(image_rgb_array, 'RGB')
+            queue.put(pixels, block=False)
+                        
 
             # Wait until the next frame is ready
             # Wait time is in milliseconds; note that 0 will return immediately
             # receiver.waitFrameSync("SpoutSender", 10000)
-        
-    
+
+
 
 
 def randcolor():
@@ -66,38 +79,47 @@ def randcolor():
 
 def tensor_to_spout_image(tensor):
     image = tensor.squeeze(0)
-    image = image.permute(1, 2, 0)
-    image_np = image.cpu().numpy()
-
-    if image_np.min() < 0:
-        image_np = (image_np + 1) / 2  # Scale from [-1, 1] to [0, 1]
-    image_np = np.clip(image_np * 255, 0, 255).astype(np.uint8)
+    if image.device.type != "cpu":
+        image = image.cpu()
+    image = image.permute(1, 2, 0).numpy()
 
-    h, w, _ = image_np.shape
-    alpha = np.full((h, w, 1), 255, dtype=np.uint8)
-    image_rgba = np.concatenate((image_np, alpha), axis=-1)
+    if image.min() < 0:
+        image = (image + 1) / 2  # Scale from [-1, 1] to [0, 1]
+    image = np.clip(image * 255, 0, 255).astype(np.uint8)
 
+    # h, w, _ = image_np.shape
+    # alpha = np.full((h, w, 1), 255, dtype=np.uint8)
+    image_rgba = np.concatenate((image, alpha_cache), axis=-1)
     image_bgra = image_rgba[..., [2, 1, 0, 3]]
 
+    del image  # Free memory
+
     return np.ascontiguousarray(image_bgra)  # Ensure the array is contiguous in memory
 
 def send_spout_image(queue: Queue, width: int, height: int)->None:
     
     with SpoutGL.SpoutSender() as sender:
         sender.setSenderName("StreamDiffusion")
-    
+        
         while True:
 
             # Check if there are images in the queue
             if not queue.empty():
-                image = queue.get(block=False)
-                pixels = tensor_to_spout_image(image)
-
-                result = sender.sendImage(pixels, width, height, GL.GL_RGBA, False, 0)
+                output_image = queue.get(block=False)
+                # pixels = tensor_to_spout_image(image)
+               
+                output_bgr_array = np.array(output_image, dtype=np.uint8)[:, :, ::-1]
+                output_bgra_array = np.zeros((SEND_HEIGHT, SEND_WIDTH, 4), dtype=np.uint8)
+                output_bgra_array[:, :, :3] = output_bgr_array
+                output_bgra_array[:, :, 3] = 255
+                buffer = output_bgra_array
+               
+                
+                result = sender.sendImage(buffer, width, height, GL.GL_RGBA, False, 0)
                 # print("Send result", result)
                 
                 # Indicate that a frame is ready to read
                 sender.setFrameSync("StreamDiffusion")
-                
+
                 # Wait for next send attempt
                 # time.sleep(1./TARGET_FPS)
\ No newline at end of file