v1

5 months ago · e73d66bb7f
commit e73d66bb7f
6 changed files with 437 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
+.venv
+engines
--- a/app.py
+++ b/app.py
@ -0,0 +1,198 @@
+import os
+import sys
+import time
+
+import torch
+from diffusers import AutoencoderTiny, StableDiffusionPipeline
+from diffusers.utils import load_image
+
+sys.path.insert(0, os.path.abspath('../StreamDiffusion'))
+
+from streamdiffusion import StreamDiffusion
+from streamdiffusion.image_utils import postprocess_image
+
+from utils.viewer import receive_images
+
+from utils.wrapper import StreamDiffusionWrapper
+from threading import Thread
+
+
+from multiprocessing import Process, Queue, get_context
+
+from perlin import perlin_2d, rand_perlin_2d, rand_perlin_2d_octaves, perlin_2d_octaves
+from scene_prompt import surreal_prompt_parts
+from scene_prompt import surreal_prompts
+from scene_prompt import regret_prompts
+
+from spout_util import send_spout_image, get_spout_image
+
+from osc import start_osc_server
+
+
+import fire
+
+def image_generation_process(
+    queue: Queue,
+    fps_queue: Queue,
+    prompt_queue: Queue,
+    input_queue: Queue,
+    # prompt: str,
+    model_id_or_path: str,
+)-> None:
+    # stream = StreamDiffusionWrapper(       
+    #         model_id_or_path=model_id_or_path,
+    #         lora_dict=None,
+    #         t_index_list=[0, 16, 32, 45],
+    #         frame_buffer_size=1,
+    #         width=512,
+    #         height=512,
+    #         warmup=10,
+    #         acceleration="xformers",
+    #         mode="txt2img",
+    #         use_denoising_batch=False,
+    #         cfg_type="none",
+    #         seed=2,
+    #     )
+    stream = StreamDiffusionWrapper(
+        model_id_or_path=model_id_or_path,
+        t_index_list=[0],
+        frame_buffer_size=1,
+        warmup=10,
+        acceleration="tensorrt",
+        use_lcm_lora=False,
+        mode="txt2img",
+        cfg_type="none",
+        use_denoising_batch=True,
+    )
+     
+    start_prompt = "A glowing, vintage phone booth standing in surreal landscapes across different scene"
+    # Prepare the stream
+    stream.prepare(
+        prompt=start_prompt,
+        num_inference_steps=4,
+    )
+
+    # Prepare image
+    # init_image = load_image("example.png").resize((512, 512))
+
+    # Warmup >= len(t_index_list) x frame_buffer_size
+    # for _ in range(stream.batch_size - 1):
+    #     stream()
+
+    previous_output = None
+    idx=0
+    last_time = time.time()
+
+    while True:
+        # try:
+        start_time = time.time()
+        # x_output = stream(image=previous_output)
+        # x_output=stream.stream.txt2img_sd_turbo(1).cpu()
+
+      
+
+        input_image= input_queue.get(block=True)
+
+        
+        # Check if a new prompt is available in the prompt_queue
+        if not prompt_queue.empty():
+            new_prompt = prompt_queue.get(block=False)
+            if new_prompt:
+                x_output = stream.img2img(image=input_image, prompt=new_prompt)
+                print(f"Received new prompt from queue: {new_prompt}")
+        else:
+            # Use the current prompt if no new prompt is available
+            x_output = stream.img2img(image=input_image)
+
+
+
+        preprocessed_image =stream.preprocess_image(x_output)
+       
+        queue.put(preprocessed_image, block=False)
+
+        # queue.put(preprocessed_image, block=False)
+
+        # Calculate FPS
+        elapsed_time = time.time() - start_time
+        fps = 1 / elapsed_time if elapsed_time > 0 else float('inf')
+        fps_queue.put(fps)
+        
+        # x_output = (x_output + 1) / 2  # Scale from [-1, 1] to [0, 1]
+        # x_output = torch.clamp(x_output, 0, 1)
+        # previous_output = x_output
+
+        # except KeyboardInterrupt:
+        #     print(f"fps: {fps}")
+        #     return
+
+
+def main()-> None:
+
+    try:
+        ctx = get_context('spawn')
+        queue = Queue()
+        fps_queue = Queue()
+        # noise_queue = Queue()
+        spout_in_queue = Queue()
+
+        # prompt = "A surreal landscapes"
+        # prompt=regret_prompts[0]
+
+        prompt_queue = Queue()
+
+        # model_id_or_path = "KBlueLeaf/kohaku-v2.1"
+        model_id_or_path = "stabilityai/sd-turbo"
+
+
+        # start_osc_server(prompt_queue)
+        process_osc = ctx.Process(
+            target=start_osc_server,
+            args=(prompt_queue,)
+        )
+        process_osc.start()
+
+        print("Starting spout input process")
+        process_spout_in = ctx.Process(
+            target=get_spout_image,
+            args=(spout_in_queue, 512, 512),
+        )
+        process_spout_in.start()
+        
+        
+        print("Starting image generation process")
+        process_gen= ctx.Process(
+            target=image_generation_process,
+            args=(queue, fps_queue, prompt_queue, spout_in_queue, model_id_or_path),
+        )
+        process_gen.start()
+
+
+
+        # process_show=ctx.Process(target=receive_images, args=(queue, fps_queue))
+        # process_show.start()
+
+        print("Starting spout output process")
+        process_spout_out=ctx.Process(target=send_spout_image, args=(queue, 512, 512))
+        process_spout_out.start()
+
+
+        process_gen.join()
+        process_spout_in.join()
+        process_spout_out.join()
+        process_osc.join()
+
+
+    except KeyboardInterrupt:
+        print("Process interrupted")
+        
+        process_gen.terminate()
+        process_spout_in.terminate()
+        process_spout_out.terminate()
+        process_osc.terminate()
+
+        return
+
+
+
+if __name__ == "__main__":
+    fire.Fire(main)
--- a/osc.py
+++ b/osc.py
@ -0,0 +1,23 @@
+import argparse
+import math
+
+from pythonosc.dispatcher import Dispatcher
+from pythonosc import osc_server
+
+OSC_PORT = 8787
+    
+
+def start_osc_server(queue):
+
+    def onReceivePrompt(address, *args):
+        prompt = " ".join(args)
+        print(f"Received prompt: {prompt}")
+        queue.put(prompt)
+
+    dispatcher = Dispatcher()
+    dispatcher.map("/prompt", onReceivePrompt)
+    
+
+    server = osc_server.ThreadingOSCUDPServer(("localhost", OSC_PORT), dispatcher)
+    print(f"OSC server is running on port {OSC_PORT}")
+    server.serve_forever()
--- a/perlin.py
+++ b/perlin.py
@ -0,0 +1,69 @@
+import torch
+import math
+
+def rand_perlin_2d(shape, res, fade = lambda t: 6*t**5 - 15*t**4 + 10*t**3):
+    delta = (res[0] / shape[0], res[1] / shape[1])
+    d = (shape[0] // res[0], shape[1] // res[1])
+    
+    grid = torch.stack(torch.meshgrid(torch.arange(0, res[0], delta[0]), torch.arange(0, res[1], delta[1])), dim = -1) % 1
+    angles = 2*math.pi*torch.rand(res[0]+1, res[1]+1)
+    gradients = torch.stack((torch.cos(angles), torch.sin(angles)), dim = -1)
+    
+    tile_grads = lambda slice1, slice2: gradients[slice1[0]:slice1[1], slice2[0]:slice2[1]].repeat_interleave(d[0], 0).repeat_interleave(d[1], 1)
+    dot = lambda grad, shift: (torch.stack((grid[:shape[0],:shape[1],0] + shift[0], grid[:shape[0],:shape[1], 1] + shift[1]  ), dim = -1) * grad[:shape[0], :shape[1]]).sum(dim = -1)
+    
+    n00 = dot(tile_grads([0, -1], [0, -1]), [0,  0])
+    n10 = dot(tile_grads([1, None], [0, -1]), [-1, 0])
+    n01 = dot(tile_grads([0, -1],[1, None]), [0, -1])
+    n11 = dot(tile_grads([1, None], [1, None]), [-1,-1])
+    t = fade(grid[:shape[0], :shape[1]])
+    return math.sqrt(2) * torch.lerp(torch.lerp(n00, n10, t[..., 0]), torch.lerp(n01, n11, t[..., 0]), t[..., 1])
+
+def rand_perlin_2d_octaves(shape, res, octaves=1, persistence=0.5):
+    noise = torch.zeros(shape)
+    frequency = 1
+    amplitude = 1
+    for _ in range(octaves):
+        noise += amplitude * rand_perlin_2d(shape, (frequency*res[0], frequency*res[1]))
+        frequency *= 2
+        amplitude *= persistence
+    return noise
+
+def perlin_2d(shape, res, seed, fade=lambda t: 6*t**5 - 15*t**4 + 10*t**3):
+    delta = (res[0] / shape[0], res[1] / shape[1])
+    d = (shape[0] // res[0], shape[1] // res[1])
+    
+    grid = torch.stack(torch.meshgrid(torch.arange(0, res[0], delta[0]), torch.arange(0, res[1], delta[1])), dim=-1) % 1
+    base_seed = int(seed)
+    frac_seed = seed - base_seed
+    
+    torch.manual_seed(base_seed)
+    angles_base = 2 * math.pi * torch.rand(res[0] + 1, res[1] + 1)
+    gradients_base = torch.stack((torch.cos(angles_base), torch.sin(angles_base)), dim=-1)
+    
+    torch.manual_seed(base_seed + 1)
+    angles_next = 2 * math.pi * torch.rand(res[0] + 1, res[1] + 1)
+    gradients_next = torch.stack((torch.cos(angles_next), torch.sin(angles_next)), dim=-1)
+    
+    gradients = (1 - frac_seed) * gradients_base + frac_seed * gradients_next
+    
+    tile_grads = lambda slice1, slice2: gradients[slice1[0]:slice1[1], slice2[0]:slice2[1]].repeat_interleave(d[0], 0).repeat_interleave(d[1], 1)
+    dot = lambda grad, shift: (torch.stack((grid[:shape[0], :shape[1], 0] + shift[0], grid[:shape[0], :shape[1], 1] + shift[1]), dim=-1) * grad[:shape[0], :shape[1]]).sum(dim=-1)
+    
+    n00 = dot(tile_grads([0, -1], [0, -1]), [0,  0])
+    n10 = dot(tile_grads([1, None], [0, -1]), [-1, 0])
+    n01 = dot(tile_grads([0, -1], [1, None]), [0, -1])
+    n11 = dot(tile_grads([1, None], [1, None]), [-1, -1])
+    t = fade(grid[:shape[0], :shape[1]])
+    return math.sqrt(2) * torch.lerp(torch.lerp(n00, n10, t[..., 0]), torch.lerp(n01, n11, t[..., 0]), t[..., 1])
+
+
+def perlin_2d_octaves(shape, res, seed, octaves=1, persistence=0.5, fade=lambda t: 6*t**5 - 15*t**4 + 10*t**3):
+    noise = torch.zeros(shape)
+    frequency = 1
+    amplitude = 1
+    for i in range(octaves):
+        noise += amplitude * perlin_2d(shape, (frequency * res[0], frequency * res[1]), seed + i, fade)
+        frequency *= 2
+        amplitude *= persistence
+    return noise
--- a/scene_prompt.py
+++ b/scene_prompt.py
@ -0,0 +1,42 @@
+surreal_prompts = [
+    "a surreal landscape of floating islands under a glowing sky",
+    "an ethereal valley where waterfalls rise into the clouds",
+    "a dreamlike desert with mirrored sand and hovering stones",
+    "an endless ocean reflecting fractured moons and stars",
+    "a neon-lit canyon with levitating ruins and glowing mist",
+    "a twilight forest where the trees grow upside-down",
+    "a luminous terrain with bioluminescent plants and crystal arches",
+    "a gravity-defying mountain range spiraling into the void",
+    "a shattered realm of glass bridges and hovering towers",
+    "an alien world lit by pulsating constellations and fluid geometry"
+]
+
+surreal_prompt_parts = [
+    "a surreal landscape",
+    "with floating islands",
+    "glowing waterfalls",
+    "neon-colored skies",
+    "mirror-like desert ground",
+    "levitating rocks",
+    "upside-down trees",
+    "ancient ruins suspended in air",
+    "bioluminescent flora",
+    "shattered moons overhead",
+    "a path of glass tiles",
+    "crystal towers emitting soft hums",
+    "gravity-defying rivers",
+    "alien constellations glowing brightly"
+]
+
+regret_prompts = [
+    "a lone figure standing in a vast, empty desert at dusk",
+    "fractured mirrors scattered across the sand, reflecting different memories",
+    "a withered tree growing upside down from the sky, its roots dripping ink",
+    "floating clocks melting into the horizon, ticking backwards",
+    "ghostly silhouettes walking in reverse, retracing forgotten steps",
+    "a house half-submerged in water, its windows glowing faintly with past laughter",
+    "the sky opens into a tunnel of old photographs slowly burning at the edges",
+    "giant stone hands reaching out from the earth, trying to grasp something lost",
+    "an ocean made of letters never sent, waves crashing with whispered apologies",
+    "a child version of the figure stands alone, staring at the adult with distant eyes"
+]
--- a/spout_util.py
+++ b/spout_util.py
@ -0,0 +1,103 @@
+import torch
+import SpoutGL
+
+from itertools import islice, cycle, repeat
+import array
+from random import randint
+import time
+from OpenGL import GL
+
+from multiprocessing import Queue
+import numpy as np
+
+
+
+TARGET_FPS = 30
+SEND_WIDTH = 512
+SEND_HEIGHT = 512
+
+
+
+
+
+def spout_buffer_to_tensor(buffer, width, height):
+    np_buffer = np.asarray(buffer, dtype=np.uint8)
+    image_bgra = np_buffer.reshape((height, width, 4))
+
+    image_rgb = image_bgra[..., [2, 1, 0]]
+    image_float = image_rgb.astype(np.float32) / 255.0
+    # image_normalized = (image_float * 2.0) - 1.0
+    tensor = torch.from_numpy(image_float).permute(2, 0, 1)
+
+    return tensor.unsqueeze(0)
+
+
+def get_spout_image(queue, wwidth: int, wheight: int) -> None:
+    with SpoutGL.SpoutReceiver() as receiver:
+        receiver.setReceiverName("Spout DX11 Sender")
+
+        buffer = None
+
+        while True:
+            result = receiver.receiveImage(buffer, GL.GL_RGBA, False, 0)
+            # print("Receive result", result)
+
+            if receiver.isUpdated():
+                width = receiver.getSenderWidth()
+                height = receiver.getSenderHeight()
+                buffer = array.array('B', [0] * (width * height * 4))  # Correctly reallocate buffer with updated size
+                print("Spout Receiver updated, Buffer size", width, height)
+
+            if buffer and result and not SpoutGL.helpers.isBufferEmpty(buffer):
+                pixels=spout_buffer_to_tensor(buffer, width, height)
+                # print("get_spout_image", pixels.shape)
+                queue.put(pixels, block=False)
+
+            # Wait until the next frame is ready
+            # Wait time is in milliseconds; note that 0 will return immediately
+            # receiver.waitFrameSync("SpoutSender", 10000)
+        
+    
+
+
+def randcolor():
+    return randint(0, 255)
+
+
+def tensor_to_spout_image(tensor):
+    image = tensor.squeeze(0)
+    image = image.permute(1, 2, 0)
+    image_np = image.cpu().numpy()
+
+    if image_np.min() < 0:
+        image_np = (image_np + 1) / 2  # Scale from [-1, 1] to [0, 1]
+    image_np = np.clip(image_np * 255, 0, 255).astype(np.uint8)
+
+    h, w, _ = image_np.shape
+    alpha = np.full((h, w, 1), 255, dtype=np.uint8)
+    image_rgba = np.concatenate((image_np, alpha), axis=-1)
+
+    image_bgra = image_rgba[..., [2, 1, 0, 3]]
+
+    return np.ascontiguousarray(image_bgra)  # Ensure the array is contiguous in memory
+
+def send_spout_image(queue: Queue, width: int, height: int)->None:
+    
+    with SpoutGL.SpoutSender() as sender:
+        sender.setSenderName("StreamDiffusion")
+    
+        while True:
+
+            # Check if there are images in the queue
+            if not queue.empty():
+                image = queue.get(block=False)
+                pixels = tensor_to_spout_image(image)
+
+                result = sender.sendImage(pixels, width, height, GL.GL_RGBA, False, 0)
+                # print("Send result", result)
+                
+                # Indicate that a frame is ready to read
+                sender.setFrameSync("StreamDiffusion")
+                
+                # Wait for next send attempt
+                # time.sleep(1./TARGET_FPS)