Spaces:

AiSudo
/

LongCat-Image

Running on Zero

File size: 7,366 Bytes

53a42bb

import gradio as gr
import numpy as np
import os, random, json, spaces, torch, time, subprocess

import torch
from transformers import AutoProcessor
from longcat_image.models import LongCatImageTransformer2DModel
from longcat_image.pipelines import LongCatImagePipeline

from utils.image_utils import rescale_image
from utils.prompt_utils import polish_prompt


# GIT_DIR = "LongCat-Image"
# GIT_URL = "https://github.com/yourusername/LongCat-Image.git"

# if not os.path.isdir(GIT_DIR):
#     subprocess.run(["git", "clone", GIT_URL])
# else:
#     print("Folder already exists.")


def prepare(prompt, is_polish_prompt):
    if not is_polish_prompt: return prompt, False
    polished_prompt = polish_prompt(prompt)
    return polished_prompt, True

@spaces.GPU
def inference(
    prompt,
    negative_prompt,
    input_image,
    image_scale=1.0,
    control_mode='Canny',
    control_context_scale = 0.75,
    seed=42,
    randomize_seed=True,
    guidance_scale=1.5,
    num_inference_steps=8,
    progress=gr.Progress(track_tqdm=True),
):
    # timestamp = time.time()
    # print(f"timestamp: {timestamp}")

    # # process image
    # print("DEBUG: process image")
    # if input_image is None:
    #     print("Error: input_image is empty.")
    #     return None
    
    # # input_image, width, height = scale_image(input_image, image_scale)
    # # control_mode='HED'
    # processor_id = 'canny'
    # if control_mode == 'HED':
    #     processor_id = 'softedge_hed'
    # if control_mode =='Depth':
    #     processor_id = 'depth_midas'
    # if control_mode =='MLSD':
    #     processor_id = 'mlsd'
    # if control_mode =='Pose':
    #     processor_id = 'openpose_full'

    # print(f"DEBUG: processor_id={processor_id}")
    # processor = Processor(processor_id)

    # # Width must be divisible by 16
    # control_image, width, height = rescale_image(input_image, image_scale, 16)
    # control_image = control_image.resize((1024, 1024))

    # print("DEBUG: processor running")
    # control_image = processor(control_image, to_pil=True)
    # control_image = control_image.resize((width, height))

    # print("DEBUG: control_image_torch")
    # control_image_torch = get_image_latent(control_image, sample_size=[height, width])[:, :, 0]

    # # generation
    # if randomize_seed: seed = random.randint(0, MAX_SEED)
    # generator = torch.Generator().manual_seed(seed)

    # image = pipe(
    #     prompt=prompt,
    #     negative_prompt = negative_prompt,
    #     height=height,
    #     width=width,
    #     generator=generator,
    #     guidance_scale=guidance_scale,
    #     control_image=control_image_torch,
    #     num_inference_steps=num_inference_steps,
    #     control_context_scale=control_context_scale,
    # ).images[0]

    # return image, seed, control_image
    return True


def read_file(path: str) -> str:
    with open(path, 'r', encoding='utf-8') as f:
        content = f.read()
    return content


css = """
#col-container {
    margin: 0 auto;
    max-width: 960px;
}
"""

with open('static/data.json', 'r') as file:
    data = json.load(file)
examples = data['examples']

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        with gr.Column():
            gr.HTML(read_file("static/header.html"))
        with gr.Row():
            with gr.Column():
                input_image = gr.Image(
                    height=290, sources=['upload', 'clipboard'], 
                    image_mode='RGB', 
                    # elem_id="image_upload", 
                    type="pil", label="Upload")
                
                prompt = gr.Textbox(
                    label="Prompt",
                    show_label=False,
                    lines=2,
                    placeholder="Enter your prompt",
                    # container=False,
                )
                is_polish_prompt = gr.Checkbox(label="Polish prompt", value=True)
                control_mode = gr.Radio(
                    choices=["Canny", "Depth", "HED", "MLSD", "Pose"],
                    value="Canny",
                    label="Control Mode"
                )
                run_button = gr.Button("Generate", variant="primary")
                with gr.Accordion("Advanced Settings", open=False):
                    
                    negative_prompt = gr.Textbox(
                        label="Negative prompt",
                        lines=2,
                        container=False,
                        placeholder="Enter your negative prompt",
                        value="blurry ugly bad"
                    )
                    with gr.Row():
                        num_inference_steps = gr.Slider(
                            label="Steps",
                            minimum=1,
                            maximum=30,
                            step=1,
                            value=9,
                        )
                        control_context_scale = gr.Slider(
                            label="Context scale",
                            minimum=0.0,
                            maximum=1.0,
                            step=0.01,
                            value=0.75,
                        )

                    with gr.Row():
                        guidance_scale = gr.Slider(
                            label="Guidance scale",
                            minimum=0.0,
                            maximum=10.0,
                            step=0.1,
                            value=1.0,
                        )

                        image_scale = gr.Slider(
                            label="Image scale",
                            minimum=0.5,
                            maximum=2.0,
                            step=0.1,
                            value=1.0,
                        )

                    seed = gr.Slider(
                        label="Seed",
                        minimum=0,
                        maximum=MAX_SEED,
                        step=1,
                        value=42,
                    )
                    randomize_seed = gr.Checkbox(label="Randomize seed", value=False)

            with gr.Column():
                output_image = gr.Image(label="Generated image", show_label=False)
                polished_prompt = gr.Textbox(label="Polished prompt", interactive=False)

                with gr.Accordion("Preprocessor output", open=False):
                    control_image = gr.Image(label="Control image", show_label=False)
                    

        
        gr.Examples(examples=examples, inputs=[input_image, prompt, control_mode])
        gr.Markdown(read_file("static/footer.md"))

    run_button.click(
        fn=prepare,
        inputs=[prompt, is_polish_prompt],
        outputs=[polished_prompt, is_polish_prompt]
        # outputs=gr.State(),  # Pass to the next function, not to UI at this step
    ).then(
        fn=inference,
        inputs=[
            polished_prompt,
            negative_prompt,
            input_image,
            image_scale,
            control_mode,
            control_context_scale,
            seed,
            randomize_seed,
            guidance_scale,
            num_inference_steps,
        ],
        outputs=[output_image, seed, control_image],
    )

if __name__ == "__main__":
    demo.launch(mcp_server=True)