Original

2025-04-20 11:29:26 +02:00 · 2025-04-20 11:29:26 +02:00 · 7e9e55d131
commit 7e9e55d131
11 changed files with 324 additions and 0 deletions
--- a/genXL.py
+++ b/genXL.py
@ -0,0 +1,82 @@
+import re
+from llama_cpp import Llama
+import outetts
+from diffusers import StableDiffusionXLPipeline
+from diffusers.utils import load_image, export_to_video
+import torch
+import os
+
+def loadllama():
+    llm = Llama(
+        model_path="models/llama3.gguf",
+        n_ctx=4096,
+        n_threads=6,
+        n_gpu_layers=-1,
+        verbose=False
+    )
+
+    return llm
+
+def loadtts():
+    model_config = outetts.GGUFModelConfig_v1(
+        model_path="models/tts.gguf",
+        language="en",
+        n_gpu_layers=-1,
+        verbose=False,
+    )
+
+    interface = outetts.InterfaceGGUF(model_version="0.2", cfg=model_config)
+    speaker = interface.load_default_speaker(name="male_1")
+    return interface, speaker
+
+def loadsdxl():
+    pipe = StableDiffusionXLPipeline.from_single_file("models/sdxlReal.safetensors", torch_dtype=torch.float16, variant="fp16", requires_safety_checker=True)
+    pipe.to("cuda")
+    pipe.enable_xformers_memory_efficient_attention()
+    pipe.enable_model_cpu_offload()
+    return pipe
+
+def loadsvd():
+    pipe = StableVideoDiffusionPipeline.from_pretrained(
+        "stabilityai/stable-video-diffusion-img2vid-xt", torch_dtype=torch.float16, variant="fp16"
+    )
+    pipe.to("cuda")
+    pipe.enable_xformers_memory_efficient_attention()
+    pipe.enable_model_cpu_offload()
+    return pipe
+
+
+def generate_story(prompt, sprompt, model):
+    sys_prompt = ("<|im_start|>system\n" + sprompt + "<|im_end|>\n")
+    user_prompt = (
+        "<|im_start|>user\n"
+        + prompt + "<|im_end|>\n"
+        + "<|im_start|>assistant"
+    )
+    res = model(sys_prompt + user_prompt, max_tokens=2**16, stop=["</s>"], echo=False)
+    return str(res["choices"][0]["text"])
+
+
+def text_to_speech(prompt, interface, speaker, num):
+
+    output = interface.generate(
+        text=prompt,
+        temperature=0.1,
+        repetition_penalty=1.1,
+        max_length=4096,
+        speaker=speaker,
+    )
+
+    output.save(f"./audios/{num}.wav")
+
+def stableDiffusion(pprompt, nprompt, pipe, num):
+    os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
+    images = pipe(prompt=pprompt, negative_prompt=nprompt, num_inference_steps=30, height=576, width=1024).images[0]
+    images.save(f"./images/{num}.png")
+
+def stableVideoDiffusion(pipe, image, num):
+    os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
+    image = load_image(image)
+    frames = pipe(image, decode_chunk_size=2, num_frames=25).frames[0]
+    export_to_video(frames, f"./videos/{num}.mp4", fps=7)
+