2025-04-20 11:29:26 +02:00
|
|
|
import re
|
|
|
|
from llama_cpp import Llama
|
|
|
|
import outetts
|
|
|
|
from diffusers import StableDiffusionXLPipeline
|
|
|
|
from diffusers.utils import load_image, export_to_video
|
|
|
|
import torch
|
|
|
|
import os
|
|
|
|
|
|
|
|
def loadllama():
|
|
|
|
llm = Llama(
|
2025-04-20 12:40:43 +02:00
|
|
|
model_path="models/.gguf",
|
2025-04-20 11:29:26 +02:00
|
|
|
n_ctx=4096,
|
|
|
|
n_threads=6,
|
|
|
|
n_gpu_layers=-1,
|
|
|
|
verbose=False
|
|
|
|
)
|
|
|
|
|
|
|
|
return llm
|
|
|
|
|
|
|
|
def loadtts():
|
|
|
|
model_config = outetts.GGUFModelConfig_v1(
|
2025-04-20 12:40:43 +02:00
|
|
|
model_path="models/.gguf",
|
2025-04-20 11:29:26 +02:00
|
|
|
language="en",
|
|
|
|
n_gpu_layers=-1,
|
|
|
|
verbose=False,
|
|
|
|
)
|
|
|
|
|
|
|
|
interface = outetts.InterfaceGGUF(model_version="0.2", cfg=model_config)
|
|
|
|
speaker = interface.load_default_speaker(name="male_1")
|
|
|
|
return interface, speaker
|
|
|
|
|
|
|
|
def loadsdxl():
|
2025-04-20 12:40:43 +02:00
|
|
|
pipe = StableDiffusionXLPipeline.from_single_file("models/.safetensors", torch_dtype=torch.float16, variant="fp16", requires_safety_checker=True)
|
2025-04-20 11:29:26 +02:00
|
|
|
pipe.to("cuda")
|
|
|
|
pipe.enable_xformers_memory_efficient_attention()
|
|
|
|
pipe.enable_model_cpu_offload()
|
|
|
|
return pipe
|
|
|
|
|
|
|
|
def loadsvd():
|
|
|
|
pipe = StableVideoDiffusionPipeline.from_pretrained(
|
|
|
|
"stabilityai/stable-video-diffusion-img2vid-xt", torch_dtype=torch.float16, variant="fp16"
|
|
|
|
)
|
|
|
|
pipe.to("cuda")
|
|
|
|
pipe.enable_xformers_memory_efficient_attention()
|
|
|
|
pipe.enable_model_cpu_offload()
|
|
|
|
return pipe
|
|
|
|
|
|
|
|
|
|
|
|
def generate_story(prompt, sprompt, model):
|
|
|
|
sys_prompt = ("<|im_start|>system\n" + sprompt + "<|im_end|>\n")
|
|
|
|
user_prompt = (
|
|
|
|
"<|im_start|>user\n"
|
|
|
|
+ prompt + "<|im_end|>\n"
|
|
|
|
+ "<|im_start|>assistant"
|
|
|
|
)
|
|
|
|
res = model(sys_prompt + user_prompt, max_tokens=2**16, stop=["</s>"], echo=False)
|
|
|
|
return str(res["choices"][0]["text"])
|
|
|
|
|
|
|
|
|
|
|
|
def text_to_speech(prompt, interface, speaker, num):
|
|
|
|
|
|
|
|
output = interface.generate(
|
|
|
|
text=prompt,
|
|
|
|
temperature=0.1,
|
|
|
|
repetition_penalty=1.1,
|
|
|
|
max_length=4096,
|
|
|
|
speaker=speaker,
|
|
|
|
)
|
|
|
|
|
|
|
|
output.save(f"./audios/{num}.wav")
|
|
|
|
|
|
|
|
def stableDiffusion(pprompt, nprompt, pipe, num):
|
|
|
|
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
|
|
|
|
images = pipe(prompt=pprompt, negative_prompt=nprompt, num_inference_steps=30, height=576, width=1024).images[0]
|
|
|
|
images.save(f"./images/{num}.png")
|
|
|
|
|
|
|
|
def stableVideoDiffusion(pipe, image, num):
|
|
|
|
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
|
|
|
|
image = load_image(image)
|
|
|
|
frames = pipe(image, decode_chunk_size=2, num_frames=25).frames[0]
|
|
|
|
export_to_video(frames, f"./videos/{num}.mp4", fps=7)
|
|
|
|
|