import re from llama_cpp import Llama import outetts from diffusers import StableDiffusionXLPipeline from diffusers.utils import load_image, export_to_video import torch import os def loadllama(): llm = Llama( model_path="models/.gguf", n_ctx=4096, n_threads=6, n_gpu_layers=-1, verbose=False ) return llm def loadtts(): model_config = outetts.GGUFModelConfig_v1( model_path="models/.gguf", language="en", n_gpu_layers=-1, verbose=False, ) interface = outetts.InterfaceGGUF(model_version="0.2", cfg=model_config) speaker = interface.load_default_speaker(name="male_1") return interface, speaker def loadsdxl(): pipe = StableDiffusionXLPipeline.from_single_file("models/.safetensors", torch_dtype=torch.float16, variant="fp16", requires_safety_checker=True) pipe.to("cuda") pipe.enable_xformers_memory_efficient_attention() pipe.enable_model_cpu_offload() return pipe def loadsvd(): pipe = StableVideoDiffusionPipeline.from_pretrained( "stabilityai/stable-video-diffusion-img2vid-xt", torch_dtype=torch.float16, variant="fp16" ) pipe.to("cuda") pipe.enable_xformers_memory_efficient_attention() pipe.enable_model_cpu_offload() return pipe def generate_story(prompt, sprompt, model): sys_prompt = ("<|im_start|>system\n" + sprompt + "<|im_end|>\n") user_prompt = ( "<|im_start|>user\n" + prompt + "<|im_end|>\n" + "<|im_start|>assistant" ) res = model(sys_prompt + user_prompt, max_tokens=2**16, stop=[""], echo=False) return str(res["choices"][0]["text"]) def text_to_speech(prompt, interface, speaker, num): output = interface.generate( text=prompt, temperature=0.1, repetition_penalty=1.1, max_length=4096, speaker=speaker, ) output.save(f"./audios/{num}.wav") def stableDiffusion(pprompt, nprompt, pipe, num): os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True' images = pipe(prompt=pprompt, negative_prompt=nprompt, num_inference_steps=30, height=576, width=1024).images[0] images.save(f"./images/{num}.png") def stableVideoDiffusion(pipe, image, num): os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True' image = load_image(image) frames = pipe(image, decode_chunk_size=2, num_frames=25).frames[0] export_to_video(frames, f"./videos/{num}.mp4", fps=7)