VideoStory/gen.py
2025-04-20 12:40:14 +02:00

80 lines
2.3 KiB
Python

import re
from llama_cpp import Llama
import outetts
from diffusers import StableDiffusionPipeline
from diffusers.utils import load_image, export_to_video
import torch
import os
def loadllama():
llm = Llama(
model_path="models/.gguf",
n_ctx=4096,
n_threads=6,
n_gpu_layers=-1,
verbose=False
)
return llm
def loadtts():
model_config = outetts.GGUFModelConfig_v1(
model_path="models/.gguf",
language="en",
n_gpu_layers=-1,
verbose=False,
)
interface = outetts.InterfaceGGUF(model_version="0.2", cfg=model_config)
speaker = interface.load_default_speaker(name="male_1")
return interface, speaker
def loadsdxl():
pipe = pipe = StableDiffusionPipeline.from_single_file("models/.safetensors", torch_dtype=torch.float32, safety_checker=None)
pipe.to("cuda")
pipe.enable_xformers_memory_efficient_attention()
pipe.enable_model_cpu_offload()
return pipe
def loadsvd():
pipe = StableVideoDiffusionPipeline.from_pretrained(
"stabilityai/stable-video-diffusion-img2vid-xt", torch_dtype=torch.float16, variant="fp16"
)
pipe.to("cuda")
return pipe
def generate_story(prompt, sprompt, model):
sys_prompt = ("<|im_start|>system\n" + sprompt + "<|im_end|>\n")
user_prompt = (
"<|im_start|>user\n"
+ prompt + "<|im_end|>\n"
+ "<|im_start|>assistant"
)
res = model(sys_prompt + user_prompt, max_tokens=2**16, stop=["</s>"], echo=False)
return str(res["choices"][0]["text"])
def text_to_speech(prompt, interface, speaker, num):
output = interface.generate(
text=prompt,
temperature=0.1,
repetition_penalty=1.1,
max_length=4096,
speaker=speaker,
)
output.save(f"./audios/{num}.wav")
def stableDiffusion(pprompt, nprompt, pipe, num):
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
images = pipe(prompt=pprompt, negative_prompt=nprompt, num_inference_steps=60, height=512, width=768).images[0]
images.save(f"./images/{num}.png")
def stableVideoDiffusion(pipe, image, num):
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
image = load_image(image)
frames = pipe(image, decode_chunk_size=2, num_frames=25).frames[0]
export_to_video(frames, f"./videos/{num}.mp4", fps=7)