Compare commits
4 commits
75cd94244a
...
23b261710b
Author | SHA1 | Date | |
---|---|---|---|
23b261710b | |||
100348f0a4 | |||
62815fe58c | |||
7e9e55d131 |
8 changed files with 303 additions and 0 deletions
5
clean.py
Normal file
5
clean.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
import re
|
||||
def clean(input_text):
|
||||
output_text = re.sub(r'keywords:', ', ', input_text)
|
||||
output_text = re.sub(r'\n', ', ', input_text)
|
||||
return output_text
|
80
gen.py
Normal file
80
gen.py
Normal file
|
@ -0,0 +1,80 @@
|
|||
import re
|
||||
from llama_cpp import Llama
|
||||
import outetts
|
||||
from diffusers import StableDiffusionPipeline
|
||||
from diffusers.utils import load_image, export_to_video
|
||||
import torch
|
||||
import os
|
||||
|
||||
def loadllama():
|
||||
llm = Llama(
|
||||
model_path="models/llama3.gguf",
|
||||
n_ctx=4096,
|
||||
n_threads=6,
|
||||
n_gpu_layers=-1,
|
||||
verbose=False
|
||||
)
|
||||
|
||||
return llm
|
||||
|
||||
def loadtts():
|
||||
model_config = outetts.GGUFModelConfig_v1(
|
||||
model_path="models/tts.gguf",
|
||||
language="en",
|
||||
n_gpu_layers=-1,
|
||||
verbose=False,
|
||||
)
|
||||
|
||||
interface = outetts.InterfaceGGUF(model_version="0.2", cfg=model_config)
|
||||
speaker = interface.load_default_speaker(name="male_1")
|
||||
return interface, speaker
|
||||
|
||||
def loadsdxl():
|
||||
pipe = pipe = StableDiffusionPipeline.from_single_file("models/revAnimated_v2Rebirth.safetensors", torch_dtype=torch.float32, safety_checker=None)
|
||||
pipe.to("cuda")
|
||||
pipe.enable_xformers_memory_efficient_attention()
|
||||
pipe.enable_model_cpu_offload()
|
||||
return pipe
|
||||
|
||||
def loadsvd():
|
||||
pipe = StableVideoDiffusionPipeline.from_pretrained(
|
||||
"stabilityai/stable-video-diffusion-img2vid-xt", torch_dtype=torch.float16, variant="fp16"
|
||||
)
|
||||
pipe.to("cuda")
|
||||
return pipe
|
||||
|
||||
|
||||
def generate_story(prompt, sprompt, model):
|
||||
sys_prompt = ("<|im_start|>system\n" + sprompt + "<|im_end|>\n")
|
||||
user_prompt = (
|
||||
"<|im_start|>user\n"
|
||||
+ prompt + "<|im_end|>\n"
|
||||
+ "<|im_start|>assistant"
|
||||
)
|
||||
res = model(sys_prompt + user_prompt, max_tokens=2**16, stop=["</s>"], echo=False)
|
||||
return str(res["choices"][0]["text"])
|
||||
|
||||
|
||||
def text_to_speech(prompt, interface, speaker, num):
|
||||
|
||||
output = interface.generate(
|
||||
text=prompt,
|
||||
temperature=0.1,
|
||||
repetition_penalty=1.1,
|
||||
max_length=4096,
|
||||
speaker=speaker,
|
||||
)
|
||||
|
||||
output.save(f"./audios/{num}.wav")
|
||||
|
||||
def stableDiffusion(pprompt, nprompt, pipe, num):
|
||||
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
|
||||
images = pipe(prompt=pprompt, negative_prompt=nprompt, num_inference_steps=60, height=512, width=768).images[0]
|
||||
images.save(f"./images/{num}.png")
|
||||
|
||||
def stableVideoDiffusion(pipe, image, num):
|
||||
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
|
||||
image = load_image(image)
|
||||
frames = pipe(image, decode_chunk_size=2, num_frames=25).frames[0]
|
||||
export_to_video(frames, f"./videos/{num}.mp4", fps=7)
|
||||
|
82
genXL.py
Normal file
82
genXL.py
Normal file
|
@ -0,0 +1,82 @@
|
|||
import re
|
||||
from llama_cpp import Llama
|
||||
import outetts
|
||||
from diffusers import StableDiffusionXLPipeline
|
||||
from diffusers.utils import load_image, export_to_video
|
||||
import torch
|
||||
import os
|
||||
|
||||
def loadllama():
|
||||
llm = Llama(
|
||||
model_path="models/llama3.gguf",
|
||||
n_ctx=4096,
|
||||
n_threads=6,
|
||||
n_gpu_layers=-1,
|
||||
verbose=False
|
||||
)
|
||||
|
||||
return llm
|
||||
|
||||
def loadtts():
|
||||
model_config = outetts.GGUFModelConfig_v1(
|
||||
model_path="models/tts.gguf",
|
||||
language="en",
|
||||
n_gpu_layers=-1,
|
||||
verbose=False,
|
||||
)
|
||||
|
||||
interface = outetts.InterfaceGGUF(model_version="0.2", cfg=model_config)
|
||||
speaker = interface.load_default_speaker(name="male_1")
|
||||
return interface, speaker
|
||||
|
||||
def loadsdxl():
|
||||
pipe = StableDiffusionXLPipeline.from_single_file("models/sdxlReal.safetensors", torch_dtype=torch.float16, variant="fp16", requires_safety_checker=True)
|
||||
pipe.to("cuda")
|
||||
pipe.enable_xformers_memory_efficient_attention()
|
||||
pipe.enable_model_cpu_offload()
|
||||
return pipe
|
||||
|
||||
def loadsvd():
|
||||
pipe = StableVideoDiffusionPipeline.from_pretrained(
|
||||
"stabilityai/stable-video-diffusion-img2vid-xt", torch_dtype=torch.float16, variant="fp16"
|
||||
)
|
||||
pipe.to("cuda")
|
||||
pipe.enable_xformers_memory_efficient_attention()
|
||||
pipe.enable_model_cpu_offload()
|
||||
return pipe
|
||||
|
||||
|
||||
def generate_story(prompt, sprompt, model):
|
||||
sys_prompt = ("<|im_start|>system\n" + sprompt + "<|im_end|>\n")
|
||||
user_prompt = (
|
||||
"<|im_start|>user\n"
|
||||
+ prompt + "<|im_end|>\n"
|
||||
+ "<|im_start|>assistant"
|
||||
)
|
||||
res = model(sys_prompt + user_prompt, max_tokens=2**16, stop=["</s>"], echo=False)
|
||||
return str(res["choices"][0]["text"])
|
||||
|
||||
|
||||
def text_to_speech(prompt, interface, speaker, num):
|
||||
|
||||
output = interface.generate(
|
||||
text=prompt,
|
||||
temperature=0.1,
|
||||
repetition_penalty=1.1,
|
||||
max_length=4096,
|
||||
speaker=speaker,
|
||||
)
|
||||
|
||||
output.save(f"./audios/{num}.wav")
|
||||
|
||||
def stableDiffusion(pprompt, nprompt, pipe, num):
|
||||
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
|
||||
images = pipe(prompt=pprompt, negative_prompt=nprompt, num_inference_steps=30, height=576, width=1024).images[0]
|
||||
images.save(f"./images/{num}.png")
|
||||
|
||||
def stableVideoDiffusion(pipe, image, num):
|
||||
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
|
||||
image = load_image(image)
|
||||
frames = pipe(image, decode_chunk_size=2, num_frames=25).frames[0]
|
||||
export_to_video(frames, f"./videos/{num}.mp4", fps=7)
|
||||
|
66
main.py
Normal file
66
main.py
Normal file
|
@ -0,0 +1,66 @@
|
|||
import gen
|
||||
import torch
|
||||
import video
|
||||
import os
|
||||
|
||||
|
||||
SYSTEMPROMPTT = ""
|
||||
SYSTEMPROMPTI = ""
|
||||
SDBAD = ""
|
||||
|
||||
with open("promptUtoT.txt", "r") as file:
|
||||
SYSTEMPROMPTT = file.readlines()[0]
|
||||
|
||||
with open("promptTtoI.txt", "r") as file:
|
||||
SYSTEMPROMPTI = file.readlines()[0]
|
||||
|
||||
def main(prompt):
|
||||
|
||||
llm = gen.loadllama()
|
||||
|
||||
raw_text = gen.generate_story(prompt, SYSTEMPROMPTT, model=llm)
|
||||
image_prompts = []
|
||||
raw_text = raw_text.split("\n")
|
||||
raw_text = [item for item in raw_text if item != ""]
|
||||
|
||||
for i in range(len(raw_text)):
|
||||
promptimg = "Context:\n"
|
||||
for l in range(0, i - 1):
|
||||
promptimg += raw_text[l] + " "
|
||||
promptimg += "Current Scene:\n" + raw_text[i]
|
||||
image_prompts.append(gen.generate_story(promptimg, SYSTEMPROMPTI, model=llm).strip("\n"))
|
||||
|
||||
llm, speaker = gen.loadtts()
|
||||
|
||||
for i in range(len(raw_text)):
|
||||
try:
|
||||
gen.text_to_speech(raw_text[i], llm, speaker, i)
|
||||
except:
|
||||
pass
|
||||
|
||||
del llm
|
||||
del speaker
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
llm = gen.loadsdxl()
|
||||
|
||||
for elem in range(len(image_prompts)):
|
||||
gen.stableDiffusion(image_prompts[elem], SDBAD, llm, elem)
|
||||
|
||||
del llm
|
||||
torch.cuda.empty_cache()
|
||||
"""
|
||||
llm = gen.loadsvd()
|
||||
|
||||
for elem in range(len([name for name in os.listdir('./images')])):
|
||||
gen.stableVideoDiffusion(llm, f"./images/{elem}.png", elem)
|
||||
|
||||
del llm
|
||||
torch.cuda.empty_cache()
|
||||
"""
|
||||
|
||||
video.create_video(".", ".", "out.mp4")
|
||||
|
||||
return None
|
||||
|
||||
main("")
|
BIN
out.mp4
Normal file
BIN
out.mp4
Normal file
Binary file not shown.
1
promptTtoI.txt
Normal file
1
promptTtoI.txt
Normal file
|
@ -0,0 +1 @@
|
|||
You are an AI prompt engineer for Stable Diffusion 1.5.
|
1
promptUtoT.txt
Normal file
1
promptUtoT.txt
Normal file
|
@ -0,0 +1 @@
|
|||
You are a highly skilled and versatile writer, renowned for your ability to craft captivating stories across all genres. Your work is celebrated globally and has been translated into over 73 languages. As a master storyteller, you create intricate, detailed narratives that engage readers and evoke strong emotions. Your writing is characterized by its originality and depth, ensuring that each piece is unique and memorable.
|
68
video.py
Normal file
68
video.py
Normal file
|
@ -0,0 +1,68 @@
|
|||
import os
|
||||
from moviepy import *
|
||||
|
||||
def create_video(image_folder, audio_folder, output_video):
|
||||
# Get sorted lists of images and audio files
|
||||
images = []
|
||||
audio_files = []
|
||||
for i in range(len([name for name in os.listdir('./images')])):
|
||||
images.append(f"./images/{i}.png")
|
||||
audio_files.append(f"./audios/{i}.wav")
|
||||
|
||||
clips = []
|
||||
|
||||
for img, audio in zip(images, audio_files):
|
||||
# Create an ImageClip
|
||||
img_path = os.path.join(image_folder, img)
|
||||
image_clip = ImageClip(img_path)
|
||||
|
||||
# Create an AudioFileClip
|
||||
audio_path = os.path.join(audio_folder, audio)
|
||||
audio_clip = AudioFileClip(audio_path)
|
||||
audio_clip = CompositeAudioClip([audio_clip])
|
||||
|
||||
# Set the duration of the image clip to the duration of the audio clip
|
||||
image_clip = image_clip.with_duration(audio_clip.duration)
|
||||
|
||||
# Set the audio of the image clip
|
||||
image_clip.audio = audio_clip
|
||||
|
||||
# Append the image clip to the list of clips
|
||||
clips.append(image_clip)
|
||||
|
||||
# Concatenate all the clips into a single video
|
||||
final_video = concatenate_videoclips(clips, method="compose")
|
||||
|
||||
# Write the result to a file
|
||||
final_video.write_videofile(output_video, codec='libx264', audio_codec='aac', fps=24)
|
||||
|
||||
"""
|
||||
import os
|
||||
from moviepy import *
|
||||
|
||||
def create_video_from_images_and_audio(image_folder, audio_folder, output_video):
|
||||
video_files = []
|
||||
audio_files = []
|
||||
for i in range(len([name for name in os.listdir('./images')])):
|
||||
video_files.append(f"./videos/{i}.mp4")
|
||||
audio_files.append(f"./audios/{i}.wav")
|
||||
|
||||
|
||||
audio_clip = AudioFileClip(audio_file)
|
||||
audio_duration = audio_clip.duration
|
||||
|
||||
clips = []
|
||||
|
||||
for video, audio in zip(video_files, audio_files):
|
||||
video_clip = VideoFileClip(video_path)
|
||||
audio_clip = AudioFileClip(audio_path)
|
||||
if video_clip.duration < audio_clip.duration:
|
||||
loops_needed = int(audio_clip.duration // video_clip.duration) + 1
|
||||
video_clip = video_clip.loop(loops=loops_needed)
|
||||
video_clip = video_clip.subclip(0, audio_clip.duration)
|
||||
video_clip = video_clip.set_audio(audio_clip)
|
||||
clips.append(video_clip)
|
||||
|
||||
final_video = concatenate_videoclips(clips, method="compose")
|
||||
final_video.write_videofile(output_video, codec='libx264', audio_codec='aac', fps=24)
|
||||
"""
|
Loading…
Add table
Reference in a new issue