Compare commits
No commits in common. "23b261710bfd7eba2617ca9ee92c90db56fdfc46" and "75cd94244a358afa9fedacf52271f68db804d606" have entirely different histories.
23b261710b
...
75cd94244a
8 changed files with 0 additions and 303 deletions
5
clean.py
5
clean.py
|
@ -1,5 +0,0 @@
|
||||||
import re
|
|
||||||
def clean(input_text):
|
|
||||||
output_text = re.sub(r'keywords:', ', ', input_text)
|
|
||||||
output_text = re.sub(r'\n', ', ', input_text)
|
|
||||||
return output_text
|
|
80
gen.py
80
gen.py
|
@ -1,80 +0,0 @@
|
||||||
import re
|
|
||||||
from llama_cpp import Llama
|
|
||||||
import outetts
|
|
||||||
from diffusers import StableDiffusionPipeline
|
|
||||||
from diffusers.utils import load_image, export_to_video
|
|
||||||
import torch
|
|
||||||
import os
|
|
||||||
|
|
||||||
def loadllama():
|
|
||||||
llm = Llama(
|
|
||||||
model_path="models/llama3.gguf",
|
|
||||||
n_ctx=4096,
|
|
||||||
n_threads=6,
|
|
||||||
n_gpu_layers=-1,
|
|
||||||
verbose=False
|
|
||||||
)
|
|
||||||
|
|
||||||
return llm
|
|
||||||
|
|
||||||
def loadtts():
|
|
||||||
model_config = outetts.GGUFModelConfig_v1(
|
|
||||||
model_path="models/tts.gguf",
|
|
||||||
language="en",
|
|
||||||
n_gpu_layers=-1,
|
|
||||||
verbose=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
interface = outetts.InterfaceGGUF(model_version="0.2", cfg=model_config)
|
|
||||||
speaker = interface.load_default_speaker(name="male_1")
|
|
||||||
return interface, speaker
|
|
||||||
|
|
||||||
def loadsdxl():
|
|
||||||
pipe = pipe = StableDiffusionPipeline.from_single_file("models/revAnimated_v2Rebirth.safetensors", torch_dtype=torch.float32, safety_checker=None)
|
|
||||||
pipe.to("cuda")
|
|
||||||
pipe.enable_xformers_memory_efficient_attention()
|
|
||||||
pipe.enable_model_cpu_offload()
|
|
||||||
return pipe
|
|
||||||
|
|
||||||
def loadsvd():
|
|
||||||
pipe = StableVideoDiffusionPipeline.from_pretrained(
|
|
||||||
"stabilityai/stable-video-diffusion-img2vid-xt", torch_dtype=torch.float16, variant="fp16"
|
|
||||||
)
|
|
||||||
pipe.to("cuda")
|
|
||||||
return pipe
|
|
||||||
|
|
||||||
|
|
||||||
def generate_story(prompt, sprompt, model):
|
|
||||||
sys_prompt = ("<|im_start|>system\n" + sprompt + "<|im_end|>\n")
|
|
||||||
user_prompt = (
|
|
||||||
"<|im_start|>user\n"
|
|
||||||
+ prompt + "<|im_end|>\n"
|
|
||||||
+ "<|im_start|>assistant"
|
|
||||||
)
|
|
||||||
res = model(sys_prompt + user_prompt, max_tokens=2**16, stop=["</s>"], echo=False)
|
|
||||||
return str(res["choices"][0]["text"])
|
|
||||||
|
|
||||||
|
|
||||||
def text_to_speech(prompt, interface, speaker, num):
|
|
||||||
|
|
||||||
output = interface.generate(
|
|
||||||
text=prompt,
|
|
||||||
temperature=0.1,
|
|
||||||
repetition_penalty=1.1,
|
|
||||||
max_length=4096,
|
|
||||||
speaker=speaker,
|
|
||||||
)
|
|
||||||
|
|
||||||
output.save(f"./audios/{num}.wav")
|
|
||||||
|
|
||||||
def stableDiffusion(pprompt, nprompt, pipe, num):
|
|
||||||
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
|
|
||||||
images = pipe(prompt=pprompt, negative_prompt=nprompt, num_inference_steps=60, height=512, width=768).images[0]
|
|
||||||
images.save(f"./images/{num}.png")
|
|
||||||
|
|
||||||
def stableVideoDiffusion(pipe, image, num):
|
|
||||||
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
|
|
||||||
image = load_image(image)
|
|
||||||
frames = pipe(image, decode_chunk_size=2, num_frames=25).frames[0]
|
|
||||||
export_to_video(frames, f"./videos/{num}.mp4", fps=7)
|
|
||||||
|
|
82
genXL.py
82
genXL.py
|
@ -1,82 +0,0 @@
|
||||||
import re
|
|
||||||
from llama_cpp import Llama
|
|
||||||
import outetts
|
|
||||||
from diffusers import StableDiffusionXLPipeline
|
|
||||||
from diffusers.utils import load_image, export_to_video
|
|
||||||
import torch
|
|
||||||
import os
|
|
||||||
|
|
||||||
def loadllama():
|
|
||||||
llm = Llama(
|
|
||||||
model_path="models/llama3.gguf",
|
|
||||||
n_ctx=4096,
|
|
||||||
n_threads=6,
|
|
||||||
n_gpu_layers=-1,
|
|
||||||
verbose=False
|
|
||||||
)
|
|
||||||
|
|
||||||
return llm
|
|
||||||
|
|
||||||
def loadtts():
|
|
||||||
model_config = outetts.GGUFModelConfig_v1(
|
|
||||||
model_path="models/tts.gguf",
|
|
||||||
language="en",
|
|
||||||
n_gpu_layers=-1,
|
|
||||||
verbose=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
interface = outetts.InterfaceGGUF(model_version="0.2", cfg=model_config)
|
|
||||||
speaker = interface.load_default_speaker(name="male_1")
|
|
||||||
return interface, speaker
|
|
||||||
|
|
||||||
def loadsdxl():
|
|
||||||
pipe = StableDiffusionXLPipeline.from_single_file("models/sdxlReal.safetensors", torch_dtype=torch.float16, variant="fp16", requires_safety_checker=True)
|
|
||||||
pipe.to("cuda")
|
|
||||||
pipe.enable_xformers_memory_efficient_attention()
|
|
||||||
pipe.enable_model_cpu_offload()
|
|
||||||
return pipe
|
|
||||||
|
|
||||||
def loadsvd():
|
|
||||||
pipe = StableVideoDiffusionPipeline.from_pretrained(
|
|
||||||
"stabilityai/stable-video-diffusion-img2vid-xt", torch_dtype=torch.float16, variant="fp16"
|
|
||||||
)
|
|
||||||
pipe.to("cuda")
|
|
||||||
pipe.enable_xformers_memory_efficient_attention()
|
|
||||||
pipe.enable_model_cpu_offload()
|
|
||||||
return pipe
|
|
||||||
|
|
||||||
|
|
||||||
def generate_story(prompt, sprompt, model):
|
|
||||||
sys_prompt = ("<|im_start|>system\n" + sprompt + "<|im_end|>\n")
|
|
||||||
user_prompt = (
|
|
||||||
"<|im_start|>user\n"
|
|
||||||
+ prompt + "<|im_end|>\n"
|
|
||||||
+ "<|im_start|>assistant"
|
|
||||||
)
|
|
||||||
res = model(sys_prompt + user_prompt, max_tokens=2**16, stop=["</s>"], echo=False)
|
|
||||||
return str(res["choices"][0]["text"])
|
|
||||||
|
|
||||||
|
|
||||||
def text_to_speech(prompt, interface, speaker, num):
|
|
||||||
|
|
||||||
output = interface.generate(
|
|
||||||
text=prompt,
|
|
||||||
temperature=0.1,
|
|
||||||
repetition_penalty=1.1,
|
|
||||||
max_length=4096,
|
|
||||||
speaker=speaker,
|
|
||||||
)
|
|
||||||
|
|
||||||
output.save(f"./audios/{num}.wav")
|
|
||||||
|
|
||||||
def stableDiffusion(pprompt, nprompt, pipe, num):
|
|
||||||
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
|
|
||||||
images = pipe(prompt=pprompt, negative_prompt=nprompt, num_inference_steps=30, height=576, width=1024).images[0]
|
|
||||||
images.save(f"./images/{num}.png")
|
|
||||||
|
|
||||||
def stableVideoDiffusion(pipe, image, num):
|
|
||||||
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
|
|
||||||
image = load_image(image)
|
|
||||||
frames = pipe(image, decode_chunk_size=2, num_frames=25).frames[0]
|
|
||||||
export_to_video(frames, f"./videos/{num}.mp4", fps=7)
|
|
||||||
|
|
66
main.py
66
main.py
|
@ -1,66 +0,0 @@
|
||||||
import gen
|
|
||||||
import torch
|
|
||||||
import video
|
|
||||||
import os
|
|
||||||
|
|
||||||
|
|
||||||
SYSTEMPROMPTT = ""
|
|
||||||
SYSTEMPROMPTI = ""
|
|
||||||
SDBAD = ""
|
|
||||||
|
|
||||||
with open("promptUtoT.txt", "r") as file:
|
|
||||||
SYSTEMPROMPTT = file.readlines()[0]
|
|
||||||
|
|
||||||
with open("promptTtoI.txt", "r") as file:
|
|
||||||
SYSTEMPROMPTI = file.readlines()[0]
|
|
||||||
|
|
||||||
def main(prompt):
|
|
||||||
|
|
||||||
llm = gen.loadllama()
|
|
||||||
|
|
||||||
raw_text = gen.generate_story(prompt, SYSTEMPROMPTT, model=llm)
|
|
||||||
image_prompts = []
|
|
||||||
raw_text = raw_text.split("\n")
|
|
||||||
raw_text = [item for item in raw_text if item != ""]
|
|
||||||
|
|
||||||
for i in range(len(raw_text)):
|
|
||||||
promptimg = "Context:\n"
|
|
||||||
for l in range(0, i - 1):
|
|
||||||
promptimg += raw_text[l] + " "
|
|
||||||
promptimg += "Current Scene:\n" + raw_text[i]
|
|
||||||
image_prompts.append(gen.generate_story(promptimg, SYSTEMPROMPTI, model=llm).strip("\n"))
|
|
||||||
|
|
||||||
llm, speaker = gen.loadtts()
|
|
||||||
|
|
||||||
for i in range(len(raw_text)):
|
|
||||||
try:
|
|
||||||
gen.text_to_speech(raw_text[i], llm, speaker, i)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
del llm
|
|
||||||
del speaker
|
|
||||||
torch.cuda.empty_cache()
|
|
||||||
|
|
||||||
llm = gen.loadsdxl()
|
|
||||||
|
|
||||||
for elem in range(len(image_prompts)):
|
|
||||||
gen.stableDiffusion(image_prompts[elem], SDBAD, llm, elem)
|
|
||||||
|
|
||||||
del llm
|
|
||||||
torch.cuda.empty_cache()
|
|
||||||
"""
|
|
||||||
llm = gen.loadsvd()
|
|
||||||
|
|
||||||
for elem in range(len([name for name in os.listdir('./images')])):
|
|
||||||
gen.stableVideoDiffusion(llm, f"./images/{elem}.png", elem)
|
|
||||||
|
|
||||||
del llm
|
|
||||||
torch.cuda.empty_cache()
|
|
||||||
"""
|
|
||||||
|
|
||||||
video.create_video(".", ".", "out.mp4")
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
main("")
|
|
BIN
out.mp4
BIN
out.mp4
Binary file not shown.
|
@ -1 +0,0 @@
|
||||||
You are an AI prompt engineer for Stable Diffusion 1.5.
|
|
|
@ -1 +0,0 @@
|
||||||
You are a highly skilled and versatile writer, renowned for your ability to craft captivating stories across all genres. Your work is celebrated globally and has been translated into over 73 languages. As a master storyteller, you create intricate, detailed narratives that engage readers and evoke strong emotions. Your writing is characterized by its originality and depth, ensuring that each piece is unique and memorable.
|
|
68
video.py
68
video.py
|
@ -1,68 +0,0 @@
|
||||||
import os
|
|
||||||
from moviepy import *
|
|
||||||
|
|
||||||
def create_video(image_folder, audio_folder, output_video):
|
|
||||||
# Get sorted lists of images and audio files
|
|
||||||
images = []
|
|
||||||
audio_files = []
|
|
||||||
for i in range(len([name for name in os.listdir('./images')])):
|
|
||||||
images.append(f"./images/{i}.png")
|
|
||||||
audio_files.append(f"./audios/{i}.wav")
|
|
||||||
|
|
||||||
clips = []
|
|
||||||
|
|
||||||
for img, audio in zip(images, audio_files):
|
|
||||||
# Create an ImageClip
|
|
||||||
img_path = os.path.join(image_folder, img)
|
|
||||||
image_clip = ImageClip(img_path)
|
|
||||||
|
|
||||||
# Create an AudioFileClip
|
|
||||||
audio_path = os.path.join(audio_folder, audio)
|
|
||||||
audio_clip = AudioFileClip(audio_path)
|
|
||||||
audio_clip = CompositeAudioClip([audio_clip])
|
|
||||||
|
|
||||||
# Set the duration of the image clip to the duration of the audio clip
|
|
||||||
image_clip = image_clip.with_duration(audio_clip.duration)
|
|
||||||
|
|
||||||
# Set the audio of the image clip
|
|
||||||
image_clip.audio = audio_clip
|
|
||||||
|
|
||||||
# Append the image clip to the list of clips
|
|
||||||
clips.append(image_clip)
|
|
||||||
|
|
||||||
# Concatenate all the clips into a single video
|
|
||||||
final_video = concatenate_videoclips(clips, method="compose")
|
|
||||||
|
|
||||||
# Write the result to a file
|
|
||||||
final_video.write_videofile(output_video, codec='libx264', audio_codec='aac', fps=24)
|
|
||||||
|
|
||||||
"""
|
|
||||||
import os
|
|
||||||
from moviepy import *
|
|
||||||
|
|
||||||
def create_video_from_images_and_audio(image_folder, audio_folder, output_video):
|
|
||||||
video_files = []
|
|
||||||
audio_files = []
|
|
||||||
for i in range(len([name for name in os.listdir('./images')])):
|
|
||||||
video_files.append(f"./videos/{i}.mp4")
|
|
||||||
audio_files.append(f"./audios/{i}.wav")
|
|
||||||
|
|
||||||
|
|
||||||
audio_clip = AudioFileClip(audio_file)
|
|
||||||
audio_duration = audio_clip.duration
|
|
||||||
|
|
||||||
clips = []
|
|
||||||
|
|
||||||
for video, audio in zip(video_files, audio_files):
|
|
||||||
video_clip = VideoFileClip(video_path)
|
|
||||||
audio_clip = AudioFileClip(audio_path)
|
|
||||||
if video_clip.duration < audio_clip.duration:
|
|
||||||
loops_needed = int(audio_clip.duration // video_clip.duration) + 1
|
|
||||||
video_clip = video_clip.loop(loops=loops_needed)
|
|
||||||
video_clip = video_clip.subclip(0, audio_clip.duration)
|
|
||||||
video_clip = video_clip.set_audio(audio_clip)
|
|
||||||
clips.append(video_clip)
|
|
||||||
|
|
||||||
final_video = concatenate_videoclips(clips, method="compose")
|
|
||||||
final_video.write_videofile(output_video, codec='libx264', audio_codec='aac', fps=24)
|
|
||||||
"""
|
|
Loading…
Add table
Reference in a new issue