From 2499fb4e1910d31ff12c24110f161b20641b8835 Mon Sep 17 00:00:00 2001 From: Raphael Stoeckli Date: Wed, 5 Oct 2022 21:57:18 +0200 Subject: Add sanitizer for captions in Textual inversion --- modules/textual_inversion/preprocess.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index f545a993..4f3df4bd 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -1,5 +1,8 @@ +from cmath import log import os from PIL import Image, ImageOps +import platform +import sys import tqdm from modules import shared, images @@ -25,6 +28,7 @@ def preprocess(process_src, process_dst, process_flip, process_split, process_ca def save_pic_with_caption(image, index): if process_caption: caption = "-" + shared.interrogator.generate_caption(image) + caption = sanitize_caption(os.path.join(dst, f"{index:05}-{subindex[0]}"), caption, ".png") else: caption = filename caption = os.path.splitext(caption)[0] @@ -75,3 +79,27 @@ def preprocess(process_src, process_dst, process_flip, process_split, process_ca if process_caption: shared.interrogator.send_blip_to_ram() + +def sanitize_caption(base_path, original_caption, suffix): + operating_system = platform.system().lower() + if (operating_system == "windows"): + invalid_path_characters = "\\/:*?\"<>|" + max_path_length = 259 + else: + invalid_path_characters = "/" #linux/macos + max_path_length = 1023 + caption = original_caption + for invalid_character in invalid_path_characters: + caption = caption.replace(invalid_character, "") + fixed_path_length = len(base_path) + len(suffix) + if fixed_path_length + len(caption) <= max_path_length: + return caption + caption_tokens = caption.split() + new_caption = "" + for token in caption_tokens: + last_caption = new_caption + new_caption = new_caption + token + " " + if (len(new_caption) + fixed_path_length - 1 > max_path_length): + break + print(f"\nPath will be too long. Truncated caption: {original_caption}\nto: {last_caption}", file=sys.stderr) + return last_caption.strip() -- cgit v1.2.1