Commit c81d440d authored by AUTOMATIC's avatar AUTOMATIC

moved deepdanbooru to pure pytorch implementation

parent 47a44c7e
...@@ -70,7 +70,7 @@ Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-web ...@@ -70,7 +70,7 @@ Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-web
- separate prompts using uppercase `AND` - separate prompts using uppercase `AND`
- also supports weights for prompts: `a cat :1.2 AND a dog AND a penguin :2.2` - also supports weights for prompts: `a cat :1.2 AND a dog AND a penguin :2.2`
- No token limit for prompts (original stable diffusion lets you use up to 75 tokens) - No token limit for prompts (original stable diffusion lets you use up to 75 tokens)
- DeepDanbooru integration, creates danbooru style tags for anime prompts (add --deepdanbooru to commandline args) - DeepDanbooru integration, creates danbooru style tags for anime prompts
- [xformers](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Xformers), major speed increase for select cards: (add --xformers to commandline args) - [xformers](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Xformers), major speed increase for select cards: (add --xformers to commandline args)
- via extension: [History tab](https://github.com/yfszzx/stable-diffusion-webui-images-browser): view, direct and delete images conveniently within the UI - via extension: [History tab](https://github.com/yfszzx/stable-diffusion-webui-images-browser): view, direct and delete images conveniently within the UI
- Generate forever option - Generate forever option
......
...@@ -134,7 +134,6 @@ def prepare_enviroment(): ...@@ -134,7 +134,6 @@ def prepare_enviroment():
gfpgan_package = os.environ.get('GFPGAN_PACKAGE', "git+https://github.com/TencentARC/GFPGAN.git@8d2447a2d918f8eba5a4a01463fd48e45126a379") gfpgan_package = os.environ.get('GFPGAN_PACKAGE', "git+https://github.com/TencentARC/GFPGAN.git@8d2447a2d918f8eba5a4a01463fd48e45126a379")
clip_package = os.environ.get('CLIP_PACKAGE', "git+https://github.com/openai/CLIP.git@d50d76daa670286dd6cacf3bcd80b5e4823fc8e1") clip_package = os.environ.get('CLIP_PACKAGE', "git+https://github.com/openai/CLIP.git@d50d76daa670286dd6cacf3bcd80b5e4823fc8e1")
deepdanbooru_package = os.environ.get('DEEPDANBOORU_PACKAGE', "git+https://github.com/KichangKim/DeepDanbooru.git@d91a2963bf87c6a770d74894667e9ffa9f6de7ff")
xformers_windows_package = os.environ.get('XFORMERS_WINDOWS_PACKAGE', 'https://github.com/C43H66N12O12S2/stable-diffusion-webui/releases/download/f/xformers-0.0.14.dev0-cp310-cp310-win_amd64.whl') xformers_windows_package = os.environ.get('XFORMERS_WINDOWS_PACKAGE', 'https://github.com/C43H66N12O12S2/stable-diffusion-webui/releases/download/f/xformers-0.0.14.dev0-cp310-cp310-win_amd64.whl')
...@@ -158,7 +157,6 @@ def prepare_enviroment(): ...@@ -158,7 +157,6 @@ def prepare_enviroment():
sys.argv, update_check = extract_arg(sys.argv, '--update-check') sys.argv, update_check = extract_arg(sys.argv, '--update-check')
sys.argv, run_tests = extract_arg(sys.argv, '--tests') sys.argv, run_tests = extract_arg(sys.argv, '--tests')
xformers = '--xformers' in sys.argv xformers = '--xformers' in sys.argv
deepdanbooru = '--deepdanbooru' in sys.argv
ngrok = '--ngrok' in sys.argv ngrok = '--ngrok' in sys.argv
try: try:
...@@ -193,9 +191,6 @@ def prepare_enviroment(): ...@@ -193,9 +191,6 @@ def prepare_enviroment():
elif platform.system() == "Linux": elif platform.system() == "Linux":
run_pip("install xformers", "xformers") run_pip("install xformers", "xformers")
if not is_installed("deepdanbooru") and deepdanbooru:
run_pip(f"install {deepdanbooru_package}#egg=deepdanbooru[tensorflow] tensorflow==2.10.0 tensorflow-io==0.27.0", "deepdanbooru")
if not is_installed("pyngrok") and ngrok: if not is_installed("pyngrok") and ngrok:
run_pip("install pyngrok", "ngrok") run_pip("install pyngrok", "ngrok")
......
...@@ -9,7 +9,7 @@ from fastapi.security import HTTPBasic, HTTPBasicCredentials ...@@ -9,7 +9,7 @@ from fastapi.security import HTTPBasic, HTTPBasicCredentials
from secrets import compare_digest from secrets import compare_digest
import modules.shared as shared import modules.shared as shared
from modules import sd_samplers from modules import sd_samplers, deepbooru
from modules.api.models import * from modules.api.models import *
from modules.processing import StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img, process_images from modules.processing import StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img, process_images
from modules.extras import run_extras, run_pnginfo from modules.extras import run_extras, run_pnginfo
...@@ -18,9 +18,6 @@ from modules.sd_models import checkpoints_list ...@@ -18,9 +18,6 @@ from modules.sd_models import checkpoints_list
from modules.realesrgan_model import get_realesrgan_models from modules.realesrgan_model import get_realesrgan_models
from typing import List from typing import List
if shared.cmd_opts.deepdanbooru:
from modules.deepbooru import get_deepbooru_tags
def upscaler_to_index(name: str): def upscaler_to_index(name: str):
try: try:
return [x.name.lower() for x in shared.sd_upscalers].index(name.lower()) return [x.name.lower() for x in shared.sd_upscalers].index(name.lower())
...@@ -245,10 +242,7 @@ class Api: ...@@ -245,10 +242,7 @@ class Api:
if interrogatereq.model == "clip": if interrogatereq.model == "clip":
processed = shared.interrogator.interrogate(img) processed = shared.interrogator.interrogate(img)
elif interrogatereq.model == "deepdanbooru": elif interrogatereq.model == "deepdanbooru":
if shared.cmd_opts.deepdanbooru: processed = deepbooru.model.tag(img)
processed = get_deepbooru_tags(img)
else:
raise HTTPException(status_code=404, detail="Model not found. Add --deepdanbooru when launching for using the model.")
else: else:
raise HTTPException(status_code=404, detail="Model not found") raise HTTPException(status_code=404, detail="Model not found")
......
import os.path import os
from concurrent.futures import ProcessPoolExecutor
import multiprocessing
import time
import re import re
import torch
from PIL import Image
import numpy as np
from modules import modelloader, paths, deepbooru_model, devices, images, shared
re_special = re.compile(r'([\\()])') re_special = re.compile(r'([\\()])')
def get_deepbooru_tags(pil_image):
""" class DeepDanbooru:
This method is for running only one image at a time for simple use. Used to the img2img interrogate. def __init__(self):
""" self.model = None
from modules import shared # prevents circular reference
def load(self):
try: if self.model is not None:
create_deepbooru_process(shared.opts.interrogate_deepbooru_score_threshold, create_deepbooru_opts()) return
return get_tags_from_process(pil_image)
finally: files = modelloader.load_models(
release_process() model_path=os.path.join(paths.models_path, "torch_deepdanbooru"),
model_url='https://github.com/AUTOMATIC1111/TorchDeepDanbooru/releases/download/v1/model-resnet_custom_v3.pt',
ext_filter=".pt",
OPT_INCLUDE_RANKS = "include_ranks" download_name='model-resnet_custom_v3.pt',
def create_deepbooru_opts(): )
from modules import shared
self.model = deepbooru_model.DeepDanbooruModel()
return { self.model.load_state_dict(torch.load(files[0], map_location="cpu"))
"use_spaces": shared.opts.deepbooru_use_spaces,
"use_escape": shared.opts.deepbooru_escape, self.model.eval()
"alpha_sort": shared.opts.deepbooru_sort_alpha, self.model.to(devices.cpu, devices.dtype)
OPT_INCLUDE_RANKS: shared.opts.interrogate_return_ranks,
} def start(self):
self.load()
self.model.to(devices.device)
def deepbooru_process(queue, deepbooru_process_return, threshold, deepbooru_opts):
model, tags = get_deepbooru_tags_model() def stop(self):
while True: # while process is running, keep monitoring queue for new image if not shared.opts.interrogate_keep_models_in_memory:
pil_image = queue.get() self.model.to(devices.cpu)
if pil_image == "QUIT": devices.torch_gc()
break
else: def tag(self, pil_image):
deepbooru_process_return["value"] = get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_opts) self.start()
res = self.tag_multi(pil_image)
self.stop()
def create_deepbooru_process(threshold, deepbooru_opts):
""" return res
Creates deepbooru process. A queue is created to send images into the process. This enables multiple images
to be processed in a row without reloading the model or creating a new process. To return the data, a shared def tag_multi(self, pil_image, force_disable_ranks=False):
dictionary is created to hold the tags created. To wait for tags to be returned, a value of -1 is assigned threshold = shared.opts.interrogate_deepbooru_score_threshold
to the dictionary and the method adding the image to the queue should wait for this value to be updated with use_spaces = shared.opts.deepbooru_use_spaces
the tags. use_escape = shared.opts.deepbooru_escape
""" alpha_sort = shared.opts.deepbooru_sort_alpha
from modules import shared # prevents circular reference include_ranks = shared.opts.interrogate_return_ranks and not force_disable_ranks
context = multiprocessing.get_context("spawn")
shared.deepbooru_process_manager = context.Manager() pic = images.resize_image(2, pil_image.convert("RGB"), 512, 512)
shared.deepbooru_process_queue = shared.deepbooru_process_manager.Queue() a = np.expand_dims(np.array(pic, dtype=np.float32), 0) / 255
shared.deepbooru_process_return = shared.deepbooru_process_manager.dict()
shared.deepbooru_process_return["value"] = -1 with torch.no_grad(), devices.autocast():
shared.deepbooru_process = context.Process(target=deepbooru_process, args=(shared.deepbooru_process_queue, shared.deepbooru_process_return, threshold, deepbooru_opts)) x = torch.from_numpy(a).cuda()
shared.deepbooru_process.start() y = self.model(x)[0].detach().cpu().numpy()
probability_dict = {}
def get_tags_from_process(image):
from modules import shared for tag, probability in zip(self.model.tags, y):
if probability < threshold:
shared.deepbooru_process_return["value"] = -1 continue
shared.deepbooru_process_queue.put(image)
while shared.deepbooru_process_return["value"] == -1:
time.sleep(0.2)
caption = shared.deepbooru_process_return["value"]
shared.deepbooru_process_return["value"] = -1
return caption
def release_process():
"""
Stops the deepbooru process to return used memory
"""
from modules import shared # prevents circular reference
shared.deepbooru_process_queue.put("QUIT")
shared.deepbooru_process.join()
shared.deepbooru_process_queue = None
shared.deepbooru_process = None
shared.deepbooru_process_return = None
shared.deepbooru_process_manager = None
def get_deepbooru_tags_model():
import deepdanbooru as dd
import tensorflow as tf
import numpy as np
this_folder = os.path.dirname(__file__)
model_path = os.path.abspath(os.path.join(this_folder, '..', 'models', 'deepbooru'))
if not os.path.exists(os.path.join(model_path, 'project.json')):
# there is no point importing these every time
import zipfile
from basicsr.utils.download_util import load_file_from_url
load_file_from_url(
r"https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip",
model_path)
with zipfile.ZipFile(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"), "r") as zip_ref:
zip_ref.extractall(model_path)
os.remove(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"))
tags = dd.project.load_tags_from_project(model_path)
model = dd.project.load_model_from_project(
model_path, compile_model=False
)
return model, tags
def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_opts):
import deepdanbooru as dd
import tensorflow as tf
import numpy as np
alpha_sort = deepbooru_opts['alpha_sort']
use_spaces = deepbooru_opts['use_spaces']
use_escape = deepbooru_opts['use_escape']
include_ranks = deepbooru_opts['include_ranks']
width = model.input_shape[2]
height = model.input_shape[1]
image = np.array(pil_image)
image = tf.image.resize(
image,
size=(height, width),
method=tf.image.ResizeMethod.AREA,
preserve_aspect_ratio=True,
)
image = image.numpy() # EagerTensor to np.array
image = dd.image.transform_and_pad_image(image, width, height)
image = image / 255.0
image_shape = image.shape
image = image.reshape((1, image_shape[0], image_shape[1], image_shape[2]))
y = model.predict(image)[0]
result_dict = {}
for i, tag in enumerate(tags):
result_dict[tag] = y[i]
unsorted_tags_in_theshold = []
result_tags_print = []
for tag in tags:
if result_dict[tag] >= threshold:
if tag.startswith("rating:"): if tag.startswith("rating:"):
continue continue
unsorted_tags_in_theshold.append((result_dict[tag], tag))
result_tags_print.append(f'{result_dict[tag]} {tag}') probability_dict[tag] = probability
# sort tags if alpha_sort:
result_tags_out = [] tags = sorted(probability_dict)
sort_ndx = 0 else:
if alpha_sort: tags = [tag for tag, _ in sorted(probability_dict.items(), key=lambda x: -x[1])]
sort_ndx = 1
res = []
# sort by reverse by likelihood and normal for alpha, and format tag text as requested
unsorted_tags_in_theshold.sort(key=lambda y: y[sort_ndx], reverse=(not alpha_sort)) for tag in tags:
for weight, tag in unsorted_tags_in_theshold: probability = probability_dict[tag]
tag_outformat = tag tag_outformat = tag
if use_spaces: if use_spaces:
tag_outformat = tag_outformat.replace('_', ' ') tag_outformat = tag_outformat.replace('_', ' ')
if use_escape: if use_escape:
tag_outformat = re.sub(re_special, r'\\\1', tag_outformat) tag_outformat = re.sub(re_special, r'\\\1', tag_outformat)
if include_ranks: if include_ranks:
tag_outformat = f"({tag_outformat}:{weight:.3f})" tag_outformat = f"({tag_outformat}:{probability:.3f})"
result_tags_out.append(tag_outformat) res.append(tag_outformat)
print('\n'.join(sorted(result_tags_print, reverse=True))) return ", ".join(res)
return ', '.join(result_tags_out)
model = DeepDanbooru()
This diff is collapsed.
...@@ -55,7 +55,7 @@ parser.add_argument("--ldsr-models-path", type=str, help="Path to directory with ...@@ -55,7 +55,7 @@ parser.add_argument("--ldsr-models-path", type=str, help="Path to directory with
parser.add_argument("--clip-models-path", type=str, help="Path to directory with CLIP model file(s).", default=None) parser.add_argument("--clip-models-path", type=str, help="Path to directory with CLIP model file(s).", default=None)
parser.add_argument("--xformers", action='store_true', help="enable xformers for cross attention layers") parser.add_argument("--xformers", action='store_true', help="enable xformers for cross attention layers")
parser.add_argument("--force-enable-xformers", action='store_true', help="enable xformers for cross attention layers regardless of whether the checking code thinks you can run it; do not make bug reports if this fails to work") parser.add_argument("--force-enable-xformers", action='store_true', help="enable xformers for cross attention layers regardless of whether the checking code thinks you can run it; do not make bug reports if this fails to work")
parser.add_argument("--deepdanbooru", action='store_true', help="enable deepdanbooru interrogator") parser.add_argument("--deepdanbooru", action='store_true', help="does not do anything")
parser.add_argument("--opt-split-attention", action='store_true', help="force-enables Doggettx's cross-attention layer optimization. By default, it's on for torch cuda.") parser.add_argument("--opt-split-attention", action='store_true', help="force-enables Doggettx's cross-attention layer optimization. By default, it's on for torch cuda.")
parser.add_argument("--opt-split-attention-invokeai", action='store_true', help="force-enables InvokeAI's cross-attention layer optimization. By default, it's on when cuda is unavailable.") parser.add_argument("--opt-split-attention-invokeai", action='store_true', help="force-enables InvokeAI's cross-attention layer optimization. By default, it's on when cuda is unavailable.")
parser.add_argument("--opt-split-attention-v1", action='store_true', help="enable older version of split attention optimization that does not consume all the VRAM it can find") parser.add_argument("--opt-split-attention-v1", action='store_true', help="enable older version of split attention optimization that does not consume all the VRAM it can find")
......
...@@ -6,12 +6,10 @@ import sys ...@@ -6,12 +6,10 @@ import sys
import tqdm import tqdm
import time import time
from modules import shared, images from modules import shared, images, deepbooru
from modules.paths import models_path from modules.paths import models_path
from modules.shared import opts, cmd_opts from modules.shared import opts, cmd_opts
from modules.textual_inversion import autocrop from modules.textual_inversion import autocrop
if cmd_opts.deepdanbooru:
import modules.deepbooru as deepbooru
def preprocess(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2, process_focal_crop=False, process_focal_crop_face_weight=0.9, process_focal_crop_entropy_weight=0.3, process_focal_crop_edges_weight=0.5, process_focal_crop_debug=False): def preprocess(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2, process_focal_crop=False, process_focal_crop_face_weight=0.9, process_focal_crop_entropy_weight=0.3, process_focal_crop_edges_weight=0.5, process_focal_crop_debug=False):
...@@ -20,9 +18,7 @@ def preprocess(process_src, process_dst, process_width, process_height, preproce ...@@ -20,9 +18,7 @@ def preprocess(process_src, process_dst, process_width, process_height, preproce
shared.interrogator.load() shared.interrogator.load()
if process_caption_deepbooru: if process_caption_deepbooru:
db_opts = deepbooru.create_deepbooru_opts() deepbooru.model.start()
db_opts[deepbooru.OPT_INCLUDE_RANKS] = False
deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, db_opts)
preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru, split_threshold, overlap_ratio, process_focal_crop, process_focal_crop_face_weight, process_focal_crop_entropy_weight, process_focal_crop_edges_weight, process_focal_crop_debug) preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru, split_threshold, overlap_ratio, process_focal_crop, process_focal_crop_face_weight, process_focal_crop_entropy_weight, process_focal_crop_edges_weight, process_focal_crop_debug)
...@@ -32,7 +28,7 @@ def preprocess(process_src, process_dst, process_width, process_height, preproce ...@@ -32,7 +28,7 @@ def preprocess(process_src, process_dst, process_width, process_height, preproce
shared.interrogator.send_blip_to_ram() shared.interrogator.send_blip_to_ram()
if process_caption_deepbooru: if process_caption_deepbooru:
deepbooru.release_process() deepbooru.model.stop()
def listfiles(dirname): def listfiles(dirname):
...@@ -58,7 +54,7 @@ def save_pic_with_caption(image, index, params: PreprocessParams, existing_capti ...@@ -58,7 +54,7 @@ def save_pic_with_caption(image, index, params: PreprocessParams, existing_capti
if params.process_caption_deepbooru: if params.process_caption_deepbooru:
if len(caption) > 0: if len(caption) > 0:
caption += ", " caption += ", "
caption += deepbooru.get_tags_from_process(image) caption += deepbooru.model.tag_multi(image)
filename_part = params.src filename_part = params.src
filename_part = os.path.splitext(filename_part)[0] filename_part = os.path.splitext(filename_part)[0]
......
...@@ -19,14 +19,11 @@ import numpy as np ...@@ -19,14 +19,11 @@ import numpy as np
from PIL import Image, PngImagePlugin from PIL import Image, PngImagePlugin
from modules import sd_hijack, sd_models, localization, script_callbacks, ui_extensions from modules import sd_hijack, sd_models, localization, script_callbacks, ui_extensions, deepbooru
from modules.paths import script_path from modules.paths import script_path
from modules.shared import opts, cmd_opts, restricted_opts from modules.shared import opts, cmd_opts, restricted_opts
if cmd_opts.deepdanbooru:
from modules.deepbooru import get_deepbooru_tags
import modules.codeformer_model import modules.codeformer_model
import modules.generation_parameters_copypaste as parameters_copypaste import modules.generation_parameters_copypaste as parameters_copypaste
import modules.gfpgan_model import modules.gfpgan_model
...@@ -352,7 +349,7 @@ def interrogate(image): ...@@ -352,7 +349,7 @@ def interrogate(image):
def interrogate_deepbooru(image): def interrogate_deepbooru(image):
prompt = get_deepbooru_tags(image) prompt = deepbooru.model.tag(image)
return gr_show(True) if prompt is None else prompt return gr_show(True) if prompt is None else prompt
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment