Commit 7b770017 authored by novelailab's avatar novelailab

Revert open() changes, fix and switch to build()

parent 8a038aec
...@@ -199,8 +199,8 @@ class ImageDatasetBuilder(): ...@@ -199,8 +199,8 @@ class ImageDatasetBuilder():
self.folder_path.mkdir(parents=True, exist_ok=True) self.folder_path.mkdir(parents=True, exist_ok=True)
if self.open_dataset: if self.open_dataset:
dataset = open(self.dataset_path, mode="ab+") self.dataset = open(self.dataset_path, mode="ab+")
dataset.flush() self.dataset.flush()
if self.open_index: if self.open_index:
self.index = [] self.index = []
...@@ -218,31 +218,22 @@ class ImageDatasetBuilder(): ...@@ -218,31 +218,22 @@ class ImageDatasetBuilder():
self.flush_metadata(silent=True) self.flush_metadata(silent=True)
print("Dataset closed and flushed.") print("Dataset closed and flushed.")
append_mode = False
if self.open_dataset and self.dataset_path.is_file(): if self.open_dataset and self.dataset_path.is_file():
self.dataset = open(self.dataset_path, mode="ab+") self.dataset = open(self.dataset_path, mode="ab+")
append_mode = True
elif self.open_dataset:
self.dataset = open(self.dataset_path, mode="wb")
else: else:
raise Exception("Dataset file not found at {}".format(self.dataset_path)) raise Exception("Dataset file not found at {}".format(self.dataset_path))
if self.open_index and self.index_path.is_file(): if self.open_index and self.index_path.is_file():
with open(self.index_path, 'rb') as f: with open(self.index_path, 'rb') as f:
self.index = pickle.load(f) self.index = pickle.load(f)
elif append_mode:
raise Exception("Index file not found at {}".format(self.index_path))
else: else:
self.index = [] raise Exception("Index file not found at {}".format(self.index_path))
if self.open_metadata and self.metadata_path.is_file(): if self.open_metadata and self.metadata_path.is_file():
with open(self.metadata_path, 'rb') as f: with open(self.metadata_path, 'rb') as f:
self.metadata = pickle.load(f) self.metadata = pickle.load(f)
elif append_mode:
raise Exception("Metadata file not found at {}".format(self.metadata_path))
else: else:
self.metadata = {} raise Exception("Metadata file not found at {}".format(self.metadata_path))
def operate(self, operation, batch, identities, metadata=None, executor=concurrent.futures.ThreadPoolExecutor, use_tqdm=False, **kwargs): def operate(self, operation, batch, identities, metadata=None, executor=concurrent.futures.ThreadPoolExecutor, use_tqdm=False, **kwargs):
executor = executor(max_workers=self.threads) executor = executor(max_workers=self.threads)
......
...@@ -60,7 +60,7 @@ print("Copyng old db data...") ...@@ -60,7 +60,7 @@ print("Copyng old db data...")
# detect block size of fs the archive is stored on # detect block size of fs the archive is stored on
block_size = int(os.popen("getconf PAGE_SIZE").read().lstrip().rstrip()) #int(os.popen("stat -fc %s " + new_dataset_folder).read().lstrip().rstrip()) block_size = int(os.popen("getconf PAGE_SIZE").read().lstrip().rstrip()) #int(os.popen("stat -fc %s " + new_dataset_folder).read().lstrip().rstrip())
new_dataset = ImageDatasetBuilder(folder_path=new_dataset_folder, name="danbooru_updated", threads=32, block_size=block_size, align_fs_blocks=True) new_dataset = ImageDatasetBuilder(folder_path=new_dataset_folder, name="danbooru_updated", threads=32, block_size=block_size, align_fs_blocks=True)
new_dataset.open() new_dataset.build()
# how many operations to run at once # how many operations to run at once
copy_chunk_size = 4096 copy_chunk_size = 4096
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment