| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | import argparse |
| | import glob |
| | import json |
| | import logging |
| | import os |
| |
|
| | from dataset import consep_nuclei_dataset |
| |
|
| | logger = logging.getLogger(__name__) |
| |
|
| |
|
| | def main(): |
| | logging.basicConfig( |
| | level=logging.INFO, |
| | format="[%(asctime)s] [%(process)s] [%(threadName)s] [%(levelname)s] (%(name)s:%(lineno)d) - %(message)s", |
| | datefmt="%Y-%m-%d %H:%M:%S", |
| | force=True, |
| | ) |
| |
|
| | parser = argparse.ArgumentParser() |
| | parser.add_argument( |
| | "--input", |
| | "-i", |
| | type=str, |
| | default=r"/workspace/data/CoNSeP", |
| | help="Input/Downloaded/Extracted dir for CoNSeP Dataset", |
| | ) |
| | parser.add_argument( |
| | "--output", |
| | "-o", |
| | type=str, |
| | default=r"/workspace/data/CoNSePNuclei", |
| | help="Output dir to store pre-processed data", |
| | ) |
| | parser.add_argument("--crop_size", "-s", type=int, default=128, help="Crop size for each Nuclei") |
| | parser.add_argument("--limit", "-n", type=int, default=0, help="Non-zero value to limit processing max records") |
| |
|
| | args = parser.parse_args() |
| | dataset_json = {} |
| | for f, v in {"Train": "training", "Test": "validation"}.items(): |
| | logger.info("---------------------------------------------------------------------------------") |
| | if not os.path.exists(os.path.join(args.input, f)): |
| | logger.warning(f"Ignore {f} (NOT Exists in Input Folder)") |
| | continue |
| |
|
| | logger.info(f"Processing Images/labels for: {f}") |
| | images_path = os.path.join(args.input, f, "Images", "*.png") |
| | labels_path = os.path.join(args.input, f, "Labels", "*.mat") |
| | images = sorted(glob.glob(images_path)) |
| | labels = sorted(glob.glob(labels_path)) |
| | ds = [{"image": i, "label": l} for i, l in zip(images, labels)] |
| |
|
| | output_dir = os.path.join(args.output, f) if args.output else f |
| | crop_size = args.crop_size |
| | limit = args.limit |
| |
|
| | ds_new = consep_nuclei_dataset(ds, output_dir, crop_size, limit=limit) |
| | logger.info(f"Total Generated/Extended Records: {len(ds)} => {len(ds_new)}") |
| |
|
| | dataset_json[v] = ds_new |
| |
|
| | ds_file = os.path.join(args.output, "dataset.json") |
| | with open(ds_file, "w") as fp: |
| | json.dump(dataset_json, fp, indent=2) |
| | logger.info(f"Dataset JSON Generated at: {ds_file}") |
| |
|
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|