Skip to content
Snippets Groups Projects
Commit fa9883e4 authored by Jérôme Botoko Ekila's avatar Jérôme Botoko Ekila
Browse files

fix: create cogent dataset

parent 439cae36
No related branches found
No related tags found
No related merge requests found
import argparse
import json
import logging
import os
import shutil
import cv2 as cv
import numpy as np
from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor
from scipy.spatial import distance as d
from tqdm import tqdm
from nmn.datasets.generate.utils.masks import calculate_mid_mask, convert_array_to_rle
from nmn.utils.config import load_json, load_module_config
from nmn.utils.dotdict import DotDict
parser = argparse.ArgumentParser()
parser.add_argument("--use_cuda", action="store_true")
def split_data() -> None:
"""Split a random part of the training set to create a held-out validation set."""
logging.info("Reading datasets...")
os.rename("data/cogent/scenes", "data/cogent/raw_scenes")
trainA = load_json("data/cogent/raw_scenes/CLEVR_trainA_scenes.json")
testA = load_json("data/cogent/raw_scenes/CLEVR_valA_scenes.json")
testB = load_json("data/cogent/raw_scenes/CLEVR_valB_scenes.json")
scenes = np.array(trainA["scenes"])
trainA_scenes = scenes[:60000]
valA_scenes = scenes[60000:]
testA_scenes = np.array(testA["scenes"])
testB_scenes = np.array(testB["scenes"])
os.makedirs("data/cogent/raw_scenes", exist_ok=True)
for dataset, scenes in zip(
["trainA", "valA", "testA", "testB"],
[trainA_scenes, valA_scenes, testA_scenes, testB_scenes],
):
with open(f"data/cogent/raw_scenes/CLEVR_{dataset}_splitscenes.json", "w") as f:
train = {
"info": f"COGENT {dataset} dataset annotated with object mask in coco rle format",
"scenes": scenes.tolist(),
}
logging.info(f"Writing {dataset} set...")
json.dump(train, f)
def move_images() -> None:
os.rename("data/cogent/images/testA", "data/cogent/images/unusedA")
os.rename("data/cogent/images/testB", "data/cogent/images/unusedB")
os.rename("data/cogent/images/valA", "data/cogent/images/testA")
os.rename("data/cogent/images/valB", "data/cogent/images/testB")
os.makedirs("data/cogent/images/valA", exist_ok=True)
for i in range(60000, 70000):
in_loc = f"data/cogent/images/trainA/CLEVR_trainA_{str(i).zfill(6)}.png"
out_loc = f"data/cogent/images/valA/CLEVR_trainA_{str(i).zfill(6)}.png"
shutil.move(in_loc, out_loc)
CATEGORY_TO_SHAPE = {0: "sphere", 1: "cylinder", 2: "cube"}
def load_detectron(use_cuda: bool, module_info: DotDict) -> DefaultPredictor:
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file(module_info["architecture_path"]))
cfg.DATASETS.TRAIN = ("clevr_mini_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 0
cfg.MODEL.DEVICE = "cuda:0" if use_cuda else "cpu"
cfg.MODEL.WEIGHTS = module_info.weights
cfg.INPUT.MASK_FORMAT = module_info.mask_format
cfg.MODEL.ROI_HEADS.NUM_CLASSES = module_info.n_classes
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = module_info.threshold
return DefaultPredictor(cfg)
def load_img(dataset, filename) -> np.ndarray:
img_path = os.path.join(f"data/cogent/images/{dataset}", filename)
img = cv.imread(img_path)
img = cv.cvtColor(img, cv.COLOR_RGBA2RGB)
return img
def generate_masks(use_cuda: bool, module_info: DotDict):
# Load detectron
logging.info("Loading Detectron...")
detectron: DefaultPredictor = load_detectron(use_cuda, module_info)
for dataset in ["trainA", "valA", "testA"]:
# Load the scenes
logging.info("Loading scenes...")
path = f"data/cogent/raw_scenes/CLEVR_{dataset}_splitscenes.json"
data = load_json(path)
scenes = data["scenes"]
# Output dictionary
image_idxs = []
object_masks = []
shapes = []
# For each scene
logging.info("Processing scenes...")
for scene in tqdm(scenes):
# load the image
img: np.ndarray = load_img(dataset, scene["image_filename"])
# run detectron
outputs = detectron(img)
# grab the predictions and append to masks
masks = outputs["instances"].pred_masks
shape_classes = outputs["instances"].pred_classes.cpu().numpy()
shape_strs = [CATEGORY_TO_SHAPE[c] for c in shape_classes]
for mask, shape_str in zip(masks, shape_strs):
mask = mask.cpu().numpy()
mask = convert_array_to_rle(mask)
image_idxs.append(scene["image_index"])
object_masks.append(mask)
shapes.append(shape_str)
logging.info("Done processing scenes")
# Create output dictionary
masks = {
"image_idxs": image_idxs,
"object_masks": object_masks,
"shapes": shapes,
}
# Write to output [not needed]
with open(f"data/cogent/raw_scenes/CLEVR_{dataset}_masks.json", "w") as f:
json.dump(masks, f)
def match_masks() -> None:
"""
Matches the objects in scenes to calculated masks
and outputs the combination in a common file.
In principle this solution is not watertight.
In rare cases this algorithm will fail match masks properly,
when the center of two objects are very close.
This will result in objects having 0 masks.
This is scenario is quite rare however and
has only be observed a handful of times
in a dataset of millions of objects.
"""
os.makedirs("data/cogent/scenes", exist_ok=True)
for dataset in ["trainA", "valA", "testA"]:
logging.info("Loading masks...")
masks = load_json(f"data/cogent/raw_scenes/CLEVR_{dataset}_masks.json")
logging.info("Loading CLEVR data...")
data = load_json(f"data/cogent/raw_scenes/CLEVR_{dataset}_splitscenes.json")
scenes = data["scenes"]
logging.info("Matching masks to objects...")
annotated_scenes = []
for scene in tqdm(scenes):
# get the image index
img_idx = scene["image_index"]
# retrieve the indices of the masks associated with index
mask_indices = [
i for i, x in enumerate(masks["image_idxs"]) if x == img_idx
]
# get the masks
RLE_masks = [masks["object_masks"][i] for i in mask_indices]
if len(RLE_masks) == len(scene["objects"]):
# calculate middle coordinate for each mask
mask_coords = [calculate_mid_mask(m) for m in RLE_masks]
# for each object in the scene find associated mask
assigned_idxs = []
for obj in scene["objects"]:
obj_coord = obj["pixel_coords"][0:2]
# calculate the distance from the object to all masks
distances = [
d.euclidean(obj_coord, mask_coord) for mask_coord in mask_coords
]
# Combine the indices, masks and distances and sort them
candidates = list(zip(mask_indices, RLE_masks, distances))
sorted_candidates = sorted(candidates, key=lambda t: t[2])
# take the closest mask that is not yet assigned
for _, (idx, RLE_mask, _) in enumerate(sorted_candidates):
if idx not in assigned_idxs:
assigned_idxs.append(idx)
obj["mask"] = RLE_mask
break
# add to the annotated scenes list
annotated_scenes.append(scene)
# output data
annotated_data = {
"info": f"COGENT dataset {dataset} annotated with object mask in coco rle format",
"scenes": annotated_scenes,
}
with open(f"data/cogent/scenes/{dataset}.json", "w") as f:
json.dump(annotated_data, f)
def main() -> None:
args = parser.parse_args()
args.experiment = "clevr"
args.module_config = "detectron.json"
load_module_config(args)
# split_data()
# move_images()
generate_masks(
args.use_cuda,
args.module_info,
)
# match_masks()
if __name__ == "__main__":
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment