-
Notifications
You must be signed in to change notification settings - Fork 1
/
dynamic_mask_generation.py
247 lines (219 loc) · 9.12 KB
/
dynamic_mask_generation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
#!/usr/bin/env python3
# Copyright 2004-present Facebook. All Rights Reserved.
import argparse
import glob
import multiprocessing as mp
import os
import os.path as osp
import time
import cv2
import numpy as np
import tqdm
from detectron2.config import get_cfg
from detectron2.data.detection_utils import read_image
from utils.predictor import VisualizationDemo
"""
- Pretrained weights (TODO(xrong): Replace with dropbox url):
manifold://compphoto_data/tree/pretrained/
first-party/mask-rcnn/mask_rcnn_R_50_FPN_3x.pkl
- Dynamic object categories in MSCOCO:
Person + Vehicle + Animal:
[person,
bicycle, car, motorcycle, airplane, bus, train, truck, boat, bird, cat,
dog, horse, sheep, cow, elephant, bear, zebra, giraffe]
A more detailed chart can be checked here:
https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
"""
DEFAULT_MASK_RCNN_MODEL_PATH = "models/mask_rcnn_R_50_FPN_3x.pkl"
DEFAULT_MASK_RCNN_CONFIG_PATH = "configs/mask_rcnn_R_50_FPN_3x.yaml"
# constants
WINDOW_NAME = "COCO detections"
# dynamic class id list
DYNAMIC_OBJECT_CATEGORIES = list(range(0, 8)) + list(range(13, 23))
def setup_cfg(args):
# load config from file and command-line arguments
cfg = get_cfg()
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
# Set score_threshold for builtin models
cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold
cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = (
args.confidence_threshold
)
# cfg.freeze()
return cfg
def get_parser():
parser = argparse.ArgumentParser(description="Dynamic mask generation CLI demo")
parser.add_argument(
"--config_file",
default="configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml",
metavar="FILE",
help="path to config file",
)
parser.add_argument(
"--webcam", action="store_true", help="Take inputs from webcam."
)
parser.add_argument("--video_input", help="Path to video file.")
parser.add_argument(
"--input",
nargs="+",
help="A list of space separated input images; "
"or a single glob pattern such as 'directory/*.jpg'",
)
parser.add_argument(
"--output",
help="A file or directory to save output visualizations. "
"If not given, will show output in an OpenCV window.",
)
parser.add_argument(
"--confidence_threshold",
type=float,
default=0.5,
help="Minimum score for instance predictions to be shown",
)
parser.add_argument(
"--opts",
help="Modify config options using the command-line 'KEY VALUE' pairs",
default=[],
nargs=argparse.REMAINDER,
)
parser.add_argument(
"--dilation_factor",
help="the factor to dilate the binary mask",
type=int,
default=5,
)
parser.add_argument(
"--save_anno", action="store_true",
help="save anonymized images",
)
return parser
def dynamic_mask_generation(args):
local_model_path = DEFAULT_MASK_RCNN_MODEL_PATH
cfg = setup_cfg(args)
cfg.merge_from_list(["MODEL.WEIGHTS", local_model_path])
cfg.freeze()
demo = VisualizationDemo(cfg)
if args.input:
if args.input:
print(f"dynamic frames input paths: {args.input}")
args.input = glob.glob(osp.expanduser(args.input[0]))
assert args.input, "The input path(s) was not found"
for path in tqdm.tqdm(args.input, disable=not args.output):
# use PIL, to be consistent with evaluation
img = read_image(path, format="BGR")
start_time = time.time()
predictions, visualized_output = demo.run_on_image(img)
print(
"{}: {} in {:.2f}s".format(
path,
"detected {} instances".format(len(predictions["instances"]))
if "instances" in predictions
else "finished",
time.time() - start_time,
)
)
if args.output:
if osp.isdir(args.output):
out_filename = osp.join(args.output, osp.basename(path))
elif osp.isfile(args.output):
assert (
len(args.input) == 1
), "Please specify a *directory* with args.output"
out_filename = args.output
else:
os.makedirs(args.output, exist_ok=True)
out_filename = osp.join(args.output, osp.basename(path))
# visualized_output.save(out_filename)
mask_classes = predictions["instances"].get("pred_classes").cpu()
mask_tensors = predictions["instances"].get("pred_masks").cpu()
# the output masked image, similar to the anonymization output
mask_img = np.transpose(np.copy(img), (2, 0, 1)).astype(np.uint8)
# the output binary mask
mask = np.zeros(img.shape[:2]).astype(np.uint8)
# only mask out the dynamic object categories
for idx, mask_class in enumerate(mask_classes):
if mask_class in DYNAMIC_OBJECT_CATEGORIES:
# get the category-specific mask
mask_tensor = mask_tensors[idx].numpy()
# aggregate category-specific mask to the output mask
mask[mask_tensor] = 255
# aggregate category-specific mask to the output masked image
for idx in range(3):
mask_img[idx][mask_tensor] = 255
out_filename_prefix = osp.splitext(out_filename)[0]
# save masked image
if args.save_anno:
mask_img = np.transpose(mask_img, (1, 2, 0))
cv2.imwrite(out_filename_prefix + "_anon.png", mask_img)
# save binary mask (invert to match the previous pipeline)
mask = cv2.dilate(
mask,
kernel=np.ones(
(args.dilation_factor, args.dilation_factor), dtype=np.uint8
),
iterations=1,
)
mask = cv2.bitwise_not(mask)
cv2.imwrite(out_filename_prefix + ".png", mask)
else:
cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1])
if cv2.waitKey(0) == 27:
break # esc to quit
elif args.webcam:
assert args.input is None, "Cannot have both --input and --webcam!"
assert args.output is None, "Output not yet supported with --webcam!"
cam = cv2.VideoCapture(0)
for vis in tqdm.tqdm(demo.run_on_video(cam)):
cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
cv2.imshow(WINDOW_NAME, vis)
if cv2.waitKey(1) == 27:
break # esc to quit
cam.release()
cv2.destroyAllWindows()
elif args.video_input:
assert args.input is None, "Cannot have both --input and --video_input!"
video = cv2.VideoCapture(args.video_input)
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
frames_per_second = video.get(cv2.CAP_PROP_FPS)
num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
basename = osp.basename(args.video_input)
if args.output:
if args.output.endswith((".mkv", ".mp4")):
output_fname = args.output
else:
os.makedirs(args.output, exist_ok=True)
output_fname = osp.join(args.output, basename)
output_fname = osp.splitext(output_fname)[0] + ".mkv"
assert not osp.isfile(output_fname), output_fname
output_file = cv2.VideoWriter(
filename=output_fname,
# some installation of opencv may not support x264 (due to its license),
# you can try other format (e.g. MPEG)
fourcc=cv2.VideoWriter_fourcc(*"x264"),
fps=float(frames_per_second),
frameSize=(width, height),
isColor=True,
)
assert osp.isfile(args.video_input)
for vis_frame in tqdm.tqdm(demo.run_on_video(video), total=num_frames):
if args.output:
output_file.write(vis_frame)
else:
cv2.namedWindow(basename, cv2.WINDOW_NORMAL)
cv2.imshow(basename, vis_frame)
if cv2.waitKey(1) == 27:
break # esc to quit
video.release()
if args.output:
output_file.release()
else:
cv2.destroyAllWindows()
if __name__ == "__main__":
mp.set_start_method("spawn", force=True)
args = get_parser().parse_args()
print("Arguments: " + str(args))
dynamic_mask_generation(args)