From 4d704ce0222dabe53f664c52c6450a20aab46359 Mon Sep 17 00:00:00 2001 From: Tekin Evrim Ozmermer Date: Tue, 20 Jun 2023 17:38:00 +0300 Subject: [PATCH 1/5] this definitely works better --- backend/exact_solution.py | 3 + backend/logs/file.log | 87 +++++++++++++++++ backend/server.py | 167 +++++++++++++------------------- frontend_python/make_request.py | 6 +- 4 files changed, 159 insertions(+), 104 deletions(-) diff --git a/backend/exact_solution.py b/backend/exact_solution.py index a38a7ad..64e1439 100644 --- a/backend/exact_solution.py +++ b/backend/exact_solution.py @@ -6,6 +6,8 @@ """ import torch import numpy as np +import torchvision.utils + from server_utils import flatten_feature_map, l2_norm @@ -57,6 +59,7 @@ def infer(self, query_features): query_features = l2_norm(query_features)[0] predictions = self.forward(query_features.float()) predictions = predictions.reshape(b, h, w).squeeze(0) + torchvision.utils.save_image(predictions.cpu().float(), "./intermediate_mask.png") return predictions def forward(self, embedding): diff --git a/backend/logs/file.log b/backend/logs/file.log index 763379a..01c6a6a 100644 --- a/backend/logs/file.log +++ b/backend/logs/file.log @@ -187,3 +187,90 @@ repeat_interleave() received an invalid combination of arguments - got (NoneType -file server.py --line 123 'image_path' +2023-06-16 09:31:17,785 - server_utils - ERROR - +-file server.py +--line 205 +OpenCV(4.7.0) :-1: error: (-5:Bad argument) in function 'imwrite' +> Overload resolution failed: +> - imwrite() missing required argument 'img' (pos 2) +> - imwrite() missing required argument 'img' (pos 2) + +2023-06-18 13:08:29,769 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES. +2023-06-18 13:09:07,264 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES. +2023-06-18 13:30:31,506 - server_utils - ERROR - +-file server.py +--line 164 +stack(): argument 'tensors' (position 1) must be tuple of Tensors, not Tensor +2023-06-18 13:31:23,511 - server_utils - ERROR - +-file server.py +--line 164 +stack(): argument 'tensors' (position 1) must be tuple of Tensors, not Tensor +2023-06-18 13:38:25,410 - server_utils - ERROR - +-file server.py +--line 164 +stack(): argument 'tensors' (position 1) must be tuple of Tensors, not Tensor +2023-06-18 16:14:17,440 - server_utils - ERROR - +-file server.py +--line 86 +shape +2023-06-18 16:30:00,838 - server_utils - ERROR - +-file server.py +--line 87 +shape +2023-06-18 16:30:02,893 - server_utils - ERROR - +-file server.py +--line 176 +not enough values to unpack (expected 4, got 3) +2023-06-18 16:45:12,270 - server_utils - ERROR - +-file server.py +--line 180 +not enough values to unpack (expected 4, got 3) +2023-06-18 18:07:02,734 - server_utils - ERROR - +-file server.py +--line 180 +not enough values to unpack (expected 4, got 3) +2023-06-18 18:10:00,569 - server_utils - ERROR - +-file server.py +--line 180 +Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same +2023-06-18 18:21:25,814 - server_utils - ERROR - +-file server.py +--line 213 +tuple index out of range +2023-06-18 18:22:54,478 - server_utils - ERROR - +-file server.py +--line 213 +tuple index out of range +2023-06-18 18:25:47,706 - server_utils - ERROR - +-file server.py +--line 219 +mat1 and mat2 shapes cannot be multiplied (6144x16 and 384x3) +2023-06-18 18:31:18,395 - server_utils - ERROR - +-file server.py +--line 209 +stack(): argument 'tensors' (position 1) must be tuple of Tensors, not Tensor +2023-06-18 18:33:31,478 - server_utils - ERROR - +-file server.py +--line 209 +stack(): argument 'tensors' (position 1) must be tuple of Tensors, not Tensor +2023-06-18 18:38:16,040 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES. +2023-06-18 18:53:49,032 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES. +2023-06-19 00:54:46,039 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES. +2023-06-19 00:58:11,029 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES. +2023-06-19 08:58:31,963 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES. +2023-06-19 09:44:46,373 - server_utils - ERROR - +-file server.py +--line 245 +keywords must be strings +2023-06-20 15:42:13,948 - server_utils - ERROR - +-file server.py +--line 206 +int() argument must be a string, a bytes-like object or a real number, not 'dict' +2023-06-20 15:51:50,701 - server_utils - ERROR - +-file server.py +--line 173 +result type Float can't be cast to the desired output type Long +2023-06-20 17:24:52,125 - server_utils - ERROR - +-file server.py +--line 192 +only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices diff --git a/backend/server.py b/backend/server.py index 1abe61d..7b6f880 100644 --- a/backend/server.py +++ b/backend/server.py @@ -8,14 +8,12 @@ import json import time import queue -# import threading import server_utils as utils import annotations from exact_solution import ExactSolution -# from forwarder import Forwarder -from segment_anything import sam_model_registry, SamPredictor +from segment_anything import sam_model_registry, SamPredictor, SamAutomaticMaskGenerator model_to_checkpoint_map = { @@ -130,73 +128,83 @@ def generate(gen_type): try: if gen_type in ["point", "annotation", "all"]: support_package = request.json['package'] + image_data = request.json['image'] image_path = request.json['image_path'] - for label_int, label_str in enumerate(support_package): - image = utils.get_image(image_data) - q_image_shape = image.shape - t0 = time.time() - predictor.set_image(image) - features = predictor.features - t1 = time.time() - print(f"SAM INFERENCE TIME: {t1-t0}") - linear_model_labels_int = [] - embedding_collection = [] + + all_labels = {elm: i for i, elm in enumerate(support_package.keys(), start=1)} + all_labels["background"] = 0 + linear_model_labels_int = [] + embedding_collection = [] + + image = utils.get_image(image_data) + predictor.set_image(image) + features = predictor.features + + generated_masks = mask_generator.generate(image) + + for label_int, label_str in enumerate(support_package, start=1): + for i, embedding in enumerate(support_package[label_str]["positive"][0]): embedding = np.array(embedding) embedding = torch.from_numpy(embedding).to(cfg.device)[0] - linear_model_labels_int.append(1) + linear_model_labels_int.append(all_labels[label_str]) embedding_collection.append(embedding) for i, embedding in enumerate(support_package[label_str]["negative"][0]): embedding = np.array(embedding) embedding = torch.from_numpy(embedding).to(cfg.device)[0] - linear_model_labels_int.append(0) + linear_model_labels_int.append(all_labels["background"]) embedding_collection.append(embedding) embedding_collection = torch.stack(embedding_collection, dim=0) linear_model_labels_int = np.array(linear_model_labels_int) - t0 = time.time() - linear_model = ExactSolution( - device=cfg.device, - embedding_collection=embedding_collection, - labels_int=linear_model_labels_int, - threshold=cfg.threshold - ) - predictions = linear_model.infer(features) - t1 = time.time() - print(f"EXACT SOLUTION INFERENCE TIME: {t1 - t0}") + t0 = time.time() + linear_model = ExactSolution( + device=cfg.device, + embedding_collection=embedding_collection, + labels_int=linear_model_labels_int, + threshold=cfg.threshold + ) + + predictions = linear_model.infer(features) + t1 = time.time() + print(f"EXACT SOLUTION INFERENCE TIME: {t1 - t0}") - yx_multi = (predictions == 1.).nonzero() # this can be changed later + matching_bboxes = [] + for label_int, label_str in enumerate(support_package, start=1): + yx_multi = (predictions == label_int).nonzero() for yx in yx_multi: - t0 = time.time() xy = utils.adapt_point( {"x": yx[1].item(), "y": yx[0].item()}, initial_shape=features.shape[-2:], final_shape=image.shape[0:2] ) - t1 = time.time() - print(f"ADAPT POINT TIME: {t1 - t0}") matching_points[label_str] = {"x": xy["x"], "y": xy["y"]} if gen_type == "point": return jsonify({'matching_points': matching_points, "error": error_text}) - l_ = np.ones((1,)) - t0 = time.time() - mask_, scores, logits = predictor.predict( - point_coords=np.array([[xy["x"], xy["y"]]]).astype(int), - point_labels=l_, - multimask_output=True, - ) - t1 = time.time() - print(f"MASK GEN INFERENCE TIME: {t1 - t0}") - mask_ = mask_.astype(np.uint8) - mask_ = cv2.resize(mask_[0], (q_image_shape[1], q_image_shape[0])) + matching_bboxes_ = [ + { + "id": cnt, + "bbox": [elm["bbox"][0], elm["bbox"][1], elm["bbox"][0] + elm["bbox"][2], elm["bbox"][1] + elm["bbox"][3]] + } + for cnt, elm in enumerate(generated_masks) if elm["segmentation"][int(xy["y"]), int(xy["x"])]] + + matching_bboxes = matching_bboxes + matching_bboxes_ + + unique_match_ids = np.unique([elm["id"] for elm in matching_bboxes]) + matching_bboxes = {elm["id"]: elm["bbox"] for elm in matching_bboxes if elm["id"] in unique_match_ids} + + for label_int, label_str in enumerate(support_package, start=1): + for match_id in matching_bboxes: + mask_ = generated_masks[int(match_id)]["segmentation"] * 1 + mask_ = mask_.astype(np.int8) polygons, points_ = annotations.generate_polygons_from_mask( polygons=polygons, @@ -207,27 +215,14 @@ def generate(gen_type): masks.append(mask_) - t0 = time.time() - # create xml file from the coordinates - if len(points_)>0: - for cnt_p, pts_ in enumerate(points_): - # coordinates = np.nonzero(mask_) - if len(pts_) >= 3: - print(f"---> Finding bounding box: {cnt_p}/{len(points_)}") - pts = np.array([np.array(pt) for pt in pts_]) - y0, y1, x0, x1 = pts[:, 0].min(), pts[:, 0].max(), pts[:, 1].min(), pts[:, 1].max() - bboxes.append({ - "coordinates": [int(x0), int(y0), int(x1), int(y1)], - "format": "xyxy", - "label": "label_str" - }) - - labels_str.append(label_str) - labels_int.append(label_int) - - t1 = time.time() - print(f"BBOX GENERATION TIME: {t1 - t0}") + bboxes.append({ + "coordinates": matching_bboxes[match_id], + "format": "xyxy", + "label": label_str + }) + labels_str.append(label_str) + labels_int.append(label_int) if len(masks) > 0: print("---> Merging masks") @@ -279,47 +274,6 @@ def generate(gen_type): return jsonify({"error": error_text}) -# @app.route('/forwarder/extract_features', methods=['POST']) -# def forwarder_extract(): -# -# timestamp = time.time() -# if not ExtractInQueue.full(): -# ExtractInQueue.put({ -# "url": f"http://{base}/extract_features", -# "data": request.json, -# "timestamp": timestamp -# }) -# else: -# return json.dumps({"warning": "Queue is full, please wait and try again."}) -# -# while True: -# if timestamp in ExtractOutDict: -# response = ExtractOutDict[timestamp]["data"] -# del ExtractOutDict[timestamp] -# break -# -# return response -# -# @app.route('/forwarder/generate/', methods=['POST']) -# def forwarder_generate(gen_type): -# -# timestamp = time.time() -# if not GenerateInQueue.full(): -# GenerateInQueue.put({ -# "url": f"http://{base}/generate/{gen_type}", -# "data": request.json, -# "timestamp": timestamp -# }) -# else: -# return json.dumps({"warning": "Queue is full, please wait and try again."}) -# -# while True: -# if timestamp in GenerateOutDict: -# response = GenerateOutDict[timestamp]["data"] -# del GenerateOutDict[timestamp] -# break -# -# return response def run_forwarder(forwarder): forwarder.run() @@ -334,6 +288,16 @@ def run_forwarder(forwarder): sam = sam_model_registry[cfg.model](checkpoint=checkpoint) sam.to(device=cfg.device) predictor = SamPredictor(sam) + + mask_generator = SamAutomaticMaskGenerator( + model=sam, + points_per_side=64, + pred_iou_thresh=0.9, + stability_score_thresh=0.92, + crop_n_layers=1, + crop_n_points_downscale_factor=2, + min_mask_region_area=100, # Requires open-cv to run post-processing + ) if not os.path.isdir("./backend/logs/"): os.makedirs("./backend/logs/") @@ -365,4 +329,5 @@ def run_forwarder(forwarder): # fp.start() # app.run(host = "0.0.0.0", port=8080, debug=False) + print("SERVER STARTING...") serve(app, host="0.0.0.0", port=8080) diff --git a/frontend_python/make_request.py b/frontend_python/make_request.py index 801de9c..46df98a 100644 --- a/frontend_python/make_request.py +++ b/frontend_python/make_request.py @@ -26,7 +26,7 @@ t1 = time.time() print(response.text) data_json = json.loads(response.text) -image_path = "../query_images/test.jpg" +image_path = "../query_images/bmw_i20_s_1.jpg" data_json["image_path"] = image_path init_image = utils.import_image(image_path) data_json["image"] = utils.numpy_to_base64(init_image) @@ -46,7 +46,7 @@ t2 = time.time() data_json = json.loads(response.text) masks = utils.get_image(data_json["masks"]) -masks.save("masks.png") +masks.save("../results/masks.png") # Print the response try: @@ -55,7 +55,7 @@ print(data_json) # save annotation -with open("test.json", "w") as fp: +with open("../results/bmw_i20_s_0.json", "w") as fp: json.dump(data_json["coco_json"], fp, indent=4) print(f"Request.1 Time: {t1-t0} | Request.2 Time: {t2-t1}") From dd80edf76aca984f76956c8f95225d5583fb50ce Mon Sep 17 00:00:00 2001 From: Tekin Evrim Ozmermer Date: Sun, 23 Jul 2023 22:46:59 +0300 Subject: [PATCH 2/5] this definitely works better --- .gitignore | 2 +- backend/exact_solution.py | 46 ++++++++++---- backend/logs/file.log | 109 ++++++++++++++++++++++++++++++++ backend/server.py | 10 +-- config.yml | 3 +- frontend_python/make_request.py | 10 +-- 6 files changed, 159 insertions(+), 21 deletions(-) diff --git a/.gitignore b/.gitignore index 068aadd..81ea9c5 100644 --- a/.gitignore +++ b/.gitignore @@ -16,5 +16,5 @@ *.json *.onnx *.drawio -/dev_gitignored/ +/dev/ /frontend_python/ diff --git a/backend/exact_solution.py b/backend/exact_solution.py index 64e1439..66dd023 100644 --- a/backend/exact_solution.py +++ b/backend/exact_solution.py @@ -7,7 +7,7 @@ import torch import numpy as np import torchvision.utils - +import tqdm from server_utils import flatten_feature_map, l2_norm @@ -35,42 +35,66 @@ def __init__(self, super(ExactSolution, self).__init__() self.device = device - self.embedding_collection = embedding_collection + # self.loss_func = torch.nn.CrossEntropyLoss() + + self.embedding_collection = l2_norm(embedding_collection).to(self.device).float() self.threshold = threshold self.num_classes = len(np.unique(labels_int)) + # self.labels_int = torch.from_numpy(labels_int).to(self.device).long() self.labels_bin = binarize_labels(labels_int) self.linear = torch.nn.Linear(in_features=self.embedding_collection.shape[1], out_features=self.labels_bin.shape[1], bias=False) self.solve_exact() + # self.opt = torch.optim.SGD(momentum=0.9, lr=0.1, params=self.linear.parameters()) + # self.train() + # self.train_linear() self.eval() def solve_exact(self): - collection_inverse = torch.pinverse(l2_norm(self.embedding_collection)).float() + collection_inverse = torch.pinverse(self.embedding_collection) self.W = torch.matmul(collection_inverse.to(self.device), self.labels_bin.to(self.device)) with torch.no_grad(): self.linear.weight = torch.nn.Parameter(self.W.T) def infer(self, query_features): + with torch.no_grad(): + b, n, h, w = query_features.shape query_features = flatten_feature_map(query_features) query_features = l2_norm(query_features)[0] - predictions = self.forward(query_features.float()) + out = self.forward(query_features.float()) + + torchvision.utils.save_image(out[:, 1].reshape(b, h, w).squeeze(0).cpu().float(), "./intermediate_preds.png") + + # apply adaptive threshold + self.threshold = self.threshold if self.threshold <= out[:, 1].max() else out[:, 1].max() + print(f"ADAPTIVE THRESHOLD: {self.threshold}") + out = torch.where(out >= self.threshold, 1, 0) + + # get indexes of maximums + predictions = out.argmax(dim=-1) + predictions = predictions.reshape(b, h, w).squeeze(0) torchvision.utils.save_image(predictions.cpu().float(), "./intermediate_mask.png") + return predictions + def train_linear(self): + pbar = tqdm.tqdm(range(0, 200)) + for epoch in pbar: + out = self.linear(self.embedding_collection) + loss = self.loss_func(out, self.labels_int) + pbar.set_description(f"EPOCH: {epoch} | LOSS: {loss.item()}") + + self.opt.zero_grad() + loss.backward() + self.opt.step() + def forward(self, embedding): out = self.linear(embedding) out = torch.where(out > 1, 2-out, out) out = torch.nn.functional.softmax(out, dim=-1) - - # apply adaptive threshold - self.threshold = out[:, 1].max()-0.02 if self.threshold > out[:, 1].max() else self.threshold - out = torch.where(out >= self.threshold, 1, 0) - - # get indexes of maximums - out = out.argmax(dim=-1) return out diff --git a/backend/logs/file.log b/backend/logs/file.log index 01c6a6a..7521fd4 100644 --- a/backend/logs/file.log +++ b/backend/logs/file.log @@ -274,3 +274,112 @@ result type Float can't be cast to the desired output type Long -file server.py --line 192 only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices +2023-06-21 09:53:35,865 - server_utils - ERROR - +-file server.py +--line 166 +SGD.__init__() missing 1 required positional argument: 'params' +2023-06-21 09:54:29,877 - server_utils - ERROR - +-file server.py +--line 166 +SGD.__init__() missing 1 required positional argument: 'params' +2023-06-21 09:55:19,811 - server_utils - ERROR - +-file server.py +--line 166 +SGD.__init__() missing 1 required positional argument: 'params' +2023-06-21 09:57:55,476 - server_utils - ERROR - +-file server.py +--line 166 +SGD.__init__() missing 1 required positional argument: 'params' +2023-06-21 10:00:03,784 - server_utils - ERROR - +-file server.py +--line 166 +'ExactSolution' object has no attribute 'linear' +2023-06-21 10:01:00,648 - server_utils - ERROR - +-file server.py +--line 166 +'bool' object is not callable +2023-06-21 10:14:29,460 - server_utils - ERROR - +-file server.py +--line 166 +'bool' object is not callable +2023-06-21 10:24:17,893 - server_utils - ERROR - +-file server.py +--line 166 +expected scalar type Double but found Float +2023-06-21 10:25:31,981 - server_utils - ERROR - +-file server.py +--line 166 +"nll_loss_forward_reduce_cuda_kernel_2d_index" not implemented for 'Int' +2023-06-21 10:28:20,393 - server_utils - ERROR - +-file server.py +--line 166 +to() received an invalid combination of arguments - got (torch.tensortype), but expected one of: + * (torch.device device, torch.dtype dtype, bool non_blocking, bool copy, *, torch.memory_format memory_format) + * (torch.dtype dtype, bool non_blocking, bool copy, *, torch.memory_format memory_format) + * (Tensor tensor, bool non_blocking, bool copy, *, torch.memory_format memory_format) + +2023-07-09 15:31:33,316 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES. +2023-07-09 16:53:42,852 - server_utils - ERROR - +-file server.py +--line 173 +Dimension out of range (expected to be in range of [-1, 0], but got 2) +2023-07-09 17:06:05,739 - server_utils - ERROR - +-file server.py +--line 86 +'tuple' object has no attribute 'shape' +2023-07-09 17:07:25,830 - server_utils - ERROR - +-file server.py +--line 144 +repeat_interleave() received an invalid combination of arguments - got (tuple, int, dim=int), but expected one of: + * (Tensor input, Tensor repeats, int dim, *, int output_size) + * (Tensor repeats, *, int output_size) + didn't match because some of the keywords were incorrect: dim + * (Tensor input, int repeats, int dim, *, int output_size) + +2023-07-09 17:10:28,599 - server_utils - ERROR - +-file server.py +--line 144 +repeat_interleave() received an invalid combination of arguments - got (tuple, int, dim=int), but expected one of: + * (Tensor input, Tensor repeats, int dim, *, int output_size) + * (Tensor repeats, *, int output_size) + didn't match because some of the keywords were incorrect: dim + * (Tensor input, int repeats, int dim, *, int output_size) + +2023-07-09 17:12:17,006 - server_utils - ERROR - +-file server.py +--line 144 +repeat_interleave() received an invalid combination of arguments - got (tuple, int, dim=int), but expected one of: + * (Tensor input, Tensor repeats, int dim, *, int output_size) + * (Tensor repeats, *, int output_size) + didn't match because some of the keywords were incorrect: dim + * (Tensor input, int repeats, int dim, *, int output_size) + +2023-07-09 17:13:58,124 - server_utils - ERROR - +-file server.py +--line 144 +repeat_interleave() received an invalid combination of arguments - got (tuple, int, dim=int), but expected one of: + * (Tensor input, Tensor repeats, int dim, *, int output_size) + * (Tensor repeats, *, int output_size) + didn't match because some of the keywords were incorrect: dim + * (Tensor input, int repeats, int dim, *, int output_size) + +2023-07-09 17:15:06,438 - server_utils - ERROR - +-file server.py +--line 173 +shape '[1, 64, 64]' is invalid for input of size 8192 +2023-07-09 17:44:45,055 - server_utils - ERROR - +-file server.py +--line 78 +not enough values to unpack (expected 2, got 1) +2023-07-09 17:44:47,678 - server_utils - ERROR - +-file server.py +--line 142 +not enough values to unpack (expected 2, got 1) +2023-07-09 17:47:50,766 - server_utils - ERROR - +-file server.py +--line 78 +not enough values to unpack (expected 2, got 1) +2023-07-09 17:47:53,413 - server_utils - ERROR - +-file server.py +--line 142 +not enough values to unpack (expected 2, got 1) diff --git a/backend/server.py b/backend/server.py index 7b6f880..2b77238 100644 --- a/backend/server.py +++ b/backend/server.py @@ -4,7 +4,7 @@ import numpy as np import os import sys -import cv2 +import traceback import json import time import queue @@ -75,7 +75,7 @@ def extract(): image = utils.get_image(data["images"][image_id]) with torch.no_grad(): predictor.set_image(image) - features = predictor.features + _, features = predictor.features positive_coord = coordinates["positive"] negative_coord = coordinates["negative"] @@ -139,7 +139,7 @@ def generate(gen_type): image = utils.get_image(image_data) predictor.set_image(image) - features = predictor.features + _, features = predictor.features generated_masks = mask_generator.generate(image) @@ -270,6 +270,7 @@ def generate(gen_type): exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] error_text = f"{exc_type}\n-file {fname}\n--line {exc_tb.tb_lineno}\n{e}" + print(traceback.format_exc()) logger.error(error_text) return jsonify({"error": error_text}) @@ -287,10 +288,11 @@ def run_forwarder(forwarder): sam = sam_model_registry[cfg.model](checkpoint=checkpoint) sam.to(device=cfg.device) - predictor = SamPredictor(sam) + predictor = SamPredictor(sam, preconv_features=True) mask_generator = SamAutomaticMaskGenerator( model=sam, + preconv_features=True, points_per_side=64, pred_iou_thresh=0.9, stability_score_thresh=0.92, diff --git a/config.yml b/config.yml index dbdcb2d..25dfba8 100644 --- a/config.yml +++ b/config.yml @@ -11,8 +11,9 @@ labeling: polygon_resolution: 0.3 window_size: [1024, 1024] +preconv_features: False model: vit_h # Faster <<< vit_b, vit_l, vit_h >>> More accurate -threshold: 0.9 +threshold: 0.75 device: "cuda" COLORMAP: [[255, 0, 121],[101, 0, 255],[253, 255, 0],[0, 49, 255],[0, 186, 255],[0, 255, 39],[215, 0, 255],[0, 24, 255],[0, 157, 255],[0, 131, 255]] diff --git a/frontend_python/make_request.py b/frontend_python/make_request.py index 46df98a..a8dd0ab 100644 --- a/frontend_python/make_request.py +++ b/frontend_python/make_request.py @@ -26,7 +26,7 @@ t1 = time.time() print(response.text) data_json = json.loads(response.text) -image_path = "../query_images/bmw_i20_s_1.jpg" +image_path = "../query_images/bmw_emblem.jpg" data_json["image_path"] = image_path init_image = utils.import_image(image_path) data_json["image"] = utils.numpy_to_base64(init_image) @@ -45,8 +45,10 @@ response = requests.post(url, data=json.dumps(data_json), headers=headers) t2 = time.time() data_json = json.loads(response.text) -masks = utils.get_image(data_json["masks"]) -masks.save("../results/masks.png") + +if "masks" in data_json: + masks = utils.get_image(data_json["masks"]) + masks.save("../results/masks.png") # Print the response try: @@ -55,7 +57,7 @@ print(data_json) # save annotation -with open("../results/bmw_i20_s_0.json", "w") as fp: +with open("../results/bmw_emblem.json", "w") as fp: json.dump(data_json["coco_json"], fp, indent=4) print(f"Request.1 Time: {t1-t0} | Request.2 Time: {t2-t1}") From 2b29ef856b86d4d6879a6a949911706151fbffd3 Mon Sep 17 00:00:00 2001 From: Tekin Evrim Ozmermer Date: Sun, 23 Jul 2023 23:39:57 +0300 Subject: [PATCH 3/5] faster inference --- backend/annotations.py | 8 ++-- backend/logs/file.log | 21 +++++++++ backend/server.py | 76 +++++++++------------------------ frontend_python/make_request.py | 4 +- 4 files changed, 48 insertions(+), 61 deletions(-) diff --git a/backend/annotations.py b/backend/annotations.py index c556e3d..45cbf7b 100644 --- a/backend/annotations.py +++ b/backend/annotations.py @@ -107,16 +107,16 @@ def generate_polygons_from_mask(polygons, mask, label, polygon_resolution): # Generate polygons from the contours points_ = [] - instances, num_instances = find_instances(mask) for k in range(1, num_instances+1, 1): instance = ((instances == k)*1).astype(np.uint8) # Find the contours in the binary mask contours, _ = cv2.findContours(instance, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + filtered_contours = [contour for contour in contours if cv2.contourArea(contour) >= 100] - for i, contour in enumerate(contours): - if int(len(contour)*polygon_resolution)>0: + for i, contour in enumerate(filtered_contours): + if int(len(contour)*polygon_resolution) > 2: points = contour.squeeze()[np.arange(0, len(contour), int(len(contour)/int(len(contour)*polygon_resolution)) @@ -128,7 +128,7 @@ def generate_polygons_from_mask(polygons, mask, label, polygon_resolution): "shape_type": "polygon", "flags": {} }) - points_.append(points) + points_.append(np.array(points)) return polygons, points_ diff --git a/backend/logs/file.log b/backend/logs/file.log index 7521fd4..6549dfc 100644 --- a/backend/logs/file.log +++ b/backend/logs/file.log @@ -383,3 +383,24 @@ not enough values to unpack (expected 2, got 1) -file server.py --line 142 not enough values to unpack (expected 2, got 1) +2023-07-23 23:06:58,600 - server_utils - ERROR - +-file server.py +--line 211 +setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part. +2023-07-23 23:26:27,032 - server_utils - ERROR - +-file server.py +--line 205 +too many values to unpack (expected 2) +2023-07-23 23:27:00,571 - server_utils - ERROR - +-file server.py +--line 215 +too many indices for array: array is 1-dimensional, but 2 were indexed +2023-07-23 23:28:47,726 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES. +2023-07-23 23:30:29,986 - server_utils - ERROR - +-file server.py +--line 219 +name 'generated_masks' is not defined +2023-07-23 23:30:57,066 - server_utils - ERROR - +-file server.py +--line 256 +Object of type int32 is not JSON serializable diff --git a/backend/server.py b/backend/server.py index 2b77238..78c4d9b 100644 --- a/backend/server.py +++ b/backend/server.py @@ -9,6 +9,7 @@ import time import queue +import cv2 import server_utils as utils import annotations from exact_solution import ExactSolution @@ -138,11 +139,10 @@ def generate(gen_type): embedding_collection = [] image = utils.get_image(image_data) + query_image_shape = image.shape predictor.set_image(image) _, features = predictor.features - generated_masks = mask_generator.generate(image) - for label_int, label_str in enumerate(support_package, start=1): for i, embedding in enumerate(support_package[label_str]["positive"][0]): @@ -189,22 +189,17 @@ def generate(gen_type): if gen_type == "point": return jsonify({'matching_points': matching_points, "error": error_text}) - matching_bboxes_ = [ - { - "id": cnt, - "bbox": [elm["bbox"][0], elm["bbox"][1], elm["bbox"][0] + elm["bbox"][2], elm["bbox"][1] + elm["bbox"][3]] - } - for cnt, elm in enumerate(generated_masks) if elm["segmentation"][int(xy["y"]), int(xy["x"])]] - - matching_bboxes = matching_bboxes + matching_bboxes_ - - unique_match_ids = np.unique([elm["id"] for elm in matching_bboxes]) - matching_bboxes = {elm["id"]: elm["bbox"] for elm in matching_bboxes if elm["id"] in unique_match_ids} - - for label_int, label_str in enumerate(support_package, start=1): - for match_id in matching_bboxes: - mask_ = generated_masks[int(match_id)]["segmentation"] * 1 - mask_ = mask_.astype(np.int8) + l_ = np.ones((1,)) + t0 = time.time() + mask_, scores, logits = predictor.predict( + point_coords=np.array([[xy["x"], xy["y"]]]).astype(int), + point_labels=l_, + multimask_output=True, + ) + t1 = time.time() + print(f"MASK GEN INFERENCE TIME: {t1 - t0}") + mask_ = mask_.astype(np.uint8) + mask_ = cv2.resize(mask_[0], (query_image_shape[1], query_image_shape[0])) polygons, points_ = annotations.generate_polygons_from_mask( polygons=polygons, @@ -213,13 +208,15 @@ def generate(gen_type): polygon_resolution=cfg.labeling.polygon_resolution ) - masks.append(mask_) + for pt in points_: + + bboxes.append({ + "coordinates": [int(pt[:, 0].min()), int(pt[:, 1].min()), int(pt[:, 0].max()), int(pt[:, 1].max())], + "format": "xyxy", + "label": label_str + }) - bboxes.append({ - "coordinates": matching_bboxes[match_id], - "format": "xyxy", - "label": label_str - }) + masks.append(mask_) labels_str.append(label_str) labels_int.append(label_int) @@ -289,17 +286,6 @@ def run_forwarder(forwarder): sam = sam_model_registry[cfg.model](checkpoint=checkpoint) sam.to(device=cfg.device) predictor = SamPredictor(sam, preconv_features=True) - - mask_generator = SamAutomaticMaskGenerator( - model=sam, - preconv_features=True, - points_per_side=64, - pred_iou_thresh=0.9, - stability_score_thresh=0.92, - crop_n_layers=1, - crop_n_points_downscale_factor=2, - min_mask_region_area=100, # Requires open-cv to run post-processing - ) if not os.path.isdir("./backend/logs/"): os.makedirs("./backend/logs/") @@ -310,26 +296,6 @@ def run_forwarder(forwarder): logger = utils.get_logger(log_path='./backend/logs/file.log') - # forwarder_extract = Forwarder( - # in_queue=ExtractInQueue, - # out_dict=ExtractOutDict, - # freq=10 - # ) - # - # forwarder_generate = Forwarder( - # in_queue=GenerateInQueue, - # out_dict=GenerateOutDict, - # freq=10 - # ) - # - # fps = [] - # fps.append(threading.Thread(target=run_forwarder, args=(forwarder_extract,))) - # fps.append(threading.Thread(target=run_forwarder, args=(forwarder_generate,))) - # - # for fp in fps: - # fp.daemon = True - # fp.start() - # app.run(host = "0.0.0.0", port=8080, debug=False) print("SERVER STARTING...") serve(app, host="0.0.0.0", port=8080) diff --git a/frontend_python/make_request.py b/frontend_python/make_request.py index a8dd0ab..5aa91e6 100644 --- a/frontend_python/make_request.py +++ b/frontend_python/make_request.py @@ -26,7 +26,7 @@ t1 = time.time() print(response.text) data_json = json.loads(response.text) -image_path = "../query_images/bmw_emblem.jpg" +image_path = "../query_images/bmw_i20_s.jpg" data_json["image_path"] = image_path init_image = utils.import_image(image_path) data_json["image"] = utils.numpy_to_base64(init_image) @@ -57,7 +57,7 @@ print(data_json) # save annotation -with open("../results/bmw_emblem.json", "w") as fp: +with open("../results/bmw_i20_s.json", "w") as fp: json.dump(data_json["coco_json"], fp, indent=4) print(f"Request.1 Time: {t1-t0} | Request.2 Time: {t2-t1}") From 5722f023f7183ca6d5611488aaedc2bd23e76aad Mon Sep 17 00:00:00 2001 From: Tekin Evrim Ozmermer Date: Mon, 24 Jul 2023 00:47:57 +0300 Subject: [PATCH 4/5] threshold is determined differently --- README.MD | 18 ++++++++++++++--- backend/exact_solution.py | 13 +++---------- backend/logs/file.log | 41 +++++++++++++++++++++++++++++++++++++++ backend/server.py | 14 ++++--------- 4 files changed, 63 insertions(+), 23 deletions(-) diff --git a/README.MD b/README.MD index 2bb4445..f89afd5 100644 --- a/README.MD +++ b/README.MD @@ -36,15 +36,27 @@ More accurate <<< [VIT-H](https://dl.fbaipublicfiles.com/segment_anything/sam_vi | [VIT-L](https://dl.fbaipublicfiles.com/segment_anything/sam_vit_l_0b3195.pth) | [VIT-B](https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth) >>> Faster -## RUN +## START SERVER Before you start the application, create a folder to put your support images that will be used to learn from, then create a folder to put your query images that are going to be labeled. Put the relative path to the folders to support_dir and query_dir in config.yml. -Then, let the magic begin ... + +#### To create request json that will be sent to server +```commandline +python interface.py +``` +then adjust make_request.py according to your images and paths + +Finally, run the server ... ``` -python main.py +python backend/server.py +``` + +and make request while server.py is running +```commandline +python make_request.py ``` ## DOCKERIZATION diff --git a/backend/exact_solution.py b/backend/exact_solution.py index 66dd023..963ef99 100644 --- a/backend/exact_solution.py +++ b/backend/exact_solution.py @@ -67,18 +67,11 @@ def infer(self, query_features): query_features = l2_norm(query_features)[0] out = self.forward(query_features.float()) - torchvision.utils.save_image(out[:, 1].reshape(b, h, w).squeeze(0).cpu().float(), "./intermediate_preds.png") - - # apply adaptive threshold - self.threshold = self.threshold if self.threshold <= out[:, 1].max() else out[:, 1].max() - print(f"ADAPTIVE THRESHOLD: {self.threshold}") - out = torch.where(out >= self.threshold, 1, 0) - # get indexes of maximums - predictions = out.argmax(dim=-1) + predictions = out[:, 1] - predictions = predictions.reshape(b, h, w).squeeze(0) - torchvision.utils.save_image(predictions.cpu().float(), "./intermediate_mask.png") + predictions = predictions.reshape(h, w) + torchvision.utils.save_image(predictions.cpu().float(), "./intermediate_preds.png") return predictions diff --git a/backend/logs/file.log b/backend/logs/file.log index 6549dfc..15ad39a 100644 --- a/backend/logs/file.log +++ b/backend/logs/file.log @@ -404,3 +404,44 @@ name 'generated_masks' is not defined -file server.py --line 256 Object of type int32 is not JSON serializable +2023-07-23 23:48:38,345 - server_utils - ERROR - +-file server.py +--line 75 +not enough values to unpack (expected 2, got 1) +2023-07-23 23:48:40,966 - server_utils - ERROR - +-file server.py +--line 140 +not enough values to unpack (expected 2, got 1) +2023-07-23 23:49:26,722 - server_utils - ERROR - +-file server.py +--line 75 +not enough values to unpack (expected 2, got 1) +2023-07-23 23:49:29,337 - server_utils - ERROR - +-file server.py +--line 162 +'list' object has no attribute 'norm' +2023-07-23 23:50:14,078 - server_utils - ERROR - +-file server.py +--line 75 +not enough values to unpack (expected 2, got 1) +2023-07-23 23:59:24,235 - server_utils - ERROR - +-file server.py +--line 169 +can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first. +2023-07-24 00:08:36,355 - server_utils - ERROR - +-file server.py +--line 169 +Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [64, 64] +2023-07-24 00:09:47,127 - server_utils - ERROR - +-file server.py +--line 169 +Input type (torch.cuda.LongTensor) and weight type (torch.FloatTensor) should be the same +2023-07-24 00:10:29,927 - server_utils - ERROR - +-file server.py +--line 169 +Input type (torch.cuda.LongTensor) and weight type (torch.cuda.FloatTensor) should be the same +2023-07-24 00:13:24,336 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES. +2023-07-24 00:14:05,916 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES. +2023-07-24 00:15:43,198 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES. +2023-07-24 00:19:44,234 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES. +2023-07-24 00:21:20,447 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES. diff --git a/backend/server.py b/backend/server.py index 78c4d9b..50a4925 100644 --- a/backend/server.py +++ b/backend/server.py @@ -23,10 +23,6 @@ "vit_h": "./checkpoints/sam_vit_h_4b8939.pth", } base = "localhost:8080" -ExtractInQueue = queue.Queue(maxsize=10) -ExtractOutDict = {} -GenerateInQueue = queue.Queue(maxsize=4) -GenerateOutDict = {} app = Flask(__name__) @app.route('/health', methods=['GET']) @@ -174,10 +170,12 @@ def generate(gen_type): t1 = time.time() print(f"EXACT SOLUTION INFERENCE TIME: {t1 - t0}") - matching_bboxes = [] for label_int, label_str in enumerate(support_package, start=1): - yx_multi = (predictions == label_int).nonzero() + threshold = predictions.flatten().sort(descending=True)[0][0:5] + print(f"HIGHEST 5 CONFIDENCES: {threshold}") + threshold = threshold[4] + yx_multi = (predictions >= threshold).nonzero() for yx in yx_multi: xy = utils.adapt_point( {"x": yx[1].item(), "y": yx[0].item()}, @@ -273,10 +271,6 @@ def generate(gen_type): return jsonify({"error": error_text}) -def run_forwarder(forwarder): - forwarder.run() - - if __name__ == '__main__': cfg = utils.load_config("./config.yml") From ba925c6d1a5b22985db89114010894f194290e32 Mon Sep 17 00:00:00 2001 From: Tekin Evrim Ozmermer Date: Mon, 24 Jul 2023 00:49:13 +0300 Subject: [PATCH 5/5] readme --- README.MD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.MD b/README.MD index f89afd5..67021b0 100644 --- a/README.MD +++ b/README.MD @@ -20,14 +20,14 @@ pip install git+https://github.com/facebookresearch/segment-anything.git or clone the repository locally and install with ``` -git clone git@github.com:facebookresearch/segment-anything.git +git clone git@github.com:rootvisionai/segment-anything.git cd segment-anything; pip install -e . ``` The following dependencies are necessary for the FEWSAM: ``` -pip install opencv-python PyYAML PySimpleGUI kmeans-pytorch +pip install opencv-python PyYAML PySimpleGUI ``` Now download the model checkpoints: