From 4d704ce0222dabe53f664c52c6450a20aab46359 Mon Sep 17 00:00:00 2001
From: Tekin Evrim Ozmermer <evrimozmermer@hotmail.com>
Date: Tue, 20 Jun 2023 17:38:00 +0300
Subject: [PATCH 1/5] this definitely works better

---
 backend/exact_solution.py       |   3 +
 backend/logs/file.log           |  87 +++++++++++++++++
 backend/server.py               | 167 +++++++++++++-------------------
 frontend_python/make_request.py |   6 +-
 4 files changed, 159 insertions(+), 104 deletions(-)
diff --git a/backend/exact_solution.py b/backend/exact_solution.py
index a38a7ad..64e1439 100644
--- a/backend/exact_solution.py
+++ b/backend/exact_solution.py
@@ -6,6 +6,8 @@
 """
 import torch
 import numpy as np
+import torchvision.utils
+
 from server_utils import flatten_feature_map, l2_norm
 
 
@@ -57,6 +59,7 @@ def infer(self, query_features):
             query_features = l2_norm(query_features)[0]
             predictions = self.forward(query_features.float())
             predictions = predictions.reshape(b, h, w).squeeze(0)
+            torchvision.utils.save_image(predictions.cpu().float(), "./intermediate_mask.png")
         return predictions
 
     def forward(self, embedding):
diff --git a/backend/logs/file.log b/backend/logs/file.log
index 763379a..01c6a6a 100644
--- a/backend/logs/file.log
+++ b/backend/logs/file.log
@@ -187,3 +187,90 @@ repeat_interleave() received an invalid combination of arguments - got (NoneType
 -file server.py
 --line 123
 'image_path'
+2023-06-16 09:31:17,785 - server_utils - ERROR - <class 'cv2.error'>
+-file server.py
+--line 205
+OpenCV(4.7.0) :-1: error: (-5:Bad argument) in function 'imwrite'
+> Overload resolution failed:
+>  - imwrite() missing required argument 'img' (pos 2)
+>  - imwrite() missing required argument 'img' (pos 2)
+
+2023-06-18 13:08:29,769 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES.
+2023-06-18 13:09:07,264 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES.
+2023-06-18 13:30:31,506 - server_utils - ERROR - <class 'TypeError'>
+-file server.py
+--line 164
+stack(): argument 'tensors' (position 1) must be tuple of Tensors, not Tensor
+2023-06-18 13:31:23,511 - server_utils - ERROR - <class 'TypeError'>
+-file server.py
+--line 164
+stack(): argument 'tensors' (position 1) must be tuple of Tensors, not Tensor
+2023-06-18 13:38:25,410 - server_utils - ERROR - <class 'TypeError'>
+-file server.py
+--line 164
+stack(): argument 'tensors' (position 1) must be tuple of Tensors, not Tensor
+2023-06-18 16:14:17,440 - server_utils - ERROR - <class 'AttributeError'>
+-file server.py
+--line 86
+shape
+2023-06-18 16:30:00,838 - server_utils - ERROR - <class 'AttributeError'>
+-file server.py
+--line 87
+shape
+2023-06-18 16:30:02,893 - server_utils - ERROR - <class 'ValueError'>
+-file server.py
+--line 176
+not enough values to unpack (expected 4, got 3)
+2023-06-18 16:45:12,270 - server_utils - ERROR - <class 'ValueError'>
+-file server.py
+--line 180
+not enough values to unpack (expected 4, got 3)
+2023-06-18 18:07:02,734 - server_utils - ERROR - <class 'ValueError'>
+-file server.py
+--line 180
+not enough values to unpack (expected 4, got 3)
+2023-06-18 18:10:00,569 - server_utils - ERROR - <class 'RuntimeError'>
+-file server.py
+--line 180
+Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same
+2023-06-18 18:21:25,814 - server_utils - ERROR - <class 'IndexError'>
+-file server.py
+--line 213
+tuple index out of range
+2023-06-18 18:22:54,478 - server_utils - ERROR - <class 'IndexError'>
+-file server.py
+--line 213
+tuple index out of range
+2023-06-18 18:25:47,706 - server_utils - ERROR - <class 'RuntimeError'>
+-file server.py
+--line 219
+mat1 and mat2 shapes cannot be multiplied (6144x16 and 384x3)
+2023-06-18 18:31:18,395 - server_utils - ERROR - <class 'TypeError'>
+-file server.py
+--line 209
+stack(): argument 'tensors' (position 1) must be tuple of Tensors, not Tensor
+2023-06-18 18:33:31,478 - server_utils - ERROR - <class 'TypeError'>
+-file server.py
+--line 209
+stack(): argument 'tensors' (position 1) must be tuple of Tensors, not Tensor
+2023-06-18 18:38:16,040 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES.
+2023-06-18 18:53:49,032 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES.
+2023-06-19 00:54:46,039 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES.
+2023-06-19 00:58:11,029 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES.
+2023-06-19 08:58:31,963 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES.
+2023-06-19 09:44:46,373 - server_utils - ERROR - <class 'TypeError'>
+-file server.py
+--line 245
+keywords must be strings
+2023-06-20 15:42:13,948 - server_utils - ERROR - <class 'TypeError'>
+-file server.py
+--line 206
+int() argument must be a string, a bytes-like object or a real number, not 'dict'
+2023-06-20 15:51:50,701 - server_utils - ERROR - <class 'RuntimeError'>
+-file server.py
+--line 173
+result type Float can't be cast to the desired output type Long
+2023-06-20 17:24:52,125 - server_utils - ERROR - <class 'IndexError'>
+-file server.py
+--line 192
+only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices
diff --git a/backend/server.py b/backend/server.py
index 1abe61d..7b6f880 100644
--- a/backend/server.py
+++ b/backend/server.py
@@ -8,14 +8,12 @@
 import json
 import time
 import queue
-# import threading
 
 import server_utils as utils
 import annotations
 from exact_solution import ExactSolution
-# from forwarder import Forwarder
 
-from segment_anything import sam_model_registry, SamPredictor
+from segment_anything import sam_model_registry, SamPredictor, SamAutomaticMaskGenerator
 
 
 model_to_checkpoint_map = {
@@ -130,73 +128,83 @@ def generate(gen_type):
     try:
         if gen_type in ["point", "annotation", "all"]:
             support_package = request.json['package']
+
             image_data = request.json['image']
             image_path = request.json['image_path']
-            for label_int, label_str in enumerate(support_package):
-                image = utils.get_image(image_data)
-                q_image_shape = image.shape
-                t0 = time.time()
-                predictor.set_image(image)
-                features = predictor.features
-                t1 = time.time()
-                print(f"SAM INFERENCE TIME: {t1-t0}")
-                linear_model_labels_int = []
-                embedding_collection = []
+
+            all_labels = {elm: i for i, elm in enumerate(support_package.keys(), start=1)}
+            all_labels["background"] = 0
+            linear_model_labels_int = []
+            embedding_collection = []
+
+            image = utils.get_image(image_data)
+            predictor.set_image(image)
+            features = predictor.features
+
+            generated_masks = mask_generator.generate(image)
+
+            for label_int, label_str in enumerate(support_package, start=1):
+
                 for i, embedding in enumerate(support_package[label_str]["positive"][0]):
                     embedding = np.array(embedding)
                     embedding = torch.from_numpy(embedding).to(cfg.device)[0]
 
-                    linear_model_labels_int.append(1)
+                    linear_model_labels_int.append(all_labels[label_str])
                     embedding_collection.append(embedding)
 
                 for i, embedding in enumerate(support_package[label_str]["negative"][0]):
                     embedding = np.array(embedding)
                     embedding = torch.from_numpy(embedding).to(cfg.device)[0]
 
-                    linear_model_labels_int.append(0)
+                    linear_model_labels_int.append(all_labels["background"])
                     embedding_collection.append(embedding)
 
                 embedding_collection = torch.stack(embedding_collection, dim=0)
                 linear_model_labels_int = np.array(linear_model_labels_int)
 
-                t0 = time.time()
-                linear_model = ExactSolution(
-                    device=cfg.device,
-                    embedding_collection=embedding_collection,
-                    labels_int=linear_model_labels_int,
-                    threshold=cfg.threshold
-                )
-                predictions = linear_model.infer(features)
-                t1 = time.time()
-                print(f"EXACT SOLUTION INFERENCE TIME: {t1 - t0}")
+            t0 = time.time()
+            linear_model = ExactSolution(
+                device=cfg.device,
+                embedding_collection=embedding_collection,
+                labels_int=linear_model_labels_int,
+                threshold=cfg.threshold
+            )
+
+            predictions = linear_model.infer(features)
+            t1 = time.time()
+            print(f"EXACT SOLUTION INFERENCE TIME: {t1 - t0}")
 
-                yx_multi = (predictions == 1.).nonzero()  # this can be changed later
+            matching_bboxes = []
+            for label_int, label_str in enumerate(support_package, start=1):
 
+                yx_multi = (predictions == label_int).nonzero()
                 for yx in yx_multi:
-                    t0 = time.time()
                     xy = utils.adapt_point(
                         {"x": yx[1].item(), "y": yx[0].item()},
                         initial_shape=features.shape[-2:],
                         final_shape=image.shape[0:2]
                     )
-                    t1 = time.time()
-                    print(f"ADAPT POINT TIME: {t1 - t0}")
 
                     matching_points[label_str] = {"x": xy["x"], "y": xy["y"]}
                     if gen_type == "point":
                         return jsonify({'matching_points': matching_points, "error": error_text})
 
-                    l_ = np.ones((1,))
-                    t0 = time.time()
-                    mask_, scores, logits = predictor.predict(
-                        point_coords=np.array([[xy["x"], xy["y"]]]).astype(int),
-                        point_labels=l_,
-                        multimask_output=True,
-                    )
-                    t1 = time.time()
-                    print(f"MASK GEN INFERENCE TIME: {t1 - t0}")
-                    mask_ = mask_.astype(np.uint8)
-                    mask_ = cv2.resize(mask_[0], (q_image_shape[1], q_image_shape[0]))
+                    matching_bboxes_ = [
+                        {
+                            "id": cnt,
+                            "bbox": [elm["bbox"][0], elm["bbox"][1], elm["bbox"][0] + elm["bbox"][2], elm["bbox"][1] + elm["bbox"][3]]
+                        }
+                        for cnt, elm in enumerate(generated_masks) if elm["segmentation"][int(xy["y"]), int(xy["x"])]]
+
+                    matching_bboxes = matching_bboxes + matching_bboxes_
+
+            unique_match_ids = np.unique([elm["id"] for elm in matching_bboxes])
+            matching_bboxes = {elm["id"]: elm["bbox"] for elm in matching_bboxes if elm["id"] in unique_match_ids}
+
+            for label_int, label_str in enumerate(support_package, start=1):
+                for match_id in matching_bboxes:
+                    mask_ = generated_masks[int(match_id)]["segmentation"] * 1
+                    mask_ = mask_.astype(np.int8)
 
                     polygons, points_ = annotations.generate_polygons_from_mask(
                         polygons=polygons,
@@ -207,27 +215,14 @@ def generate(gen_type):
 
                     masks.append(mask_)
 
-                    t0 = time.time()
-                    # create xml file from the coordinates
-                    if len(points_)>0:
-                        for cnt_p, pts_ in enumerate(points_):
-                            # coordinates = np.nonzero(mask_)
-                            if len(pts_) >= 3:
-                                print(f"---> Finding bounding box: {cnt_p}/{len(points_)}")
-                                pts = np.array([np.array(pt) for pt in pts_])
-                                y0, y1, x0, x1 = pts[:, 0].min(), pts[:, 0].max(), pts[:, 1].min(), pts[:, 1].max()
-                                bboxes.append({
-                                    "coordinates": [int(x0), int(y0), int(x1), int(y1)],
-                                    "format": "xyxy",
-                                    "label": "label_str"
-                                })
-
-                                labels_str.append(label_str)
-                                labels_int.append(label_int)
-
-                    t1 = time.time()
-                    print(f"BBOX GENERATION TIME: {t1 - t0}")
+                    bboxes.append({
+                        "coordinates": matching_bboxes[match_id],
+                        "format": "xyxy",
+                        "label": label_str
+                    })
 
+                    labels_str.append(label_str)
+                    labels_int.append(label_int)
 
             if len(masks) > 0:
                 print("---> Merging masks")
@@ -279,47 +274,6 @@ def generate(gen_type):
 
     return jsonify({"error": error_text})
 
-# @app.route('/forwarder/extract_features', methods=['POST'])
-# def forwarder_extract():
-#
-#     timestamp = time.time()
-#     if not ExtractInQueue.full():
-#         ExtractInQueue.put({
-#             "url": f"http://{base}/extract_features",
-#             "data": request.json,
-#             "timestamp": timestamp
-#         })
-#     else:
-#         return json.dumps({"warning": "Queue is full, please wait and try again."})
-#
-#     while True:
-#         if timestamp in ExtractOutDict:
-#             response = ExtractOutDict[timestamp]["data"]
-#             del ExtractOutDict[timestamp]
-#             break
-#
-#     return response
-#
-# @app.route('/forwarder/generate/<gen_type>', methods=['POST'])
-# def forwarder_generate(gen_type):
-#
-#     timestamp = time.time()
-#     if not GenerateInQueue.full():
-#         GenerateInQueue.put({
-#             "url": f"http://{base}/generate/{gen_type}",
-#             "data": request.json,
-#             "timestamp": timestamp
-#         })
-#     else:
-#         return json.dumps({"warning": "Queue is full, please wait and try again."})
-#
-#     while True:
-#         if timestamp in GenerateOutDict:
-#             response = GenerateOutDict[timestamp]["data"]
-#             del GenerateOutDict[timestamp]
-#             break
-#
-#     return response
 
 def run_forwarder(forwarder):
     forwarder.run()
@@ -334,6 +288,16 @@ def run_forwarder(forwarder):
     sam = sam_model_registry[cfg.model](checkpoint=checkpoint)
     sam.to(device=cfg.device)
     predictor = SamPredictor(sam)
+
+    mask_generator = SamAutomaticMaskGenerator(
+        model=sam,
+        points_per_side=64,
+        pred_iou_thresh=0.9,
+        stability_score_thresh=0.92,
+        crop_n_layers=1,
+        crop_n_points_downscale_factor=2,
+        min_mask_region_area=100,  # Requires open-cv to run post-processing
+    )
     
     if not os.path.isdir("./backend/logs/"):
         os.makedirs("./backend/logs/")
@@ -365,4 +329,5 @@ def run_forwarder(forwarder):
     #     fp.start()
 
     # app.run(host = "0.0.0.0", port=8080, debug=False)
+    print("SERVER STARTING...")
     serve(app, host="0.0.0.0", port=8080)
diff --git a/frontend_python/make_request.py b/frontend_python/make_request.py
index 801de9c..46df98a 100644
--- a/frontend_python/make_request.py
+++ b/frontend_python/make_request.py
@@ -26,7 +26,7 @@
 t1 = time.time()
 print(response.text)
 data_json = json.loads(response.text)
-image_path = "../query_images/test.jpg"
+image_path = "../query_images/bmw_i20_s_1.jpg"
 data_json["image_path"] = image_path
 init_image = utils.import_image(image_path)
 data_json["image"] = utils.numpy_to_base64(init_image)
@@ -46,7 +46,7 @@
 t2 = time.time()
 data_json = json.loads(response.text)
 masks = utils.get_image(data_json["masks"])
-masks.save("masks.png")
+masks.save("../results/masks.png")
 
 # Print the response
 try:
@@ -55,7 +55,7 @@
     print(data_json)
 
 # save annotation
-with open("test.json", "w") as fp:
+with open("../results/bmw_i20_s_0.json", "w") as fp:
     json.dump(data_json["coco_json"], fp, indent=4)
 
 print(f"Request.1 Time: {t1-t0} | Request.2 Time: {t2-t1}")

From dd80edf76aca984f76956c8f95225d5583fb50ce Mon Sep 17 00:00:00 2001
From: Tekin Evrim Ozmermer <evrimozmermer@hotmail.com>
Date: Sun, 23 Jul 2023 22:46:59 +0300
Subject: [PATCH 2/5] this definitely works better

---
 .gitignore                      |   2 +-
 backend/exact_solution.py       |  46 ++++++++++----
 backend/logs/file.log           | 109 ++++++++++++++++++++++++++++++++
 backend/server.py               |  10 +--
 config.yml                      |   3 +-
 frontend_python/make_request.py |  10 +--
 6 files changed, 159 insertions(+), 21 deletions(-)

diff --git a/.gitignore b/.gitignore
index 068aadd..81ea9c5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,5 +16,5 @@
 *.json
 *.onnx
 *.drawio
-/dev_gitignored/
+/dev/
 /frontend_python/
diff --git a/backend/exact_solution.py b/backend/exact_solution.py
index 64e1439..66dd023 100644
--- a/backend/exact_solution.py
+++ b/backend/exact_solution.py
@@ -7,7 +7,7 @@
 import torch
 import numpy as np
 import torchvision.utils
-
+import tqdm
 from server_utils import flatten_feature_map, l2_norm
 
 
@@ -35,42 +35,66 @@ def __init__(self,
         super(ExactSolution, self).__init__()
 
         self.device = device
-        self.embedding_collection = embedding_collection
+        # self.loss_func = torch.nn.CrossEntropyLoss()
+
+        self.embedding_collection = l2_norm(embedding_collection).to(self.device).float()
         self.threshold = threshold
         self.num_classes = len(np.unique(labels_int))
+        # self.labels_int = torch.from_numpy(labels_int).to(self.device).long()
         self.labels_bin = binarize_labels(labels_int)
         self.linear = torch.nn.Linear(in_features=self.embedding_collection.shape[1],
                                       out_features=self.labels_bin.shape[1],
                                       bias=False)
         self.solve_exact()
+        # self.opt = torch.optim.SGD(momentum=0.9, lr=0.1, params=self.linear.parameters())
+        # self.train()
+        # self.train_linear()
         self.eval()
 
     def solve_exact(self):
-        collection_inverse = torch.pinverse(l2_norm(self.embedding_collection)).float()
+        collection_inverse = torch.pinverse(self.embedding_collection)
         self.W = torch.matmul(collection_inverse.to(self.device),
                               self.labels_bin.to(self.device))
         with torch.no_grad():
             self.linear.weight = torch.nn.Parameter(self.W.T)
 
     def infer(self, query_features):
+
         with torch.no_grad():
+
             b, n, h, w = query_features.shape
             query_features = flatten_feature_map(query_features)
             query_features = l2_norm(query_features)[0]
-            predictions = self.forward(query_features.float())
+            out = self.forward(query_features.float())
+
+            torchvision.utils.save_image(out[:, 1].reshape(b, h, w).squeeze(0).cpu().float(), "./intermediate_preds.png")
+
+            # apply adaptive threshold
+            self.threshold = self.threshold if self.threshold <= out[:, 1].max() else out[:, 1].max()
+            print(f"ADAPTIVE THRESHOLD: {self.threshold}")
+            out = torch.where(out >= self.threshold, 1, 0)
+
+            # get indexes of maximums
+            predictions = out.argmax(dim=-1)
+
             predictions = predictions.reshape(b, h, w).squeeze(0)
             torchvision.utils.save_image(predictions.cpu().float(), "./intermediate_mask.png")
+
         return predictions
 
+    def train_linear(self):
+        pbar = tqdm.tqdm(range(0, 200))
+        for epoch in pbar:
+            out = self.linear(self.embedding_collection)
+            loss = self.loss_func(out, self.labels_int)
+            pbar.set_description(f"EPOCH: {epoch} | LOSS: {loss.item()}")
+
+            self.opt.zero_grad()
+            loss.backward()
+            self.opt.step()
+
     def forward(self, embedding):
         out = self.linear(embedding)
         out = torch.where(out > 1, 2-out, out)
         out = torch.nn.functional.softmax(out, dim=-1)
-
-        # apply adaptive threshold
-        self.threshold = out[:, 1].max()-0.02 if self.threshold > out[:, 1].max() else self.threshold
-        out = torch.where(out >= self.threshold, 1, 0)
-
-        # get indexes of maximums
-        out = out.argmax(dim=-1)
         return out
diff --git a/backend/logs/file.log b/backend/logs/file.log
index 01c6a6a..7521fd4 100644
--- a/backend/logs/file.log
+++ b/backend/logs/file.log
@@ -274,3 +274,112 @@ result type Float can't be cast to the desired output type Long
 -file server.py
 --line 192
 only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices
+2023-06-21 09:53:35,865 - server_utils - ERROR - <class 'TypeError'>
+-file server.py
+--line 166
+SGD.__init__() missing 1 required positional argument: 'params'
+2023-06-21 09:54:29,877 - server_utils - ERROR - <class 'TypeError'>
+-file server.py
+--line 166
+SGD.__init__() missing 1 required positional argument: 'params'
+2023-06-21 09:55:19,811 - server_utils - ERROR - <class 'TypeError'>
+-file server.py
+--line 166
+SGD.__init__() missing 1 required positional argument: 'params'
+2023-06-21 09:57:55,476 - server_utils - ERROR - <class 'TypeError'>
+-file server.py
+--line 166
+SGD.__init__() missing 1 required positional argument: 'params'
+2023-06-21 10:00:03,784 - server_utils - ERROR - <class 'AttributeError'>
+-file server.py
+--line 166
+'ExactSolution' object has no attribute 'linear'
+2023-06-21 10:01:00,648 - server_utils - ERROR - <class 'TypeError'>
+-file server.py
+--line 166
+'bool' object is not callable
+2023-06-21 10:14:29,460 - server_utils - ERROR - <class 'TypeError'>
+-file server.py
+--line 166
+'bool' object is not callable
+2023-06-21 10:24:17,893 - server_utils - ERROR - <class 'RuntimeError'>
+-file server.py
+--line 166
+expected scalar type Double but found Float
+2023-06-21 10:25:31,981 - server_utils - ERROR - <class 'RuntimeError'>
+-file server.py
+--line 166
+"nll_loss_forward_reduce_cuda_kernel_2d_index" not implemented for 'Int'
+2023-06-21 10:28:20,393 - server_utils - ERROR - <class 'TypeError'>
+-file server.py
+--line 166
+to() received an invalid combination of arguments - got (torch.tensortype), but expected one of:
+ * (torch.device device, torch.dtype dtype, bool non_blocking, bool copy, *, torch.memory_format memory_format)
+ * (torch.dtype dtype, bool non_blocking, bool copy, *, torch.memory_format memory_format)
+ * (Tensor tensor, bool non_blocking, bool copy, *, torch.memory_format memory_format)
+
+2023-07-09 15:31:33,316 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES.
+2023-07-09 16:53:42,852 - server_utils - ERROR - <class 'IndexError'>
+-file server.py
+--line 173
+Dimension out of range (expected to be in range of [-1, 0], but got 2)
+2023-07-09 17:06:05,739 - server_utils - ERROR - <class 'AttributeError'>
+-file server.py
+--line 86
+'tuple' object has no attribute 'shape'
+2023-07-09 17:07:25,830 - server_utils - ERROR - <class 'TypeError'>
+-file server.py
+--line 144
+repeat_interleave() received an invalid combination of arguments - got (tuple, int, dim=int), but expected one of:
+ * (Tensor input, Tensor repeats, int dim, *, int output_size)
+ * (Tensor repeats, *, int output_size)
+      didn't match because some of the keywords were incorrect: dim
+ * (Tensor input, int repeats, int dim, *, int output_size)
+
+2023-07-09 17:10:28,599 - server_utils - ERROR - <class 'TypeError'>
+-file server.py
+--line 144
+repeat_interleave() received an invalid combination of arguments - got (tuple, int, dim=int), but expected one of:
+ * (Tensor input, Tensor repeats, int dim, *, int output_size)
+ * (Tensor repeats, *, int output_size)
+      didn't match because some of the keywords were incorrect: dim
+ * (Tensor input, int repeats, int dim, *, int output_size)
+
+2023-07-09 17:12:17,006 - server_utils - ERROR - <class 'TypeError'>
+-file server.py
+--line 144
+repeat_interleave() received an invalid combination of arguments - got (tuple, int, dim=int), but expected one of:
+ * (Tensor input, Tensor repeats, int dim, *, int output_size)
+ * (Tensor repeats, *, int output_size)
+      didn't match because some of the keywords were incorrect: dim
+ * (Tensor input, int repeats, int dim, *, int output_size)
+
+2023-07-09 17:13:58,124 - server_utils - ERROR - <class 'TypeError'>
+-file server.py
+--line 144
+repeat_interleave() received an invalid combination of arguments - got (tuple, int, dim=int), but expected one of:
+ * (Tensor input, Tensor repeats, int dim, *, int output_size)
+ * (Tensor repeats, *, int output_size)
+      didn't match because some of the keywords were incorrect: dim
+ * (Tensor input, int repeats, int dim, *, int output_size)
+
+2023-07-09 17:15:06,438 - server_utils - ERROR - <class 'RuntimeError'>
+-file server.py
+--line 173
+shape '[1, 64, 64]' is invalid for input of size 8192
+2023-07-09 17:44:45,055 - server_utils - ERROR - <class 'ValueError'>
+-file server.py
+--line 78
+not enough values to unpack (expected 2, got 1)
+2023-07-09 17:44:47,678 - server_utils - ERROR - <class 'ValueError'>
+-file server.py
+--line 142
+not enough values to unpack (expected 2, got 1)
+2023-07-09 17:47:50,766 - server_utils - ERROR - <class 'ValueError'>
+-file server.py
+--line 78
+not enough values to unpack (expected 2, got 1)
+2023-07-09 17:47:53,413 - server_utils - ERROR - <class 'ValueError'>
+-file server.py
+--line 142
+not enough values to unpack (expected 2, got 1)
diff --git a/backend/server.py b/backend/server.py
index 7b6f880..2b77238 100644
--- a/backend/server.py
+++ b/backend/server.py
@@ -4,7 +4,7 @@
 import numpy as np
 import os
 import sys
-import cv2
+import traceback
 import json
 import time
 import queue
@@ -75,7 +75,7 @@ def extract():
             image = utils.get_image(data["images"][image_id])
             with torch.no_grad():
                 predictor.set_image(image)
-            features = predictor.features
+            _, features = predictor.features
 
             positive_coord = coordinates["positive"]
             negative_coord = coordinates["negative"]
@@ -139,7 +139,7 @@ def generate(gen_type):
 
             image = utils.get_image(image_data)
             predictor.set_image(image)
-            features = predictor.features
+            _, features = predictor.features
 
             generated_masks = mask_generator.generate(image)
 
@@ -270,6 +270,7 @@ def generate(gen_type):
         exc_type, exc_obj, exc_tb = sys.exc_info()
         fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
         error_text = f"{exc_type}\n-file {fname}\n--line {exc_tb.tb_lineno}\n{e}"
+        print(traceback.format_exc())
         logger.error(error_text)
 
     return jsonify({"error": error_text})
@@ -287,10 +288,11 @@ def run_forwarder(forwarder):
 
     sam = sam_model_registry[cfg.model](checkpoint=checkpoint)
     sam.to(device=cfg.device)
-    predictor = SamPredictor(sam)
+    predictor = SamPredictor(sam, preconv_features=True)
 
     mask_generator = SamAutomaticMaskGenerator(
         model=sam,
+        preconv_features=True,
         points_per_side=64,
         pred_iou_thresh=0.9,
         stability_score_thresh=0.92,
diff --git a/config.yml b/config.yml
index dbdcb2d..25dfba8 100644
--- a/config.yml
+++ b/config.yml
@@ -11,8 +11,9 @@ labeling:
   polygon_resolution: 0.3
 
 window_size: [1024, 1024]
+preconv_features: False
 model: vit_h # Faster <<< vit_b, vit_l, vit_h >>> More accurate
-threshold: 0.9
+threshold: 0.75
 device: "cuda"
 COLORMAP: [[255, 0, 121],[101, 0, 255],[253, 255, 0],[0, 49, 255],[0, 186, 255],[0, 255, 39],[215, 0, 255],[0, 24, 255],[0, 157, 255],[0, 131, 255]]
 
diff --git a/frontend_python/make_request.py b/frontend_python/make_request.py
index 46df98a..a8dd0ab 100644
--- a/frontend_python/make_request.py
+++ b/frontend_python/make_request.py
@@ -26,7 +26,7 @@
 t1 = time.time()
 print(response.text)
 data_json = json.loads(response.text)
-image_path = "../query_images/bmw_i20_s_1.jpg"
+image_path = "../query_images/bmw_emblem.jpg"
 data_json["image_path"] = image_path
 init_image = utils.import_image(image_path)
 data_json["image"] = utils.numpy_to_base64(init_image)
@@ -45,8 +45,10 @@
 response = requests.post(url, data=json.dumps(data_json), headers=headers)
 t2 = time.time()
 data_json = json.loads(response.text)
-masks = utils.get_image(data_json["masks"])
-masks.save("../results/masks.png")
+
+if "masks" in data_json:
+    masks = utils.get_image(data_json["masks"])
+    masks.save("../results/masks.png")
 
 # Print the response
 try:
@@ -55,7 +57,7 @@
     print(data_json)
 
 # save annotation
-with open("../results/bmw_i20_s_0.json", "w") as fp:
+with open("../results/bmw_emblem.json", "w") as fp:
     json.dump(data_json["coco_json"], fp, indent=4)
 
 print(f"Request.1 Time: {t1-t0} | Request.2 Time: {t2-t1}")

From 2b29ef856b86d4d6879a6a949911706151fbffd3 Mon Sep 17 00:00:00 2001
From: Tekin Evrim Ozmermer <evrimozmermer@hotmail.com>
Date: Sun, 23 Jul 2023 23:39:57 +0300
Subject: [PATCH 3/5] faster inference

---
 backend/annotations.py          |  8 ++--
 backend/logs/file.log           | 21 +++++++++
 backend/server.py               | 76 +++++++++------------------------
 frontend_python/make_request.py |  4 +-
 4 files changed, 48 insertions(+), 61 deletions(-)

diff --git a/backend/annotations.py b/backend/annotations.py
index c556e3d..45cbf7b 100644
--- a/backend/annotations.py
+++ b/backend/annotations.py
@@ -107,16 +107,16 @@ def generate_polygons_from_mask(polygons, mask, label, polygon_resolution):
 
     # Generate polygons from the contours
     points_ = []
-
     instances, num_instances = find_instances(mask)
     for k in range(1, num_instances+1, 1):
         instance = ((instances == k)*1).astype(np.uint8)
 
         # Find the contours in the binary mask
         contours, _ = cv2.findContours(instance, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+        filtered_contours = [contour for contour in contours if cv2.contourArea(contour) >= 100]
 
-        for i, contour in enumerate(contours):
-            if int(len(contour)*polygon_resolution)>0:
+        for i, contour in enumerate(filtered_contours):
+            if int(len(contour)*polygon_resolution) > 2:
                 points = contour.squeeze()[np.arange(0,
                                                      len(contour),
                                                      int(len(contour)/int(len(contour)*polygon_resolution))
@@ -128,7 +128,7 @@ def generate_polygons_from_mask(polygons, mask, label, polygon_resolution):
                     "shape_type": "polygon",
                     "flags": {}
                 })
-                points_.append(points)
+                points_.append(np.array(points))
 
     return polygons, points_
 
diff --git a/backend/logs/file.log b/backend/logs/file.log
index 7521fd4..6549dfc 100644
--- a/backend/logs/file.log
+++ b/backend/logs/file.log
@@ -383,3 +383,24 @@ not enough values to unpack (expected 2, got 1)
 -file server.py
 --line 142
 not enough values to unpack (expected 2, got 1)
+2023-07-23 23:06:58,600 - server_utils - ERROR - <class 'ValueError'>
+-file server.py
+--line 211
+setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.
+2023-07-23 23:26:27,032 - server_utils - ERROR - <class 'ValueError'>
+-file server.py
+--line 205
+too many values to unpack (expected 2)
+2023-07-23 23:27:00,571 - server_utils - ERROR - <class 'IndexError'>
+-file server.py
+--line 215
+too many indices for array: array is 1-dimensional, but 2 were indexed
+2023-07-23 23:28:47,726 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES.
+2023-07-23 23:30:29,986 - server_utils - ERROR - <class 'NameError'>
+-file server.py
+--line 219
+name 'generated_masks' is not defined
+2023-07-23 23:30:57,066 - server_utils - ERROR - <class 'TypeError'>
+-file server.py
+--line 256
+Object of type int32 is not JSON serializable
diff --git a/backend/server.py b/backend/server.py
index 2b77238..78c4d9b 100644
--- a/backend/server.py
+++ b/backend/server.py
@@ -9,6 +9,7 @@
 import time
 import queue
 
+import cv2
 import server_utils as utils
 import annotations
 from exact_solution import ExactSolution
@@ -138,11 +139,10 @@ def generate(gen_type):
             embedding_collection = []
 
             image = utils.get_image(image_data)
+            query_image_shape = image.shape
             predictor.set_image(image)
             _, features = predictor.features
 
-            generated_masks = mask_generator.generate(image)
-
             for label_int, label_str in enumerate(support_package, start=1):
 
                 for i, embedding in enumerate(support_package[label_str]["positive"][0]):
@@ -189,22 +189,17 @@ def generate(gen_type):
                     if gen_type == "point":
                         return jsonify({'matching_points': matching_points, "error": error_text})
 
-                    matching_bboxes_ = [
-                        {
-                            "id": cnt,
-                            "bbox": [elm["bbox"][0], elm["bbox"][1], elm["bbox"][0] + elm["bbox"][2], elm["bbox"][1] + elm["bbox"][3]]
-                        }
-                        for cnt, elm in enumerate(generated_masks) if elm["segmentation"][int(xy["y"]), int(xy["x"])]]
-
-                    matching_bboxes = matching_bboxes + matching_bboxes_
-
-            unique_match_ids = np.unique([elm["id"] for elm in matching_bboxes])
-            matching_bboxes = {elm["id"]: elm["bbox"] for elm in matching_bboxes if elm["id"] in unique_match_ids}
-
-            for label_int, label_str in enumerate(support_package, start=1):
-                for match_id in matching_bboxes:
-                    mask_ = generated_masks[int(match_id)]["segmentation"] * 1
-                    mask_ = mask_.astype(np.int8)
+                    l_ = np.ones((1,))
+                    t0 = time.time()
+                    mask_, scores, logits = predictor.predict(
+                        point_coords=np.array([[xy["x"], xy["y"]]]).astype(int),
+                        point_labels=l_,
+                        multimask_output=True,
+                    )
+                    t1 = time.time()
+                    print(f"MASK GEN INFERENCE TIME: {t1 - t0}")
+                    mask_ = mask_.astype(np.uint8)
+                    mask_ = cv2.resize(mask_[0], (query_image_shape[1], query_image_shape[0]))
 
                     polygons, points_ = annotations.generate_polygons_from_mask(
                         polygons=polygons,
@@ -213,13 +208,15 @@ def generate(gen_type):
                         polygon_resolution=cfg.labeling.polygon_resolution
                     )
 
-                    masks.append(mask_)
+                    for pt in points_:
+
+                        bboxes.append({
+                            "coordinates": [int(pt[:, 0].min()), int(pt[:, 1].min()), int(pt[:, 0].max()), int(pt[:, 1].max())],
+                            "format": "xyxy",
+                            "label": label_str
+                        })
 
-                    bboxes.append({
-                        "coordinates": matching_bboxes[match_id],
-                        "format": "xyxy",
-                        "label": label_str
-                    })
+                    masks.append(mask_)
 
                     labels_str.append(label_str)
                     labels_int.append(label_int)
@@ -289,17 +286,6 @@ def run_forwarder(forwarder):
     sam = sam_model_registry[cfg.model](checkpoint=checkpoint)
     sam.to(device=cfg.device)
     predictor = SamPredictor(sam, preconv_features=True)
-
-    mask_generator = SamAutomaticMaskGenerator(
-        model=sam,
-        preconv_features=True,
-        points_per_side=64,
-        pred_iou_thresh=0.9,
-        stability_score_thresh=0.92,
-        crop_n_layers=1,
-        crop_n_points_downscale_factor=2,
-        min_mask_region_area=100,  # Requires open-cv to run post-processing
-    )
     
     if not os.path.isdir("./backend/logs/"):
         os.makedirs("./backend/logs/")
@@ -310,26 +296,6 @@ def run_forwarder(forwarder):
     
     logger = utils.get_logger(log_path='./backend/logs/file.log')
 
-    # forwarder_extract = Forwarder(
-    #     in_queue=ExtractInQueue,
-    #     out_dict=ExtractOutDict,
-    #     freq=10
-    # )
-    #
-    # forwarder_generate = Forwarder(
-    #     in_queue=GenerateInQueue,
-    #     out_dict=GenerateOutDict,
-    #     freq=10
-    # )
-    #
-    # fps = []
-    # fps.append(threading.Thread(target=run_forwarder, args=(forwarder_extract,)))
-    # fps.append(threading.Thread(target=run_forwarder, args=(forwarder_generate,)))
-    #
-    # for fp in fps:
-    #     fp.daemon = True
-    #     fp.start()
-
     # app.run(host = "0.0.0.0", port=8080, debug=False)
     print("SERVER STARTING...")
     serve(app, host="0.0.0.0", port=8080)
diff --git a/frontend_python/make_request.py b/frontend_python/make_request.py
index a8dd0ab..5aa91e6 100644
--- a/frontend_python/make_request.py
+++ b/frontend_python/make_request.py
@@ -26,7 +26,7 @@
 t1 = time.time()
 print(response.text)
 data_json = json.loads(response.text)
-image_path = "../query_images/bmw_emblem.jpg"
+image_path = "../query_images/bmw_i20_s.jpg"
 data_json["image_path"] = image_path
 init_image = utils.import_image(image_path)
 data_json["image"] = utils.numpy_to_base64(init_image)
@@ -57,7 +57,7 @@
     print(data_json)
 
 # save annotation
-with open("../results/bmw_emblem.json", "w") as fp:
+with open("../results/bmw_i20_s.json", "w") as fp:
     json.dump(data_json["coco_json"], fp, indent=4)
 
 print(f"Request.1 Time: {t1-t0} | Request.2 Time: {t2-t1}")

From 5722f023f7183ca6d5611488aaedc2bd23e76aad Mon Sep 17 00:00:00 2001
From: Tekin Evrim Ozmermer <evrimozmermer@hotmail.com>
Date: Mon, 24 Jul 2023 00:47:57 +0300
Subject: [PATCH 4/5] threshold is determined differently

---
 README.MD                 | 18 ++++++++++++++---
 backend/exact_solution.py | 13 +++----------
 backend/logs/file.log     | 41 +++++++++++++++++++++++++++++++++++++++
 backend/server.py         | 14 ++++---------
 4 files changed, 63 insertions(+), 23 deletions(-)

diff --git a/README.MD b/README.MD
index 2bb4445..f89afd5 100644
--- a/README.MD
+++ b/README.MD
@@ -36,15 +36,27 @@ More accurate <<< [VIT-H](https://dl.fbaipublicfiles.com/segment_anything/sam_vi
 | [VIT-L](https://dl.fbaipublicfiles.com/segment_anything/sam_vit_l_0b3195.pth) 
 | [VIT-B](https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth) >>> Faster
 
-## RUN
+## START SERVER
 
 Before you start the application, create a folder to put your
 support images that will be used to learn from, then create a 
 folder to put your query images that are going to be labeled.
 Put the relative path to the folders to support_dir and query_dir in config.yml.
-Then, let the magic begin ...
+
+#### To create request json that will be sent to server
+```commandline
+python interface.py  
+```
+then adjust make_request.py according to your images and paths
+
+Finally, run the server ...
 ```
-python main.py
+python backend/server.py
+```
+
+and make request while server.py is running
+```commandline
+python make_request.py
 ```
 
 ## DOCKERIZATION
diff --git a/backend/exact_solution.py b/backend/exact_solution.py
index 66dd023..963ef99 100644
--- a/backend/exact_solution.py
+++ b/backend/exact_solution.py
@@ -67,18 +67,11 @@ def infer(self, query_features):
             query_features = l2_norm(query_features)[0]
             out = self.forward(query_features.float())
 
-            torchvision.utils.save_image(out[:, 1].reshape(b, h, w).squeeze(0).cpu().float(), "./intermediate_preds.png")
-
-            # apply adaptive threshold
-            self.threshold = self.threshold if self.threshold <= out[:, 1].max() else out[:, 1].max()
-            print(f"ADAPTIVE THRESHOLD: {self.threshold}")
-            out = torch.where(out >= self.threshold, 1, 0)
-
             # get indexes of maximums
-            predictions = out.argmax(dim=-1)
+            predictions = out[:, 1]
 
-            predictions = predictions.reshape(b, h, w).squeeze(0)
-            torchvision.utils.save_image(predictions.cpu().float(), "./intermediate_mask.png")
+            predictions = predictions.reshape(h, w)
+            torchvision.utils.save_image(predictions.cpu().float(), "./intermediate_preds.png")
 
         return predictions
 
diff --git a/backend/logs/file.log b/backend/logs/file.log
index 6549dfc..15ad39a 100644
--- a/backend/logs/file.log
+++ b/backend/logs/file.log
@@ -404,3 +404,44 @@ name 'generated_masks' is not defined
 -file server.py
 --line 256
 Object of type int32 is not JSON serializable
+2023-07-23 23:48:38,345 - server_utils - ERROR - <class 'ValueError'>
+-file server.py
+--line 75
+not enough values to unpack (expected 2, got 1)
+2023-07-23 23:48:40,966 - server_utils - ERROR - <class 'ValueError'>
+-file server.py
+--line 140
+not enough values to unpack (expected 2, got 1)
+2023-07-23 23:49:26,722 - server_utils - ERROR - <class 'ValueError'>
+-file server.py
+--line 75
+not enough values to unpack (expected 2, got 1)
+2023-07-23 23:49:29,337 - server_utils - ERROR - <class 'AttributeError'>
+-file server.py
+--line 162
+'list' object has no attribute 'norm'
+2023-07-23 23:50:14,078 - server_utils - ERROR - <class 'ValueError'>
+-file server.py
+--line 75
+not enough values to unpack (expected 2, got 1)
+2023-07-23 23:59:24,235 - server_utils - ERROR - <class 'TypeError'>
+-file server.py
+--line 169
+can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.
+2023-07-24 00:08:36,355 - server_utils - ERROR - <class 'RuntimeError'>
+-file server.py
+--line 169
+Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [64, 64]
+2023-07-24 00:09:47,127 - server_utils - ERROR - <class 'RuntimeError'>
+-file server.py
+--line 169
+Input type (torch.cuda.LongTensor) and weight type (torch.FloatTensor) should be the same
+2023-07-24 00:10:29,927 - server_utils - ERROR - <class 'RuntimeError'>
+-file server.py
+--line 169
+Input type (torch.cuda.LongTensor) and weight type (torch.cuda.FloatTensor) should be the same
+2023-07-24 00:13:24,336 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES.
+2023-07-24 00:14:05,916 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES.
+2023-07-24 00:15:43,198 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES.
+2023-07-24 00:19:44,234 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES.
+2023-07-24 00:21:20,447 - server_utils - ERROR - NO MASK IS GENERATED FOR THIS IMAGE BASED ON THE GIVEN COORDINATES.
diff --git a/backend/server.py b/backend/server.py
index 78c4d9b..50a4925 100644
--- a/backend/server.py
+++ b/backend/server.py
@@ -23,10 +23,6 @@
     "vit_h": "./checkpoints/sam_vit_h_4b8939.pth",
 }
 base = "localhost:8080"
-ExtractInQueue = queue.Queue(maxsize=10)
-ExtractOutDict = {}
-GenerateInQueue = queue.Queue(maxsize=4)
-GenerateOutDict = {}
 app = Flask(__name__)
 
 @app.route('/health', methods=['GET'])
@@ -174,10 +170,12 @@ def generate(gen_type):
             t1 = time.time()
             print(f"EXACT SOLUTION INFERENCE TIME: {t1 - t0}")
 
-            matching_bboxes = []
             for label_int, label_str in enumerate(support_package, start=1):
 
-                yx_multi = (predictions == label_int).nonzero()
+                threshold = predictions.flatten().sort(descending=True)[0][0:5]
+                print(f"HIGHEST 5 CONFIDENCES: {threshold}")
+                threshold = threshold[4]
+                yx_multi = (predictions >= threshold).nonzero()
                 for yx in yx_multi:
                     xy = utils.adapt_point(
                         {"x": yx[1].item(), "y": yx[0].item()},
@@ -273,10 +271,6 @@ def generate(gen_type):
     return jsonify({"error": error_text})
 
 
-def run_forwarder(forwarder):
-    forwarder.run()
-
-
 if __name__ == '__main__':
     cfg = utils.load_config("./config.yml")
 

From ba925c6d1a5b22985db89114010894f194290e32 Mon Sep 17 00:00:00 2001
From: Tekin Evrim Ozmermer <evrimozmermer@hotmail.com>
Date: Mon, 24 Jul 2023 00:49:13 +0300
Subject: [PATCH 5/5] readme

---
 README.MD | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.MD b/README.MD
index f89afd5..67021b0 100644
--- a/README.MD
+++ b/README.MD
@@ -20,14 +20,14 @@ pip install git+https://github.com/facebookresearch/segment-anything.git
 or clone the repository locally and install with
 
 ```
-git clone git@github.com:facebookresearch/segment-anything.git
+git clone git@github.com:rootvisionai/segment-anything.git
 cd segment-anything; pip install -e .
 ```
 
 The following dependencies are necessary for the FEWSAM:
 
 ```
-pip install opencv-python PyYAML PySimpleGUI kmeans-pytorch
+pip install opencv-python PyYAML PySimpleGUI
 ```
 
 Now download the model checkpoints: