Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

open README.md with unicode (to support Hugging Face emoji); fix various typos #218

Merged
merged 2 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sam2/modeling/position_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
class PositionEmbeddingSine(nn.Module):
"""
This is a more standard version of the position embedding, very similar to the one
used by the Attention is all you need paper, generalized to work on images.
used by the Attention Is All You Need paper, generalized to work on images.
"""

def __init__(
Expand Down
2 changes: 1 addition & 1 deletion sam2/modeling/sam2_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,7 +642,7 @@ def _prepare_memory_conditioned_features(
pix_feat_with_mem = pix_feat_with_mem.permute(1, 2, 0).view(B, C, H, W)
return pix_feat_with_mem

# Use a dummy token on the first frame (to avoid emtpy memory input to tranformer encoder)
# Use a dummy token on the first frame (to avoid empty memory input to tranformer encoder)
to_cat_memory = [self.no_mem_embed.expand(1, B, self.mem_dim)]
to_cat_memory_pos_embed = [self.no_mem_pos_enc.expand(1, B, self.mem_dim)]

Expand Down
2 changes: 1 addition & 1 deletion sam2/sam2_image_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def predict_batch(
normalize_coords=True,
) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray]]:
"""This function is very similar to predict(...), however it is used for batched mode, when the model is expected to generate predictions on multiple images.
It returns a tupele of lists of masks, ious, and low_res_masks_logits.
It returns a tuple of lists of masks, ious, and low_res_masks_logits.
"""
assert self._is_batch, "This function should only be used when in batched mode"
if not self._is_image_set:
Expand Down
6 changes: 3 additions & 3 deletions sam2/sam2_video_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def init_state(
offload_state_to_cpu=False,
async_loading_frames=False,
):
"""Initialize a inference state."""
"""Initialize an inference state."""
compute_device = self.device # device of the model
images, video_height, video_width = load_video_frames(
video_path=video_path,
Expand Down Expand Up @@ -589,7 +589,7 @@ def propagate_in_video_preflight(self, inference_state):
# to `propagate_in_video_preflight`).
consolidated_frame_inds = inference_state["consolidated_frame_inds"]
for is_cond in [False, True]:
# Separately consolidate conditioning and non-conditioning temp outptus
# Separately consolidate conditioning and non-conditioning temp outputs
storage_key = "cond_frame_outputs" if is_cond else "non_cond_frame_outputs"
# Find all the frames that contain temporary outputs for any objects
# (these should be the frames that have just received clicks for mask inputs
Expand All @@ -598,7 +598,7 @@ def propagate_in_video_preflight(self, inference_state):
for obj_temp_output_dict in temp_output_dict_per_obj.values():
temp_frame_inds.update(obj_temp_output_dict[storage_key].keys())
consolidated_frame_inds[storage_key].update(temp_frame_inds)
# consolidate the temprary output across all objects on this frame
# consolidate the temporary output across all objects on this frame
for frame_idx in temp_frame_inds:
consolidated_out = self._consolidate_temp_output_across_obj(
inference_state, frame_idx, is_cond=is_cond, run_mem_encoder=True
Expand Down
4 changes: 2 additions & 2 deletions sam2/utils/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def mask_to_box(masks: torch.Tensor):
compute bounding box given an input mask

Inputs:
- masks: [B, 1, H, W] boxes, dtype=torch.Tensor
- masks: [B, 1, H, W] masks, dtype=torch.Tensor

Returns:
- box_coords: [B, 1, 4], contains (x, y) coordinates of top left and bottom right box corners, dtype=torch.Tensor
Expand Down Expand Up @@ -120,7 +120,7 @@ def __init__(
self.offload_video_to_cpu = offload_video_to_cpu
self.img_mean = img_mean
self.img_std = img_std
# items in `self._images` will be loaded asynchronously
# items in `self.images` will be loaded asynchronously
self.images = [None] * len(img_paths)
# catch and raise any exceptions in the async loading thread
self.exception = None
Expand Down
2 changes: 1 addition & 1 deletion sav_dataset/sav_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
parser.add_argument(
"--do_not_skip_first_and_last_frame",
help="In SA-V val and test, we skip the first and the last annotated frames in evaluation. "
"Set this to true for evaluation on settings that doen't skip first and last frames",
"Set this to true for evaluation on settings that doesn't skip first and last frames",
action="store_true",
)

Expand Down
2 changes: 1 addition & 1 deletion sav_dataset/utils/sav_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def _seg2bmap(seg, width=None, height=None):

assert not (
width > w | height > h | abs(ar1 - ar2) > 0.01
), "Can" "t convert %dx%d seg to %dx%d bmap." % (w, h, width, height)
), "Cannot convert %dx%d seg to %dx%d bmap." % (w, h, width, height)

e = np.zeros_like(seg)
s = np.zeros_like(seg)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
LICENSE = "Apache 2.0"

# Read the contents of README file
with open("README.md", "r") as f:
with open("README.md", "r", encoding="utf-8") as f:
LONG_DESCRIPTION = f.read()

# Required dependencies
Expand Down
Loading