diff --git a/kernels/aten/functions.yaml b/kernels/aten/functions.yaml
index ebcd86d851..0024c676ef 100644
--- a/kernels/aten/functions.yaml
+++ b/kernels/aten/functions.yaml
@@ -405,6 +405,8 @@
 
 - op: unsqueeze_copy.out
 
+- op: upsample_bilinear2d.vec_out
+
 - op: upsample_nearest2d.out
 
 - op: upsample_nearest2d.vec_out
diff --git a/kernels/portable/cpu/op_upsample_bilinear2d.cpp b/kernels/portable/cpu/op_upsample_bilinear2d.cpp
new file mode 100644
index 0000000000..ea2ff86b31
--- /dev/null
+++ b/kernels/portable/cpu/op_upsample_bilinear2d.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/kernels/portable/cpu/util/upsample_util.h>
+#include <executorch/runtime/kernel/kernel_includes.h>
+
+namespace torch {
+namespace executor {
+namespace native {
+
+using exec_aten::ArrayRef;
+using exec_aten::optional;
+using exec_aten::SizesType;
+
+namespace {
+template <typename CTYPE>
+void upsample_bilinear2d_kernel_impl(
+    const Tensor& in,
+    bool align_corners,
+    const float scale_h,
+    const float scale_w,
+    Tensor& out) {
+  const auto in_data = in.const_data_ptr<CTYPE>();
+  auto out_data = out.mutable_data_ptr<CTYPE>();
+
+  auto in_plane = in_data;
+  for (auto n = 0; n < out.size(0); n++) {
+    for (auto c = 0; c < out.size(1); c++) {
+      for (auto h = 0; h < out.size(2); h++) {
+        // Compute source index and weights.
+        int64_t in_h1, in_h2;
+        float weight_h, inv_weight_h;
+
+        compute_source_index_and_lambda(
+            in_h1,
+            in_h2,
+            weight_h,
+            inv_weight_h,
+            scale_h,
+            h,
+            in.sizes()[2],
+            out.sizes()[2],
+            align_corners);
+
+        for (auto w = 0; w < out.size(3); w++) {
+          int64_t in_w1, in_w2;
+          float weight_w, inv_weight_w;
+
+          compute_source_index_and_lambda(
+              in_w1,
+              in_w2,
+              weight_w,
+              inv_weight_w,
+              scale_w,
+              w,
+              in.sizes()[3],
+              out.sizes()[3],
+              align_corners);
+
+          const auto top_left =
+              in_plane[in_h1 * in.strides()[2] + in_w1 * in.strides()[3]];
+          const auto top_right =
+              in_plane[in_h1 * in.strides()[2] + in_w2 * in.strides()[3]];
+          const auto bottom_left =
+              in_plane[in_h2 * in.strides()[2] + in_w1 * in.strides()[3]];
+          const auto bottom_right =
+              in_plane[in_h2 * in.strides()[2] + in_w2 * in.strides()[3]];
+
+          const auto top = top_left * weight_w + top_right * inv_weight_w;
+          const auto bottom =
+              bottom_left * weight_w + bottom_right * inv_weight_w;
+          const auto val = top * weight_h + bottom * inv_weight_h;
+
+          *out_data = val;
+          out_data++;
+        }
+      }
+
+      in_plane += in.strides()[1];
+    }
+  }
+}
+} // namespace
+
+// Signatures are auto-generated, so disable pass-by-value lint.
+// NOLINTBEGIN(facebook-hte-ConstantArgumentPassByValue,
+// facebook-hte-ParameterMightThrowOnCopy)
+Tensor& upsample_bilinear2d_vec_out(
+    KernelRuntimeContext& ctx,
+    const Tensor& in,
+    const exec_aten::OptionalArrayRef<int64_t> output_size,
+    bool align_corners,
+    const exec_aten::OptionalArrayRef<double> scale_factors,
+    Tensor& out) {
+  // Preconditions (checked in check_..._args):
+  //  In and out tensors have same dtype.
+  //  In and out tensors are rank 4 and have same dim[0] and dim[1].
+  //  In and out tensors are default dim order (NCHW).
+  ET_KERNEL_CHECK(
+      ctx,
+      check_upsample_bilinear2d_args(
+          in, output_size, align_corners, scale_factors, out),
+      InvalidArgument,
+      out);
+
+  double scale_h, scale_w;
+
+  ET_KERNEL_CHECK_MSG(
+      ctx,
+      resize_upsample_2d(
+          in, output_size, scale_factors, scale_h, scale_w, out) == Error::Ok,
+      InvalidArgument,
+      out,
+      "Failed to resize output tensor");
+
+  const auto kernel_scale_h = area_pixel_compute_scale<double>(
+      in.sizes()[2], out.sizes()[2], align_corners, scale_h);
+  const auto kernel_scale_w = area_pixel_compute_scale<double>(
+      in.sizes()[3], out.sizes()[3], align_corners, scale_w);
+
+  ET_SWITCH_REAL_TYPES(
+      in.scalar_type(), ctx, "upsample_bilinear2d.out", CTYPE, [&]() {
+        upsample_bilinear2d_kernel_impl<CTYPE>(
+            in, align_corners, kernel_scale_h, kernel_scale_w, out);
+      });
+
+  return out;
+}
+// NOLINTEND(facebook-hte-ConstantArgumentPassByValue,
+// facebook-hte-ParameterMightThrowOnCopy)
+
+} // namespace native
+} // namespace executor
+} // namespace torch
diff --git a/kernels/portable/cpu/util/targets.bzl b/kernels/portable/cpu/util/targets.bzl
index 1dc36afce2..3e8072fef6 100644
--- a/kernels/portable/cpu/util/targets.bzl
+++ b/kernels/portable/cpu/util/targets.bzl
@@ -31,6 +31,7 @@ def define_common_targets():
             "//executorch/kernels/portable/cpu/util:advanced_index_util",
             "//executorch/kernels/portable/cpu/util:slice_util",
             "//executorch/kernels/portable/cpu/util:elementwise_util",
+            "//executorch/kernels/portable/cpu/util:upsample_util",
         ],
         visibility = ["//executorch/...", "@EXECUTORCH_CLIENTS"],
     )
@@ -266,6 +267,16 @@ def define_common_targets():
         visibility = ["//executorch/kernels/portable/cpu/..."],
     )
 
+    runtime.cxx_library(
+        name = "upsample_util",
+        srcs = ["upsample_util.cpp"],
+        exported_headers = ["upsample_util.h"],
+        deps = [
+            "//executorch/runtime/kernel:kernel_includes",
+        ],
+        visibility = ["//executorch/kernels/portable/cpu/..."],
+    )
+
     # Utility functions that can be used by operators that perform reduction
     for aten_mode in [True, False]:
         suffix = "_aten" if aten_mode else ""
diff --git a/kernels/portable/cpu/util/upsample_util.cpp b/kernels/portable/cpu/util/upsample_util.cpp
new file mode 100644
index 0000000000..2082b632d9
--- /dev/null
+++ b/kernels/portable/cpu/util/upsample_util.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/kernels/portable/cpu/util/upsample_util.h>
+#include <executorch/runtime/core/exec_aten/util/tensor_util.h>
+
+namespace torch {
+namespace executor {
+
+bool check_upsample_2d_common_args(
+    const Tensor& in,
+    const exec_aten::OptionalArrayRef<int64_t>& output_size,
+    const exec_aten::OptionalArrayRef<double>& scale_factors,
+    Tensor& out) {
+  ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(in, out));
+  ET_LOG_AND_RETURN_IF_FALSE(in.dim() == 4);
+  ET_LOG_AND_RETURN_IF_FALSE(out.dim() == 4);
+  ET_LOG_AND_RETURN_IF_FALSE(tensor_is_default_dim_order(in));
+  ET_LOG_AND_RETURN_IF_FALSE(tensor_is_default_dim_order(out));
+  ET_LOG_AND_RETURN_IF_FALSE(
+      output_size.has_value() ^ scale_factors.has_value());
+  if (scale_factors.has_value()) {
+    ET_LOG_AND_RETURN_IF_FALSE(scale_factors.value().size() == 2);
+    ET_LOG_AND_RETURN_IF_FALSE(scale_factors.value()[0] > 0);
+    ET_LOG_AND_RETURN_IF_FALSE(scale_factors.value()[1] > 0);
+  } else if (output_size.has_value()) {
+    ET_LOG_AND_RETURN_IF_FALSE(output_size.value().size() == 2);
+    ET_LOG_AND_RETURN_IF_FALSE(output_size.value()[0] > 0);
+    ET_LOG_AND_RETURN_IF_FALSE(output_size.value()[1] > 0);
+  }
+
+  return true;
+}
+
+bool check_upsample_bilinear2d_args(
+    const Tensor& in,
+    const exec_aten::OptionalArrayRef<int64_t>& output_size,
+    ET_UNUSED const bool align_corners,
+    const exec_aten::OptionalArrayRef<double>& scale_factors,
+    Tensor& out) {
+  return check_upsample_2d_common_args(in, output_size, scale_factors, out);
+}
+
+Error resize_upsample_2d(
+    const Tensor& in,
+    const exec_aten::OptionalArrayRef<int64_t>& output_size,
+    const exec_aten::OptionalArrayRef<double>& scale_factors,
+    double& scale_h_out,
+    double& scale_w_out,
+    Tensor& out) {
+  // Either output_size or scale_factors are provided, not both. This
+  // is checked in check_..._args.
+  // Scales are transformed according to align_corners.
+  std::array<Tensor::SizesType, kTensorDimensionLimit> target_size;
+
+  const auto dim = in.dim();
+  std::copy(in.sizes().cbegin(), in.sizes().cend(), target_size.begin());
+
+  if (scale_factors.has_value()) {
+    scale_h_out = scale_factors.value()[0];
+    scale_w_out = scale_factors.value()[1];
+
+    target_size[dim - 2] =
+        static_cast<Tensor::SizesType>(in.sizes()[dim - 2] * scale_h_out);
+    target_size[dim - 1] =
+        static_cast<Tensor::SizesType>(in.sizes()[dim - 1] * scale_w_out);
+  } else if (output_size.has_value()) {
+    scale_h_out =
+        static_cast<double>(output_size.value()[0]) / in.sizes()[dim - 2];
+    scale_w_out =
+        static_cast<double>(output_size.value()[1]) / in.sizes()[dim - 1];
+
+    target_size[dim - 2] = output_size.value()[0];
+    target_size[dim - 1] = output_size.value()[1];
+  } else {
+    ET_LOG(Error, "Invalid output_size or scale_factors");
+    return Error::InvalidArgument;
+  }
+
+  ET_CHECK_OR_RETURN_ERROR(
+      target_size[dim - 2] > 0 && target_size[dim - 1] > 0,
+      InvalidArgument,
+      "Upsampled output size must be non-empty, but was %ld x %ld.",
+      static_cast<long>(target_size[dim - 2]),
+      static_cast<long>(target_size[dim - 1]));
+
+  return resize_tensor(out, {target_size.data(), static_cast<size_t>(dim)});
+}
+
+} // namespace executor
+} // namespace torch
diff --git a/kernels/portable/cpu/util/upsample_util.h b/kernels/portable/cpu/util/upsample_util.h
new file mode 100644
index 0000000000..86ae8e77ec
--- /dev/null
+++ b/kernels/portable/cpu/util/upsample_util.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/core/exec_aten/util/tensor_util.h>
+#include <executorch/runtime/kernel/kernel_includes.h>
+
+namespace torch {
+namespace executor {
+
+bool check_upsample_2d_common_args(
+    const Tensor& in,
+    const exec_aten::OptionalArrayRef<int64_t>& output_size,
+    const exec_aten::OptionalArrayRef<double>& scale_factors,
+    Tensor& out);
+
+bool check_upsample_bilinear2d_args(
+    const Tensor& in,
+    const exec_aten::OptionalArrayRef<int64_t>& output_size,
+    const bool align_corners,
+    const exec_aten::OptionalArrayRef<double>& scale_factors,
+    Tensor& out);
+
+Error resize_upsample_2d(
+    const Tensor& in,
+    const exec_aten::OptionalArrayRef<int64_t>& output_size,
+    const exec_aten::OptionalArrayRef<double>& scale_factors,
+    double& scale_h_out,
+    double& scale_w_out,
+    Tensor& out);
+
+// Ported from aten/src/ATen/native/UpSample.h
+template <typename scalar_t>
+inline scalar_t compute_scales_value(
+    const exec_aten::optional<double>& scale,
+    int64_t input_size,
+    int64_t output_size) {
+  return scale.has_value() ? static_cast<scalar_t>(1.0 / scale.value())
+                           : (static_cast<scalar_t>(input_size) / output_size);
+}
+
+// Ported from aten/src/ATen/native/UpSample.h
+template <typename scalar_t>
+inline scalar_t area_pixel_compute_scale(
+    int64_t input_size,
+    int64_t output_size,
+    bool align_corners,
+    const exec_aten::optional<double>& scale) {
+  // see Note [area_pixel_compute_scale]
+  if (align_corners) {
+    if (output_size > 1) {
+      return static_cast<scalar_t>(input_size - 1) / (output_size - 1);
+    } else {
+      return static_cast<scalar_t>(0);
+    }
+  } else {
+    return compute_scales_value<scalar_t>(scale, input_size, output_size);
+  }
+}
+
+// Ported from aten/src/ATen/native/UpSample.h
+template <typename scalar_t>
+inline scalar_t area_pixel_compute_source_index(
+    scalar_t scale,
+    int64_t dst_index,
+    bool align_corners,
+    bool cubic) {
+  if (align_corners) {
+    return scale * dst_index;
+  } else {
+    scalar_t src_idx = scale * (dst_index + static_cast<scalar_t>(0.5)) -
+        static_cast<scalar_t>(0.5);
+    return (!cubic && src_idx < static_cast<scalar_t>(0)) ? scalar_t(0)
+                                                          : src_idx;
+  }
+}
+
+// Ported from aten/src/ATen/native/UpSample.h
+// when `real_input_index` becomes larger than the range the floating point
+// type can accurately represent, the type casting to `int64_t` might exceed
+// `input_size`, causing overflow. So we guard it with `std::min` below.
+template <typename scalar_t, typename opmath_t>
+inline void guard_index_and_lambda(
+    const opmath_t& real_input_index,
+    const int64_t& input_size,
+    int64_t& input_index,
+    scalar_t& lambda) {
+  input_index =
+      std::min(static_cast<int64_t>(floorf(real_input_index)), input_size - 1);
+  lambda = std::min(
+      std::max(real_input_index - input_index, static_cast<opmath_t>(0)),
+      static_cast<opmath_t>(1));
+}
+
+// Ported from aten/src/ATen/native/UpSample.h
+template <typename scalar_t, typename opmath_t>
+inline void compute_source_index_and_lambda(
+    int64_t& input_index0,
+    int64_t& input_index1,
+    scalar_t& lambda0,
+    scalar_t& lambda1,
+    opmath_t ratio,
+    int64_t output_index,
+    int64_t input_size,
+    int64_t output_size,
+    bool align_corners) {
+  if (output_size == input_size) {
+    // scale_factor = 1, simply copy
+    input_index0 = output_index;
+    input_index1 = output_index;
+    lambda0 = static_cast<scalar_t>(1);
+    lambda1 = static_cast<scalar_t>(0);
+  } else {
+    const auto real_input_index = area_pixel_compute_source_index<opmath_t>(
+        ratio, output_index, align_corners, /*cubic=*/false);
+    guard_index_and_lambda(real_input_index, input_size, input_index0, lambda1);
+    int64_t offset = (input_index0 < input_size - 1) ? 1 : 0;
+    input_index1 = input_index0 + offset;
+    lambda0 = static_cast<scalar_t>(1.) - lambda1;
+  }
+}
+
+} // namespace executor
+} // namespace torch
diff --git a/kernels/portable/functions.yaml b/kernels/portable/functions.yaml
index 0da9917214..ed203f9487 100644
--- a/kernels/portable/functions.yaml
+++ b/kernels/portable/functions.yaml
@@ -917,6 +917,11 @@
     - arg_meta: null
       kernel_name: torch::executor::unsqueeze_copy_out
 
+- op: upsample_bilinear2d.vec_out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::upsample_bilinear2d_vec_out
+
 - op: var.correction_out
   kernels:
     - arg_meta: null
diff --git a/kernels/portable/test/TARGETS b/kernels/portable/test/TARGETS
index 2341af9282..6255c37cbc 100644
--- a/kernels/portable/test/TARGETS
+++ b/kernels/portable/test/TARGETS
@@ -2,7 +2,41 @@
 # targets.bzl. This file can contain fbcode-only targets.
 
 load(":targets.bzl", "define_common_targets")
+load("@fbcode_macros//build_defs:python_unittest.bzl", "python_unittest")
+load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
 
 oncall("executorch")
 
 define_common_targets()
+
+runtime.cxx_library(
+    name = "aot_ops_test_lib",
+    srcs = [
+        "register_ops_aot_for_test.cpp",
+    ],
+    visibility = [
+        "//executorch/...",
+        "@EXECUTORCH_CLIENTS",
+    ],
+    deps = [
+        "//executorch/extension/aten_util:aten_bridge",
+        "//executorch/kernels/portable/cpu:op_upsample_bilinear2d",
+        "//executorch/runtime/core/exec_aten:lib",
+    ],
+    external_deps = [
+        "libtorch",
+    ],
+)
+
+python_unittest(
+    name = "op_upsample_bilinear2d_test",
+    srcs = [
+        "op_upsample_bilinear2d_test.py",
+    ],
+    preload_deps = [
+        ":aot_ops_test_lib",
+    ],
+    deps = [
+        "//caffe2:torch",
+    ],
+)
diff --git a/kernels/portable/test/op_upsample_bilinear2d_test.py b/kernels/portable/test/op_upsample_bilinear2d_test.py
new file mode 100644
index 0000000000..67609202d8
--- /dev/null
+++ b/kernels/portable/test/op_upsample_bilinear2d_test.py
@@ -0,0 +1,87 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+
+import itertools
+import unittest
+
+from typing import Optional, Sequence
+
+import torch
+
+
+class UpsampleBilinear2dTest(unittest.TestCase):
+    def run_upsample_test(
+        self,
+        inp: torch.Tensor,
+        output_size: Optional[Sequence[int]] = None,
+        align_corners: bool = False,
+        scale_factors: Optional[Sequence[float]] = None,
+        atol=2e-7,
+    ) -> None:
+        aten_result = torch.nn.functional.interpolate(
+            inp,
+            size=output_size,
+            mode="bilinear",
+            scale_factor=scale_factors,
+            align_corners=align_corners,
+        )
+        et_result = torch.zeros_like(aten_result)
+        et_result = torch.ops.et_test.upsample_bilinear2d(
+            inp,
+            output_size=output_size,
+            scale_factors=scale_factors,
+            align_corners=align_corners,
+            out=et_result,
+        )
+        self.assertTrue(
+            torch.allclose(et_result, aten_result, atol=atol),
+            msg=f"ET: {et_result} \n ATen: {aten_result} \n Error: {et_result.to(torch.float) - aten_result.to(torch.float)}",
+        )
+
+    def test_upsample_bilinear2d_aten_parity_f32(self):
+        N = [1, 2]
+        C = [1, 3]
+        H = [1, 3, 50, 1001]
+        W = [1, 2, 62, 1237]
+        OUT_H = [5, 21]
+        OUT_W = [7, 31]
+        ALIGN_CORNERS = [True, False]
+
+        for n, c, h, w, out_h, out_w, align_corners in itertools.product(
+            N, C, H, W, OUT_H, OUT_W, ALIGN_CORNERS
+        ):
+            input = torch.randn(n, c, h, w)
+            self.run_upsample_test(
+                input, output_size=(out_h, out_w), align_corners=align_corners
+            )
+            self.run_upsample_test(
+                input, scale_factors=(out_h / h, out_w / w), align_corners=align_corners
+            )
+
+    def test_upsample_bilinear2d_aten_parity_u8(self):
+        N = [1, 2]
+        C = [1, 3]
+        H = [1, 3, 50, 1001]
+        W = [1, 2, 62, 1237]
+        OUT_H = [5, 21]
+        OUT_W = [7, 31]
+        ALIGN_CORNERS = [True, False]
+
+        for n, c, h, w, out_h, out_w, align_corners in itertools.product(
+            N, C, H, W, OUT_H, OUT_W, ALIGN_CORNERS
+        ):
+            input = torch.randint(0, 255, (n, c, h, w), dtype=torch.uint8)
+            self.run_upsample_test(
+                input, output_size=(out_h, out_w), align_corners=align_corners, atol=2
+            )
+            self.run_upsample_test(
+                input,
+                scale_factors=(out_h / h, out_w / w),
+                align_corners=align_corners,
+                atol=2,
+            )
diff --git a/kernels/portable/test/register_ops_aot_for_test.cpp b/kernels/portable/test/register_ops_aot_for_test.cpp
new file mode 100644
index 0000000000..c8b63e736a
--- /dev/null
+++ b/kernels/portable/test/register_ops_aot_for_test.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/extension/aten_util/make_aten_functor_from_et_functor.h>
+#include <executorch/extension/kernel_util/make_boxed_from_unboxed_functor.h>
+#include <executorch/runtime/kernel/kernel_includes.h>
+#include <executorch/runtime/platform/runtime.h>
+
+#include <torch/library.h>
+
+namespace torch {
+namespace executor {
+
+namespace native {
+
+// Signatures are auto-generated, so disable pass-by-value lint.
+// NOLINTBEGIN(facebook-hte-ConstantArgumentPassByValue,
+// facebook-hte-ParameterMightThrowOnCopy)
+Tensor& upsample_bilinear2d_vec_out(
+    KernelRuntimeContext& ctx,
+    const Tensor& in,
+    const exec_aten::OptionalArrayRef<int64_t> output_size,
+    bool align_corners,
+    const exec_aten::OptionalArrayRef<double> scale_factors,
+    Tensor& out);
+
+Tensor& upsample_bilinear2d_vec_out_no_context(
+    const Tensor& in,
+    const exec_aten::OptionalArrayRef<int64_t> output_size,
+    bool align_corners,
+    const exec_aten::OptionalArrayRef<double> scale_factors,
+    Tensor& out) {
+  KernelRuntimeContext ctx;
+  auto& ret = upsample_bilinear2d_vec_out(
+      ctx, in, output_size, align_corners, scale_factors, out);
+
+  if (ctx.failure_state() != Error::Ok) {
+    throw std::runtime_error(
+        std::string("Kernel failed with error: ") +
+        std::to_string((int)ctx.failure_state()));
+  }
+
+  return ret;
+}
+// NOLINTEND(facebook-hte-ConstantArgumentPassByValue,
+// facebook-hte-ParameterMightThrowOnCopy)
+
+TORCH_LIBRARY(et_test, m) {
+  m.def(
+      "upsample_bilinear2d.vec_out(Tensor input, SymInt[]? output_size, bool align_corners, float[]? scale_factors, *, Tensor(a!) out) -> Tensor(a!)",
+      WRAP_TO_ATEN(upsample_bilinear2d_vec_out_no_context, 4));
+}
+
+} // namespace native
+} // namespace executor
+} // namespace torch
diff --git a/kernels/test/op_upsample_bilinear2d_test.cpp b/kernels/test/op_upsample_bilinear2d_test.cpp
new file mode 100644
index 0000000000..c7b5332275
--- /dev/null
+++ b/kernels/test/op_upsample_bilinear2d_test.cpp
@@ -0,0 +1,573 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/kernels/test/FunctionHeaderWrapper.h> // Declares the operator
+#include <executorch/kernels/test/TestUtil.h>
+#include <executorch/kernels/test/supported_features.h>
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/core/exec_aten/testing_util/tensor_factory.h>
+#include <executorch/runtime/core/exec_aten/testing_util/tensor_util.h>
+
+#include <gtest/gtest.h>
+
+using exec_aten::optional;
+using exec_aten::ScalarType;
+using exec_aten::Tensor;
+using torch::executor::testing::SupportedFeatures;
+using torch::executor::testing::TensorFactory;
+
+#ifdef USE_ATEN_LIB
+template <class T>
+using OptionalArrayRef = std::optional<c10::ArrayRef<T>>;
+#else
+using exec_aten::OptionalArrayRef;
+#endif
+
+class OpUpsampleBilinear2dTest : public OperatorTest {
+ protected:
+  Tensor& op_upsample_bilinear2d_vec_out(
+      const Tensor& in,
+      const OptionalArrayRef<int64_t>& output_size,
+      bool align_corners,
+      const OptionalArrayRef<double>& scale_factors,
+      Tensor& out) {
+    return torch::executor::aten::upsample_bilinear2d_outf(
+        context_, in, output_size, align_corners, scale_factors, out);
+  }
+
+  template <class CTYPE, exec_aten::ScalarType DTYPE>
+  void test_upsample_bilinear2d_dtype() {
+    TensorFactory<DTYPE> tf;
+
+    const auto input = tf.make({1, 1, 1, 2}, {1, 4});
+    std::array<int64_t, 2> output_size = {1, 4};
+    auto out = tf.zeros({1, 1, 1, 4});
+
+    op_upsample_bilinear2d_vec_out(
+        input,
+        OptionalArrayRef<int64_t>({output_size.data(), output_size.size()}),
+        true,
+        {},
+        out);
+
+    const auto expected = tf.make({1, 1, 1, 4}, {1, 2, 3, 4});
+
+    EXPECT_TENSOR_CLOSE(out, expected);
+  }
+};
+
+TEST_F(OpUpsampleBilinear2dTest, Simple1x2To1x4) {
+  TensorFactory<ScalarType::Float> tf;
+
+  const auto input = tf.make({1, 1, 1, 2}, {1.0, 4.0});
+  std::array<int64_t, 2> output_size = {1, 4};
+  auto out = tf.zeros({1, 1, 1, 4});
+
+  op_upsample_bilinear2d_vec_out(
+      input,
+      OptionalArrayRef<int64_t>({output_size.data(), output_size.size()}),
+      false,
+      {},
+      out);
+
+  const auto expected = tf.make({1, 1, 1, 4}, {1.0, 1.75, 3.25, 4.0});
+
+  EXPECT_TENSOR_EQ(out, expected);
+}
+
+TEST_F(OpUpsampleBilinear2dTest, Simple1x2To1x4AlignCorners) {
+  TensorFactory<ScalarType::Float> tf;
+
+  const auto input = tf.make({1, 1, 2, 1}, {1.0, 4.0});
+  std::array<int64_t, 2> output_size = {4, 1};
+  auto out = tf.zeros({1, 1, 4, 1});
+
+  op_upsample_bilinear2d_vec_out(
+      input,
+      OptionalArrayRef<int64_t>({output_size.data(), output_size.size()}),
+      true,
+      {},
+      out);
+
+  const auto expected = tf.make({1, 1, 4, 1}, {1.0, 2.0, 3.0, 4.0});
+
+  EXPECT_TENSOR_EQ(out, expected);
+}
+
+TEST_F(OpUpsampleBilinear2dTest, Simple2x1To4x1) {
+  TensorFactory<ScalarType::Float> tf;
+
+  const auto input = tf.make({1, 1, 2, 1}, {1.0, 4.0});
+  std::array<int64_t, 2> output_size = {4, 1};
+  auto out = tf.zeros({1, 1, 4, 1});
+
+  op_upsample_bilinear2d_vec_out(
+      input,
+      OptionalArrayRef<int64_t>({output_size.data(), output_size.size()}),
+      false,
+      {},
+      out);
+
+  const auto expected = tf.make({1, 1, 4, 1}, {1.0, 1.75, 3.25, 4.0});
+
+  EXPECT_TENSOR_EQ(out, expected);
+}
+
+TEST_F(OpUpsampleBilinear2dTest, Simple2x1To4x1AlignCorners) {
+  TensorFactory<ScalarType::Float> tf;
+
+  const auto input = tf.make({1, 1, 2, 1}, {1.0, 4.0});
+  std::array<int64_t, 2> output_size = {4, 1};
+  auto out = tf.zeros({1, 1, 4, 1});
+
+  op_upsample_bilinear2d_vec_out(
+      input,
+      OptionalArrayRef<int64_t>({output_size.data(), output_size.size()}),
+      true,
+      {},
+      out);
+
+  const auto expected = tf.make({1, 1, 4, 1}, {1.0, 2.0, 3.0, 4.0});
+
+  EXPECT_TENSOR_EQ(out, expected);
+}
+
+TEST_F(OpUpsampleBilinear2dTest, SmokeTest) {
+  TensorFactory<ScalarType::Float> tf;
+
+  const auto input = tf.make(
+      {1, 1, 2, 3},
+      {
+          1.0,
+          2.0,
+          3.0,
+          4.0,
+          5.0,
+          6.0,
+      });
+  std::array<int64_t, 2> output_size = {3, 4};
+  auto out = tf.zeros({1, 1, 3, 4});
+
+  op_upsample_bilinear2d_vec_out(
+      input,
+      OptionalArrayRef<int64_t>({output_size.data(), output_size.size()}),
+      false,
+      {},
+      out);
+
+  const auto expected = tf.make(
+      {1, 1, 3, 4},
+      {1.0000,
+       1.6250,
+       2.3750,
+       3.0000,
+       2.5000,
+       3.1250,
+       3.8750,
+       4.5000,
+       4.0000,
+       4.6250,
+       5.3750,
+       6.0000});
+
+  EXPECT_TENSOR_CLOSE(out, expected);
+}
+
+TEST_F(OpUpsampleBilinear2dTest, SmokeTestAlignCorners) {
+  TensorFactory<ScalarType::Float> tf;
+
+  const auto input = tf.make(
+      {1, 1, 2, 3},
+      {
+          1.0,
+          2.0,
+          3.0,
+          4.0,
+          5.0,
+          6.0,
+      });
+  std::array<int64_t, 2> output_size = {3, 4};
+  auto out = tf.zeros({1, 1, 3, 4});
+
+  op_upsample_bilinear2d_vec_out(
+      input,
+      OptionalArrayRef<int64_t>({output_size.data(), output_size.size()}),
+      true,
+      {},
+      out);
+
+  const auto expected = tf.make(
+      {1, 1, 3, 4},
+      {1.0000,
+       1.6667,
+       2.3333,
+       3.0000,
+       2.5000,
+       3.1667,
+       3.8333,
+       4.5000,
+       4.0000,
+       4.6667,
+       5.3333,
+       6.0000});
+
+  EXPECT_TENSOR_CLOSE_WITH_TOL(out, expected, 0, 0.0001);
+}
+
+TEST_F(OpUpsampleBilinear2dTest, SmokeTestScales) {
+  TensorFactory<ScalarType::Float> tf;
+
+  const auto input = tf.make(
+      {1, 1, 2, 3},
+      {
+          1.0,
+          2.0,
+          3.0,
+          4.0,
+          5.0,
+          6.0,
+      });
+  auto out = tf.zeros({1, 1, 3, 4});
+
+  const std::array<double, 2> scale_factors = {3.0 / 2, 4.0 / 3};
+  op_upsample_bilinear2d_vec_out(
+      input,
+      {},
+      false,
+      OptionalArrayRef<double>({scale_factors.data(), scale_factors.size()}),
+      out);
+
+  const auto expected = tf.make(
+      {1, 1, 3, 4},
+      {1.0000,
+       1.6250,
+       2.3750,
+       3.0000,
+       2.5000,
+       3.1250,
+       3.8750,
+       4.5000,
+       4.0000,
+       4.6250,
+       5.3750,
+       6.0000});
+
+  EXPECT_TENSOR_CLOSE(out, expected);
+}
+
+TEST_F(OpUpsampleBilinear2dTest, SmokeTestAlignCornersScales) {
+  TensorFactory<ScalarType::Float> tf;
+
+  const auto input = tf.make(
+      {1, 1, 2, 3},
+      {
+          1.0,
+          2.0,
+          3.0,
+          4.0,
+          5.0,
+          6.0,
+      });
+  auto out = tf.zeros({1, 1, 3, 4});
+
+  const std::array<double, 2> scale_factors = {3.0 / 2, 4.0 / 3};
+  op_upsample_bilinear2d_vec_out(
+      input,
+      {},
+      true,
+      OptionalArrayRef<double>({scale_factors.data(), scale_factors.size()}),
+      out);
+
+  const auto expected = tf.make(
+      {1, 1, 3, 4},
+      {1.0000,
+       1.6667,
+       2.3333,
+       3.0000,
+       2.5000,
+       3.1667,
+       3.8333,
+       4.5000,
+       4.0000,
+       4.6667,
+       5.3333,
+       6.0000});
+
+  EXPECT_TENSOR_CLOSE_WITH_TOL(out, expected, 0, 0.0001);
+}
+
+TEST_F(OpUpsampleBilinear2dTest, DType) {
+#define TEST_ENTRY(ctype, dtype)                              \
+  test_upsample_bilinear2d_dtype<ctype, ScalarType::dtype>(); \
+  ET_FORALL_REAL_TYPES(TEST_ENTRY);
+#undef TEST_ENTRY
+}
+
+TEST_F(OpUpsampleBilinear2dTest, MismatchedOutputSizeDies) {
+  if (SupportedFeatures::get()->output_resize) {
+    GTEST_SKIP()
+        << "The current kernel supports implicitly resizing output tensor";
+  }
+  TensorFactory<ScalarType::Float> tf;
+
+  const auto input = tf.ones({1, 1, 1, 2});
+  std::array<int64_t, 2> output_size = {1, 4};
+  auto out = tf.zeros({1, 1, 1, 5});
+
+  ET_EXPECT_KERNEL_FAILURE(
+      context_,
+      op_upsample_bilinear2d_vec_out(
+          input,
+          OptionalArrayRef<int64_t>({output_size.data(), output_size.size()}),
+          false,
+          {},
+          out));
+}
+
+TEST_F(OpUpsampleBilinear2dTest, InvalidInputRankDies) {
+  TensorFactory<ScalarType::Float> tf;
+
+  const auto input = tf.ones({1, 1, 2});
+  std::array<int64_t, 2> output_size = {1, 4};
+  auto out = tf.zeros({1, 1, 1, 4});
+
+  ET_EXPECT_KERNEL_FAILURE(
+      context_,
+      op_upsample_bilinear2d_vec_out(
+          input,
+          OptionalArrayRef<int64_t>({output_size.data(), output_size.size()}),
+          false,
+          {},
+          out));
+}
+
+TEST_F(OpUpsampleBilinear2dTest, InvalidOutputRankDies) {
+  TensorFactory<ScalarType::Float> tf;
+
+  const auto input = tf.ones({1, 1, 2});
+  std::array<int64_t, 2> output_size = {1, 4};
+  auto out = tf.zeros({1, 1, 4});
+
+  ET_EXPECT_KERNEL_FAILURE(
+      context_,
+      op_upsample_bilinear2d_vec_out(
+          input,
+          OptionalArrayRef<int64_t>({output_size.data(), output_size.size()}),
+          false,
+          {},
+          out));
+}
+
+TEST_F(OpUpsampleBilinear2dTest, MissingOutputSizeOrScaleDies) {
+  TensorFactory<ScalarType::Float> tf;
+
+  const auto input = tf.ones({1, 1, 2});
+  auto out = tf.zeros({1, 1, 4});
+
+  ET_EXPECT_KERNEL_FAILURE(
+      context_, op_upsample_bilinear2d_vec_out(input, {}, false, {}, out));
+}
+
+TEST_F(OpUpsampleBilinear2dTest, BothOutputSizeAndScaleDies) {
+  TensorFactory<ScalarType::Float> tf;
+
+  const auto input = tf.ones({1, 1, 2});
+  const std::array<int64_t, 2> output_size = {1, 4};
+  const std::array<double, 2> scale_factors = {2, 1};
+  auto out = tf.zeros({1, 1, 4});
+
+  ET_EXPECT_KERNEL_FAILURE(
+      context_,
+      op_upsample_bilinear2d_vec_out(
+          input,
+          OptionalArrayRef<int64_t>({output_size.data(), output_size.size()}),
+          false,
+          OptionalArrayRef<double>(
+              {scale_factors.data(), scale_factors.size()}),
+          out));
+}
+
+TEST_F(OpUpsampleBilinear2dTest, MismatchedDTypeDies) {
+  TensorFactory<ScalarType::Float> tf;
+  TensorFactory<ScalarType::Long> tf2;
+
+  const auto input = tf.ones({1, 1, 1, 2});
+  std::array<int64_t, 2> output_size = {1, 4};
+  auto out = tf2.zeros({1, 1, 1, 4});
+
+  ET_EXPECT_KERNEL_FAILURE(
+      context_,
+      op_upsample_bilinear2d_vec_out(
+          input,
+          OptionalArrayRef<int64_t>({output_size.data(), output_size.size()}),
+          false,
+          {},
+          out));
+}
+
+TEST_F(OpUpsampleBilinear2dTest, ComputedOutputSizeMatchesExpected) {
+  // Computed output sizes (from input size * scales) must match PyTorch
+  // eager-mode - multiplied as double and cast (truncated) to an integral type.
+  // See
+  // https://github.com/pytorch/pytorch/blob/main/aten/src/ATen/native/UpSample.cpp
+  TensorFactory<ScalarType::Float> tf;
+
+  // Test case format: { in_h, in_w, scale_h, scale_w, out_h, out_w }
+  std::vector<std::tuple<int32_t, int32_t, double, double, int32_t, int32_t>>
+      test_cases = {
+          {10, 10, 9.99999, 9.55, 99, 95},
+          {10, 10, 9.99999999, 0.1, 99, 1},
+      };
+
+  for (const auto& test_case : test_cases) {
+    const auto [in_h, in_w, scale_h, scale_w, out_h, out_w] = test_case;
+
+    const auto input = tf.ones({1, 1, in_h, in_w});
+    auto out = tf.zeros({1, 1, out_h, out_w});
+    std::array<double, 2> scale_factors = {scale_h, scale_w};
+
+    op_upsample_bilinear2d_vec_out(
+        input,
+        {},
+        false,
+        OptionalArrayRef<double>({scale_factors.data(), scale_factors.size()}),
+        out);
+
+    const auto expected = tf.ones({1, 1, out_h, out_w});
+
+    EXPECT_TENSOR_CLOSE(out, expected);
+  }
+}
+
+TEST_F(OpUpsampleBilinear2dTest, ZeroComputedOutputSizeDies) {
+  TensorFactory<ScalarType::Float> tf;
+
+  const auto input = tf.ones({1, 1, 1, 2});
+  auto out = tf.zeros({1, 1, 1, 4});
+  std::array<double, 2> scale_factors = {1, 0.25};
+
+  ET_EXPECT_KERNEL_FAILURE(
+      context_,
+      op_upsample_bilinear2d_vec_out(
+          input,
+          {},
+          false,
+          OptionalArrayRef<double>(
+              {scale_factors.data(), scale_factors.size()}),
+          out));
+}
+
+TEST_F(OpUpsampleBilinear2dTest, NumericsCheck) {
+  TensorFactory<ScalarType::Float> tf;
+
+  const auto input = tf.ones({3, 7, 47, 99});
+  auto out = tf.zeros({3, 7, 291, 512});
+  std::array<int64_t, 2> output_size = {291, 512};
+
+  auto input_ptr = static_cast<float*>(input.mutable_data_ptr());
+  for (auto i = 0ul; i < input.numel(); i++) {
+    input_ptr[i] = static_cast<float>(i);
+  }
+
+  op_upsample_bilinear2d_vec_out(
+      input,
+      OptionalArrayRef<int64_t>({output_size.data(), output_size.size()}),
+      false,
+      {},
+      out);
+
+  // Indices and expected values to evaluate.
+  std::vector<std::tuple<int, int, int, int, float>> test_values = {
+      {0, 2, 60, 200, 10262.14453125},
+      {1, 6, 5, 503, 60624.30078125},
+      {2, 0, 111, 300, 66932.953125},
+  };
+
+  const auto output_data = static_cast<const float*>(out.const_data_ptr());
+  for (const auto& test_case : test_values) {
+    const auto [n, c, h, w, expected] = test_case;
+    const auto actual = output_data
+        [n * out.strides()[0] + c * out.strides()[1] + h * out.strides()[2] +
+         w * out.strides()[3]];
+    EXPECT_FLOAT_EQ(expected, actual);
+  }
+}
+
+TEST_F(OpUpsampleBilinear2dTest, NumericsCheckAlignCorners) {
+  TensorFactory<ScalarType::Float> tf;
+
+  const auto input = tf.ones({3, 7, 47, 99});
+  auto out = tf.zeros({3, 7, 291, 512});
+  std::array<int64_t, 2> output_size = {291, 512};
+
+  auto input_ptr = static_cast<float*>(input.mutable_data_ptr());
+  for (auto i = 0ul; i < input.numel(); i++) {
+    input_ptr[i] = static_cast<float>(i);
+  }
+
+  op_upsample_bilinear2d_vec_out(
+      input,
+      OptionalArrayRef<int64_t>({output_size.data(), output_size.size()}),
+      true,
+      {},
+      out);
+
+  // Indices and expected values to evaluate.
+  std::vector<std::tuple<int, int, int, int, float>> test_values = {
+      {0, 2, 60, 200, 10286.5634765625},
+      {1, 6, 5, 503, 60663.98046875},
+      {2, 0, 111, 300, 66942.625},
+  };
+
+  const auto output_data = static_cast<const float*>(out.const_data_ptr());
+  for (const auto& test_case : test_values) {
+    const auto [n, c, h, w, expected] = test_case;
+    const auto actual = output_data
+        [n * out.strides()[0] + c * out.strides()[1] + h * out.strides()[2] +
+         w * out.strides()[3]];
+    EXPECT_FLOAT_EQ(expected, actual);
+  }
+}
+
+TEST_F(OpUpsampleBilinear2dTest, Simple5x1To4x1) {
+  TensorFactory<ScalarType::Float> tf;
+
+  const auto input = tf.make({1, 1, 5, 1}, {1.0, 2.0, 3.0, 4.0, 5.0});
+  std::array<int64_t, 2> output_size = {4, 1};
+  auto out = tf.zeros({1, 1, 4, 1});
+
+  op_upsample_bilinear2d_vec_out(
+      input,
+      OptionalArrayRef<int64_t>({output_size.data(), output_size.size()}),
+      false,
+      {},
+      out);
+
+  const auto expected = tf.make({1, 1, 4, 1}, {1.1250, 2.3750, 3.6250, 4.8750});
+
+  EXPECT_TENSOR_EQ(out, expected);
+}
+
+TEST_F(OpUpsampleBilinear2dTest, Simple5x1To4x1AlignCorners) {
+  TensorFactory<ScalarType::Float> tf;
+
+  const auto input = tf.make({1, 1, 5, 1}, {1.0, 2.0, 3.0, 4.0, 5.0});
+  std::array<int64_t, 2> output_size = {4, 1};
+  auto out = tf.zeros({1, 1, 4, 1});
+
+  op_upsample_bilinear2d_vec_out(
+      input,
+      OptionalArrayRef<int64_t>({output_size.data(), output_size.size()}),
+      true,
+      {},
+      out);
+
+  const auto expected = tf.make({1, 1, 4, 1}, {1.0, 2.333333, 3.666667, 5.0});
+
+  EXPECT_TENSOR_CLOSE(out, expected);
+}
diff --git a/kernels/test/targets.bzl b/kernels/test/targets.bzl
index 18fa646aec..d4f5769dbe 100644
--- a/kernels/test/targets.bzl
+++ b/kernels/test/targets.bzl
@@ -346,6 +346,7 @@ def define_common_targets():
     _common_op_test("op_trunc_test", ["aten", "portable"])
     _common_op_test("op_unbind_copy_test", ["aten", "portable"])
     _common_op_test("op_unsqueeze_copy_test", ["aten", "portable"])
+    _common_op_test("op_upsample_bilinear2d_test", ["aten", "portable"])
     _common_op_test("op_var_test", ["aten", "portable"])
     _common_op_test("op_view_copy_test", ["aten", "portable"])
     _common_op_test("op_where_test", ["aten", "portable"])
diff --git a/shim/xplat/executorch/kernels/portable/op_registration_util.bzl b/shim/xplat/executorch/kernels/portable/op_registration_util.bzl
index 8a04277c95..9f2a383402 100644
--- a/shim/xplat/executorch/kernels/portable/op_registration_util.bzl
+++ b/shim/xplat/executorch/kernels/portable/op_registration_util.bzl
@@ -1229,6 +1229,12 @@ ATEN_OPS = (
             "//executorch/kernels/portable/cpu/util:copy_ops_util",
         ],
     ),
+    op_target(
+        name = "op_upsample_bilinear2d",
+        deps = [
+            "//executorch/kernels/portable/cpu/util:upsample_util",
+        ],
+    ),
     op_target(
         name = "op_var",
         deps = [