diff --git a/backends/aoti/slim/core/SlimTensor.h b/backends/aoti/slim/core/SlimTensor.h
index 0061b0e08b9..b7eae8dd592 100644
--- a/backends/aoti/slim/core/SlimTensor.h
+++ b/backends/aoti/slim/core/SlimTensor.h
@@ -312,6 +312,33 @@ class SlimTensor {
     set_sizes_and_strides(sizes, makeArrayRef(contig_strides));
   }
 
+  /**
+   * Returns a copy of this tensor.
+   *
+   * @return A new SlimTensor with same content.
+   */
+  SlimTensor clone() const {
+    return _clone_impl(
+        this->sizes(), this->strides(), this->dtype(), this->device());
+  }
+
+  /**
+   * Returns a contiguous copy of this tensor.
+   * If the tensor is already contiguous, returns a copy with independent
+   * storage.
+   *
+   * @return A new contiguous SlimTensor.
+   */
+  SlimTensor clone_contiguous() const {
+    std::vector<int64_t> contig_strides =
+        compute_contiguous_strides(this->sizes());
+    return _clone_impl(
+        this->sizes(),
+        makeArrayRef(contig_strides),
+        this->dtype(),
+        this->device());
+  }
+
   // =========================================================================
   // View Operations
   // =========================================================================
@@ -364,6 +391,39 @@ class SlimTensor {
         makeArrayRef(sizes), makeArrayRef(strides), storage_offset);
   }
 
+  /**
+   * Returns a new tensor with dimensions permuted according to dims.
+   * The returned tensor shares the same underlying storage.
+   *
+   * @param dims The permutation of dimensions.
+   * @return A new SlimTensor with permuted dimensions.
+   */
+  inline SlimTensor permute(IntArrayRef dims) const;
+
+  /**
+   * Overload for initializer lists.
+   */
+  inline SlimTensor permute(std::initializer_list<int64_t> dims) const {
+    return permute(makeArrayRef(dims));
+  }
+
+  /**
+   * Returns a tensor with the same data and number of elements as this tensor,
+   * but with the specified shape. If possible, returns a view; otherwise
+   * creates a contiguous copy.
+   *
+   * @param shape The target shape (may contain one -1 for inference).
+   * @return A new SlimTensor with the specified shape.
+   */
+  inline SlimTensor reshape(IntArrayRef shape) const;
+
+  /**
+   * Overload for initializer lists.
+   */
+  inline SlimTensor reshape(std::initializer_list<int64_t> shape) const {
+    return reshape(makeArrayRef(shape));
+  }
+
   // =========================================================================
   // Copy Operation
   // =========================================================================
@@ -445,6 +505,18 @@ class SlimTensor {
   }
 
  private:
+  SlimTensor _clone_impl(
+      c10::IntArrayRef sizes,
+      c10::IntArrayRef strides,
+      c10::ScalarType dtype,
+      const c10::Device& device) const {
+    Storage storage = new_storage(sizes, strides, dtype, device);
+    SlimTensor result =
+        SlimTensor(std::move(storage), sizes, strides, dtype, 0);
+    result.copy_(*this);
+    return result;
+  }
+
   void refresh_numel() {
     numel_ = compute_numel(sizes_and_strides_.sizes_arrayref());
   }
diff --git a/backends/aoti/slim/core/SlimTensorView-incl.h b/backends/aoti/slim/core/SlimTensorView-incl.h
index f0ed8bc087c..a437f187299 100644
--- a/backends/aoti/slim/core/SlimTensorView-incl.h
+++ b/backends/aoti/slim/core/SlimTensorView-incl.h
@@ -49,4 +49,62 @@ inline SlimTensor& SlimTensor::as_strided_(
   return *this;
 }
 
+inline SlimTensor SlimTensor::permute(IntArrayRef dims) const {
+  const size_t ndim = this->dim();
+  ET_CHECK_MSG(
+      ndim == dims.size(),
+      "permute: dims length (%zu) must equal tensor.dim() (%zu)",
+      dims.size(),
+      ndim);
+
+  IntArrayRef old_sizes = this->sizes();
+  IntArrayRef old_strides = this->strides();
+  std::vector<int64_t> new_sizes(ndim);
+  std::vector<int64_t> new_strides(ndim);
+  std::vector<bool> seen_dims(ndim, false);
+
+  for (size_t i = 0; i < ndim; i++) {
+    int64_t d = c10::maybe_wrap_dim(dims[i], ndim);
+    ET_CHECK_MSG(!seen_dims[d], "permute: duplicate dims are not allowed");
+    seen_dims[d] = true;
+    new_sizes[i] = old_sizes[d];
+    new_strides[i] = old_strides[d];
+  }
+
+  SlimTensor result = *this;
+  result.as_strided_(
+      makeArrayRef(new_sizes),
+      makeArrayRef(new_strides),
+      this->storage_offset());
+  return result;
+}
+
+inline SlimTensor SlimTensor::reshape(IntArrayRef proposed_shape) const {
+  std::vector<int64_t> final_shape_vec =
+      infer_size(proposed_shape, static_cast<int64_t>(this->numel()));
+
+  // compute_stride returns the proper strides to use if this
+  // reshape can be just a view.
+  std::optional<std::vector<int64_t>> new_strides_opt = compute_stride(
+      this->sizes(), this->strides(), makeArrayRef(final_shape_vec));
+
+  // Create a view if possible
+  if (new_strides_opt.has_value()) {
+    SlimTensor result = *this;
+    result.as_strided_(
+        makeArrayRef(final_shape_vec),
+        makeArrayRef(new_strides_opt.value()),
+        this->storage_offset());
+    return result;
+  }
+
+  // If a view is not possible, create a contiguous clone and reshape that
+  SlimTensor contiguous_clone = this->clone_contiguous();
+  // After cloning, the tensor is already contiguous. We just need to update
+  // its metadata to reflect the new shape. This is effectively a view of
+  // the new contiguous clone.
+  contiguous_clone.set_sizes_contiguous(makeArrayRef(final_shape_vec));
+  return contiguous_clone;
+}
+
 } // namespace executorch::backends::aoti::slim
diff --git a/backends/aoti/slim/core/test/targets.bzl b/backends/aoti/slim/core/test/targets.bzl
index e2bd116ffc9..72f101a6200 100644
--- a/backends/aoti/slim/core/test/targets.bzl
+++ b/backends/aoti/slim/core/test/targets.bzl
@@ -7,16 +7,6 @@ def get_backend_mode():
 
 def define_common_targets():
     """Define test targets for SlimTensor core module."""
-    runtime.cxx_test(
-        name = "test_slimtensor_dtypes",
-        srcs = [
-            "test_slimtensor_dtypes.cpp",
-        ],
-        deps = [
-            "//executorch/backends/aoti/slim/factory:empty",
-        ],
-    )
-
     # Backend mode specific tests
     for backend_mode in get_backend_mode():
         backend_suffix = "_" + backend_mode if backend_mode == "cuda" else ""
@@ -77,3 +67,16 @@ def define_common_targets():
             ],
             **backend_kwargs
         )
+
+
+        runtime.cxx_test(
+            name = "test_permute_reshape" + backend_suffix,
+            srcs = [
+                "test_permute_reshape.cpp",
+            ],
+            deps = [
+                "//executorch/backends/aoti/slim/core:slimtensor",
+                "//executorch/backends/aoti/slim/factory:empty",
+            ],
+            **backend_kwargs
+        )
diff --git a/backends/aoti/slim/core/test/test_permute_reshape.cpp b/backends/aoti/slim/core/test/test_permute_reshape.cpp
new file mode 100644
index 00000000000..688245d8be7
--- /dev/null
+++ b/backends/aoti/slim/core/test/test_permute_reshape.cpp
@@ -0,0 +1,407 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <gtest/gtest.h>
+
+#include <executorch/backends/aoti/slim/core/SlimTensor.h>
+#include <executorch/backends/aoti/slim/core/SlimTensorView-incl.h>
+#include <executorch/backends/aoti/slim/factory/Empty.h>
+
+#ifdef CUDA_AVAILABLE
+#include <cuda_runtime.h>
+#endif
+
+namespace executorch::backends::aoti::slim {
+
+// =============================================================================
+// Device trait for parameterized tests
+// =============================================================================
+
+struct CPUDevice {
+  static c10::Device device() {
+    return CPU_DEVICE;
+  }
+  static constexpr bool is_cuda = false;
+};
+
+#ifdef CUDA_AVAILABLE
+struct CUDADevice {
+  static c10::Device device() {
+    return DEFAULT_CUDA_DEVICE;
+  }
+  static constexpr bool is_cuda = true;
+};
+#endif
+
+// =============================================================================
+// Test fixture for parameterized device tests
+// =============================================================================
+
+template <typename DeviceTrait>
+class PermuteReshapeDeviceTest : public ::testing::Test {
+ protected:
+  static c10::Device device() {
+    return DeviceTrait::device();
+  }
+
+  SlimTensor make_tensor(
+      std::initializer_list<int64_t> sizes,
+      c10::ScalarType dtype = c10::ScalarType::Float) {
+    return empty(sizes, dtype, device());
+  }
+
+  // Helper to initialize tensor data from CPU (handles both CPU and CUDA)
+  template <typename T>
+  void fill_sequential(SlimTensor& tensor, size_t count) {
+    if constexpr (DeviceTrait::is_cuda) {
+#ifdef CUDA_AVAILABLE
+      std::vector<T> cpu_data(count);
+      for (size_t i = 0; i < count; ++i) {
+        cpu_data[i] = static_cast<T>(i);
+      }
+      DeviceTraits<c10::DeviceType::CUDA>::memcpy(
+          tensor.data_ptr(),
+          cpu_data.data(),
+          count * sizeof(T),
+          DEFAULT_CUDA_DEVICE,
+          CPU_DEVICE);
+#endif
+    } else {
+      T* data = static_cast<T*>(tensor.data_ptr());
+      for (size_t i = 0; i < count; ++i) {
+        data[i] = static_cast<T>(i);
+      }
+    }
+  }
+
+  // Helper to read a value from tensor (handles both CPU and CUDA)
+  template <typename T>
+  T read_value(void* ptr, size_t offset = 0) {
+    if constexpr (DeviceTrait::is_cuda) {
+#ifdef CUDA_AVAILABLE
+      T value;
+      DeviceTraits<c10::DeviceType::CUDA>::memcpy(
+          &value,
+          static_cast<T*>(ptr) + offset,
+          sizeof(T),
+          CPU_DEVICE,
+          DEFAULT_CUDA_DEVICE);
+      return value;
+#else
+      return T{};
+#endif
+    } else {
+      return *(static_cast<T*>(ptr) + offset);
+    }
+  }
+
+  // Helper to write a value to tensor (handles both CPU and CUDA)
+  template <typename T>
+  void write_value(void* ptr, T value, size_t offset = 0) {
+    if constexpr (DeviceTrait::is_cuda) {
+#ifdef CUDA_AVAILABLE
+      DeviceTraits<c10::DeviceType::CUDA>::memcpy(
+          static_cast<T*>(ptr) + offset,
+          &value,
+          sizeof(T),
+          DEFAULT_CUDA_DEVICE,
+          CPU_DEVICE);
+#endif
+    } else {
+      *(static_cast<T*>(ptr) + offset) = value;
+    }
+  }
+};
+
+// Type list for parameterized tests
+using DeviceTypes = ::testing::Types<
+    CPUDevice
+#ifdef CUDA_AVAILABLE
+    ,
+    CUDADevice
+#endif
+    >;
+
+TYPED_TEST_SUITE(PermuteReshapeDeviceTest, DeviceTypes);
+
+// =============================================================================
+// permute Basic Tests
+// =============================================================================
+
+TYPED_TEST(PermuteReshapeDeviceTest, Basic2DTranspose) {
+  SlimTensor tensor = this->make_tensor({3, 4});
+  this->template fill_sequential<float>(tensor, 12);
+
+  SlimTensor transposed = tensor.permute({1, 0});
+
+  EXPECT_EQ(transposed.size(0), 4);
+  EXPECT_EQ(transposed.size(1), 3);
+  EXPECT_EQ(transposed.stride(0), 1);
+  EXPECT_EQ(transposed.stride(1), 4);
+  EXPECT_FALSE(transposed.is_contiguous());
+  EXPECT_EQ(transposed.numel(), 12);
+
+  // Shares storage
+  EXPECT_EQ(transposed.storage().get(), tensor.storage().get());
+}
+
+TYPED_TEST(PermuteReshapeDeviceTest, 3DPermutation) {
+  SlimTensor tensor = this->make_tensor({2, 3, 4});
+  this->template fill_sequential<float>(tensor, 24);
+
+  SlimTensor permuted = tensor.permute({2, 0, 1});
+
+  EXPECT_EQ(permuted.size(0), 4);
+  EXPECT_EQ(permuted.size(1), 2);
+  EXPECT_EQ(permuted.size(2), 3);
+
+  // Original strides: [12, 4, 1]
+  // Permuted strides for {2, 0, 1}: [1, 12, 4]
+  EXPECT_EQ(permuted.stride(0), 1);
+  EXPECT_EQ(permuted.stride(1), 12);
+  EXPECT_EQ(permuted.stride(2), 4);
+}
+
+TYPED_TEST(PermuteReshapeDeviceTest, NegativeIndices) {
+  SlimTensor tensor = this->make_tensor({2, 3, 4});
+
+  // Use negative indices: -1 is the last dimension
+  SlimTensor permuted = tensor.permute({-1, -3, -2});
+
+  EXPECT_EQ(permuted.size(0), 4);
+  EXPECT_EQ(permuted.size(1), 2);
+  EXPECT_EQ(permuted.size(2), 3);
+}
+
+TYPED_TEST(PermuteReshapeDeviceTest, IdentityPermutation) {
+  SlimTensor tensor = this->make_tensor({2, 3, 4});
+
+  SlimTensor permuted = tensor.permute({0, 1, 2});
+
+  EXPECT_EQ(permuted.sizes(), tensor.sizes());
+  EXPECT_EQ(permuted.strides(), tensor.strides());
+  EXPECT_TRUE(permuted.is_contiguous());
+}
+
+TYPED_TEST(PermuteReshapeDeviceTest, SharedStorageModification) {
+  SlimTensor tensor = this->make_tensor({2, 3});
+  this->template fill_sequential<float>(tensor, 6);
+
+  SlimTensor transposed = tensor.permute({1, 0});
+
+  // Modify via transposed
+  this->template write_value<float>(transposed.data_ptr(), 100.0f, 0);
+
+  EXPECT_FLOAT_EQ(
+      this->template read_value<float>(tensor.data_ptr(), 0), 100.0f);
+}
+
+// =============================================================================
+// reshape Basic Tests
+// =============================================================================
+
+TYPED_TEST(PermuteReshapeDeviceTest, ContiguousReshapeToView) {
+  SlimTensor tensor = this->make_tensor({2, 3, 4});
+  this->template fill_sequential<float>(tensor, 24);
+
+  SlimTensor reshaped = tensor.reshape({6, 4});
+
+  EXPECT_EQ(reshaped.size(0), 6);
+  EXPECT_EQ(reshaped.size(1), 4);
+  EXPECT_EQ(reshaped.numel(), 24);
+  EXPECT_TRUE(reshaped.is_contiguous());
+
+  // Should share storage (view)
+  EXPECT_EQ(reshaped.storage().get(), tensor.storage().get());
+
+  // Verify data is accessible
+  EXPECT_FLOAT_EQ(
+      this->template read_value<float>(reshaped.data_ptr(), 0), 0.0f);
+  EXPECT_FLOAT_EQ(
+      this->template read_value<float>(reshaped.data_ptr(), 23), 23.0f);
+}
+
+TYPED_TEST(PermuteReshapeDeviceTest, Flatten) {
+  SlimTensor tensor = this->make_tensor({2, 3, 4});
+  this->template fill_sequential<float>(tensor, 24);
+
+  SlimTensor flat = tensor.reshape({24});
+
+  EXPECT_EQ(flat.dim(), 1);
+  EXPECT_EQ(flat.size(0), 24);
+  EXPECT_TRUE(flat.is_contiguous());
+
+  // Should be a view
+  EXPECT_EQ(flat.storage().get(), tensor.storage().get());
+}
+
+TYPED_TEST(PermuteReshapeDeviceTest, InferDimension) {
+  SlimTensor tensor = this->make_tensor({2, 3, 4});
+
+  // Use -1 to infer dimension: 24 / 6 = 4
+  SlimTensor reshaped = tensor.reshape({6, -1});
+
+  EXPECT_EQ(reshaped.size(0), 6);
+  EXPECT_EQ(reshaped.size(1), 4);
+}
+
+TYPED_TEST(PermuteReshapeDeviceTest, InferFirstDimension) {
+  SlimTensor tensor = this->make_tensor({2, 3, 4});
+
+  // Infer first dimension: 24 / 8 = 3
+  SlimTensor reshaped = tensor.reshape({-1, 8});
+
+  EXPECT_EQ(reshaped.size(0), 3);
+  EXPECT_EQ(reshaped.size(1), 8);
+}
+
+TYPED_TEST(PermuteReshapeDeviceTest, NonContiguousTensorCopies) {
+  SlimTensor tensor = this->make_tensor({3, 4});
+  this->template fill_sequential<float>(tensor, 12);
+
+  // Transpose makes it non-contiguous
+  SlimTensor transposed = tensor.permute({1, 0});
+  EXPECT_FALSE(transposed.is_contiguous());
+
+  // Reshape of non-contiguous requires copy
+  SlimTensor reshaped = transposed.reshape({12});
+
+  EXPECT_EQ(reshaped.dim(), 1);
+  EXPECT_EQ(reshaped.size(0), 12);
+  EXPECT_TRUE(reshaped.is_contiguous());
+
+  // Should NOT share storage (copy made)
+  EXPECT_NE(reshaped.storage().get(), transposed.storage().get());
+
+  // Verify data was copied correctly
+  // transposed[0][0] = tensor[0][0] = 0
+  // transposed[0][1] = tensor[1][0] = 4
+  // transposed[0][2] = tensor[2][0] = 8
+  // transposed[1][0] = tensor[0][1] = 1
+  EXPECT_FLOAT_EQ(
+      this->template read_value<float>(reshaped.data_ptr(), 0), 0.0f);
+  EXPECT_FLOAT_EQ(
+      this->template read_value<float>(reshaped.data_ptr(), 1), 4.0f);
+  EXPECT_FLOAT_EQ(
+      this->template read_value<float>(reshaped.data_ptr(), 2), 8.0f);
+  EXPECT_FLOAT_EQ(
+      this->template read_value<float>(reshaped.data_ptr(), 3), 1.0f);
+}
+
+TYPED_TEST(PermuteReshapeDeviceTest, ExpandDimensions) {
+  SlimTensor tensor = this->make_tensor({12});
+  this->template fill_sequential<float>(tensor, 12);
+
+  SlimTensor reshaped = tensor.reshape({2, 2, 3});
+
+  EXPECT_EQ(reshaped.dim(), 3);
+  EXPECT_EQ(reshaped.size(0), 2);
+  EXPECT_EQ(reshaped.size(1), 2);
+  EXPECT_EQ(reshaped.size(2), 3);
+
+  // Should be a view
+  EXPECT_EQ(reshaped.storage().get(), tensor.storage().get());
+}
+
+TYPED_TEST(PermuteReshapeDeviceTest, SharedStorageModificationView) {
+  SlimTensor tensor = this->make_tensor({2, 6});
+  this->template fill_sequential<float>(tensor, 12);
+
+  SlimTensor reshaped = tensor.reshape({3, 4});
+
+  // Modify via reshaped
+  this->template write_value<float>(reshaped.data_ptr(), 100.0f, 0);
+
+  // Should be visible in original
+  EXPECT_FLOAT_EQ(
+      this->template read_value<float>(tensor.data_ptr(), 0), 100.0f);
+}
+
+// =============================================================================
+// clone_contiguous Tests
+// =============================================================================
+
+TYPED_TEST(PermuteReshapeDeviceTest, BasicClone) {
+  SlimTensor tensor = this->make_tensor({2, 3});
+  this->template fill_sequential<float>(tensor, 6);
+
+  SlimTensor cloned = tensor.clone_contiguous();
+
+  EXPECT_EQ(cloned.sizes(), tensor.sizes());
+  EXPECT_TRUE(cloned.is_contiguous());
+  EXPECT_NE(cloned.storage().get(), tensor.storage().get());
+
+  // Data should be copied
+  for (size_t i = 0; i < 6; ++i) {
+    EXPECT_FLOAT_EQ(
+        this->template read_value<float>(cloned.data_ptr(), i),
+        static_cast<float>(i));
+  }
+
+  // Modification should be independent
+  this->template write_value<float>(cloned.data_ptr(), 100.0f, 0);
+  EXPECT_FLOAT_EQ(this->template read_value<float>(tensor.data_ptr(), 0), 0.0f);
+}
+
+TYPED_TEST(PermuteReshapeDeviceTest, NonContiguousToContiguous) {
+  SlimTensor tensor = this->make_tensor({3, 4});
+  this->template fill_sequential<float>(tensor, 12);
+
+  // Transpose makes it non-contiguous
+  SlimTensor transposed = tensor.permute({1, 0});
+  EXPECT_FALSE(transposed.is_contiguous());
+
+  SlimTensor cloned = transposed.clone_contiguous();
+
+  EXPECT_EQ(cloned.size(0), 4);
+  EXPECT_EQ(cloned.size(1), 3);
+  EXPECT_TRUE(cloned.is_contiguous());
+  EXPECT_NE(cloned.storage().get(), transposed.storage().get());
+
+  // Verify data was correctly reordered
+  // cloned[0][0] = transposed[0][0] = tensor[0][0] = 0
+  // cloned[0][1] = transposed[0][1] = tensor[1][0] = 4
+  // cloned[0][2] = transposed[0][2] = tensor[2][0] = 8
+  // cloned[1][0] = transposed[1][0] = tensor[0][1] = 1
+  EXPECT_FLOAT_EQ(this->template read_value<float>(cloned.data_ptr(), 0), 0.0f);
+  EXPECT_FLOAT_EQ(this->template read_value<float>(cloned.data_ptr(), 1), 4.0f);
+  EXPECT_FLOAT_EQ(this->template read_value<float>(cloned.data_ptr(), 2), 8.0f);
+  EXPECT_FLOAT_EQ(this->template read_value<float>(cloned.data_ptr(), 3), 1.0f);
+}
+
+// =============================================================================
+// Combined Operations Tests
+// =============================================================================
+
+TYPED_TEST(PermuteReshapeDeviceTest, PermuteAndReshape) {
+  SlimTensor tensor = this->make_tensor({2, 3, 4});
+  this->template fill_sequential<float>(tensor, 24);
+
+  // Permute to 3x2x4, then reshape to 6x4
+  SlimTensor permuted = tensor.permute({1, 0, 2});
+  SlimTensor reshaped = permuted.reshape({6, 4});
+
+  EXPECT_EQ(reshaped.size(0), 6);
+  EXPECT_EQ(reshaped.size(1), 4);
+  EXPECT_EQ(reshaped.numel(), 24);
+}
+
+TYPED_TEST(PermuteReshapeDeviceTest, ReshapeAndPermute) {
+  SlimTensor tensor = this->make_tensor({24});
+  this->template fill_sequential<float>(tensor, 24);
+
+  // Reshape to 2x3x4, then permute to 4x3x2
+  SlimTensor reshaped = tensor.reshape({2, 3, 4});
+  SlimTensor permuted = reshaped.permute({2, 1, 0});
+
+  EXPECT_EQ(permuted.size(0), 4);
+  EXPECT_EQ(permuted.size(1), 3);
+  EXPECT_EQ(permuted.size(2), 2);
+}
+
+} // namespace executorch::backends::aoti::slim
diff --git a/backends/aoti/slim/util/SizeUtil.h b/backends/aoti/slim/util/SizeUtil.h
index faf6e9c914f..aaa41329b14 100644
--- a/backends/aoti/slim/util/SizeUtil.h
+++ b/backends/aoti/slim/util/SizeUtil.h
@@ -9,6 +9,7 @@
 #pragma once
 
 #include <cstdint>
+#include <optional>
 #include <vector>
 
 #include <executorch/backends/aoti/slim/util/ArrayRefUtil.h>
@@ -87,4 +88,119 @@ inline std::vector<int64_t> compute_contiguous_strides(IntArrayRef sizes) {
   return strides;
 }
 
+/// Infers the final concrete shape by filling in at most one '-1' dimension.
+/// @param shape The proposed shape, may contain one -1 for inference.
+/// @param numel The total number of elements in the tensor.
+/// @return Vector with the final shape (no -1 entries).
+inline std::vector<int64_t> infer_size(IntArrayRef shape, int64_t numel) {
+  int64_t new_size = 1;
+  int64_t infer_dim = -1;
+  std::vector<int64_t> result_shape;
+  result_shape.reserve(shape.size());
+
+  for (size_t dim = 0; dim < shape.size(); dim++) {
+    if (shape[dim] == -1) {
+      ET_CHECK_MSG(infer_dim == -1, "only one dimension can be inferred");
+      infer_dim = static_cast<int64_t>(dim);
+      result_shape.push_back(-1); // placeholder
+    } else {
+      ET_CHECK_MSG(
+          shape[dim] >= 0,
+          "invalid shape dimension %ld",
+          static_cast<long>(shape[dim]));
+      new_size *= shape[dim];
+      result_shape.push_back(shape[dim]);
+    }
+  }
+
+  if (infer_dim != -1) {
+    ET_CHECK_MSG(
+        new_size != 0,
+        "cannot reshape tensor of 0 elements into shape with -1");
+    ET_CHECK_MSG(
+        numel % new_size == 0,
+        "shape is invalid for input size %ld",
+        static_cast<long>(numel));
+    result_shape[static_cast<size_t>(infer_dim)] = numel / new_size;
+  } else {
+    ET_CHECK_MSG(
+        numel == new_size,
+        "shape is invalid for input of size %ld",
+        static_cast<long>(numel));
+  }
+  return result_shape;
+}
+
+/// Determines if a reshape is possible as a view without copying.
+/// If so, returns the new strides; otherwise returns an empty optional.
+/// @param old_sizes Current tensor sizes.
+/// @param old_strides Current tensor strides.
+/// @param new_sizes Target tensor sizes.
+/// @return Strides for the view, or nullopt if copy is required.
+inline std::optional<std::vector<int64_t>> compute_stride(
+    IntArrayRef old_sizes,
+    IntArrayRef old_strides,
+    IntArrayRef new_sizes) {
+  if (old_sizes.empty()) {
+    return std::vector<int64_t>(new_sizes.size(), 1);
+  }
+
+  // Handle numel == 0 case
+  size_t numel = static_cast<size_t>(compute_numel(old_sizes));
+  if (numel == 0 && old_sizes == new_sizes) {
+    return toVec(old_strides);
+  }
+
+  int64_t new_sizes_len = static_cast<int64_t>(new_sizes.size());
+  std::vector<int64_t> new_strides(new_sizes_len);
+  if (numel == 0) {
+    for (int64_t view_d = new_sizes_len - 1; view_d >= 0; view_d--) {
+      if (view_d == new_sizes_len - 1) {
+        new_strides[view_d] = 1;
+      } else {
+        new_strides[view_d] = std::max<int64_t>(new_sizes[view_d + 1], 1) *
+            new_strides[view_d + 1];
+      }
+    }
+    return new_strides;
+  }
+
+  int64_t view_d = new_sizes_len - 1;
+  int64_t chunk_base_stride = old_strides.back();
+  int64_t tensor_numel = 1;
+  int64_t view_numel = 1;
+
+  for (int64_t tensor_d = static_cast<int64_t>(old_sizes.size()) - 1;
+       tensor_d >= 0;
+       tensor_d--) {
+    tensor_numel *= old_sizes[tensor_d];
+
+    bool is_chunk_end = (tensor_d == 0) ||
+        (old_sizes[tensor_d - 1] != 1 &&
+         old_strides[tensor_d - 1] != tensor_numel * chunk_base_stride);
+
+    if (is_chunk_end) {
+      while (view_d >= 0 &&
+             (view_numel < tensor_numel || new_sizes[view_d] == 1)) {
+        new_strides[view_d] = view_numel * chunk_base_stride;
+        view_numel *= new_sizes[view_d];
+        view_d--;
+      }
+      if (view_numel != tensor_numel) {
+        return std::nullopt; // Not viewable
+      }
+      if (tensor_d > 0) {
+        chunk_base_stride = old_strides[tensor_d - 1];
+        tensor_numel = 1;
+        view_numel = 1;
+      }
+    }
+  }
+
+  if (view_d != -1) {
+    return std::nullopt; // Not viewable
+  }
+  return new_strides;
+}
+
 } // namespace executorch::backends::aoti::slim