pytorch
diff --git a/‎BUILD.bazel‎
Lines changed: 6 additions & 0 deletions b/‎BUILD.bazel‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎aten/src/ATen/FunctionalInverses.cpp‎
Lines changed: 234 additions & 0 deletions b/‎aten/src/ATen/FunctionalInverses.cpp‎
Lines changed: 234 additions & 0 deletions
diff --git a/‎aten/src/ATen/FunctionalStorageImpl.cpp‎
Lines changed: 117 additions & 0 deletions b/‎aten/src/ATen/FunctionalStorageImpl.cpp‎
Lines changed: 117 additions & 0 deletions
@@ -127,6 +127,11 @@ genrule(
         "aten/src/ATen/Declarations.yaml",
         "aten/src/ATen/RegisterBackendSelect.cpp",
         "aten/src/ATen/RegisterCPU.cpp",
+        "aten/src/ATen/RegisterFunctionalization_0.cpp",
+        "aten/src/ATen/RegisterFunctionalization_1.cpp",
+        "aten/src/ATen/RegisterFunctionalization_2.cpp",
+        "aten/src/ATen/RegisterFunctionalization_3.cpp",
+        # "aten/src/ATen/RegisterFunctionalizationEverything.cpp",
         "aten/src/ATen/RegisterMkldnnCPU.cpp",
         "aten/src/ATen/RegisterQuantizedCPU.cpp",
         "aten/src/ATen/RegisterSparseCPU.cpp",
@@ -143,6 +148,7 @@ genrule(
         "aten/src/ATen/CompositeExplicitAutogradFunctions_inl.h",
         "aten/src/ATen/CompositeImplicitAutogradFunctions.h",
         "aten/src/ATen/CompositeImplicitAutogradFunctions_inl.h",
+        "aten/src/ATen/FunctionalInverses.h",
         "aten/src/ATen/Functions.h",
         "aten/src/ATen/Functions.cpp",
         "aten/src/ATen/RedispatchFunctions.h",
 
@@ -0,0 +1,234 @@
+
+#include <ATen/FunctionalInverses.h>
+
+#include <ATen/ATen.h>
+#include <ATen/ExpandUtils.h>
+namespace at {
+namespace functionalization {
+
+// This logic is similar to autograd code for view backwards calls.
+// We can't easily share it though, because (eventually) these functions
+// will all call `permute/unsqueeze_copy()` instead of `permute/unsqueeze`.
+
+Tensor permute_inverse(const Tensor& self, IntArrayRef dims) {
+  // invert the permutation
+  auto ndims = dims.size();
+  std::vector<int64_t> dims_(ndims);
+  for(const auto i : c10::irange(ndims)) {
+    dims_[at::maybe_wrap_dim(dims[i], ndims)] = i;
+  }
+  return self.permute(dims_);
+}
+
+Tensor unsqueeze_to(const Tensor & self, IntArrayRef sizes) {
+  auto result = self;
+
+  int64_t nDims = sizes.size();
+  for(const auto dim : c10::irange(nDims)) {
+    if (sizes[dim] == 1) {
+      result = result.unsqueeze(dim);
+    }
+  }
+  return result;
+}
+
+Tensor unsqueeze_to(const Tensor & self, int64_t dim, IntArrayRef sizes) {
+  dim = at::maybe_wrap_dim(dim, sizes.size());
+  // in NumPy it's not an error to unsqueeze a scalar, but we still need to avoided
+  // unsqueezing in the backward.
+  if (sizes.size() > 0 && sizes[dim] == 1) {
+    return self.unsqueeze(dim);
+  }
+  return self;
+}
+
+// Note [Functionalization Pass: View Inverses].
+// This file contains the implementation of each "view inverse".
+// These aren't really true inverses in the mathematically sense: each view inverse describes how to undo
+// the original view (although it takes in different arguments).
+//
+// E.g. Below is an example of a program that has alias operations removed, and the role that view inverses play:
+//
+// normal program with views and mutations:
+// view1 = input1.view_op(args...)
+// view1.add_(1) (perform a mutation on the view, which should also modify input)
+
+// version of the program with no aliasing, that instead uses view_inverse functions:
+// view_copy1 = input1.view_copy_op(args...)
+// view_copy1.add_(1) (perform a mutation on view_copy1. At this point, input1 is NOT modified)
+// x = view_op_inverse(input1, view_copy1, args...)
+//
+// at this point, input1 and x should be equal
+//
+// Note that input1 is also passed as an argument to view_op_inverse in the above example.
+// This isn't actually required for most view operators: it's only required for view ops
+// where you can't figure out what the size of the base tensor is given just the view tensor and arguments.
+// Examples are slice/select/scatter/squeeze/as_strided.
+// We happen to be passing in the base tensor in all cases, mostly to make the codegen simpler.
+// But you'll see below that the "base" argument is ignored by most view_inverse implementations.
+
+// ----------------------------------------------------------
+// Implementations of each view_inverse() function are below.
+// One of these needs to be implemented for every existing non-composite view operator.
+// The codegen automatically generates the corresponding function declaration.
+// ----------------------------------------------------------
+
+Tensor FunctionalInverses::_fw_primal_inverse(const at::Tensor& base, const at::Tensor& mutated_view, int64_t level) {
+    TORCH_INTERNAL_ASSERT(false, "Attempted to call _fw_primal() during the functionalization pass. For now, this is not supported.");
+    return Tensor();
+}
+
+Tensor FunctionalInverses::view_as_real_inverse(const Tensor& base, const Tensor& mutated_view) {
+    return at::view_as_complex(mutated_view);
+}
+
+Tensor FunctionalInverses::view_as_complex_inverse(const Tensor& base, const Tensor& mutated_view) {
+    return at::view_as_real(mutated_view.resolve_conj());
+}
+
+Tensor FunctionalInverses::_conj_inverse(const Tensor& base, const Tensor& mutated_view) {
+    return mutated_view.conj();
+}
+
+Tensor FunctionalInverses::_neg_view_inverse(const Tensor& base, const Tensor& mutated_view) {
+    return mutated_view.neg();
+}
+
+Tensor FunctionalInverses::as_strided_inverse(const Tensor& base, const Tensor& mutated_view, at::IntArrayRef size, at::IntArrayRef stride, c10::optional<int64_t> storage_offset) {
+    TORCH_INTERNAL_ASSERT(false, "as_strided has not been implemented in the functionalization pass yet");
+    return Tensor();
+}
+
+Tensor FunctionalInverses::diagonal_inverse(const Tensor& base, const Tensor& mutated_view, int64_t offset, int64_t dim1, int64_t dim2) {
+    return base.diagonal_scatter(mutated_view, offset, dim1, dim2);
+}
+
+Tensor FunctionalInverses::expand_inverse(const Tensor& base, const Tensor& mutated_view, at::IntArrayRef size, bool implicit) {
+    return at::sum_to(mutated_view, base.sizes());
+}
+
+Tensor FunctionalInverses::permute_inverse(const Tensor& base, const Tensor& mutated_view, at::IntArrayRef dims) {
+    return at::functionalization::permute_inverse(mutated_view, dims);
+}
+
+Tensor FunctionalInverses::_reshape_alias_inverse(const Tensor& base, const Tensor& mutated_view, at::IntArrayRef size, at::IntArrayRef stride) {
+    // Note that I'm directly calling reshape(), and ignoring the strides.
+    // _reshape_alias() isn't available from user code, and is an implementation detail of reshape().
+    // Specifically, passing in the strides directly can get us into trouble in cases like:
+    // b = a[0]; c = b.reshape(...); c.add_(1); print(a)
+    // When we eventually run the _reshape_alias_inverse() call here, if we were to pass in both sizes and strides,
+    // The call would fail because `mutated_view` doesn't have enough bytes of storage.
+    return mutated_view.reshape(base.sizes());
+}
+
+Tensor FunctionalInverses::select_int_inverse(const Tensor& base, const Tensor& mutated_view, int64_t dim, int64_t index) {
+    return base.select_scatter(mutated_view, dim, index);
+}
+Tensor FunctionalInverses::detach_inverse(const Tensor& base, const Tensor& mutated_view) {
+    // the functionalization pass doesn't care about autograd metadata - as a view, I think detach() is just an identity function
+    return mutated_view;
+}
+
+Tensor FunctionalInverses::slice_Tensor_inverse(const Tensor& base, const Tensor& mutated_view, int64_t dim, c10::optional<int64_t> start, c10::optional<int64_t> end, int64_t step) {
+    return base.slice_scatter(mutated_view, dim, start, end, step);
+}
+
+Tensor FunctionalInverses::split_Tensor_inverse(const Tensor& base, const Tensor& mutated_view, int64_t mutated_view_idx, int64_t split_size, int64_t dim) {
+    // It would be nice if this logic could be re-used from autograd's split_backward(), but I don't think it can.
+    // For functionalization, we have only have one of the tensors from the TensorList outputed by split(), and we want to layer i
+    // on top of the base tensor.
+    // For autograd, we have all of the tensors outputted by split() and we just want to stack them.
+    dim = at::maybe_wrap_dim(dim, base.sizes().size());
+    auto dim_size = base.size(dim);
+    auto start = mutated_view_idx * split_size;
+    auto end = start + split_size;
+    if (end > dim_size) end = dim_size;
+    return base.slice_scatter(mutated_view, dim, start, end, 1);
+}
+
+Tensor FunctionalInverses::split_with_sizes_inverse(const Tensor& base, const Tensor& mutated_view, int64_t mutated_view_idx, at::IntArrayRef split_sizes, int64_t dim) {
+    dim = at::maybe_wrap_dim(dim, base.sizes().size());
+    auto dim_size = base.size(dim);
+    int64_t start = 0;
+    for (auto i = 0; i < mutated_view_idx; ++i) {
+        start += split_sizes[i];
+    }
+    auto end = start + split_sizes[mutated_view_idx];
+    if (end > dim_size) end = dim_size;
+    return base.slice_scatter(mutated_view, dim, start, end, 1);
+}
+
+Tensor FunctionalInverses::squeeze_inverse(const Tensor& base, const Tensor& mutated_view) {
+    return unsqueeze_to(mutated_view, base.sizes());
+}
+
+Tensor FunctionalInverses::squeeze_dim_inverse(const Tensor& base, const Tensor& mutated_view, int64_t dim) {
+    return unsqueeze_to(mutated_view, dim, base.sizes());
+}
+
+Tensor FunctionalInverses::t_inverse(const Tensor& base, const Tensor& mutated_view) {
+    return mutated_view.t();
+}
+
+Tensor FunctionalInverses::transpose_int_inverse(const Tensor& base, const Tensor& mutated_view, int64_t dim0, int64_t dim1) {
+    return mutated_view.transpose(dim0, dim1);
+}
+
+Tensor FunctionalInverses::unsqueeze_inverse(const Tensor& base, const Tensor& mutated_view, int64_t dim) {
+    return mutated_view.squeeze(dim);
+}
+
+Tensor FunctionalInverses::_indices_inverse(const Tensor& base, const Tensor& mutated_view) {
+    TORCH_INTERNAL_ASSERT(false, "Attempted to call _indices() during the functionalization pass. For now, sparse tensors aren't supported during functionalization");
+    return Tensor();
+}
+
+Tensor FunctionalInverses::_values_inverse(const Tensor& base, const Tensor& mutated_view) {
+    TORCH_INTERNAL_ASSERT(false, "Attempted to call _values() during the functionalization pass. For now, sparse tensors aren't supported during functionalization");
+    return Tensor();
+}
+
+Tensor FunctionalInverses::indices_inverse(const Tensor& base, const Tensor& mutated_view) {
+    TORCH_INTERNAL_ASSERT(false, "Attempted to call indices() during the functionalization pass. For now, sparse tensors aren't supported during functionalization");
+    return Tensor();
+}
+
+Tensor FunctionalInverses::values_inverse(const Tensor& base, const Tensor& mutated_view) {
+    TORCH_INTERNAL_ASSERT(false, "Attempted to call values() during the functionalization pass. For now, sparse tensors aren't supported during functionalization");
+    return Tensor();
+}
+
+Tensor FunctionalInverses::crow_indices_inverse(const at::Tensor& base, const at::Tensor& mutated_view) {
+    TORCH_INTERNAL_ASSERT(false, "Attempted to call crow_indices() during the functionalization pass. For now, sparse tensors aren't supported during functionalization");
+    return Tensor();
+}
+
+Tensor FunctionalInverses::col_indices_inverse(const at::Tensor& base, const at::Tensor& mutated_view) {
+    TORCH_INTERNAL_ASSERT(false, "Attempted to call col_indices() during the functionalization pass. For now, sparse tensors aren't supported during functionalization");
+    return Tensor();
+}
+
+Tensor FunctionalInverses::unbind_int_inverse(const Tensor& base, const Tensor& mutated_view, int64_t mutated_view_idx, int64_t dim) {
+    dim = at::maybe_wrap_dim(dim, base.sizes().size());
+    return base.select_scatter(mutated_view, dim, mutated_view_idx);
+}
+
+Tensor FunctionalInverses::view_inverse(const Tensor& base, const Tensor& mutated_view, at::IntArrayRef size) {
+    return mutated_view.view(base.sizes());
+}
+
+Tensor FunctionalInverses::view_dtype_inverse(const Tensor& base, const Tensor& mutated_view, at::ScalarType dtype) {
+    return mutated_view.view(base.scalar_type());
+}
+
+Tensor FunctionalInverses::unfold_inverse(const Tensor& base, const Tensor& mutated_view, int64_t dimension, int64_t size, int64_t step) {
+    // I think autograd and the functionalization pass want the exact same thing here, but need to test to confirm.
+    return unfold_backward(mutated_view, base.sizes(), dimension, size, step);
+}
+
+Tensor FunctionalInverses::alias_inverse(const Tensor& base, const Tensor& mutated_view) {
+    return mutated_view;
+}
+
+} // functionalization
+} // at
@@ -0,0 +1,117 @@
+#include <ATen/FunctionalStorageImpl.h>
+
+#include <ATen/FunctionalTensorWrapper.h>
+#include <ATen/core/LegacyTypeDispatch.h>
+#include <c10/util/Exception.h>
+#include <vector>
+
+namespace at {
+namespace functionalization {
+
+ViewMeta ViewMeta::to_out_idx(int64_t out_idx) {
+  if (out_idx == this->out_index) return *this;
+  return ViewMeta(forward_fn, reverse_fn, out_idx);
+}
+
+Alias::Alias(const at::Tensor& base) {
+  TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(base));
+  base_ = base;
+}
+
+const at::Tensor& Alias::base() const {
+  return base_;
+}
+
+void Alias::add_update(const at::Tensor& updated_val, const std::vector<ViewMeta>& metas) {
+  updates_.push_back({updated_val, metas});
+  generation_++;
+}
+
+// Note [Functionalization: Alias Removal Part 2]
+// See Note [Functionalization: Alias Removal] for more details.
+// This function applies a single update from one of the views to the Alias object.
+// We start out with <original_base> and <mutated_view>, and our goal is to end up with <mutated_base>.
+// Consider this program:
+//
+// base = ...
+// a = base.view1()
+// b = a.view2()
+// c = b.view3()
+// c.add_(3)
+//
+// Then the functionalization pass will queue an update as follows:
+//
+// update.new_val = c  # the updated value of c
+// update.view_metas = [view1_meta, view2_meta, view3_meta]
+//
+// Syncing any of a, b or c will eventually call apply_update() on the alias, and the following will run:
+//
+// tmp_values = [base, a, b]  # NB: c is not necessary
+// t = update.new_val
+// t = view3_inverse(b, t, 0)  # 0 is output index, these are all single output views so it's 0
+// t = view2_inverse(a, t, 0)
+// t = view1_inverse(base, t, 0)  # t now represents the updated alias.
+// alias.base_ = t
+const Tensor apply_update(const Alias::Update& update, const Tensor& base) {
+  at::Tensor t = update.new_val;
+  TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(t));
+  std::vector<at::Tensor> tmp_values({base});
+  for (size_t i = 0; i < update.view_metas.size() - 1; ++i) {
+    at::Tensor next_view = update.view_metas[i].forward_fn(tmp_values.back(), update.view_metas[i].out_index);
+    // NB: We only actually need tmp_values for ops like select/slice/diagonal/squeeze/as_strided
+    // All of these ops require additional information to recover the sizes of the original tensor.
+    // If need to, we could probably apply this optimization and only bother computing tmp_values
+    // for those necessary view ops.
+    tmp_values.push_back(std::move(next_view));
+  }
+  for(int i = update.view_metas.size()-1; i >= 0; --i) {
+    int64_t out_idx = update.view_metas[i].out_index;
+    // Each view inverse is implemented in ViewInverses.cpp.
+    t = update.view_metas[i].reverse_fn(tmp_values[i], t, out_idx);
+  }
+  TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(t));
+  return t;
+}
+
+void Alias::apply_updates() {
+  // N.B:none of the tensors used in this function should be FunctionalTensorWrappers at this point.
+  // The only reason we currently need the TLS exclude guard here is because of functorch's DynamicLayer stack.
+  // It adds the Functionalize key into TLS before redispatching to the functionalization kernels,
+  // which means that we need to explicitly exclude it here before doing any other work underneath the pass.
+  at::AutoDispatchSkipFunctionalize guard;
+  for (auto& update_data: updates_) {
+    base_ = apply_update(update_data, base_);
+  }
+  updates_.clear();
+}
+
+FunctionalStorageImpl::FunctionalStorageImpl(const Tensor& value)
+  : c10::StorageImpl(
+      c10::StorageImpl::use_byte_size_t(),
+      value.numel() * value.dtype().itemsize(),
+      DataPtr{nullptr, value.device()},
+      // Using a null allocator, since FunctionalTensorImpl's aren't resizeable.
+      nullptr,
+      /*resizeable=*/false
+    ),
+    alias_(Alias(value))
+  {}
+
+void FunctionalStorageImpl::add_update(const Tensor& updated_val, const std::vector<ViewMeta>& view_metas) {
+  alias_.add_update(updated_val, view_metas);
+}
+
+void FunctionalStorageImpl::apply_updates() {
+  alias_.apply_updates();
+}
+
+const Tensor& FunctionalStorageImpl::base() {
+  return alias_.base();
+}
+
+size_t FunctionalStorageImpl::generation() const {
+  return alias_.generation();
+}
+
+} // namespace functionalization
+} // namespace at