diff --git a/adaptor/codegen/gen.py b/adaptor/codegen/gen.py index af8727d8b..1f9319b12 100644 --- a/adaptor/codegen/gen.py +++ b/adaptor/codegen/gen.py @@ -196,7 +196,7 @@ def prepare() -> Tuple[dict, str]: impl_plugin = options.impl_plugin base_device = options.base_device - assert(base_device is None or base_device == "" or base_device == "torch", f"invalid base_device:{base_device}") + assert base_device is None or base_device == "" or base_device == "torch", f"invalid base_device:{base_device}" if base_device == "": base_device = None def create_if_not_exist(name): diff --git a/impl/ascend/device_configs.py b/impl/ascend/device_configs.py index c620fd4b7..e2055425e 100755 --- a/impl/ascend/device_configs.py +++ b/impl/ascend/device_configs.py @@ -898,15 +898,26 @@ para=dict( accumulate=[Skip(False),], ), + tensor_para=dict( + args=[ + { + "ins": ['input'], + "shape": [Skip((16, 4, 4)),], + }, + ] + ), ), 'index_put_bool_indices_value': dict( # llm used name=['index_put'], + para=dict( + accumulate=[Skip(False),], + ), tensor_para=dict( args=[ { "ins": ['input'], - "shape": [Skip((3, 2, 2, 20)),], + "shape": [Skip((3, 2, 2, 20)), Skip((4, 2, 2, 6, 2))], }, ] ), diff --git a/impl/ascend/functions/index.cpp b/impl/ascend/functions/index.cpp index bcd372d41..e88082996 100644 --- a/impl/ascend/functions/index.cpp +++ b/impl/ascend/functions/index.cpp @@ -12,7 +12,8 @@ namespace impl { namespace ascend { -static std::vector castIntIndicesToLongIndices(diopiContextHandle_t ctx, std::vector& indices) { +namespace indexProcess { +std::vector castIntIndicesToLongIndices(diopiContextHandle_t ctx, std::vector& indices) { std::vector result; for (auto& t : indices) { if (!t.defined()) { @@ -37,7 +38,7 @@ static std::vector castIntIndicesToLongIndices(diopiContextHandle_ return result; } -static void checkIndexTensorTypes(const std::vector& indices) { +void checkIndexTensorTypes(const std::vector& indices) { for (const auto& t : indices) { if (t.defined()) { diopiDtype_t type = t.dtype(); @@ -47,7 +48,7 @@ static void checkIndexTensorTypes(const std::vector& indices) { } } -static AscendTensor nonZeroTensor(diopiContextHandle_t ctx, const AscendTensor& self) { +AscendTensor nonZeroTensor(diopiContextHandle_t ctx, const AscendTensor& self) { int64_t numELem = self.numel() * self.dim(); std::vector nShape{self.numel(), self.dim()}; std::vector nStride(nShape.size(), 1); @@ -86,14 +87,14 @@ static AscendTensor nonZeroTensor(diopiContextHandle_t ctx, const AscendTensor& return AscendTensor(nzTrans); } -static std::vector expandIndicesTensors(diopiContextHandle_t ctx, const AscendTensor& self, const std::vector& indices) { +std::vector expandIndicesTensors(diopiContextHandle_t ctx, const AscendTensor& self, const std::vector& indices) { std::vector result; for (auto& t : indices) { if (!t.defined()) { result.push_back(t); } else { if (t.dtype() == diopi_dtype_uint8 || t.dtype() == diopi_dtype_bool) { - ASCEND_CHECK(t.dtype() != diopi_dtype_uint8, + ASCEND_CHECK(t.dtype() == diopi_dtype_bool, "indexing with dtype torch.uint8 is now deprecated," " please use a dtype torch.bool instead."); for (uint64_t j = 0; j < static_cast(t.dim()); j++) { @@ -132,7 +133,7 @@ static std::vector expandIndicesTensors(diopiContextHandle_t ctx, return result; } -static aclTensor* createEmptyAclTensor() { +aclTensor* createEmptyAclTensor() { std::vector nShape{0}; std::vector nStride{1}; int64_t storageSize = 0; @@ -167,7 +168,7 @@ static std::vector indicesExpandedOutplace(std::vector indices) { // true if all the non-null tensors are adjacent +bool hasContiguousSubspace(std::vector indices) { // true if all the non-null tensors are adjacent auto isDefined = [](const AscendTensor& tensor) { return tensor.defined(); }; auto isNull = [](const AscendTensor& tensor) { return !tensor.defined(); }; auto start = std::find_if(indices.begin(), indices.end(), isDefined); @@ -176,7 +177,7 @@ static bool hasContiguousSubspace(std::vector indices) { // true return it == stop.base(); } -static std::tuple> transposeToFront(AscendTensor self, std::vector indices) { +std::tuple> transposeToFront(AscendTensor self, std::vector indices) { std::vector dims; std::vector transposedIndices; @@ -198,7 +199,7 @@ static std::tuple> transposeToFront(Asce return std::make_tuple(self.permute(dims), transposedIndices); } -static std::vector indexReshape(std::vector endIndices, int64_t dimsBefore, int64_t dimsAfter) { +std::vector indexReshape(std::vector endIndices, int64_t dimsBefore, int64_t dimsAfter) { std::vector indexShape; for (auto& idx : endIndices) { if (idx.defined()) { @@ -216,7 +217,7 @@ static std::vector indexReshape(std::vector endIndices, i return indexShape; } -static std::vector indexOutputSize(const AscendTensor& self, std::vector& indices) { +std::vector indexOutputSize(const AscendTensor& self, std::vector& indices) { std::vector midIndices = indicesExpandedOutplace(indices); while (midIndices.size() < (size_t)self.dim()) { midIndices.emplace_back(nullptr); @@ -269,6 +270,8 @@ static std::vector indexOutputSize(const AscendTensor& self, std::vecto return outputSize; } +} // namespace indexProcess + diopiError_t diopiIndex(diopiContextHandle_t ctx, diopiTensorHandle_t* out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t* indices, int64_t nums) { AscendTensor inputAt(input); std::vector indicesOrigin(nums); @@ -278,12 +281,13 @@ diopiError_t diopiIndex(diopiContextHandle_t ctx, diopiTensorHandle_t* out, diop } } - std::vector indicesList = castIntIndicesToLongIndices(ctx, indicesOrigin); - checkIndexTensorTypes(indicesList); + std::vector indicesList = indexProcess::castIntIndicesToLongIndices(ctx, indicesOrigin); + indexProcess::checkIndexTensorTypes(indicesList); - auto indicesExpanded = expandIndicesTensors(ctx, inputAt, indicesList); + auto indicesExpanded = indexProcess::expandIndicesTensors(ctx, inputAt, indicesList); std::vector allDefinedIndices; + for (const auto& idx : indicesExpanded) { if (idx.defined()) { allDefinedIndices.push_back(aclnn_adaptor::createAclTensorFromAscendTensor(idx)); @@ -293,8 +297,7 @@ diopiError_t diopiIndex(diopiContextHandle_t ctx, diopiTensorHandle_t* out, diop } } - std::vector outShape = indexOutputSize(inputAt, indicesExpanded); - + std::vector outShape = indexProcess::indexOutputSize(inputAt, indicesExpanded); diopiSize_t outSize = vectorToDiopiSize(outShape); diopiRequireTensor(ctx, out, &outSize, nullptr, inputAt.dtype(), diopi_device); diff --git a/impl/ascend/functions/index_put.cpp b/impl/ascend/functions/index_put.cpp index 3b01d6cfd..1354d8d0e 100755 --- a/impl/ascend/functions/index_put.cpp +++ b/impl/ascend/functions/index_put.cpp @@ -9,18 +9,72 @@ namespace impl { namespace ascend { + +namespace indexProcess { +extern std::vector castIntIndicesToLongIndices(diopiContextHandle_t ctx, std::vector& indices); +extern void checkIndexTensorTypes(const std::vector& indices); +extern std::vector expandIndicesTensors(diopiContextHandle_t ctx, const AscendTensor& self, const std::vector& indices); +extern aclTensor* createEmptyAclTensor(); +} // namespace indexProcess + diopiError_t diopiIndexPut(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t values, diopiConstTensorHandle_t* indices, int64_t indicesCounts, bool accumulate) { diopiCopyInp(ctx, input, out); - std::vector indicesVec(indices, indices + indicesCounts); - DIOPI_ASCEND_CALL_ACLNN(aclnnIndexPutImpl, ctx, out, indicesVec, values, accumulate, false); + AscendTensor inputAt(input); + AscendTensor valuesAt(values); + if (inputAt.numel() == 0 || valuesAt.numel() == 0) { + return diopiSuccess; + } + std::vector indicesOrigin(indicesCounts); + for (int64_t i = 0; i < indicesCounts; i++) { + if (indices[i] != nullptr) { + indicesOrigin[i] = AscendTensor(indices[i]); + } + } + std::vector indicesList = indexProcess::castIntIndicesToLongIndices(ctx, indicesOrigin); + indexProcess::checkIndexTensorTypes(indicesList); + auto indicesExpanded = indexProcess::expandIndicesTensors(ctx, inputAt, indicesList); + std::vector allDefinedIndices; + auto emptyTensor = indexProcess::createEmptyAclTensor(); + for (const auto& idx : indicesExpanded) { + if (idx.defined()) { + allDefinedIndices.push_back(aclnn_adaptor::createAclTensorFromAscendTensor(idx)); + } else { + allDefinedIndices.push_back(emptyTensor); + } + } + + DIOPI_ASCEND_CALL_ACLNN(aclnnIndexPutImpl, ctx, out, allDefinedIndices, values, accumulate, false); return diopiSuccess; } diopiError_t diopiIndexPutInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t values, diopiConstTensorHandle_t* indices, int64_t indicesCounts, bool accumulate) { - std::vector indicesVec(indices, indices + indicesCounts); - DIOPI_ASCEND_CALL_ACLNN(aclnnIndexPutImpl, ctx, input, indicesVec, values, accumulate, false); + AscendTensor inputAt(input); + AscendTensor valuesAt(values); + if (inputAt.numel() == 0 || valuesAt.numel() == 0) { + return diopiSuccess; + } + std::vector indicesOrigin(indicesCounts); + for (int64_t i = 0; i < indicesCounts; i++) { + if (indices[i] != nullptr) { + indicesOrigin[i] = AscendTensor(indices[i]); + } + } + std::vector indicesList = indexProcess::castIntIndicesToLongIndices(ctx, indicesOrigin); + indexProcess::checkIndexTensorTypes(indicesList); + auto indicesExpanded = indexProcess::expandIndicesTensors(ctx, inputAt, indicesList); + std::vector allDefinedIndices; + auto emptyTensor = indexProcess::createEmptyAclTensor(); + for (const auto& idx : indicesExpanded) { + if (idx.defined()) { + allDefinedIndices.push_back(aclnn_adaptor::createAclTensorFromAscendTensor(idx)); + } else { + allDefinedIndices.push_back(emptyTensor); + } + } + + DIOPI_ASCEND_CALL_ACLNN(aclnnIndexPutImpl, ctx, input, allDefinedIndices, values, accumulate, false); return diopiSuccess; }