Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ jobs:
go-version: ${{ env.GO_VERSION }}
- run: go mod download github.com/Project-HAMi/HAMi
- run: go get github.com/Project-HAMi/ascend-device-plugin/internal/server
- run: go get huawei.com/npu-exporter
- run: go get huawei.com/npu-exporter/utils/logger@v0.0.0-00010101000000-000000000000
- name: golangci-lint
uses: golangci/golangci-lint-action@v6
with:
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "mind-cluster"]
path = mind-cluster
url = https://gitcode.com/Ascend/mind-cluster.git
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ WORKDIR /build
ADD . .
RUN go mod download github.com/Project-HAMi/HAMi
RUN go get github.com/Project-HAMi/ascend-device-plugin/internal/server
RUN go get huawei.com/npu-exporter
RUN go get huawei.com/npu-exporter/utils/logger@v0.0.0-00010101000000-000000000000
RUN make all

FROM $BASE_IMAGE
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ Memory slicing is supported based on virtualization template, lease available te

[ascend-docker-runtime](https://gitcode.com/Ascend/mind-cluster/tree/master/component/ascend-docker-runtime)

```bash
git submodule add https://gitcode.com/Ascend/mind-cluster.git
```

## Compile

```bash
Expand Down
5 changes: 5 additions & 0 deletions README_cn.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ Ascend device plugin 是用来支持在 [HAMi](https://github.com/Project-HAMi/H

部署 [ascend-docker-runtime](https://gitcode.com/Ascend/mind-cluster/tree/master/component/ascend-docker-runtime)

克隆子模块 mind-cluster
```bash
git submodule add https://gitcode.com/Ascend/mind-cluster.git
```

## 编译

```bash
Expand Down
13 changes: 6 additions & 7 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
package main

import (
"context"
"flag"
"fmt"
"os"
Expand All @@ -29,7 +28,7 @@ import (
"github.com/Project-HAMi/ascend-device-plugin/internal/server"
"github.com/Project-HAMi/ascend-device-plugin/version"
"github.com/fsnotify/fsnotify"
"huawei.com/npu-exporter/v6/common-utils/hwlog"
"huawei.com/npu-exporter/utils/logger"
"k8s.io/klog/v2"
"k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
)
Expand Down Expand Up @@ -117,11 +116,11 @@ func main() {
checkFlags()
klog.Infof("version: %s", version.GetVersion())
klog.Infof("using config file: %s", *configFile)
config := &hwlog.LogConfig{
OnlyToStdout: true,
LogLevel: *hwLoglevel,
}
err := hwlog.InitRunLogger(config, context.Background())

logger.HwLogConfig.OnlyToStdout = true
logger.HwLogConfig.LogLevel = *hwLoglevel

err := logger.InitLogger("Prometheus")
if err != nil {
klog.Fatalf("init huawei run logger failed, %v", err)
}
Expand Down
5 changes: 3 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ module github.com/Project-HAMi/ascend-device-plugin
go 1.22.2

require (
ascend-common v0.0.0
github.com/Project-HAMi/HAMi v0.0.0
github.com/fsnotify/fsnotify v1.7.0
google.golang.org/grpc v1.63.2
huawei.com/npu-exporter/v6 v6.0.0-RC3.b001
k8s.io/api v0.29.3
k8s.io/apimachinery v0.29.3
k8s.io/klog/v2 v2.120.1
Expand Down Expand Up @@ -57,6 +57,7 @@ require (
)

replace (
ascend-common => ./mind-cluster/component/ascend-common
github.com/Project-HAMi/HAMi v0.0.0 => github.com/Project-HAMi/HAMi v0.0.0-20250901013025-61c6cbe7d480
huawei.com/npu-exporter/v6 => gitee.com/ascend/ascend-npu-exporter/v6 v6.0.0-RC3
huawei.com/npu-exporter => ./mind-cluster/component/npu-exporter
)
7 changes: 4 additions & 3 deletions internal/manager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@ import (
"fmt"
"sort"

"ascend-common/devmanager"
"ascend-common/devmanager/dcmi"

"github.com/Project-HAMi/ascend-device-plugin/internal"
"huawei.com/npu-exporter/v6/devmanager"
"huawei.com/npu-exporter/v6/devmanager/dcmi"
"k8s.io/klog/v2"
)

Expand All @@ -45,7 +46,7 @@ type AscendManager struct {
}

func NewAscendManager() (*AscendManager, error) {
mgr, err := devmanager.AutoInit("")
mgr, err := devmanager.AutoInit("", 30)
if err != nil {
return nil, err
}
Expand Down
11 changes: 9 additions & 2 deletions internal/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ const (
// PodAllocAnno = "huawei.com/AscendDevices"
NodeLockAscend = "hami.io/mutex.lock"
Ascend910Prefix = "Ascend910"
Ascend910CType = "Ascend910C"
)

var (
Expand Down Expand Up @@ -191,10 +192,16 @@ func (ps *PluginServer) registerKubelet() error {
return nil
}

func (ps *PluginServer) getDeviceNetworkID(idx int) (int, error) {
func (ps *PluginServer) getDeviceNetworkID(idx int, deviceType string) (int, error) {
// For Ascend910C devices, all modules (dies) are interconnected via HCCS
if deviceType == Ascend910CType {
return 0, nil
}

if idx > 3 {
return 1, nil
}

return 0, nil
}

Expand All @@ -214,7 +221,7 @@ func (ps *PluginServer) registerHAMi() error {
Health: dev.Health,
}
if strings.HasPrefix(device.Type, Ascend910Prefix) {
NetworkID, err := ps.getDeviceNetworkID(i)
NetworkID, err := ps.getDeviceNetworkID(i, device.Type)
if err != nil {
return fmt.Errorf("get networkID error: %v", err)
}
Expand Down
1 change: 1 addition & 0 deletions mind-cluster
Submodule mind-cluster added at c9cf42
Loading