diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 48d5d40..342cf98 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -3,12 +3,10 @@ name: CI -on: - pull_request: - branches: [ "main" ] +on: [push, pull_request] env: - GO_VERSION: "1.22.5" + GO_VERSION: "1.24.6" jobs: golangci: @@ -19,10 +17,11 @@ jobs: - uses: actions/setup-go@v5 with: go-version: ${{ env.GO_VERSION }} + - run: go mod tidy - name: golangci-lint uses: golangci/golangci-lint-action@v6 with: - version: v1.60 + version: latest build: env: diff --git a/cmd/main.go b/cmd/main.go index 72aed00..c77c244 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -23,6 +23,7 @@ import ( "os" "syscall" + "github.com/Project-HAMi/HAMi/pkg/util/client" "github.com/Project-HAMi/ascend-device-plugin/internal" "github.com/Project-HAMi/ascend-device-plugin/internal/manager" "github.com/Project-HAMi/ascend-device-plugin/internal/server" @@ -136,6 +137,7 @@ func main() { if err != nil { klog.Fatalf("init PluginServer failed, error is %v", err) } + client.InitGlobalClient() err = start(server) if err != nil { diff --git a/config.yaml b/config.yaml index 26de5bd..945e692 100644 --- a/config.yaml +++ b/config.yaml @@ -57,3 +57,23 @@ vnpus: memory: 12288 aiCore: 4 aiCPU: 4 +- chipName: 910ProB + commonWord: Ascend910ProB + resourceName: huawei.com/Ascend910ProB + resourceMemoryName: huawei.com/Ascend910ProB-memory + memoryAllocatable: 32768 + memoryCapacity: 32768 + aiCore: 30 + templates: + - name: vir02 + memory: 2184 + aiCore: 2 + - name: vir04 + memory: 4369 + aiCore: 4 + - name: vir08 + memory: 8738 + aiCore: 8 + - name: vir16 + memory: 17476 + aiCore: 16 \ No newline at end of file diff --git a/go.mod b/go.mod index d81d606..20d9d05 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/Project-HAMi/ascend-device-plugin -go 1.22.2 +go 1.24.6 require ( github.com/Project-HAMi/HAMi v0.0.0 @@ -57,6 +57,6 @@ require ( ) replace ( - github.com/Project-HAMi/HAMi v0.0.0 => github.com/Project-HAMi/HAMi v0.0.0-20250107033239-d04fc8baaad6 + github.com/Project-HAMi/HAMi v0.0.0 => github.com/Project-HAMi/HAMi v0.0.0-20250901013025-61c6cbe7d480 huawei.com/npu-exporter/v6 => gitee.com/ascend/ascend-npu-exporter/v6 v6.0.0-RC3 ) diff --git a/internal/server/server.go b/internal/server/server.go index e67ff2a..73b49ed 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -23,8 +23,10 @@ import ( "net" "os" "path" + "strings" "time" + "github.com/Project-HAMi/HAMi/pkg/device" "github.com/Project-HAMi/HAMi/pkg/device/ascend" "github.com/Project-HAMi/HAMi/pkg/util" "github.com/Project-HAMi/HAMi/pkg/util/nodelock" @@ -40,7 +42,8 @@ import ( const ( // RegisterAnnos = "hami.io/node-register-ascend" // PodAllocAnno = "huawei.com/AscendDevices" - NodeLockAscend = "hami.io/mutex.lock" + NodeLockAscend = "hami.io/mutex.lock" + Ascend910Prefix = "Ascend910" ) var ( @@ -188,12 +191,19 @@ func (ps *PluginServer) registerKubelet() error { return nil } +func (ps *PluginServer) getDeviceNetworkID(idx int) (int, error) { + if idx > 3 { + return 1, nil + } + return 0, nil +} + func (ps *PluginServer) registerHAMi() error { devs := ps.mgr.GetDevices() - apiDevices := make([]*util.DeviceInfo, 0, len(devs)) + apiDevices := make([]*device.DeviceInfo, 0, len(devs)) // hami currently believes that the index starts from 0 and is continuous. for i, dev := range devs { - apiDevices = append(apiDevices, &util.DeviceInfo{ + device := &device.DeviceInfo{ Index: uint(i), ID: dev.UUID, Count: int32(ps.mgr.VDeviceCount()), @@ -202,10 +212,20 @@ func (ps *PluginServer) registerHAMi() error { Type: ps.mgr.CommonWord(), Numa: 0, Health: dev.Health, - }) + } + if strings.HasPrefix(device.Type, Ascend910Prefix) { + NetworkID, err := ps.getDeviceNetworkID(i) + if err != nil { + return fmt.Errorf("get networkID error: %v", err) + } + device.CustomInfo = map[string]any{ + "NetworkID": NetworkID, + } + } + apiDevices = append(apiDevices, device) } annos := make(map[string]string) - annos[ps.registerAnno] = util.MarshalNodeDevices(apiDevices) + annos[ps.registerAnno] = device.MarshalNodeDevices(apiDevices) annos[ps.handshakeAnno] = "Reported_" + time.Now().Add(time.Duration(*reportTimeOffset)*time.Second).Format("2006.01.02 15:04:05") node, err := util.GetNode(ps.nodeName) if err != nil {