diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..94f480d
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+* text=auto eol=lf
\ No newline at end of file
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
new file mode 100644
index 0000000..35798f4
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -0,0 +1,96 @@
+name: 🐞 Bug report
+description: Create a report to help us reproduce and fix the bug
+title: "[Bug] "
+labels: ['bug']
+
+body:
+- type: checkboxes
+ attributes:
+ label: Checklist
+ options:
+ - label: 1. I have searched related issues but cannot get the expected help.
+ - label: 2. The bug has not been fixed in the latest version.
+ - label: 3. I have read [Contributing Guidlines](https://github.com/xing-yiren/course_organization_template/wiki/%F0%9F%A7%AD-Contributing-Guidelines:-Issues-&-Pull-Requests).
+ - label: 4. Please note that if the bug-related issue you submitted lacks corresponding environment info and a minimal reproducible demo, it will be challenging for us to reproduce and resolve the issue, reducing the likelihood of receiving feedback.
+ - label: 5. If the issue you raised is not a bug but a question, please raise a discussion at [Discussions](https://github.com/xing-yiren/course_organization_template/discussions/new/choose) Otherwise, it will be closed.
+- type: textarea
+ attributes:
+ label: Describe the bug
+ description: |
+ A clear and concise description of what the bug is, including:
+ - What happened (actual behavior)
+ - What you expected (expected outcome)
+ - **Please attach error logs or screenshots if possible**—this helps us locate the issue faster.
+ validations:
+ required: true
+- type: textarea
+ attributes:
+ label: Reproduction
+ description: |
+ What command or script did you run? Please list the exact steps to reproduce the bug.
+ placeholder: |
+ A placeholder for the command.
+ validations:
+ required: true
+- type: checkboxes
+ id: hardware
+ attributes:
+ label: Hardware Environment
+ description: Which hardware type does this bug relate to (select all that apply)?
+ options:
+ - label: Ascend
+ - label: GPU
+ - label: CPU
+ validations:
+ required: true
+- type: input
+ attributes:
+ label: OS Platform
+ description: Which operating system are you using? (e.g., Ubuntu 20.04)
+ placeholder: "Ubuntu 20.04"
+ validations:
+ required: true
+- type: input
+ attributes:
+ label: Python Version
+ description: What version of Python are you using? (e.g., 3.9.7)
+ placeholder: "3.9.7"
+ validations:
+ required: true
+- type: input
+ attributes:
+ label: MindSpore Version
+ description: What version of MindSpore are you using? (e.g., 2.7.1)
+ placeholder: "2.7.1"
+ validations:
+ required: true
+- type: input
+ id: mindspore_nlp
+ attributes:
+ label: (Optional) MindSpore NLP Version
+ placeholder: e.g., 0.5.0
+ validations:
+ required: false
+- type: input
+ id: mindspore_transformers
+ attributes:
+ label: (Optional) MindSpore Transformers Version
+ placeholder: e.g., 1.7.0
+ validations:
+ required: false
+- type: textarea
+ id: other_suites
+ attributes:
+ label: (Optional) Other Toolkit or Suite Versions
+ description: |
+ Specify versions of any other MindSpore-related suites (e.g., MindSpore Lite, vLLM-MindSpore) or relevant third-party packages.
+ placeholder: |
+ e.g.,
+ - MindSpore Lite 2.7.0
+ - vLLM-MindSpore 0.4.0
+ validations:
+ required: false
+- type: textarea
+ attributes:
+ label: Additional Context
+ description: Any other details.
\ No newline at end of file
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml
new file mode 100644
index 0000000..a5e2e21
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -0,0 +1,39 @@
+name: 🚀 Feature request
+description: Suggest an idea for this project
+title: "[Feature] "
+
+body:
+- type: checkboxes
+ attributes:
+ label: Checklist
+ options:
+ - label: 1. I have read [Contributing Guidlines](https://github.com/xing-yiren/course_organization_template/wiki/%F0%9F%A7%AD-Contributing-Guidelines:-Issues-&-Pull-Requests).
+ - label: 2. If the issue you raised is not a feature but a question, please raise a discussion at [Discussions](https://github.com/xing-yiren/course_organization_template/discussions/new/choose) Otherwise, it will be closed.
+- type: textarea
+ attributes:
+ label: Motivation
+ description: |
+ A clear and concise description of the motivation of the feature.
+ validations:
+ required: true
+- type: checkboxes
+ id: hardware
+ attributes:
+ label: Hardware Environment
+ description: Which hardware does this feature need to support (select all that apply)?
+ options:
+ - label: Ascend
+ - label: GPU
+ - label: CPU
+ validations:
+ required: true
+- type: textarea
+ attributes:
+ label: Related resources
+ description: |
+ If there is an official code release or third-party implementations, please also provide the information here, which would be very helpful.
+- type: textarea
+ attributes:
+ label: Your Contribution
+ description: |
+ Is there any way that you could help, e.g., by submitting a PR?
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 0000000..df2504b
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,34 @@
+
+
+# Description
+
+
+# Changes
+
+
+# Testing & Benchmark
+
+
+# Checklist
+
+- [ ] Read and followed the [Contributing Guidelines](wiki_link).
+- [ ] Self-tested locally to ensure the code runs correctly and achieves expected results (all CI checks expected to pass).
+- [ ] Updated documentation if needed.
+- [ ] Verified accuracy or performance benchmarks if applicable.
+
+# Reviewers
+
diff --git a/README.md b/README.md
index 583486b..ceadfbe 100644
--- a/README.md
+++ b/README.md
@@ -1,95 +1,91 @@
-# 昇思MindSpore技术公开课
+
-- ***探究前沿***:解读技术热点,解构热点模型
-- ***应用实践***:理论实践相结合,手把手指导开发
-- ***专家解读***:多领域专家,多元解读
-- ***开源共享***:课程免费,课件代码开源
-- ***大赛赋能***:ICT大赛赋能课程(大模型专题第一、二期)
-- ***系列课程***:大模型专题课程开展中,其他专题课程敬请期待
+Brief introduction to repository.
-## 报名方式
+## 📢 News
-报名链接:https://xihe.mindspore.cn/course/foundation-model-v2/introduction
+- **2025-10-21 [Course Update]**:The XXX course has been updated with a new chapter on XXX, including complete videos, slides, and code examples. ([*View details*](xxx))
+- **2025-10-18 [Feature Optimization]**:Repository refactored for clearer course resource navigation; added CI pipeline for more standardized contributions. ([*View details*](xxx))
+- **2025-10-10 [Bug Fix]**:Fixed the xxx issue — thanks to @username for the PR contribution. ([View details](pr_link))
-(注:参与免费课程必须报名哦!同步添加[QQ群](./assets/groupchat_qq.png),后续课程事宜将在群内通知!)
+## Prerequisites
-## 大模型专题第一期(已完结)&第二期(进行中)
+Before starting this course, you should be familiar with:
-第二期课程10月14日起每双周六14:00-15:00在[b站](https://live.bilibili.com/22127570?broadcast_type=0&is_room_feed=1&spm_id_from=333.999.to_liveroom.0.click&live_from=86002)进行直播。
+- Basic Python programming
+- Basic Linux commands
+- Using Jupyter Notebook
+- Using Docker images
-每节课程的ppt和代码会随授课逐步上传至[github](https://github.com/mindspore-courses/step_into_llm),系列视频回放归档至[b站](https://space.bilibili.com/526894060/channel/seriesdetail?sid=3293489),大家可以在[昇思MindSpore公众号](./assets/wechat_official_account.png)中获取每节课的知识点回顾与下节课的课程预告,同时欢迎大家在[MindSpore社区](https://gitee.com/mindspore/community/issues)领取大模型系列任务进行挑战。
+You can take the [Prerequisite Test](exam_link) to assess your readiness.
-> 因为课程周期较长,课节安排可能会在中途出现微调,以最终通知为准,感谢理解!
+## Environment Setup
-> 热烈欢迎小伙伴们参与到课程的建设中来,基于课程的趣味开发可以提交至[昇思MindSpore大模型平台](https://xihe.mindspore.cn/)
+To ensure all example code runs smoothly, set up your environment using one of the following methods. For details, see [Set Up Development Environment](wiki_link) in Wiki.
-> 如果在学习过程中发现任何课件及代码方面的问题,希望我们讲解哪方面的内容,或是对课程有什么建议,都可以直接在本仓库中创建issue
+### Install Dependencies
+Confirm your Python version meets the course requirements, then run:
-### 教研团队
+```bash
+pip install -r requirements.txt
+```
-
+### Use Docker Image
-### 课前学习
+Prebuilt Dockerfiles are provided to simplify environment setup.
-- python
-- 人工智能基础、深度学习基础(重点学习自然语言处理):[MindSpore-d2l](https://openi.pcl.ac.cn/mindspore-courses/d2l-mindspore)
-- OpenI启智社区基础使用(可免费获取算力):[OpenI_Learning](https://openi.pcl.ac.cn/zeizei/OpenI_Learning)
-- MindSpore基础使用:[MindSpore教程](https://www.mindspore.cn/tutorials/zh-CN/r2.2/index.html)
-- MindFormers基础使用:[MindFormers讲解视频](https://www.bilibili.com/video/BV1jh4y1m7xV/?spm_id_from=333.999.0.0)
+You can find all course images in the [dockerfile](./dockerfile/) directory and pull the one that fits your hardware:
+| Platform | Image Name | Tag | Description | Dockerfile |
+| :------ | :-------------- | :------- | :------------------------ | :------------- |
+| CPU | xxx | xxx | xxx | xxx |
+| GPU | xxx | xxx | xxx | xxx |
+| NPU | xxx | xxx | xxx | xxx |
+For details, see [Using Docker Images](wiki_link) in Wiki.
-### 课程介绍
+## Course Content
-昇思MindSpore技术公开课火热开展中,面向所有对大模型感兴趣的开发者,带领大家理论结合时间,由浅入深地逐步深入大模型技术
+| No. | Lesson | Description | Learning Resource | Certification |
+| :-- | :------ | :--------------- | :----------------------- | :---------- |
+| 1 | xxx | xxx | [Slides](link) · [Code](link) · [Video](link) · [Cloud Lab](link) · [Learning Path](link) | |
+| 2 | xxx | xxx | [Slides](link) · [Code](link) · [Video](link) · [Cloud Lab](link) · [Learning Path](link) | [Beginner Certification](link) |
+| 3 | xxx | xxx | [Slides](link) · [Code](link) · [Video](link) · [Cloud Lab](link) · [Learning Path](link) | |
+| 4 | xxx | xxx | [Slides](link) · [Code](link) · [Video](link) · [Cloud Lab](link) · [Learning Path](link) | [Intermediate Certification](link) |
-在已经完结的第一期课程(第1讲-第10讲)中,我们从Transformer开始,解析到ChatGPT的演进路线,手把手带领大家搭建一个简易版的“ChatGPT”
+*“Cloud Lab” = interactive sandbox with prebuilt environment & resources.*
-正在进行的第二期课程(第11讲-)在第一期的基础上做了全方位的升级,围绕大模型从开发到应用的全流程实践展开,讲解更前沿的大模型知识、丰富更多元的讲师阵容,期待你的加入!
+## Version Management
-| 章节序号 | 章节名称 | 课程简介 | 视频 | 课件及代码 | 知识点总结 |
-|:----:|:----:|:--------------------------------------------|:----:|:----:|:----:|
-| 第一讲 | Transformer | Multi-head self-attention原理。Masked self-attention的掩码处理方式。基于Transformer的机器翻译任务训练。 | [link](https://www.bilibili.com/video/BV16h4y1W7us/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f4290) | [link](./Season1.step_into_chatgpt/1.Transformer/) | [link](./Season1.step_into_chatgpt/0.Course-Review/1-Transformer.md) |
-| 第二讲 | BERT | 基于Transformer Encoder的BERT模型设计:MLM和NSP任务。BERT进行下游任务微调的范式。 | [link](https://www.bilibili.com/video/BV1xs4y1M72q/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/2.BERT/) | [link](./Season1.step_into_chatgpt/0.Course-Review/2-BERT.md) |
-| 第三讲 | GPT | 基于Transformer Decoder的GPT模型设计:Next token prediction。GPT下游任务微调范式。 | [link](https://www.bilibili.com/video/BV1Gh411w7HC/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/3.GPT/) | [link](./Season1.step_into_chatgpt/0.Course-Review/3-GPT.md) |
-| 第四讲 | GPT2 | GPT2的核心创新点,包括Task Conditioning和Zero shot learning;模型实现细节基于GPT1的改动。 | [link](https://www.bilibili.com/video/BV1Ja4y1u7xx/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/4.GPT2/) | [link](./Season1.step_into_chatgpt/0.Course-Review/4-GPT2.md) |
-| 第五讲 | MindSpore自动并行 | 以MindSpore分布式并行特性为依托的数据并行、模型并行、Pipeline并行、内存优化等技术。 | [link](https://www.bilibili.com/video/BV1VN41117AG/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/5.Parallel/) | [link](./Season1.step_into_chatgpt/0.Course-Review/5-Parallel.md) |
-| 第六讲 | 代码预训练 | 代码预训练发展沿革。Code数据的预处理。CodeGeex代码预训练大模型。 | [link](https://www.bilibili.com/video/BV1Em4y147a1/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/6.CodeGeeX/) | [link](./Season1.step_into_chatgpt/0.Course-Review/6-CodeGeex.md) |
-| 第七讲 | Prompt Tuning | Pretrain-finetune范式到Prompt tuning范式的改变。Hard prompt和Soft prompt相关技术。只需要改造描述文本的prompting。 | [link](https://www.bilibili.com/video/BV1Wg4y1K77R/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/7.Prompt/) | [link](./Season1.step_into_chatgpt/0.Course-Review/7-Prompt.md) |
-| 第八讲 | 多模态预训练大模型 | 紫东太初多模态大模型的设计、数据处理和优势;语音识别的理论概述、系统框架和现状及挑战。 | [link](https://www.bilibili.com/video/BV1wg4y1K72r/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | / | / |
-| 第九讲 | Instruct Tuning | Instruction tuning的核心思想:让模型能够理解任务描述(指令)。Instruction tuning的局限性:无法支持开放域创新性任务、无法对齐LM训练目标和人类需求。Chain-of-thoughts:通过在prompt中提供示例,让模型“举一反三”。 | [link](https://www.bilibili.com/video/BV1cm4y1e7Cc/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/8.Instruction/) | [link](./Season1.step_into_chatgpt/0.Course-Review/8-Instruction.md) |
-| 第十讲 | RLHF | RLHF核心思想:将LLM和人类行为对齐。RLHF技术分解:LLM微调、基于人类反馈训练奖励模型、通过强化学习PPO算法实现模型微调。 | [link](https://www.bilibili.com/video/BV15a4y1c7dv/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/9.RLHF/) | 更新中 |
-| 第十一讲 | ChatGLM | GLM模型结构,从GLM到ChatGLM的演变,ChatGLM推理部署代码演示| [link](https://www.bilibili.com/video/BV1ju411T74Y/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) |[link](./Season2.step_into_llm/01.ChatGLM/)|[link](https://mp.weixin.qq.com/s/ZUoga1poFj49QPE3UNwE_w)|
-| 第十二讲 | 多模态遥感智能解译基础模型 | 本次课程由中国科学院空天信息创新研究院研究员 实验室副主任 孙显老师讲解多模态遥感解译基础模型,揭秘大模型时代的智能遥感技术的发展与挑战、遥感基础模型的技术路线与典型场景应用| [link](https://www.bilibili.com/video/BV1Be41197wY/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | /| [link](https://mp.weixin.qq.com/s/gx4KxpSfqDooIKvS8sN2fA)|
-| 第十三讲 | ChatGLM2 | ChatGLM2技术解析,ChatGLM2推理部署代码演示,ChatGLM3特性介绍| [link](https://www.bilibili.com/video/BV1Ew411W72E/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season2.step_into_llm/02.ChatGLM2/) |[link](https://mp.weixin.qq.com/s/Mu29b7E4TxtJBkONOJQdEA)|
-| 第十四讲 | 文本生成解码原理 | 以MindNLP为例,讲解搜索与采样技术原理和实现| [link](https://www.bilibili.com/video/BV1QN4y117ZK/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season2.step_into_llm/03.Decoding/) |[link](https://mp.weixin.qq.com/s/1WpiKb_1hPck_0EDnThmtA)|
-| 第十五讲 | LLAMA | LLaMA背景及羊驼大家族介绍,LLaMA模型结构解析,LLaMA推理部署代码演示| [link](https://www.bilibili.com/video/BV1nN41157a9/?spm_id_from=333.999.0.0) | [link](./Season2.step_into_llm/04.LLaMA/) | [link](https://mp.weixin.qq.com/s/9QdP062-agcIbsR0_a-b3g) |
-| 第十六讲 | LLAMA2 | 介绍LLAMA2模型结构,走读代码演示LLAMA2 chat部署| [link](https://www.bilibili.com/video/BV1Me411z7ZV/?spm_id_from=333.999.0.0) | [link](./Season2.step_into_llm/05.LLaMA2/) | [link](https://mp.weixin.qq.com/s/kmuMocA2oPJQNTXAjBKZ9A) |
-| 第十七讲 | 鹏城脑海 | 鹏城·脑海200B模型是具有2千亿参数的自回归式语言模型,在中国算力网枢纽节点'鹏城云脑II'千卡集群上基于昇思MindSpore的多维分布式并行技术进行长期大规模训练。模型聚焦中文核心能力,兼顾英文和部分多语言能力,目前完成了1.8T token量的训练 | [link](https://www.bilibili.com/video/BV1AT4y1p7bJ/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | / | [link](https://mp.weixin.qq.com/s/BVzOzP_EEV3b-CNnqiRNXA) |
-| 第十八讲 | CPM-Bee | 介绍CPM-Bee预训练、推理、微调及代码现场演示 |[link](https://www.bilibili.com/video/BV1VZ4y1n7t9/?spm_id_from=333.999.0.0) | [link](https://github.com/mindspore-courses/step_into_llm/tree/master/Season2.step_into_llm/07.CPM) | [link](https://mp.weixin.qq.com/s/lalEtEzUTQRqS1M-6AEVow) |
-| 第十九讲 | RWKV1-4 | RNN的没落和Transformers的崛起 万能的Transformers?Self-attention的弊端 “拳打”Transformer的新RNN-RWKV 基于MindNLP的RWKV模型实践 | [link](https://www.bilibili.com/video/BV1K4421w7Ha/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | / | [link](https://mp.weixin.qq.com/s/n9uxjENUA-XQEXXO3BJiPA) |
-| 第二十讲 | MOE | MoE的前世今生 MoE的实现基础:AlltoAll通信; Mixtral 8x7b: 当前最好的开源MoE大模型,MoE与终身学习,基于昇思MindSpore的Mixtral 8x7b推理演示。 | [link](https://www.bilibili.com/video/BV1jH4y177DL/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](https://github.com/mindspore-courses/step_into_llm/tree/master/Season2.step_into_llm/08.MoE) | [link](https://mp.weixin.qq.com/s/QubiOzpEau6dqMgFAVhxog) |
-| 第二十一讲 | 高效参数微调 | 介绍Lora、(P-Tuning)原理及代码实现 | [link](https://www.bilibili.com/video/BV11D421j7fZ/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](https://github.com/mindspore-courses/step_into_llm/tree/master/Season2.step_into_llm/09.PEFT) | [link](https://mp.weixin.qq.com/s/EAge4XZEG8vsyAvQFXZrhA) |
-| 第二十二讲 |Prompt Engineering | Prompt engineering:1.什么是Prompt?2.如何定义一个Prompt的好坏或优异? 3.如何撰写优质的Prompt?4.如何产出一个优质的Prompt? 5.浅谈一些我们在进行Prompt的时候遇到的问题。 | [link](https://www.bilibili.com/video/BV1aD421W73q/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | / |[link](https://mp.weixin.qq.com/s/CTVpcpKZA3E6oZftwpdgEA) |
-| 第二十三讲 | 多维度混合并行自动搜索优化策略 | 议题一·时间损失模型及改进多维度二分法/议题二·APSS算法应用 | [上](https://www.bilibili.com/video/BV1if421X7jB/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) [下](https://www.bilibili.com/video/BV1QM4m1z7FV/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](https://mp.weixin.qq.com/s/8OufiPX4FLbgj8ztnckcWQ) |
-|第二十四讲 | 书生.浦语大模型开源全链工具链简介与智能体开发体验| 在本期课程中,我们有幸邀请到了书生.浦语社区技术运营、技术布道师闻星老师,以及昇思MindSpore技术布道师耿力老师,来详细解读书生.浦语大模型开源全链路工具链,演示如何对书生.浦语进行微调、推理以及智能体开发实操。| [link](https://www.bilibili.com/video/BV1K4421w7Ha/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | / | [link](https://mp.weixin.qq.com/s/uh_RIThOEzkkWVbK_RBALQ) |
-| 第二十五讲 | RAG | | | | |
-| 第二十六讲 | LangChain模块解析 | 解析Models、Prompts、Memory、Chains、Agents、Indexes、Callbacks模块,及案例分析 | | | |
-| 第二十七讲 | RWKV5-6 | / | | | |
-| 第二十八讲 | 量化 | 介绍低比特量化等相关模型量化技术| | | |
+This repository is updated in sync with **MindSpore** and the **MindSpore NLP** Suite.
+New releases of this repository are published approximately **every six months**.
+| Branch/Version | Python | MindSpore | MindSpore NLP |
+| :------ | :----- |:------ |:------ |
+| master | xxx | xxx | xxx |
+| r1.0 | xxx | xxx | xxx |
-### 昇思资源一览:生态与伙伴共建、共享、共荣
+## FAQ
-
+See the [FAQ](wiki_link) in the Wiki.
-### 加入我们
+## Contributing
-
-
-  |
-  |
-  |
-
+We welcome bug reports, suggestions, and code contributions via [Issues](Issue_link) or [PRs](PR_link). Please follow our submission guidelines — all PRs are reviewed and merged by @username. Your contributions make the project stronger!
+
+**Guidelines**: [Issue & PR Submission](WIKI_link)
+
+## Contributors
+
+Special thanks to all contributors for improving this project!
+
+
diff --git a/README_BAK.md b/README_BAK.md
new file mode 100644
index 0000000..583486b
--- /dev/null
+++ b/README_BAK.md
@@ -0,0 +1,95 @@
+# 昇思MindSpore技术公开课
+
+- ***探究前沿***:解读技术热点,解构热点模型
+- ***应用实践***:理论实践相结合,手把手指导开发
+- ***专家解读***:多领域专家,多元解读
+- ***开源共享***:课程免费,课件代码开源
+- ***大赛赋能***:ICT大赛赋能课程(大模型专题第一、二期)
+- ***系列课程***:大模型专题课程开展中,其他专题课程敬请期待
+
+## 报名方式
+
+报名链接:https://xihe.mindspore.cn/course/foundation-model-v2/introduction
+
+(注:参与免费课程必须报名哦!同步添加[QQ群](./assets/groupchat_qq.png),后续课程事宜将在群内通知!)
+
+## 大模型专题第一期(已完结)&第二期(进行中)
+
+第二期课程10月14日起每双周六14:00-15:00在[b站](https://live.bilibili.com/22127570?broadcast_type=0&is_room_feed=1&spm_id_from=333.999.to_liveroom.0.click&live_from=86002)进行直播。
+
+每节课程的ppt和代码会随授课逐步上传至[github](https://github.com/mindspore-courses/step_into_llm),系列视频回放归档至[b站](https://space.bilibili.com/526894060/channel/seriesdetail?sid=3293489),大家可以在[昇思MindSpore公众号](./assets/wechat_official_account.png)中获取每节课的知识点回顾与下节课的课程预告,同时欢迎大家在[MindSpore社区](https://gitee.com/mindspore/community/issues)领取大模型系列任务进行挑战。
+
+> 因为课程周期较长,课节安排可能会在中途出现微调,以最终通知为准,感谢理解!
+
+> 热烈欢迎小伙伴们参与到课程的建设中来,基于课程的趣味开发可以提交至[昇思MindSpore大模型平台](https://xihe.mindspore.cn/)
+
+> 如果在学习过程中发现任何课件及代码方面的问题,希望我们讲解哪方面的内容,或是对课程有什么建议,都可以直接在本仓库中创建issue
+
+
+### 教研团队
+
+
+
+### 课前学习
+
+- python
+- 人工智能基础、深度学习基础(重点学习自然语言处理):[MindSpore-d2l](https://openi.pcl.ac.cn/mindspore-courses/d2l-mindspore)
+- OpenI启智社区基础使用(可免费获取算力):[OpenI_Learning](https://openi.pcl.ac.cn/zeizei/OpenI_Learning)
+- MindSpore基础使用:[MindSpore教程](https://www.mindspore.cn/tutorials/zh-CN/r2.2/index.html)
+- MindFormers基础使用:[MindFormers讲解视频](https://www.bilibili.com/video/BV1jh4y1m7xV/?spm_id_from=333.999.0.0)
+
+
+
+### 课程介绍
+
+昇思MindSpore技术公开课火热开展中,面向所有对大模型感兴趣的开发者,带领大家理论结合时间,由浅入深地逐步深入大模型技术
+
+在已经完结的第一期课程(第1讲-第10讲)中,我们从Transformer开始,解析到ChatGPT的演进路线,手把手带领大家搭建一个简易版的“ChatGPT”
+
+正在进行的第二期课程(第11讲-)在第一期的基础上做了全方位的升级,围绕大模型从开发到应用的全流程实践展开,讲解更前沿的大模型知识、丰富更多元的讲师阵容,期待你的加入!
+
+| 章节序号 | 章节名称 | 课程简介 | 视频 | 课件及代码 | 知识点总结 |
+|:----:|:----:|:--------------------------------------------|:----:|:----:|:----:|
+| 第一讲 | Transformer | Multi-head self-attention原理。Masked self-attention的掩码处理方式。基于Transformer的机器翻译任务训练。 | [link](https://www.bilibili.com/video/BV16h4y1W7us/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f4290) | [link](./Season1.step_into_chatgpt/1.Transformer/) | [link](./Season1.step_into_chatgpt/0.Course-Review/1-Transformer.md) |
+| 第二讲 | BERT | 基于Transformer Encoder的BERT模型设计:MLM和NSP任务。BERT进行下游任务微调的范式。 | [link](https://www.bilibili.com/video/BV1xs4y1M72q/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/2.BERT/) | [link](./Season1.step_into_chatgpt/0.Course-Review/2-BERT.md) |
+| 第三讲 | GPT | 基于Transformer Decoder的GPT模型设计:Next token prediction。GPT下游任务微调范式。 | [link](https://www.bilibili.com/video/BV1Gh411w7HC/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/3.GPT/) | [link](./Season1.step_into_chatgpt/0.Course-Review/3-GPT.md) |
+| 第四讲 | GPT2 | GPT2的核心创新点,包括Task Conditioning和Zero shot learning;模型实现细节基于GPT1的改动。 | [link](https://www.bilibili.com/video/BV1Ja4y1u7xx/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/4.GPT2/) | [link](./Season1.step_into_chatgpt/0.Course-Review/4-GPT2.md) |
+| 第五讲 | MindSpore自动并行 | 以MindSpore分布式并行特性为依托的数据并行、模型并行、Pipeline并行、内存优化等技术。 | [link](https://www.bilibili.com/video/BV1VN41117AG/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/5.Parallel/) | [link](./Season1.step_into_chatgpt/0.Course-Review/5-Parallel.md) |
+| 第六讲 | 代码预训练 | 代码预训练发展沿革。Code数据的预处理。CodeGeex代码预训练大模型。 | [link](https://www.bilibili.com/video/BV1Em4y147a1/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/6.CodeGeeX/) | [link](./Season1.step_into_chatgpt/0.Course-Review/6-CodeGeex.md) |
+| 第七讲 | Prompt Tuning | Pretrain-finetune范式到Prompt tuning范式的改变。Hard prompt和Soft prompt相关技术。只需要改造描述文本的prompting。 | [link](https://www.bilibili.com/video/BV1Wg4y1K77R/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/7.Prompt/) | [link](./Season1.step_into_chatgpt/0.Course-Review/7-Prompt.md) |
+| 第八讲 | 多模态预训练大模型 | 紫东太初多模态大模型的设计、数据处理和优势;语音识别的理论概述、系统框架和现状及挑战。 | [link](https://www.bilibili.com/video/BV1wg4y1K72r/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | / | / |
+| 第九讲 | Instruct Tuning | Instruction tuning的核心思想:让模型能够理解任务描述(指令)。Instruction tuning的局限性:无法支持开放域创新性任务、无法对齐LM训练目标和人类需求。Chain-of-thoughts:通过在prompt中提供示例,让模型“举一反三”。 | [link](https://www.bilibili.com/video/BV1cm4y1e7Cc/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/8.Instruction/) | [link](./Season1.step_into_chatgpt/0.Course-Review/8-Instruction.md) |
+| 第十讲 | RLHF | RLHF核心思想:将LLM和人类行为对齐。RLHF技术分解:LLM微调、基于人类反馈训练奖励模型、通过强化学习PPO算法实现模型微调。 | [link](https://www.bilibili.com/video/BV15a4y1c7dv/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/9.RLHF/) | 更新中 |
+| 第十一讲 | ChatGLM | GLM模型结构,从GLM到ChatGLM的演变,ChatGLM推理部署代码演示| [link](https://www.bilibili.com/video/BV1ju411T74Y/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) |[link](./Season2.step_into_llm/01.ChatGLM/)|[link](https://mp.weixin.qq.com/s/ZUoga1poFj49QPE3UNwE_w)|
+| 第十二讲 | 多模态遥感智能解译基础模型 | 本次课程由中国科学院空天信息创新研究院研究员 实验室副主任 孙显老师讲解多模态遥感解译基础模型,揭秘大模型时代的智能遥感技术的发展与挑战、遥感基础模型的技术路线与典型场景应用| [link](https://www.bilibili.com/video/BV1Be41197wY/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | /| [link](https://mp.weixin.qq.com/s/gx4KxpSfqDooIKvS8sN2fA)|
+| 第十三讲 | ChatGLM2 | ChatGLM2技术解析,ChatGLM2推理部署代码演示,ChatGLM3特性介绍| [link](https://www.bilibili.com/video/BV1Ew411W72E/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season2.step_into_llm/02.ChatGLM2/) |[link](https://mp.weixin.qq.com/s/Mu29b7E4TxtJBkONOJQdEA)|
+| 第十四讲 | 文本生成解码原理 | 以MindNLP为例,讲解搜索与采样技术原理和实现| [link](https://www.bilibili.com/video/BV1QN4y117ZK/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season2.step_into_llm/03.Decoding/) |[link](https://mp.weixin.qq.com/s/1WpiKb_1hPck_0EDnThmtA)|
+| 第十五讲 | LLAMA | LLaMA背景及羊驼大家族介绍,LLaMA模型结构解析,LLaMA推理部署代码演示| [link](https://www.bilibili.com/video/BV1nN41157a9/?spm_id_from=333.999.0.0) | [link](./Season2.step_into_llm/04.LLaMA/) | [link](https://mp.weixin.qq.com/s/9QdP062-agcIbsR0_a-b3g) |
+| 第十六讲 | LLAMA2 | 介绍LLAMA2模型结构,走读代码演示LLAMA2 chat部署| [link](https://www.bilibili.com/video/BV1Me411z7ZV/?spm_id_from=333.999.0.0) | [link](./Season2.step_into_llm/05.LLaMA2/) | [link](https://mp.weixin.qq.com/s/kmuMocA2oPJQNTXAjBKZ9A) |
+| 第十七讲 | 鹏城脑海 | 鹏城·脑海200B模型是具有2千亿参数的自回归式语言模型,在中国算力网枢纽节点'鹏城云脑II'千卡集群上基于昇思MindSpore的多维分布式并行技术进行长期大规模训练。模型聚焦中文核心能力,兼顾英文和部分多语言能力,目前完成了1.8T token量的训练 | [link](https://www.bilibili.com/video/BV1AT4y1p7bJ/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | / | [link](https://mp.weixin.qq.com/s/BVzOzP_EEV3b-CNnqiRNXA) |
+| 第十八讲 | CPM-Bee | 介绍CPM-Bee预训练、推理、微调及代码现场演示 |[link](https://www.bilibili.com/video/BV1VZ4y1n7t9/?spm_id_from=333.999.0.0) | [link](https://github.com/mindspore-courses/step_into_llm/tree/master/Season2.step_into_llm/07.CPM) | [link](https://mp.weixin.qq.com/s/lalEtEzUTQRqS1M-6AEVow) |
+| 第十九讲 | RWKV1-4 | RNN的没落和Transformers的崛起 万能的Transformers?Self-attention的弊端 “拳打”Transformer的新RNN-RWKV 基于MindNLP的RWKV模型实践 | [link](https://www.bilibili.com/video/BV1K4421w7Ha/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | / | [link](https://mp.weixin.qq.com/s/n9uxjENUA-XQEXXO3BJiPA) |
+| 第二十讲 | MOE | MoE的前世今生 MoE的实现基础:AlltoAll通信; Mixtral 8x7b: 当前最好的开源MoE大模型,MoE与终身学习,基于昇思MindSpore的Mixtral 8x7b推理演示。 | [link](https://www.bilibili.com/video/BV1jH4y177DL/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](https://github.com/mindspore-courses/step_into_llm/tree/master/Season2.step_into_llm/08.MoE) | [link](https://mp.weixin.qq.com/s/QubiOzpEau6dqMgFAVhxog) |
+| 第二十一讲 | 高效参数微调 | 介绍Lora、(P-Tuning)原理及代码实现 | [link](https://www.bilibili.com/video/BV11D421j7fZ/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](https://github.com/mindspore-courses/step_into_llm/tree/master/Season2.step_into_llm/09.PEFT) | [link](https://mp.weixin.qq.com/s/EAge4XZEG8vsyAvQFXZrhA) |
+| 第二十二讲 |Prompt Engineering | Prompt engineering:1.什么是Prompt?2.如何定义一个Prompt的好坏或优异? 3.如何撰写优质的Prompt?4.如何产出一个优质的Prompt? 5.浅谈一些我们在进行Prompt的时候遇到的问题。 | [link](https://www.bilibili.com/video/BV1aD421W73q/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | / |[link](https://mp.weixin.qq.com/s/CTVpcpKZA3E6oZftwpdgEA) |
+| 第二十三讲 | 多维度混合并行自动搜索优化策略 | 议题一·时间损失模型及改进多维度二分法/议题二·APSS算法应用 | [上](https://www.bilibili.com/video/BV1if421X7jB/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) [下](https://www.bilibili.com/video/BV1QM4m1z7FV/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](https://mp.weixin.qq.com/s/8OufiPX4FLbgj8ztnckcWQ) |
+|第二十四讲 | 书生.浦语大模型开源全链工具链简介与智能体开发体验| 在本期课程中,我们有幸邀请到了书生.浦语社区技术运营、技术布道师闻星老师,以及昇思MindSpore技术布道师耿力老师,来详细解读书生.浦语大模型开源全链路工具链,演示如何对书生.浦语进行微调、推理以及智能体开发实操。| [link](https://www.bilibili.com/video/BV1K4421w7Ha/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | / | [link](https://mp.weixin.qq.com/s/uh_RIThOEzkkWVbK_RBALQ) |
+| 第二十五讲 | RAG | | | | |
+| 第二十六讲 | LangChain模块解析 | 解析Models、Prompts、Memory、Chains、Agents、Indexes、Callbacks模块,及案例分析 | | | |
+| 第二十七讲 | RWKV5-6 | / | | | |
+| 第二十八讲 | 量化 | 介绍低比特量化等相关模型量化技术| | | |
+
+
+
+### 昇思资源一览:生态与伙伴共建、共享、共荣
+
+
+
+### 加入我们
+
+
+
+  |
+  |
+  |
+
diff --git a/README_ZH.md b/README_ZH.md
new file mode 100644
index 0000000..f90beab
--- /dev/null
+++ b/README_ZH.md
@@ -0,0 +1,87 @@
+
+
+(1-2句话点名项目核心价值)项目仓介绍。
+
+## 📢 最新消息
+
+- 2025-10-21 「课程更新」:新增XXX课程,包含完整视频、课件及代码案例。([查看详情](xxxx))
+- 2025-10-18 「功能优化」:项目仓完成重构,查找课程资源更清晰,新增PR检查门禁,合入内容更规范。([查看详情](xxx))
+- 2025-10-10 「Bug修复」:修复xxxxxx问题,感谢@username的PR贡献。([查看详情](xxxx))
+
+## 前置知识
+
+在学习本门课程之前,您需要掌握:
+
+- Python基础
+- Linux命令基础
+- Jupyter基础
+- Docker镜像使用
+
+您可以通过[前置学习考试](考试链接)进行自检。
+
+## 环境准备
+
+为确保项目仓中实践代码可正常运行,推荐以下环境准备方式。更详细的环境准备指导详见[Wiki](wiki链接)。
+
+### 直接安装依赖
+
+请先确保 Python 版本符合[课程要求](#版本维护)后,进入仓库根目录,执行:
+
+```bash
+pip install requirements.txt
+```
+
+### 使用Docker镜像
+
+为方便开发者更加便捷地进行代码实践,节约环境准备的时间,我们提供了预装好的基础Dockerfile文件。课程的所有镜像可从[dockerfile](./dockerfile/)获取。本课程镜像文件信息如下,开发者可根据实际需求进行拉取:
+
+| 硬件平台 | 镜像名称 | 标签 | 说明 | Dockerfile文件 |
+| :------ | :-------------- | :------- | :------------------------ | :------------- |
+| CPU | xxx | xxx | xxx | xxx |
+| GPU | xxx | xxx | xxx | xxx |
+| NPU | xxx | xxx | xxx | xxx |
+
+镜像基础使用教程详见环境准备Wiki中的[Docker镜像使用](./dockerfile/README.md)部分。
+
+## 课程内容
+
+| 序号 | 课节 | 简介 | 课程资源 | 能力认证入口 |
+| :-- | :------ | :--------------- | :----------------------- | :---------- |
+| 1 | xxx | xxx | [PPT](跳转链接) · [代码](跳转链接) · [视频](跳转链接) · [云沙箱实验](跳转链接) · [学习路径](跳转链接) | |
+| 2 | xxx | xxx | [PPT](跳转链接) · [代码](跳转链接) · [视频](跳转链接) · [云沙箱实验](跳转链接) · [学习路径](跳转链接) | [初级认证入口](xxxx) |
+| 3 | xxx | xxx | [PPT](跳转链接) · [代码](跳转链接) · [视频](跳转链接) · [云沙箱实验](跳转链接) · [学习路径](跳转链接) | |
+| 4 | xxx | xxx | [PPT](跳转链接) · [代码](跳转链接) · [视频](跳转链接) · [云沙箱实验](跳转链接) · [学习路径](跳转链接) | [中级认证入口](xxxx) |
+
+## 版本维护
+
+项目随昇思MindSpore及昇思MindSpore NLP套件迭代同步发布版本,本项目仓每**半年**进行版本发布。
+
+| 版本名 | Python | MindSpore | MindSpore NLP |
+| :----- | :----- |:------ |:------ |
+| master | xxx | xxx | xxx |
+| r1.0 | xxx | xxx | xxx |
+
+## 常见问题(FAQ)
+
+详见Wiki中[FAQ](wiki链接)。
+
+## 贡献与反馈
+
+欢迎各位开发者通过 [Issue](Issue链接) 提交建议或 bug 反馈,也可直接发起 [PR](PR链接) 进行Bug修复或代码贡献(提交前请参考提交规范,由Committer @username 完成评审合入),你的每一份参与都能让本项目更加完善。
+
+### 提交规范
+
+详见WIKI:[Issue与PR提交规范](WIKI链接)
+
+### 贡献者展示
+
+向本项目的贡献者们致以最诚挚的感谢!
+
+
diff --git a/Season1.step_into_chatgpt/0.Course-Review/3-GPT.md b/Season1.step_into_chatgpt/0.Course-Review/3-GPT.md
index 8237000..5840162 100644
--- a/Season1.step_into_chatgpt/0.Course-Review/3-GPT.md
+++ b/Season1.step_into_chatgpt/0.Course-Review/3-GPT.md
@@ -9,7 +9,7 @@ GPT-1是更早于BERT提出了预训练语言模型(Pre-train+Fine-tune)的
## 1. 课程回顾
- Semi-Supervised Learning
-- Unsupervised Pretraining
+- Unsupervised Pretraining
- 模型预训练优化目标
- 模型结构
- Supervised Fine-tuning
diff --git a/Season1.step_into_chatgpt/0.Course-Review/5-Parallel.md b/Season1.step_into_chatgpt/0.Course-Review/5-Parallel.md
index a6898e6..61e4f0f 100644
--- a/Season1.step_into_chatgpt/0.Course-Review/5-Parallel.md
+++ b/Season1.step_into_chatgpt/0.Course-Review/5-Parallel.md
@@ -50,13 +50,13 @@
- 内存优化
1. 重计算
-
+

时间换空间:重计算技术可以不保存正向计算结果,让该内存可以被复用,然后在计算反向部分时,重新计算出正向结果。
-
+
2. 优化器并行——ZeRO
-
+

将参数和梯度分组放到不同卡上更新,再通过通信广播操作在设备间共享更新后的权值。
diff --git a/Season1.step_into_chatgpt/0.Course-Review/6-CodeGeex.md b/Season1.step_into_chatgpt/0.Course-Review/6-CodeGeex.md
index a4b8883..2d5fce9 100644
--- a/Season1.step_into_chatgpt/0.Course-Review/6-CodeGeex.md
+++ b/Season1.step_into_chatgpt/0.Course-Review/6-CodeGeex.md
@@ -70,11 +70,11 @@
2. 目前的基准从多任务及多语言两个方面对模型进行评价
- 多任务
-
+
通过不同应用场景进行评价,多使用CodeBLEU/BLEU评价相似性
- 多语言
-
+
在不同编程语言下评价代码正确性,如HumanEval(仅支持Python)、MultiPL-E(支持16种语言,但为自动翻译并不支持多任务)
3. HumanEval-X:新的多语言代码生成基准
diff --git a/Season1.step_into_chatgpt/1.Transformer/transformer-new.ipynb b/Season1.step_into_chatgpt/1.Transformer/transformer-new.ipynb
index a9bb5e4..419de78 100644
--- a/Season1.step_into_chatgpt/1.Transformer/transformer-new.ipynb
+++ b/Season1.step_into_chatgpt/1.Transformer/transformer-new.ipynb
@@ -302,7 +302,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "slide"
@@ -353,14 +353,14 @@
"\n",
" embed_size = query.shape[-1]\n",
" scaling_factor = self.sqrt(Tensor(embed_size, mstype.float32))\n",
- " \n",
+ "\n",
"\n",
" attn = ops.matmul(query, key.swapaxes(-2, -1) / scaling_factor)\n",
"\n",
"\n",
" if attn_mask is not None:\n",
" attn = attn.masked_fill(attn_mask, -1e9)\n",
- " \n",
+ "\n",
" attn = self.softmax(attn)\n",
"\n",
" attn = self.dropout(attn)\n",
@@ -868,7 +868,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "fragment"
@@ -881,7 +881,7 @@
" super().__init__()\n",
" self.layer_norm = nn.LayerNorm((d_model, ), epsilon=1e-5)\n",
" self.dropout = nn.Dropout(p=dropout_p)\n",
- " \n",
+ "\n",
" def construct(self, x, residual):\n",
" return self.layer_norm(self.dropout(x) + residual)"
]
@@ -926,7 +926,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "fragment"
@@ -944,7 +944,7 @@
" self.pos_ffn = PoswiseFeedForward(d_ff, d_model, dropout_p)\n",
" self.add_norm1 = AddNorm(d_model, dropout_p)\n",
" self.add_norm2 = AddNorm(d_model, dropout_p)\n",
- " \n",
+ "\n",
" def construct(self, enc_inputs, enc_self_attn_mask):\n",
" \"\"\"\n",
" enc_inputs: [batch_size, src_len, d_model]\n",
@@ -1004,7 +1004,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "fragment"
@@ -1020,7 +1020,7 @@
" self.layers = nn.CellList([EncoderLayer(d_model, n_heads, d_ff, dropout_p) for _ in range(n_layers)])\n",
" self.scaling_factor = ops.Sqrt()(Tensor(d_model, mstype.float32))\n",
"\n",
- " \n",
+ "\n",
" def construct(self, enc_inputs, src_pad_idx):\n",
" \"\"\"enc_inputs : [batch_size, src_len]\n",
" \"\"\"\n",
@@ -1098,7 +1098,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "slide"
@@ -1108,7 +1108,7 @@
"source": [
"def get_attn_subsequent_mask(seq_q, seq_k):\n",
" \"\"\"生成时间掩码,使decoder在第t时刻只能看到序列的前t-1个元素\n",
- " \n",
+ "\n",
" Args:\n",
" seq_q (Tensor): query序列,shape = [batch size, len_q]\n",
" seq_k (Tensor): key序列,shape = [batch size, len_k]\n",
@@ -1162,7 +1162,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "fragment"
@@ -1182,7 +1182,7 @@
" self.add_norm1 = AddNorm(d_model, dropout_p)\n",
" self.add_norm2 = AddNorm(d_model, dropout_p)\n",
" self.add_norm3 = AddNorm(d_model, dropout_p)\n",
- " \n",
+ "\n",
" def construct(self, dec_inputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask):\n",
" \"\"\"\n",
" dec_inputs: [batch_size, trg_len, d_model]\n",
@@ -1196,7 +1196,7 @@
"\n",
" dec_outputs = self.add_norm1(dec_outputs, residual)\n",
" residual = dec_outputs\n",
- " \n",
+ "\n",
" dec_outputs, dec_enc_attn = self.dec_enc_attn(dec_outputs, enc_outputs, enc_outputs, dec_enc_attn_mask)\n",
"\n",
" dec_outputs = self.add_norm2(dec_outputs, residual)\n",
@@ -1251,7 +1251,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "fragment"
@@ -1266,8 +1266,8 @@
" self.pos_emb = PositionalEncoding(d_model, dropout_p)\n",
" self.layers = nn.CellList([DecoderLayer(d_model, n_heads, d_ff) for _ in range(n_layers)])\n",
" self.projection = nn.Dense(d_model, trg_vocab_size)\n",
- " self.scaling_factor = ops.Sqrt()(Tensor(d_model, mstype.float32)) \n",
- " \n",
+ " self.scaling_factor = ops.Sqrt()(Tensor(d_model, mstype.float32))\n",
+ "\n",
" def construct(self, dec_inputs, enc_inputs, enc_outputs, src_pad_idx, trg_pad_idx):\n",
" \"\"\"\n",
" dec_inputs: [batch_size, trg_len]\n",
@@ -1308,7 +1308,7 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "fragment"
@@ -1321,7 +1321,7 @@
" super().__init__()\n",
" self.encoder = encoder\n",
" self.decoder = decoder\n",
- " \n",
+ "\n",
" def construct(self, enc_inputs, dec_inputs, src_pad_idx, trg_pad_idx):\n",
" \"\"\"\n",
" enc_inputs: [batch_size, src_len]\n",
@@ -1333,8 +1333,7 @@
"\n",
" dec_logits = dec_outputs.view((-1, dec_outputs.shape[-1]))\n",
"\n",
- " return dec_logits, enc_self_attns, dec_self_attns, dec_enc_attns\n",
- " "
+ " return dec_logits, enc_self_attns, dec_self_attns, dec_enc_attns\n"
]
},
{
@@ -1489,7 +1488,7 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "slide"
@@ -1501,18 +1500,18 @@
"\n",
"class Multi30K():\n",
" \"\"\"Multi30K数据集加载器\n",
- " \n",
+ "\n",
" 加载Multi30K数据集并处理为一个Python迭代对象。\n",
- " \n",
+ "\n",
" \"\"\"\n",
" def __init__(self, path):\n",
" self.data = self._load(path)\n",
- " \n",
+ "\n",
" def _load(self, path):\n",
" def tokenize(text):\n",
" text = text.rstrip()\n",
" return [tok.lower() for tok in re.findall(r'\\w+|[^\\w\\s]', text)]\n",
- " \n",
+ "\n",
" members = {i.split('.')[-1]: i for i in os.listdir(path)}\n",
" de_path = os.path.join(path, members['de'])\n",
" en_path = os.path.join(path, members['en'])\n",
@@ -1524,10 +1523,10 @@
" en = [tokenize(i) for i in en]\n",
"\n",
" return list(zip(de, en))\n",
- " \n",
+ "\n",
" def __getitem__(self, idx):\n",
" return self.data[idx]\n",
- " \n",
+ "\n",
" def __len__(self):\n",
" return len(self.data)"
]
@@ -2378,7 +2377,7 @@
},
{
"cell_type": "code",
- "execution_count": 43,
+ "execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "slide"
@@ -2391,13 +2390,13 @@
" enc_inputs: [batch_size(1), src_len]\n",
" \"\"\"\n",
" new_model.set_train(False)\n",
- " \n",
+ "\n",
" # 对输入句子进行分词\n",
" if isinstance(sentence, str):\n",
" tokens = [tok.lower() for tok in re.findall(r'\\w+|[^\\w\\s]', sentence.rstrip())]\n",
" else:\n",
" tokens = [token.lower() for token in sentence]\n",
- " \n",
+ "\n",
" # 补充起始、终止占位符,统一序列长度\n",
" if len(tokens) > max_len - 2:\n",
" src_len = max_len\n",
@@ -2405,24 +2404,24 @@
" else:\n",
" src_len = len(tokens) + 2\n",
" tokens = [''] + tokens + [''] + [''] * (max_len - src_len)\n",
- " \n",
+ "\n",
" # 将德语单词转换为数字索引,并进一步转换为tensor\n",
" # enc_inputs: [1, src_len]\n",
" indexes = de_vocab.encode(tokens)\n",
" enc_inputs = Tensor(indexes, mstype.float32).expand_dims(0)\n",
- " \n",
+ "\n",
" # 将输入送入encoder,获取信息\n",
" enc_outputs, _ = new_model.encoder(enc_inputs, src_pad_idx)\n",
"\n",
" dec_inputs = Tensor([[en_vocab.bos_idx]], mstype.float32)\n",
- " \n",
+ "\n",
" # 初始化decoder输入,此时仅有句首占位符\n",
" # dec_inputs: [1, 1]\n",
" max_len = enc_inputs.shape[1]\n",
" for _ in range(max_len):\n",
" dec_outputs, _, _ = new_model.decoder(dec_inputs, enc_inputs, enc_outputs, src_pad_idx, trg_pad_idx)\n",
" dec_logits = dec_outputs.view((-1, dec_outputs.shape[-1]))\n",
- " \n",
+ "\n",
" # 找到下一个词的概率分布,并输出预测\n",
" dec_logits = dec_logits[-1, :]\n",
" pred = dec_logits.argmax(axis=0).expand_dims(0).expand_dims(0)\n",
@@ -2511,7 +2510,7 @@
},
{
"cell_type": "code",
- "execution_count": 45,
+ "execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "slide"
@@ -2532,16 +2531,16 @@
"def calculate_bleu(dataset, max_len=50):\n",
" trgs = []\n",
" pred_trgs = []\n",
- " \n",
+ "\n",
" for data in dataset[:10]:\n",
- " \n",
+ "\n",
" src = data[0]\n",
" trg = data[1]\n",
"\n",
" pred_trg = inference(src, max_len)\n",
" pred_trgs.append(pred_trg)\n",
" trgs.append([trg])\n",
- " \n",
+ "\n",
" return corpus_bleu(trgs, pred_trgs)\n",
"\n",
"bleu_score = calculate_bleu(test_dataset)\n",
diff --git a/Season1.step_into_chatgpt/2.BERT/bert_emotect_finetune.ipynb b/Season1.step_into_chatgpt/2.BERT/bert_emotect_finetune.ipynb
index 6ceed46..db40ebd 100644
--- a/Season1.step_into_chatgpt/2.BERT/bert_emotect_finetune.ipynb
+++ b/Season1.step_into_chatgpt/2.BERT/bert_emotect_finetune.ipynb
@@ -290,7 +290,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"metadata": {
"tags": []
},
@@ -302,7 +302,7 @@
" is_ascend = mindspore.get_context('device_target') == 'Ascend'\n",
"\n",
" column_names = [\"label\", \"text_a\"]\n",
- " \n",
+ "\n",
" dataset = GeneratorDataset(source, column_names=column_names, shuffle=shuffle)\n",
" # transforms\n",
" type_cast_op = transforms.TypeCast(mindspore.int32)\n",
@@ -569,7 +569,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": null,
"metadata": {
"tags": []
},
@@ -580,12 +580,12 @@
"def compute_metrics(eval_pred):\n",
" predictions = eval_pred.predictions\n",
" labels = eval_pred.label_ids\n",
- " \n",
+ "\n",
" if len(predictions.shape) > 1:\n",
" predictions = np.argmax(predictions, axis=-1)\n",
"\n",
" accuracy = (predictions == labels).mean()\n",
- " \n",
+ "\n",
" return {\"accuracy\": float(accuracy)}"
]
},
diff --git a/Season1.step_into_chatgpt/2.BERT/bert_introduction.ipynb b/Season1.step_into_chatgpt/2.BERT/bert_introduction.ipynb
index 99ba599..d2d875d 100644
--- a/Season1.step_into_chatgpt/2.BERT/bert_introduction.ipynb
+++ b/Season1.step_into_chatgpt/2.BERT/bert_introduction.ipynb
@@ -422,7 +422,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"id": "d04340e2",
"metadata": {
"slideshow": {
@@ -456,7 +456,7 @@
" position_ids = position_ids.expand_dims(0).expand_as(input_ids)\n",
" if token_type_ids is None:\n",
" token_type_ids = ops.zeros_like(input_ids)\n",
- " \n",
+ "\n",
" words_embeddings = self.word_embeddings(input_ids)\n",
" position_embeddings = self.position_embeddings(position_ids)\n",
" token_type_embeddings = self.token_type_embeddings(token_type_ids)\n",
@@ -860,7 +860,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"id": "53d4e932",
"metadata": {
"slideshow": {
@@ -897,7 +897,7 @@
" self.dense = nn.Dense(config.hidden_size, config.hidden_size, weight_init=TruncatedNormal(config.initializer_range))\n",
" self.transform_act_fn = activation_map.get(config.hidden_act, nn.GELU(False))\n",
" self.layer_norm = nn.LayerNorm((config.hidden_size,), epsilon=config.layer_norm_eps)\n",
- " \n",
+ "\n",
" def construct(self, hidden_states):\n",
" hidden_states = self.dense(hidden_states)\n",
" hidden_states = self.transform_act_fn(hidden_states)\n",
@@ -991,7 +991,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": null,
"id": "e1784651",
"metadata": {
"slideshow": {
@@ -1004,7 +1004,7 @@
" def __init__(self, config):\n",
" super(BertPooler, self).__init__()\n",
" self.dense = nn.Dense(config.hidden_size, config.hidden_size, activation='tanh', weight_init=TruncatedNormal(config.initializer_range))\n",
- " \n",
+ "\n",
" def construct(self, hidden_states):\n",
" first_token_tensor = hidden_states[:, 0]\n",
" pooled_output = self.dense(first_token_tensor)\n",
@@ -1027,7 +1027,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": null,
"id": "edc1f2a8",
"metadata": {
"slideshow": {
@@ -1041,7 +1041,7 @@
" super(BertPreTrainingHeads, self).__init__()\n",
" self.predictions = BertLMPredictionHead(config)\n",
" self.seq_relationship = nn.Dense(config.hidden_size, 2, weight_init=TruncatedNormal(config.initializer_range))\n",
- " \n",
+ "\n",
" def construct(self, sequence_output, pooled_output, masked_lm_positions):\n",
" prediction_scores = self.predictions(sequence_output, masked_lm_positions)\n",
" seq_relationship_score = self.seq_relationship(pooled_output)\n",
diff --git a/Season1.step_into_chatgpt/3.GPT/gpt_imdb_finetune.ipynb b/Season1.step_into_chatgpt/3.GPT/gpt_imdb_finetune.ipynb
index 66ecb2d..41856e7 100644
--- a/Season1.step_into_chatgpt/3.GPT/gpt_imdb_finetune.ipynb
+++ b/Season1.step_into_chatgpt/3.GPT/gpt_imdb_finetune.ipynb
@@ -589,7 +589,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": null,
"metadata": {
"tags": []
},
@@ -616,7 +616,7 @@
"\n",
" acc = compute_accuracy(logits, label)['accuracy']\n",
" epoch_acc += acc\n",
- " \n",
+ "\n",
" step_total += 1\n",
" acc=epoch_acc/step_total\n",
"\n",
diff --git a/Season1.step_into_chatgpt/4.GPT2/gpt2_modules.ipynb b/Season1.step_into_chatgpt/4.GPT2/gpt2_modules.ipynb
index 9aa4b00..dc0bcb2 100644
--- a/Season1.step_into_chatgpt/4.GPT2/gpt2_modules.ipynb
+++ b/Season1.step_into_chatgpt/4.GPT2/gpt2_modules.ipynb
@@ -181,7 +181,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"id": "2d315a4e-5663-404e-b93d-efb1cf354414",
"metadata": {
"tags": []
@@ -212,7 +212,7 @@
"from mindnlp.transformers.ms_utils import Conv1D\n",
"\n",
"# query = Wq * X, key = Wk * X, value = Wv * X\n",
- "# c_attn: (1, 10, 768*3) --> query, key, value: (1, 10, 768), (1, 10, 768), (1, 10, 768) \n",
+ "# c_attn: (1, 10, 768*3) --> query, key, value: (1, 10, 768), (1, 10, 768), (1, 10, 768)\n",
"c_attn = Conv1D(3 * embed_dim, embed_dim)\n",
"query, key, value = split(c_attn(x), embed_dim, axis=2)\n",
"query.shape, key.shape, value.shape"
@@ -230,7 +230,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"id": "abb7ccac-7cfe-401a-ab32-763de70b4669",
"metadata": {
"tags": []
@@ -245,7 +245,7 @@
" new_shape = tensor.shape[:-1] + (num_heads, attn_head_size)\n",
" tensor = tensor.view(new_shape)\n",
" # (batch_size, seq_len, num_heads, attn_head_size) --> (batch_size, num_heads, seq_len, attn_head_size)\n",
- " return ops.transpose(tensor, (0, 2, 1, 3)) "
+ " return ops.transpose(tensor, (0, 2, 1, 3))"
]
},
{
diff --git a/Season1.step_into_chatgpt/4.GPT2/gpt2_summarization.ipynb b/Season1.step_into_chatgpt/4.GPT2/gpt2_summarization.ipynb
index 7daef40..2a5a842 100644
--- a/Season1.step_into_chatgpt/4.GPT2/gpt2_summarization.ipynb
+++ b/Season1.step_into_chatgpt/4.GPT2/gpt2_summarization.ipynb
@@ -320,7 +320,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"id": "f1ee1961-0658-4e70-95c2-81fefd83a40b",
"metadata": {},
"outputs": [],
@@ -340,7 +340,7 @@
" tokenized = tokenizer(text=article, text_pair=summary,\n",
" padding='max_length', truncation='only_first', max_length=max_seq_len)\n",
" return tokenized['input_ids'], tokenized['input_ids']\n",
- " \n",
+ "\n",
" dataset = dataset.map(read_map, 'text', ['article', 'summary'])\n",
" # change column names to input_ids and labels for the following training\n",
" dataset = dataset.map(merge_and_pad, ['article', 'summary'], ['input_ids', 'labels'])\n",
@@ -608,7 +608,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": null,
"id": "88259c93-5366-4406-a417-396808ec767c",
"metadata": {},
"outputs": [],
@@ -624,7 +624,7 @@
" learning_rate=learning_rate,\n",
" max_grad_norm=max_grad_norm,\n",
" warmup_steps=warmup_steps\n",
- " \n",
+ "\n",
")\n",
"\n",
"from mindnlp.engine import Trainer\n",
@@ -696,7 +696,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": null,
"id": "787795ec-0c07-4be6-97b7-4defbe899117",
"metadata": {},
"outputs": [],
@@ -712,7 +712,7 @@
"\n",
" dataset = dataset.map(read_map, 'text', ['article', 'summary'])\n",
" dataset = dataset.map(pad, 'article', ['input_ids'])\n",
- " \n",
+ "\n",
" dataset = dataset.batch(batch_size)\n",
"\n",
" return dataset"