diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..94f480d --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +* text=auto eol=lf \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000..35798f4 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,96 @@ +name: 🐞 Bug report +description: Create a report to help us reproduce and fix the bug +title: "[Bug] " +labels: ['bug'] + +body: +- type: checkboxes + attributes: + label: Checklist + options: + - label: 1. I have searched related issues but cannot get the expected help. + - label: 2. The bug has not been fixed in the latest version. + - label: 3. I have read [Contributing Guidlines](https://github.com/xing-yiren/course_organization_template/wiki/%F0%9F%A7%AD-Contributing-Guidelines:-Issues-&-Pull-Requests). + - label: 4. Please note that if the bug-related issue you submitted lacks corresponding environment info and a minimal reproducible demo, it will be challenging for us to reproduce and resolve the issue, reducing the likelihood of receiving feedback. + - label: 5. If the issue you raised is not a bug but a question, please raise a discussion at [Discussions](https://github.com/xing-yiren/course_organization_template/discussions/new/choose) Otherwise, it will be closed. +- type: textarea + attributes: + label: Describe the bug + description: | + A clear and concise description of what the bug is, including: + - What happened (actual behavior) + - What you expected (expected outcome) + - **Please attach error logs or screenshots if possible**—this helps us locate the issue faster. + validations: + required: true +- type: textarea + attributes: + label: Reproduction + description: | + What command or script did you run? Please list the exact steps to reproduce the bug. + placeholder: | + A placeholder for the command. + validations: + required: true +- type: checkboxes + id: hardware + attributes: + label: Hardware Environment + description: Which hardware type does this bug relate to (select all that apply)? + options: + - label: Ascend + - label: GPU + - label: CPU + validations: + required: true +- type: input + attributes: + label: OS Platform + description: Which operating system are you using? (e.g., Ubuntu 20.04) + placeholder: "Ubuntu 20.04" + validations: + required: true +- type: input + attributes: + label: Python Version + description: What version of Python are you using? (e.g., 3.9.7) + placeholder: "3.9.7" + validations: + required: true +- type: input + attributes: + label: MindSpore Version + description: What version of MindSpore are you using? (e.g., 2.7.1) + placeholder: "2.7.1" + validations: + required: true +- type: input + id: mindspore_nlp + attributes: + label: (Optional) MindSpore NLP Version + placeholder: e.g., 0.5.0 + validations: + required: false +- type: input + id: mindspore_transformers + attributes: + label: (Optional) MindSpore Transformers Version + placeholder: e.g., 1.7.0 + validations: + required: false +- type: textarea + id: other_suites + attributes: + label: (Optional) Other Toolkit or Suite Versions + description: | + Specify versions of any other MindSpore-related suites (e.g., MindSpore Lite, vLLM-MindSpore) or relevant third-party packages. + placeholder: | + e.g., + - MindSpore Lite 2.7.0 + - vLLM-MindSpore 0.4.0 + validations: + required: false +- type: textarea + attributes: + label: Additional Context + description: Any other details. \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 0000000..a5e2e21 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,39 @@ +name: 🚀 Feature request +description: Suggest an idea for this project +title: "[Feature] " + +body: +- type: checkboxes + attributes: + label: Checklist + options: + - label: 1. I have read [Contributing Guidlines](https://github.com/xing-yiren/course_organization_template/wiki/%F0%9F%A7%AD-Contributing-Guidelines:-Issues-&-Pull-Requests). + - label: 2. If the issue you raised is not a feature but a question, please raise a discussion at [Discussions](https://github.com/xing-yiren/course_organization_template/discussions/new/choose) Otherwise, it will be closed. +- type: textarea + attributes: + label: Motivation + description: | + A clear and concise description of the motivation of the feature. + validations: + required: true +- type: checkboxes + id: hardware + attributes: + label: Hardware Environment + description: Which hardware does this feature need to support (select all that apply)? + options: + - label: Ascend + - label: GPU + - label: CPU + validations: + required: true +- type: textarea + attributes: + label: Related resources + description: | + If there is an official code release or third-party implementations, please also provide the information here, which would be very helpful. +- type: textarea + attributes: + label: Your Contribution + description: | + Is there any way that you could help, e.g., by submitting a PR? diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..df2504b --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,34 @@ + + +# Description + + +# Changes + + +# Testing & Benchmark + + +# Checklist + +- [ ] Read and followed the [Contributing Guidelines](wiki_link). +- [ ] Self-tested locally to ensure the code runs correctly and achieves expected results (all CI checks expected to pass). +- [ ] Updated documentation if needed. +- [ ] Verified accuracy or performance benchmarks if applicable. + +# Reviewers + diff --git a/README.md b/README.md index 583486b..ceadfbe 100644 --- a/README.md +++ b/README.md @@ -1,95 +1,91 @@ -# 昇思MindSpore技术公开课 +
+

Course Title

+

查看中文

+
-- ***探究前沿***:解读技术热点,解构热点模型 -- ***应用实践***:理论实践相结合,手把手指导开发 -- ***专家解读***:多领域专家,多元解读 -- ***开源共享***:课程免费,课件代码开源 -- ***大赛赋能***:ICT大赛赋能课程(大模型专题第一、二期) -- ***系列课程***:大模型专题课程开展中,其他专题课程敬请期待 +Brief introduction to repository. -## 报名方式 +## 📢 News -报名链接:https://xihe.mindspore.cn/course/foundation-model-v2/introduction +- **2025-10-21 [Course Update]**:The XXX course has been updated with a new chapter on XXX, including complete videos, slides, and code examples. ([*View details*](xxx)) +- **2025-10-18 [Feature Optimization]**:Repository refactored for clearer course resource navigation; added CI pipeline for more standardized contributions. ([*View details*](xxx)) +- **2025-10-10 [Bug Fix]**:Fixed the xxx issue — thanks to @username for the PR contribution. ([View details](pr_link)) -(注:参与免费课程必须报名哦!同步添加[QQ群](./assets/groupchat_qq.png),后续课程事宜将在群内通知!) +## Prerequisites -## 大模型专题第一期(已完结)&第二期(进行中) +Before starting this course, you should be familiar with: -第二期课程10月14日起每双周六14:00-15:00在[b站](https://live.bilibili.com/22127570?broadcast_type=0&is_room_feed=1&spm_id_from=333.999.to_liveroom.0.click&live_from=86002)进行直播。 +- Basic Python programming +- Basic Linux commands +- Using Jupyter Notebook +- Using Docker images -每节课程的ppt和代码会随授课逐步上传至[github](https://github.com/mindspore-courses/step_into_llm),系列视频回放归档至[b站](https://space.bilibili.com/526894060/channel/seriesdetail?sid=3293489),大家可以在[昇思MindSpore公众号](./assets/wechat_official_account.png)中获取每节课的知识点回顾与下节课的课程预告,同时欢迎大家在[MindSpore社区](https://gitee.com/mindspore/community/issues)领取大模型系列任务进行挑战。 +You can take the [Prerequisite Test](exam_link) to assess your readiness. -> 因为课程周期较长,课节安排可能会在中途出现微调,以最终通知为准,感谢理解! +## Environment Setup -> 热烈欢迎小伙伴们参与到课程的建设中来,基于课程的趣味开发可以提交至[昇思MindSpore大模型平台](https://xihe.mindspore.cn/) +To ensure all example code runs smoothly, set up your environment using one of the following methods. For details, see [Set Up Development Environment](wiki_link) in Wiki. -> 如果在学习过程中发现任何课件及代码方面的问题,希望我们讲解哪方面的内容,或是对课程有什么建议,都可以直接在本仓库中创建issue +### Install Dependencies +Confirm your Python version meets the course requirements, then run: -### 教研团队 +```bash +pip install -r requirements.txt +``` -
teachers
+### Use Docker Image -### 课前学习 +Prebuilt Dockerfiles are provided to simplify environment setup. -- python -- 人工智能基础、深度学习基础(重点学习自然语言处理):[MindSpore-d2l](https://openi.pcl.ac.cn/mindspore-courses/d2l-mindspore) -- OpenI启智社区基础使用(可免费获取算力):[OpenI_Learning](https://openi.pcl.ac.cn/zeizei/OpenI_Learning) -- MindSpore基础使用:[MindSpore教程](https://www.mindspore.cn/tutorials/zh-CN/r2.2/index.html) -- MindFormers基础使用:[MindFormers讲解视频](https://www.bilibili.com/video/BV1jh4y1m7xV/?spm_id_from=333.999.0.0) +You can find all course images in the [dockerfile](./dockerfile/) directory and pull the one that fits your hardware: +| Platform | Image Name | Tag | Description | Dockerfile | +| :------ | :-------------- | :------- | :------------------------ | :------------- | +| CPU | xxx | xxx | xxx | xxx | +| GPU | xxx | xxx | xxx | xxx | +| NPU | xxx | xxx | xxx | xxx | +For details, see [Using Docker Images](wiki_link) in Wiki. -### 课程介绍 +## Course Content -昇思MindSpore技术公开课火热开展中,面向所有对大模型感兴趣的开发者,带领大家理论结合时间,由浅入深地逐步深入大模型技术 +| No. | Lesson | Description | Learning Resource | Certification | +| :-- | :------ | :--------------- | :----------------------- | :---------- | +| 1 | xxx | xxx | [Slides](link) · [Code](link) · [Video](link) · [Cloud Lab](link) · [Learning Path](link) | | +| 2 | xxx | xxx | [Slides](link) · [Code](link) · [Video](link) · [Cloud Lab](link) · [Learning Path](link) | [Beginner Certification](link) | +| 3 | xxx | xxx | [Slides](link) · [Code](link) · [Video](link) · [Cloud Lab](link) · [Learning Path](link) | | +| 4 | xxx | xxx | [Slides](link) · [Code](link) · [Video](link) · [Cloud Lab](link) · [Learning Path](link) | [Intermediate Certification](link) | -在已经完结的第一期课程(第1讲-第10讲)中,我们从Transformer开始,解析到ChatGPT的演进路线,手把手带领大家搭建一个简易版的“ChatGPT” +*“Cloud Lab” = interactive sandbox with prebuilt environment & resources.* -正在进行的第二期课程(第11讲-)在第一期的基础上做了全方位的升级,围绕大模型从开发到应用的全流程实践展开,讲解更前沿的大模型知识、丰富更多元的讲师阵容,期待你的加入! +## Version Management -| 章节序号 | 章节名称 | 课程简介 | 视频 | 课件及代码 | 知识点总结 | -|:----:|:----:|:--------------------------------------------|:----:|:----:|:----:| -| 第一讲 | Transformer | Multi-head self-attention原理。Masked self-attention的掩码处理方式。基于Transformer的机器翻译任务训练。 | [link](https://www.bilibili.com/video/BV16h4y1W7us/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f4290) | [link](./Season1.step_into_chatgpt/1.Transformer/) | [link](./Season1.step_into_chatgpt/0.Course-Review/1-Transformer.md) | -| 第二讲 | BERT | 基于Transformer Encoder的BERT模型设计:MLM和NSP任务。BERT进行下游任务微调的范式。 | [link](https://www.bilibili.com/video/BV1xs4y1M72q/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/2.BERT/) | [link](./Season1.step_into_chatgpt/0.Course-Review/2-BERT.md) | -| 第三讲 | GPT | 基于Transformer Decoder的GPT模型设计:Next token prediction。GPT下游任务微调范式。 | [link](https://www.bilibili.com/video/BV1Gh411w7HC/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/3.GPT/) | [link](./Season1.step_into_chatgpt/0.Course-Review/3-GPT.md) | -| 第四讲 | GPT2 | GPT2的核心创新点,包括Task Conditioning和Zero shot learning;模型实现细节基于GPT1的改动。 | [link](https://www.bilibili.com/video/BV1Ja4y1u7xx/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/4.GPT2/) | [link](./Season1.step_into_chatgpt/0.Course-Review/4-GPT2.md) | -| 第五讲 | MindSpore自动并行 | 以MindSpore分布式并行特性为依托的数据并行、模型并行、Pipeline并行、内存优化等技术。 | [link](https://www.bilibili.com/video/BV1VN41117AG/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/5.Parallel/) | [link](./Season1.step_into_chatgpt/0.Course-Review/5-Parallel.md) | -| 第六讲 | 代码预训练 | 代码预训练发展沿革。Code数据的预处理。CodeGeex代码预训练大模型。 | [link](https://www.bilibili.com/video/BV1Em4y147a1/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/6.CodeGeeX/) | [link](./Season1.step_into_chatgpt/0.Course-Review/6-CodeGeex.md) | -| 第七讲 | Prompt Tuning | Pretrain-finetune范式到Prompt tuning范式的改变。Hard prompt和Soft prompt相关技术。只需要改造描述文本的prompting。 | [link](https://www.bilibili.com/video/BV1Wg4y1K77R/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/7.Prompt/) | [link](./Season1.step_into_chatgpt/0.Course-Review/7-Prompt.md) | -| 第八讲 | 多模态预训练大模型 | 紫东太初多模态大模型的设计、数据处理和优势;语音识别的理论概述、系统框架和现状及挑战。 | [link](https://www.bilibili.com/video/BV1wg4y1K72r/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | / | / | -| 第九讲 | Instruct Tuning | Instruction tuning的核心思想:让模型能够理解任务描述(指令)。Instruction tuning的局限性:无法支持开放域创新性任务、无法对齐LM训练目标和人类需求。Chain-of-thoughts:通过在prompt中提供示例,让模型“举一反三”。 | [link](https://www.bilibili.com/video/BV1cm4y1e7Cc/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/8.Instruction/) | [link](./Season1.step_into_chatgpt/0.Course-Review/8-Instruction.md) | -| 第十讲 | RLHF | RLHF核心思想:将LLM和人类行为对齐。RLHF技术分解:LLM微调、基于人类反馈训练奖励模型、通过强化学习PPO算法实现模型微调。 | [link](https://www.bilibili.com/video/BV15a4y1c7dv/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/9.RLHF/) | 更新中 | -| 第十一讲 | ChatGLM | GLM模型结构,从GLM到ChatGLM的演变,ChatGLM推理部署代码演示| [link](https://www.bilibili.com/video/BV1ju411T74Y/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) |[link](./Season2.step_into_llm/01.ChatGLM/)|[link](https://mp.weixin.qq.com/s/ZUoga1poFj49QPE3UNwE_w)| -| 第十二讲 | 多模态遥感智能解译基础模型 | 本次课程由中国科学院空天信息创新研究院研究员 实验室副主任 孙显老师讲解多模态遥感解译基础模型,揭秘大模型时代的智能遥感技术的发展与挑战、遥感基础模型的技术路线与典型场景应用| [link](https://www.bilibili.com/video/BV1Be41197wY/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | /| [link](https://mp.weixin.qq.com/s/gx4KxpSfqDooIKvS8sN2fA)| -| 第十三讲 | ChatGLM2 | ChatGLM2技术解析,ChatGLM2推理部署代码演示,ChatGLM3特性介绍| [link](https://www.bilibili.com/video/BV1Ew411W72E/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season2.step_into_llm/02.ChatGLM2/) |[link](https://mp.weixin.qq.com/s/Mu29b7E4TxtJBkONOJQdEA)| -| 第十四讲 | 文本生成解码原理 | 以MindNLP为例,讲解搜索与采样技术原理和实现| [link](https://www.bilibili.com/video/BV1QN4y117ZK/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season2.step_into_llm/03.Decoding/) |[link](https://mp.weixin.qq.com/s/1WpiKb_1hPck_0EDnThmtA)| -| 第十五讲 | LLAMA | LLaMA背景及羊驼大家族介绍,LLaMA模型结构解析,LLaMA推理部署代码演示| [link](https://www.bilibili.com/video/BV1nN41157a9/?spm_id_from=333.999.0.0) | [link](./Season2.step_into_llm/04.LLaMA/) | [link](https://mp.weixin.qq.com/s/9QdP062-agcIbsR0_a-b3g) | -| 第十六讲 | LLAMA2 | 介绍LLAMA2模型结构,走读代码演示LLAMA2 chat部署| [link](https://www.bilibili.com/video/BV1Me411z7ZV/?spm_id_from=333.999.0.0) | [link](./Season2.step_into_llm/05.LLaMA2/) | [link](https://mp.weixin.qq.com/s/kmuMocA2oPJQNTXAjBKZ9A) | -| 第十七讲 | 鹏城脑海 | 鹏城·脑海200B模型是具有2千亿参数的自回归式语言模型,在中国算力网枢纽节点'鹏城云脑II'千卡集群上基于昇思MindSpore的多维分布式并行技术进行长期大规模训练。模型聚焦中文核心能力,兼顾英文和部分多语言能力,目前完成了1.8T token量的训练 | [link](https://www.bilibili.com/video/BV1AT4y1p7bJ/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | / | [link](https://mp.weixin.qq.com/s/BVzOzP_EEV3b-CNnqiRNXA) | -| 第十八讲 | CPM-Bee | 介绍CPM-Bee预训练、推理、微调及代码现场演示 |[link](https://www.bilibili.com/video/BV1VZ4y1n7t9/?spm_id_from=333.999.0.0) | [link](https://github.com/mindspore-courses/step_into_llm/tree/master/Season2.step_into_llm/07.CPM) | [link](https://mp.weixin.qq.com/s/lalEtEzUTQRqS1M-6AEVow) | -| 第十九讲 | RWKV1-4 | RNN的没落和Transformers的崛起 万能的Transformers?Self-attention的弊端 “拳打”Transformer的新RNN-RWKV 基于MindNLP的RWKV模型实践 | [link](https://www.bilibili.com/video/BV1K4421w7Ha/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | / | [link](https://mp.weixin.qq.com/s/n9uxjENUA-XQEXXO3BJiPA) | -| 第二十讲 | MOE | MoE的前世今生 MoE的实现基础:AlltoAll通信; Mixtral 8x7b: 当前最好的开源MoE大模型,MoE与终身学习,基于昇思MindSpore的Mixtral 8x7b推理演示。 | [link](https://www.bilibili.com/video/BV1jH4y177DL/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](https://github.com/mindspore-courses/step_into_llm/tree/master/Season2.step_into_llm/08.MoE) | [link](https://mp.weixin.qq.com/s/QubiOzpEau6dqMgFAVhxog) | -| 第二十一讲 | 高效参数微调 | 介绍Lora、(P-Tuning)原理及代码实现 | [link](https://www.bilibili.com/video/BV11D421j7fZ/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](https://github.com/mindspore-courses/step_into_llm/tree/master/Season2.step_into_llm/09.PEFT) | [link](https://mp.weixin.qq.com/s/EAge4XZEG8vsyAvQFXZrhA) | -| 第二十二讲 |Prompt Engineering | Prompt engineering:1.什么是Prompt?2.如何定义一个Prompt的好坏或优异? 3.如何撰写优质的Prompt?4.如何产出一个优质的Prompt? 5.浅谈一些我们在进行Prompt的时候遇到的问题。 | [link](https://www.bilibili.com/video/BV1aD421W73q/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | / |[link](https://mp.weixin.qq.com/s/CTVpcpKZA3E6oZftwpdgEA) | -| 第二十三讲 | 多维度混合并行自动搜索优化策略 | 议题一·时间损失模型及改进多维度二分法/议题二·APSS算法应用 | [上](https://www.bilibili.com/video/BV1if421X7jB/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) [下](https://www.bilibili.com/video/BV1QM4m1z7FV/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](https://mp.weixin.qq.com/s/8OufiPX4FLbgj8ztnckcWQ) | -|第二十四讲 | 书生.浦语大模型开源全链工具链简介与智能体开发体验| 在本期课程中,我们有幸邀请到了书生.浦语社区技术运营、技术布道师闻星老师,以及昇思MindSpore技术布道师耿力老师,来详细解读书生.浦语大模型开源全链路工具链,演示如何对书生.浦语进行微调、推理以及智能体开发实操。| [link](https://www.bilibili.com/video/BV1K4421w7Ha/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | / | [link](https://mp.weixin.qq.com/s/uh_RIThOEzkkWVbK_RBALQ) | -| 第二十五讲 | RAG | | | | | -| 第二十六讲 | LangChain模块解析 | 解析Models、Prompts、Memory、Chains、Agents、Indexes、Callbacks模块,及案例分析 | | | | -| 第二十七讲 | RWKV5-6 | / | | | | -| 第二十八讲 | 量化 | 介绍低比特量化等相关模型量化技术| | | | +This repository is updated in sync with **MindSpore** and the **MindSpore NLP** Suite. +New releases of this repository are published approximately **every six months**. +| Branch/Version | Python | MindSpore | MindSpore NLP | +| :------ | :----- |:------ |:------ | +| master | xxx | xxx | xxx | +| r1.0 | xxx | xxx | xxx | -### 昇思资源一览:生态与伙伴共建、共享、共荣 +## FAQ -
resources
+See the [FAQ](wiki_link) in the Wiki. -### 加入我们 +## Contributing - - - - - - +We welcome bug reports, suggestions, and code contributions via [Issues](Issue_link) or [PRs](PR_link). Please follow our submission guidelines — all PRs are reviewed and merged by @username. Your contributions make the project stronger! + +**Guidelines**: [Issue & PR Submission](WIKI_link) + +## Contributors + +Special thanks to all contributors for improving this project! + +
+ + + +
diff --git a/README_BAK.md b/README_BAK.md new file mode 100644 index 0000000..583486b --- /dev/null +++ b/README_BAK.md @@ -0,0 +1,95 @@ +# 昇思MindSpore技术公开课 + +- ***探究前沿***:解读技术热点,解构热点模型 +- ***应用实践***:理论实践相结合,手把手指导开发 +- ***专家解读***:多领域专家,多元解读 +- ***开源共享***:课程免费,课件代码开源 +- ***大赛赋能***:ICT大赛赋能课程(大模型专题第一、二期) +- ***系列课程***:大模型专题课程开展中,其他专题课程敬请期待 + +## 报名方式 + +报名链接:https://xihe.mindspore.cn/course/foundation-model-v2/introduction + +(注:参与免费课程必须报名哦!同步添加[QQ群](./assets/groupchat_qq.png),后续课程事宜将在群内通知!) + +## 大模型专题第一期(已完结)&第二期(进行中) + +第二期课程10月14日起每双周六14:00-15:00在[b站](https://live.bilibili.com/22127570?broadcast_type=0&is_room_feed=1&spm_id_from=333.999.to_liveroom.0.click&live_from=86002)进行直播。 + +每节课程的ppt和代码会随授课逐步上传至[github](https://github.com/mindspore-courses/step_into_llm),系列视频回放归档至[b站](https://space.bilibili.com/526894060/channel/seriesdetail?sid=3293489),大家可以在[昇思MindSpore公众号](./assets/wechat_official_account.png)中获取每节课的知识点回顾与下节课的课程预告,同时欢迎大家在[MindSpore社区](https://gitee.com/mindspore/community/issues)领取大模型系列任务进行挑战。 + +> 因为课程周期较长,课节安排可能会在中途出现微调,以最终通知为准,感谢理解! + +> 热烈欢迎小伙伴们参与到课程的建设中来,基于课程的趣味开发可以提交至[昇思MindSpore大模型平台](https://xihe.mindspore.cn/) + +> 如果在学习过程中发现任何课件及代码方面的问题,希望我们讲解哪方面的内容,或是对课程有什么建议,都可以直接在本仓库中创建issue + + +### 教研团队 + +
teachers
+ +### 课前学习 + +- python +- 人工智能基础、深度学习基础(重点学习自然语言处理):[MindSpore-d2l](https://openi.pcl.ac.cn/mindspore-courses/d2l-mindspore) +- OpenI启智社区基础使用(可免费获取算力):[OpenI_Learning](https://openi.pcl.ac.cn/zeizei/OpenI_Learning) +- MindSpore基础使用:[MindSpore教程](https://www.mindspore.cn/tutorials/zh-CN/r2.2/index.html) +- MindFormers基础使用:[MindFormers讲解视频](https://www.bilibili.com/video/BV1jh4y1m7xV/?spm_id_from=333.999.0.0) + + + +### 课程介绍 + +昇思MindSpore技术公开课火热开展中,面向所有对大模型感兴趣的开发者,带领大家理论结合时间,由浅入深地逐步深入大模型技术 + +在已经完结的第一期课程(第1讲-第10讲)中,我们从Transformer开始,解析到ChatGPT的演进路线,手把手带领大家搭建一个简易版的“ChatGPT” + +正在进行的第二期课程(第11讲-)在第一期的基础上做了全方位的升级,围绕大模型从开发到应用的全流程实践展开,讲解更前沿的大模型知识、丰富更多元的讲师阵容,期待你的加入! + +| 章节序号 | 章节名称 | 课程简介 | 视频 | 课件及代码 | 知识点总结 | +|:----:|:----:|:--------------------------------------------|:----:|:----:|:----:| +| 第一讲 | Transformer | Multi-head self-attention原理。Masked self-attention的掩码处理方式。基于Transformer的机器翻译任务训练。 | [link](https://www.bilibili.com/video/BV16h4y1W7us/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f4290) | [link](./Season1.step_into_chatgpt/1.Transformer/) | [link](./Season1.step_into_chatgpt/0.Course-Review/1-Transformer.md) | +| 第二讲 | BERT | 基于Transformer Encoder的BERT模型设计:MLM和NSP任务。BERT进行下游任务微调的范式。 | [link](https://www.bilibili.com/video/BV1xs4y1M72q/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/2.BERT/) | [link](./Season1.step_into_chatgpt/0.Course-Review/2-BERT.md) | +| 第三讲 | GPT | 基于Transformer Decoder的GPT模型设计:Next token prediction。GPT下游任务微调范式。 | [link](https://www.bilibili.com/video/BV1Gh411w7HC/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/3.GPT/) | [link](./Season1.step_into_chatgpt/0.Course-Review/3-GPT.md) | +| 第四讲 | GPT2 | GPT2的核心创新点,包括Task Conditioning和Zero shot learning;模型实现细节基于GPT1的改动。 | [link](https://www.bilibili.com/video/BV1Ja4y1u7xx/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/4.GPT2/) | [link](./Season1.step_into_chatgpt/0.Course-Review/4-GPT2.md) | +| 第五讲 | MindSpore自动并行 | 以MindSpore分布式并行特性为依托的数据并行、模型并行、Pipeline并行、内存优化等技术。 | [link](https://www.bilibili.com/video/BV1VN41117AG/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/5.Parallel/) | [link](./Season1.step_into_chatgpt/0.Course-Review/5-Parallel.md) | +| 第六讲 | 代码预训练 | 代码预训练发展沿革。Code数据的预处理。CodeGeex代码预训练大模型。 | [link](https://www.bilibili.com/video/BV1Em4y147a1/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/6.CodeGeeX/) | [link](./Season1.step_into_chatgpt/0.Course-Review/6-CodeGeex.md) | +| 第七讲 | Prompt Tuning | Pretrain-finetune范式到Prompt tuning范式的改变。Hard prompt和Soft prompt相关技术。只需要改造描述文本的prompting。 | [link](https://www.bilibili.com/video/BV1Wg4y1K77R/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/7.Prompt/) | [link](./Season1.step_into_chatgpt/0.Course-Review/7-Prompt.md) | +| 第八讲 | 多模态预训练大模型 | 紫东太初多模态大模型的设计、数据处理和优势;语音识别的理论概述、系统框架和现状及挑战。 | [link](https://www.bilibili.com/video/BV1wg4y1K72r/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | / | / | +| 第九讲 | Instruct Tuning | Instruction tuning的核心思想:让模型能够理解任务描述(指令)。Instruction tuning的局限性:无法支持开放域创新性任务、无法对齐LM训练目标和人类需求。Chain-of-thoughts:通过在prompt中提供示例,让模型“举一反三”。 | [link](https://www.bilibili.com/video/BV1cm4y1e7Cc/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/8.Instruction/) | [link](./Season1.step_into_chatgpt/0.Course-Review/8-Instruction.md) | +| 第十讲 | RLHF | RLHF核心思想:将LLM和人类行为对齐。RLHF技术分解:LLM微调、基于人类反馈训练奖励模型、通过强化学习PPO算法实现模型微调。 | [link](https://www.bilibili.com/video/BV15a4y1c7dv/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season1.step_into_chatgpt/9.RLHF/) | 更新中 | +| 第十一讲 | ChatGLM | GLM模型结构,从GLM到ChatGLM的演变,ChatGLM推理部署代码演示| [link](https://www.bilibili.com/video/BV1ju411T74Y/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) |[link](./Season2.step_into_llm/01.ChatGLM/)|[link](https://mp.weixin.qq.com/s/ZUoga1poFj49QPE3UNwE_w)| +| 第十二讲 | 多模态遥感智能解译基础模型 | 本次课程由中国科学院空天信息创新研究院研究员 实验室副主任 孙显老师讲解多模态遥感解译基础模型,揭秘大模型时代的智能遥感技术的发展与挑战、遥感基础模型的技术路线与典型场景应用| [link](https://www.bilibili.com/video/BV1Be41197wY/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | /| [link](https://mp.weixin.qq.com/s/gx4KxpSfqDooIKvS8sN2fA)| +| 第十三讲 | ChatGLM2 | ChatGLM2技术解析,ChatGLM2推理部署代码演示,ChatGLM3特性介绍| [link](https://www.bilibili.com/video/BV1Ew411W72E/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season2.step_into_llm/02.ChatGLM2/) |[link](https://mp.weixin.qq.com/s/Mu29b7E4TxtJBkONOJQdEA)| +| 第十四讲 | 文本生成解码原理 | 以MindNLP为例,讲解搜索与采样技术原理和实现| [link](https://www.bilibili.com/video/BV1QN4y117ZK/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](./Season2.step_into_llm/03.Decoding/) |[link](https://mp.weixin.qq.com/s/1WpiKb_1hPck_0EDnThmtA)| +| 第十五讲 | LLAMA | LLaMA背景及羊驼大家族介绍,LLaMA模型结构解析,LLaMA推理部署代码演示| [link](https://www.bilibili.com/video/BV1nN41157a9/?spm_id_from=333.999.0.0) | [link](./Season2.step_into_llm/04.LLaMA/) | [link](https://mp.weixin.qq.com/s/9QdP062-agcIbsR0_a-b3g) | +| 第十六讲 | LLAMA2 | 介绍LLAMA2模型结构,走读代码演示LLAMA2 chat部署| [link](https://www.bilibili.com/video/BV1Me411z7ZV/?spm_id_from=333.999.0.0) | [link](./Season2.step_into_llm/05.LLaMA2/) | [link](https://mp.weixin.qq.com/s/kmuMocA2oPJQNTXAjBKZ9A) | +| 第十七讲 | 鹏城脑海 | 鹏城·脑海200B模型是具有2千亿参数的自回归式语言模型,在中国算力网枢纽节点'鹏城云脑II'千卡集群上基于昇思MindSpore的多维分布式并行技术进行长期大规模训练。模型聚焦中文核心能力,兼顾英文和部分多语言能力,目前完成了1.8T token量的训练 | [link](https://www.bilibili.com/video/BV1AT4y1p7bJ/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | / | [link](https://mp.weixin.qq.com/s/BVzOzP_EEV3b-CNnqiRNXA) | +| 第十八讲 | CPM-Bee | 介绍CPM-Bee预训练、推理、微调及代码现场演示 |[link](https://www.bilibili.com/video/BV1VZ4y1n7t9/?spm_id_from=333.999.0.0) | [link](https://github.com/mindspore-courses/step_into_llm/tree/master/Season2.step_into_llm/07.CPM) | [link](https://mp.weixin.qq.com/s/lalEtEzUTQRqS1M-6AEVow) | +| 第十九讲 | RWKV1-4 | RNN的没落和Transformers的崛起 万能的Transformers?Self-attention的弊端 “拳打”Transformer的新RNN-RWKV 基于MindNLP的RWKV模型实践 | [link](https://www.bilibili.com/video/BV1K4421w7Ha/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | / | [link](https://mp.weixin.qq.com/s/n9uxjENUA-XQEXXO3BJiPA) | +| 第二十讲 | MOE | MoE的前世今生 MoE的实现基础:AlltoAll通信; Mixtral 8x7b: 当前最好的开源MoE大模型,MoE与终身学习,基于昇思MindSpore的Mixtral 8x7b推理演示。 | [link](https://www.bilibili.com/video/BV1jH4y177DL/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](https://github.com/mindspore-courses/step_into_llm/tree/master/Season2.step_into_llm/08.MoE) | [link](https://mp.weixin.qq.com/s/QubiOzpEau6dqMgFAVhxog) | +| 第二十一讲 | 高效参数微调 | 介绍Lora、(P-Tuning)原理及代码实现 | [link](https://www.bilibili.com/video/BV11D421j7fZ/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](https://github.com/mindspore-courses/step_into_llm/tree/master/Season2.step_into_llm/09.PEFT) | [link](https://mp.weixin.qq.com/s/EAge4XZEG8vsyAvQFXZrhA) | +| 第二十二讲 |Prompt Engineering | Prompt engineering:1.什么是Prompt?2.如何定义一个Prompt的好坏或优异? 3.如何撰写优质的Prompt?4.如何产出一个优质的Prompt? 5.浅谈一些我们在进行Prompt的时候遇到的问题。 | [link](https://www.bilibili.com/video/BV1aD421W73q/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | / |[link](https://mp.weixin.qq.com/s/CTVpcpKZA3E6oZftwpdgEA) | +| 第二十三讲 | 多维度混合并行自动搜索优化策略 | 议题一·时间损失模型及改进多维度二分法/议题二·APSS算法应用 | [上](https://www.bilibili.com/video/BV1if421X7jB/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) [下](https://www.bilibili.com/video/BV1QM4m1z7FV/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | [link](https://mp.weixin.qq.com/s/8OufiPX4FLbgj8ztnckcWQ) | +|第二十四讲 | 书生.浦语大模型开源全链工具链简介与智能体开发体验| 在本期课程中,我们有幸邀请到了书生.浦语社区技术运营、技术布道师闻星老师,以及昇思MindSpore技术布道师耿力老师,来详细解读书生.浦语大模型开源全链路工具链,演示如何对书生.浦语进行微调、推理以及智能体开发实操。| [link](https://www.bilibili.com/video/BV1K4421w7Ha/?spm_id_from=333.999.0.0&vd_source=eb3a45e6eb4dccc5795f97586b78f429) | / | [link](https://mp.weixin.qq.com/s/uh_RIThOEzkkWVbK_RBALQ) | +| 第二十五讲 | RAG | | | | | +| 第二十六讲 | LangChain模块解析 | 解析Models、Prompts、Memory、Chains、Agents、Indexes、Callbacks模块,及案例分析 | | | | +| 第二十七讲 | RWKV5-6 | / | | | | +| 第二十八讲 | 量化 | 介绍低比特量化等相关模型量化技术| | | | + + + +### 昇思资源一览:生态与伙伴共建、共享、共荣 + +
resources
+ +### 加入我们 + +
+ + + + + diff --git a/README_ZH.md b/README_ZH.md new file mode 100644 index 0000000..f90beab --- /dev/null +++ b/README_ZH.md @@ -0,0 +1,87 @@ +
+

课程名称

+

View English

+
+ +(1-2句话点名项目核心价值)项目仓介绍。 + +## 📢 最新消息 + +- 2025-10-21 「课程更新」:新增XXX课程,包含完整视频、课件及代码案例。([查看详情](xxxx)) +- 2025-10-18 「功能优化」:项目仓完成重构,查找课程资源更清晰,新增PR检查门禁,合入内容更规范。([查看详情](xxx)) +- 2025-10-10 「Bug修复」:修复xxxxxx问题,感谢@username的PR贡献。([查看详情](xxxx)) + +## 前置知识 + +在学习本门课程之前,您需要掌握: + +- Python基础 +- Linux命令基础 +- Jupyter基础 +- Docker镜像使用 + +您可以通过[前置学习考试](考试链接)进行自检。 + +## 环境准备 + +为确保项目仓中实践代码可正常运行,推荐以下环境准备方式。更详细的环境准备指导详见[Wiki](wiki链接)。 + +### 直接安装依赖 + +请先确保 Python 版本符合[课程要求](#版本维护)后,进入仓库根目录,执行: + +```bash +pip install requirements.txt +``` + +### 使用Docker镜像 + +为方便开发者更加便捷地进行代码实践,节约环境准备的时间,我们提供了预装好的基础Dockerfile文件。课程的所有镜像可从[dockerfile](./dockerfile/)获取。本课程镜像文件信息如下,开发者可根据实际需求进行拉取: + +| 硬件平台 | 镜像名称 | 标签 | 说明 | Dockerfile文件 | +| :------ | :-------------- | :------- | :------------------------ | :------------- | +| CPU | xxx | xxx | xxx | xxx | +| GPU | xxx | xxx | xxx | xxx | +| NPU | xxx | xxx | xxx | xxx | + +镜像基础使用教程详见环境准备Wiki中的[Docker镜像使用](./dockerfile/README.md)部分。 + +## 课程内容 + +| 序号 | 课节 | 简介 | 课程资源 | 能力认证入口 | +| :-- | :------ | :--------------- | :----------------------- | :---------- | +| 1 | xxx | xxx | [PPT](跳转链接) · [代码](跳转链接) · [视频](跳转链接) · [云沙箱实验](跳转链接) · [学习路径](跳转链接) | | +| 2 | xxx | xxx | [PPT](跳转链接) · [代码](跳转链接) · [视频](跳转链接) · [云沙箱实验](跳转链接) · [学习路径](跳转链接) | [初级认证入口](xxxx) | +| 3 | xxx | xxx | [PPT](跳转链接) · [代码](跳转链接) · [视频](跳转链接) · [云沙箱实验](跳转链接) · [学习路径](跳转链接) | | +| 4 | xxx | xxx | [PPT](跳转链接) · [代码](跳转链接) · [视频](跳转链接) · [云沙箱实验](跳转链接) · [学习路径](跳转链接) | [中级认证入口](xxxx) | + +## 版本维护 + +项目随昇思MindSpore及昇思MindSpore NLP套件迭代同步发布版本,本项目仓每**半年**进行版本发布。 + +| 版本名 | Python | MindSpore | MindSpore NLP | +| :----- | :----- |:------ |:------ | +| master | xxx | xxx | xxx | +| r1.0 | xxx | xxx | xxx | + +## 常见问题(FAQ) + +详见Wiki中[FAQ](wiki链接)。 + +## 贡献与反馈 + +欢迎各位开发者通过 [Issue](Issue链接) 提交建议或 bug 反馈,也可直接发起 [PR](PR链接) 进行Bug修复或代码贡献(提交前请参考提交规范,由Committer @username 完成评审合入),你的每一份参与都能让本项目更加完善。 + +### 提交规范 + +详见WIKI:[Issue与PR提交规范](WIKI链接) + +### 贡献者展示 + +向本项目的贡献者们致以最诚挚的感谢! + +
+ + + +
diff --git a/Season1.step_into_chatgpt/0.Course-Review/3-GPT.md b/Season1.step_into_chatgpt/0.Course-Review/3-GPT.md index 8237000..5840162 100644 --- a/Season1.step_into_chatgpt/0.Course-Review/3-GPT.md +++ b/Season1.step_into_chatgpt/0.Course-Review/3-GPT.md @@ -9,7 +9,7 @@ GPT-1是更早于BERT提出了预训练语言模型(Pre-train+Fine-tune)的 ## 1. 课程回顾 - Semi-Supervised Learning -- Unsupervised Pretraining +- Unsupervised Pretraining - 模型预训练优化目标 - 模型结构 - Supervised Fine-tuning diff --git a/Season1.step_into_chatgpt/0.Course-Review/5-Parallel.md b/Season1.step_into_chatgpt/0.Course-Review/5-Parallel.md index a6898e6..61e4f0f 100644 --- a/Season1.step_into_chatgpt/0.Course-Review/5-Parallel.md +++ b/Season1.step_into_chatgpt/0.Course-Review/5-Parallel.md @@ -50,13 +50,13 @@ - 内存优化 1. 重计算 - +
recompute
时间换空间:重计算技术可以不保存正向计算结果,让该内存可以被复用,然后在计算反向部分时,重新计算出正向结果。 - + 2. 优化器并行——ZeRO - +
optimizer-parallel
将参数和梯度分组放到不同卡上更新,再通过通信广播操作在设备间共享更新后的权值。 diff --git a/Season1.step_into_chatgpt/0.Course-Review/6-CodeGeex.md b/Season1.step_into_chatgpt/0.Course-Review/6-CodeGeex.md index a4b8883..2d5fce9 100644 --- a/Season1.step_into_chatgpt/0.Course-Review/6-CodeGeex.md +++ b/Season1.step_into_chatgpt/0.Course-Review/6-CodeGeex.md @@ -70,11 +70,11 @@ 2. 目前的基准从多任务及多语言两个方面对模型进行评价 - 多任务 - + 通过不同应用场景进行评价,多使用CodeBLEU/BLEU评价相似性 - 多语言 - + 在不同编程语言下评价代码正确性,如HumanEval(仅支持Python)、MultiPL-E(支持16种语言,但为自动翻译并不支持多任务) 3. HumanEval-X:新的多语言代码生成基准 diff --git a/Season1.step_into_chatgpt/1.Transformer/transformer-new.ipynb b/Season1.step_into_chatgpt/1.Transformer/transformer-new.ipynb index a9bb5e4..419de78 100644 --- a/Season1.step_into_chatgpt/1.Transformer/transformer-new.ipynb +++ b/Season1.step_into_chatgpt/1.Transformer/transformer-new.ipynb @@ -302,7 +302,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "slideshow": { "slide_type": "slide" @@ -353,14 +353,14 @@ "\n", " embed_size = query.shape[-1]\n", " scaling_factor = self.sqrt(Tensor(embed_size, mstype.float32))\n", - " \n", + "\n", "\n", " attn = ops.matmul(query, key.swapaxes(-2, -1) / scaling_factor)\n", "\n", "\n", " if attn_mask is not None:\n", " attn = attn.masked_fill(attn_mask, -1e9)\n", - " \n", + "\n", " attn = self.softmax(attn)\n", "\n", " attn = self.dropout(attn)\n", @@ -868,7 +868,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": { "slideshow": { "slide_type": "fragment" @@ -881,7 +881,7 @@ " super().__init__()\n", " self.layer_norm = nn.LayerNorm((d_model, ), epsilon=1e-5)\n", " self.dropout = nn.Dropout(p=dropout_p)\n", - " \n", + "\n", " def construct(self, x, residual):\n", " return self.layer_norm(self.dropout(x) + residual)" ] @@ -926,7 +926,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": { "slideshow": { "slide_type": "fragment" @@ -944,7 +944,7 @@ " self.pos_ffn = PoswiseFeedForward(d_ff, d_model, dropout_p)\n", " self.add_norm1 = AddNorm(d_model, dropout_p)\n", " self.add_norm2 = AddNorm(d_model, dropout_p)\n", - " \n", + "\n", " def construct(self, enc_inputs, enc_self_attn_mask):\n", " \"\"\"\n", " enc_inputs: [batch_size, src_len, d_model]\n", @@ -1004,7 +1004,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": { "slideshow": { "slide_type": "fragment" @@ -1020,7 +1020,7 @@ " self.layers = nn.CellList([EncoderLayer(d_model, n_heads, d_ff, dropout_p) for _ in range(n_layers)])\n", " self.scaling_factor = ops.Sqrt()(Tensor(d_model, mstype.float32))\n", "\n", - " \n", + "\n", " def construct(self, enc_inputs, src_pad_idx):\n", " \"\"\"enc_inputs : [batch_size, src_len]\n", " \"\"\"\n", @@ -1098,7 +1098,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": { "slideshow": { "slide_type": "slide" @@ -1108,7 +1108,7 @@ "source": [ "def get_attn_subsequent_mask(seq_q, seq_k):\n", " \"\"\"生成时间掩码,使decoder在第t时刻只能看到序列的前t-1个元素\n", - " \n", + "\n", " Args:\n", " seq_q (Tensor): query序列,shape = [batch size, len_q]\n", " seq_k (Tensor): key序列,shape = [batch size, len_k]\n", @@ -1162,7 +1162,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": { "slideshow": { "slide_type": "fragment" @@ -1182,7 +1182,7 @@ " self.add_norm1 = AddNorm(d_model, dropout_p)\n", " self.add_norm2 = AddNorm(d_model, dropout_p)\n", " self.add_norm3 = AddNorm(d_model, dropout_p)\n", - " \n", + "\n", " def construct(self, dec_inputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask):\n", " \"\"\"\n", " dec_inputs: [batch_size, trg_len, d_model]\n", @@ -1196,7 +1196,7 @@ "\n", " dec_outputs = self.add_norm1(dec_outputs, residual)\n", " residual = dec_outputs\n", - " \n", + "\n", " dec_outputs, dec_enc_attn = self.dec_enc_attn(dec_outputs, enc_outputs, enc_outputs, dec_enc_attn_mask)\n", "\n", " dec_outputs = self.add_norm2(dec_outputs, residual)\n", @@ -1251,7 +1251,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": { "slideshow": { "slide_type": "fragment" @@ -1266,8 +1266,8 @@ " self.pos_emb = PositionalEncoding(d_model, dropout_p)\n", " self.layers = nn.CellList([DecoderLayer(d_model, n_heads, d_ff) for _ in range(n_layers)])\n", " self.projection = nn.Dense(d_model, trg_vocab_size)\n", - " self.scaling_factor = ops.Sqrt()(Tensor(d_model, mstype.float32)) \n", - " \n", + " self.scaling_factor = ops.Sqrt()(Tensor(d_model, mstype.float32))\n", + "\n", " def construct(self, dec_inputs, enc_inputs, enc_outputs, src_pad_idx, trg_pad_idx):\n", " \"\"\"\n", " dec_inputs: [batch_size, trg_len]\n", @@ -1308,7 +1308,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": { "slideshow": { "slide_type": "fragment" @@ -1321,7 +1321,7 @@ " super().__init__()\n", " self.encoder = encoder\n", " self.decoder = decoder\n", - " \n", + "\n", " def construct(self, enc_inputs, dec_inputs, src_pad_idx, trg_pad_idx):\n", " \"\"\"\n", " enc_inputs: [batch_size, src_len]\n", @@ -1333,8 +1333,7 @@ "\n", " dec_logits = dec_outputs.view((-1, dec_outputs.shape[-1]))\n", "\n", - " return dec_logits, enc_self_attns, dec_self_attns, dec_enc_attns\n", - " " + " return dec_logits, enc_self_attns, dec_self_attns, dec_enc_attns\n" ] }, { @@ -1489,7 +1488,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "metadata": { "slideshow": { "slide_type": "slide" @@ -1501,18 +1500,18 @@ "\n", "class Multi30K():\n", " \"\"\"Multi30K数据集加载器\n", - " \n", + "\n", " 加载Multi30K数据集并处理为一个Python迭代对象。\n", - " \n", + "\n", " \"\"\"\n", " def __init__(self, path):\n", " self.data = self._load(path)\n", - " \n", + "\n", " def _load(self, path):\n", " def tokenize(text):\n", " text = text.rstrip()\n", " return [tok.lower() for tok in re.findall(r'\\w+|[^\\w\\s]', text)]\n", - " \n", + "\n", " members = {i.split('.')[-1]: i for i in os.listdir(path)}\n", " de_path = os.path.join(path, members['de'])\n", " en_path = os.path.join(path, members['en'])\n", @@ -1524,10 +1523,10 @@ " en = [tokenize(i) for i in en]\n", "\n", " return list(zip(de, en))\n", - " \n", + "\n", " def __getitem__(self, idx):\n", " return self.data[idx]\n", - " \n", + "\n", " def __len__(self):\n", " return len(self.data)" ] @@ -2378,7 +2377,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": null, "metadata": { "slideshow": { "slide_type": "slide" @@ -2391,13 +2390,13 @@ " enc_inputs: [batch_size(1), src_len]\n", " \"\"\"\n", " new_model.set_train(False)\n", - " \n", + "\n", " # 对输入句子进行分词\n", " if isinstance(sentence, str):\n", " tokens = [tok.lower() for tok in re.findall(r'\\w+|[^\\w\\s]', sentence.rstrip())]\n", " else:\n", " tokens = [token.lower() for token in sentence]\n", - " \n", + "\n", " # 补充起始、终止占位符,统一序列长度\n", " if len(tokens) > max_len - 2:\n", " src_len = max_len\n", @@ -2405,24 +2404,24 @@ " else:\n", " src_len = len(tokens) + 2\n", " tokens = [''] + tokens + [''] + [''] * (max_len - src_len)\n", - " \n", + "\n", " # 将德语单词转换为数字索引,并进一步转换为tensor\n", " # enc_inputs: [1, src_len]\n", " indexes = de_vocab.encode(tokens)\n", " enc_inputs = Tensor(indexes, mstype.float32).expand_dims(0)\n", - " \n", + "\n", " # 将输入送入encoder,获取信息\n", " enc_outputs, _ = new_model.encoder(enc_inputs, src_pad_idx)\n", "\n", " dec_inputs = Tensor([[en_vocab.bos_idx]], mstype.float32)\n", - " \n", + "\n", " # 初始化decoder输入,此时仅有句首占位符\n", " # dec_inputs: [1, 1]\n", " max_len = enc_inputs.shape[1]\n", " for _ in range(max_len):\n", " dec_outputs, _, _ = new_model.decoder(dec_inputs, enc_inputs, enc_outputs, src_pad_idx, trg_pad_idx)\n", " dec_logits = dec_outputs.view((-1, dec_outputs.shape[-1]))\n", - " \n", + "\n", " # 找到下一个词的概率分布,并输出预测\n", " dec_logits = dec_logits[-1, :]\n", " pred = dec_logits.argmax(axis=0).expand_dims(0).expand_dims(0)\n", @@ -2511,7 +2510,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": null, "metadata": { "slideshow": { "slide_type": "slide" @@ -2532,16 +2531,16 @@ "def calculate_bleu(dataset, max_len=50):\n", " trgs = []\n", " pred_trgs = []\n", - " \n", + "\n", " for data in dataset[:10]:\n", - " \n", + "\n", " src = data[0]\n", " trg = data[1]\n", "\n", " pred_trg = inference(src, max_len)\n", " pred_trgs.append(pred_trg)\n", " trgs.append([trg])\n", - " \n", + "\n", " return corpus_bleu(trgs, pred_trgs)\n", "\n", "bleu_score = calculate_bleu(test_dataset)\n", diff --git a/Season1.step_into_chatgpt/2.BERT/bert_emotect_finetune.ipynb b/Season1.step_into_chatgpt/2.BERT/bert_emotect_finetune.ipynb index 6ceed46..db40ebd 100644 --- a/Season1.step_into_chatgpt/2.BERT/bert_emotect_finetune.ipynb +++ b/Season1.step_into_chatgpt/2.BERT/bert_emotect_finetune.ipynb @@ -290,7 +290,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "tags": [] }, @@ -302,7 +302,7 @@ " is_ascend = mindspore.get_context('device_target') == 'Ascend'\n", "\n", " column_names = [\"label\", \"text_a\"]\n", - " \n", + "\n", " dataset = GeneratorDataset(source, column_names=column_names, shuffle=shuffle)\n", " # transforms\n", " type_cast_op = transforms.TypeCast(mindspore.int32)\n", @@ -569,7 +569,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": { "tags": [] }, @@ -580,12 +580,12 @@ "def compute_metrics(eval_pred):\n", " predictions = eval_pred.predictions\n", " labels = eval_pred.label_ids\n", - " \n", + "\n", " if len(predictions.shape) > 1:\n", " predictions = np.argmax(predictions, axis=-1)\n", "\n", " accuracy = (predictions == labels).mean()\n", - " \n", + "\n", " return {\"accuracy\": float(accuracy)}" ] }, diff --git a/Season1.step_into_chatgpt/2.BERT/bert_introduction.ipynb b/Season1.step_into_chatgpt/2.BERT/bert_introduction.ipynb index 99ba599..d2d875d 100644 --- a/Season1.step_into_chatgpt/2.BERT/bert_introduction.ipynb +++ b/Season1.step_into_chatgpt/2.BERT/bert_introduction.ipynb @@ -422,7 +422,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "d04340e2", "metadata": { "slideshow": { @@ -456,7 +456,7 @@ " position_ids = position_ids.expand_dims(0).expand_as(input_ids)\n", " if token_type_ids is None:\n", " token_type_ids = ops.zeros_like(input_ids)\n", - " \n", + "\n", " words_embeddings = self.word_embeddings(input_ids)\n", " position_embeddings = self.position_embeddings(position_ids)\n", " token_type_embeddings = self.token_type_embeddings(token_type_ids)\n", @@ -860,7 +860,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "53d4e932", "metadata": { "slideshow": { @@ -897,7 +897,7 @@ " self.dense = nn.Dense(config.hidden_size, config.hidden_size, weight_init=TruncatedNormal(config.initializer_range))\n", " self.transform_act_fn = activation_map.get(config.hidden_act, nn.GELU(False))\n", " self.layer_norm = nn.LayerNorm((config.hidden_size,), epsilon=config.layer_norm_eps)\n", - " \n", + "\n", " def construct(self, hidden_states):\n", " hidden_states = self.dense(hidden_states)\n", " hidden_states = self.transform_act_fn(hidden_states)\n", @@ -991,7 +991,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "e1784651", "metadata": { "slideshow": { @@ -1004,7 +1004,7 @@ " def __init__(self, config):\n", " super(BertPooler, self).__init__()\n", " self.dense = nn.Dense(config.hidden_size, config.hidden_size, activation='tanh', weight_init=TruncatedNormal(config.initializer_range))\n", - " \n", + "\n", " def construct(self, hidden_states):\n", " first_token_tensor = hidden_states[:, 0]\n", " pooled_output = self.dense(first_token_tensor)\n", @@ -1027,7 +1027,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "edc1f2a8", "metadata": { "slideshow": { @@ -1041,7 +1041,7 @@ " super(BertPreTrainingHeads, self).__init__()\n", " self.predictions = BertLMPredictionHead(config)\n", " self.seq_relationship = nn.Dense(config.hidden_size, 2, weight_init=TruncatedNormal(config.initializer_range))\n", - " \n", + "\n", " def construct(self, sequence_output, pooled_output, masked_lm_positions):\n", " prediction_scores = self.predictions(sequence_output, masked_lm_positions)\n", " seq_relationship_score = self.seq_relationship(pooled_output)\n", diff --git a/Season1.step_into_chatgpt/3.GPT/gpt_imdb_finetune.ipynb b/Season1.step_into_chatgpt/3.GPT/gpt_imdb_finetune.ipynb index 66ecb2d..41856e7 100644 --- a/Season1.step_into_chatgpt/3.GPT/gpt_imdb_finetune.ipynb +++ b/Season1.step_into_chatgpt/3.GPT/gpt_imdb_finetune.ipynb @@ -589,7 +589,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": { "tags": [] }, @@ -616,7 +616,7 @@ "\n", " acc = compute_accuracy(logits, label)['accuracy']\n", " epoch_acc += acc\n", - " \n", + "\n", " step_total += 1\n", " acc=epoch_acc/step_total\n", "\n", diff --git a/Season1.step_into_chatgpt/4.GPT2/gpt2_modules.ipynb b/Season1.step_into_chatgpt/4.GPT2/gpt2_modules.ipynb index 9aa4b00..dc0bcb2 100644 --- a/Season1.step_into_chatgpt/4.GPT2/gpt2_modules.ipynb +++ b/Season1.step_into_chatgpt/4.GPT2/gpt2_modules.ipynb @@ -181,7 +181,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "2d315a4e-5663-404e-b93d-efb1cf354414", "metadata": { "tags": [] @@ -212,7 +212,7 @@ "from mindnlp.transformers.ms_utils import Conv1D\n", "\n", "# query = Wq * X, key = Wk * X, value = Wv * X\n", - "# c_attn: (1, 10, 768*3) --> query, key, value: (1, 10, 768), (1, 10, 768), (1, 10, 768) \n", + "# c_attn: (1, 10, 768*3) --> query, key, value: (1, 10, 768), (1, 10, 768), (1, 10, 768)\n", "c_attn = Conv1D(3 * embed_dim, embed_dim)\n", "query, key, value = split(c_attn(x), embed_dim, axis=2)\n", "query.shape, key.shape, value.shape" @@ -230,7 +230,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "abb7ccac-7cfe-401a-ab32-763de70b4669", "metadata": { "tags": [] @@ -245,7 +245,7 @@ " new_shape = tensor.shape[:-1] + (num_heads, attn_head_size)\n", " tensor = tensor.view(new_shape)\n", " # (batch_size, seq_len, num_heads, attn_head_size) --> (batch_size, num_heads, seq_len, attn_head_size)\n", - " return ops.transpose(tensor, (0, 2, 1, 3)) " + " return ops.transpose(tensor, (0, 2, 1, 3))" ] }, { diff --git a/Season1.step_into_chatgpt/4.GPT2/gpt2_summarization.ipynb b/Season1.step_into_chatgpt/4.GPT2/gpt2_summarization.ipynb index 7daef40..2a5a842 100644 --- a/Season1.step_into_chatgpt/4.GPT2/gpt2_summarization.ipynb +++ b/Season1.step_into_chatgpt/4.GPT2/gpt2_summarization.ipynb @@ -320,7 +320,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "f1ee1961-0658-4e70-95c2-81fefd83a40b", "metadata": {}, "outputs": [], @@ -340,7 +340,7 @@ " tokenized = tokenizer(text=article, text_pair=summary,\n", " padding='max_length', truncation='only_first', max_length=max_seq_len)\n", " return tokenized['input_ids'], tokenized['input_ids']\n", - " \n", + "\n", " dataset = dataset.map(read_map, 'text', ['article', 'summary'])\n", " # change column names to input_ids and labels for the following training\n", " dataset = dataset.map(merge_and_pad, ['article', 'summary'], ['input_ids', 'labels'])\n", @@ -608,7 +608,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "88259c93-5366-4406-a417-396808ec767c", "metadata": {}, "outputs": [], @@ -624,7 +624,7 @@ " learning_rate=learning_rate,\n", " max_grad_norm=max_grad_norm,\n", " warmup_steps=warmup_steps\n", - " \n", + "\n", ")\n", "\n", "from mindnlp.engine import Trainer\n", @@ -696,7 +696,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "787795ec-0c07-4be6-97b7-4defbe899117", "metadata": {}, "outputs": [], @@ -712,7 +712,7 @@ "\n", " dataset = dataset.map(read_map, 'text', ['article', 'summary'])\n", " dataset = dataset.map(pad, 'article', ['input_ids'])\n", - " \n", + "\n", " dataset = dataset.batch(batch_size)\n", "\n", " return dataset"