From 89d2fc9faf4e88e04b9b01fee8ae91a020742d85 Mon Sep 17 00:00:00 2001
From: Haz Sameen Shahgir <83033987+Patchwork53@users.noreply.github.com>
Date: Fri, 12 Apr 2024 15:06:26 +0600
Subject: [PATCH 1/5] new python file to upload mosaic-bert to hf

---
 convert_mosaicbert_to_hf.py | 286 ++++++++++++++++++++++++++++++++++++
 1 file changed, 286 insertions(+)
 create mode 100644 convert_mosaicbert_to_hf.py

diff --git a/convert_mosaicbert_to_hf.py b/convert_mosaicbert_to_hf.py
new file mode 100644
index 0000000000..654f53fb45
--- /dev/null
+++ b/convert_mosaicbert_to_hf.py
@@ -0,0 +1,286 @@
+# Copyright 2022 MosaicML LLM Foundry authors
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import tempfile
+from argparse import ArgumentParser, Namespace
+from pathlib import Path
+from typing import Optional, Tuple, Union
+import json
+import torch
+import transformers
+from composer.models.huggingface import get_hf_config_from_composer_state_dict
+from composer.utils import (get_file,
+                            parse_uri, safe_torch_load)
+from transformers import PretrainedConfig, PreTrainedTokenizerBase, AutoModelForMaskedLM
+import requests
+
+def download_file(url, filename):
+    response = requests.get(url)
+    
+    if response.status_code == 200:
+        with open(filename, 'wb') as file:
+            file.write(response.content)
+        print(f'File downloaded as {filename}')
+    else:
+        print(f'Failed to download file. Status code: {response.status_code}')
+
+from llmfoundry.utils.huggingface_hub_utils import \
+    edit_files_for_hf_compatibility
+
+
+def write_huggingface_pretrained_from_composer_checkpoint(
+    checkpoint_path: Union[Path, str],
+    output_path: Union[Path, str],
+    trust_remote_code: bool,
+    output_precision: str = 'fp32',
+    local_checkpoint_save_location: Optional[Union[Path, str]] = None,
+    bert_config_path: Optional[str] = None,
+) -> Tuple[PretrainedConfig, Optional[PreTrainedTokenizerBase]]:
+    """Convert a Composer checkpoint to a pretrained HF checkpoint folder.
+
+    Write a ``config.json`` and ``pytorch_model.bin``, like
+    :meth:`transformers.PreTrainedModel.from_pretrained` expects, from a
+    composer checkpoint.
+
+    Args:
+        checkpoint_path (Union[Path, str]): Path to the composer checkpoint, can be a local path, or a remote path beginning with ``s3://``, or another backend
+            supported by :meth:`composer.utils.maybe_create_object_store_from_uri`.
+        output_path (Union[Path, str]): Path to the folder to write the output to.
+        trust_remote_code (bool): Whether or not to use code outside of the transformers module.
+        output_precision (str, optional): The precision of the output weights saved to `pytorch_model.bin`. Can be one of ``fp32``, ``fp16``, or ``bf16``.
+        local_checkpoint_save_location (Optional[Union[Path, str]], optional): If specified, where to save the checkpoint file to locally.
+                                                                                If the input ``checkpoint_path`` is already a local path, this will be a symlink.
+                                                                                Defaults to None, which will use a temporary file.
+    """
+    dtype = {
+        'fp32': torch.float32,
+        'fp16': torch.float16,
+        'bf16': torch.bfloat16,
+    }[output_precision]
+
+    # default local path to a tempfile if path is not provided
+    if local_checkpoint_save_location is None:
+        tmp_dir = tempfile.TemporaryDirectory()
+        local_checkpoint_save_location = Path(
+            tmp_dir.name) / 'local-composer-checkpoint.pt'
+
+    # create folder
+    os.makedirs(output_path)
+
+    # download the checkpoint file
+    print(
+        f'Downloading checkpoint from {checkpoint_path} -> {local_checkpoint_save_location}'
+    )
+    get_file(str(checkpoint_path), str(local_checkpoint_save_location))
+
+    # Load the Composer checkpoint state dict
+    print('Loading checkpoint into CPU RAM...')
+    composer_state_dict = safe_torch_load(local_checkpoint_save_location)
+
+    if bert_config_path is not None:
+        #json
+        with open(bert_config_path, 'r') as f:
+            bert_config = json.load(f)   
+        composer_state_dict["state"]["integrations"]["huggingface"]["model"]["config"]["content"] = bert_config
+
+    else:
+        # placeholder config from mosaicml/mosaic-bert-base
+        composer_state_dict["state"]["integrations"]["huggingface"] = {
+            "model":{"config":{"content":{
+                "_name_or_path": "mosaicml/mosaic-bert",
+                "alibi_starting_size": 512,
+                "architectures": [
+                    "BertForMaskedLM"
+                ],
+                "attention_probs_dropout_prob": 0.1,
+                "auto_map": {
+                    "AutoConfig": "configuration_bert.BertConfig",
+                    "AutoModelForMaskedLM": "bert_layers.BertForMaskedLM"
+                },
+                "classifier_dropout": None,
+                "gradient_checkpointing": False,
+                "hidden_act": "gelu",
+                "hidden_dropout_prob": 0.1,
+                "hidden_size": 768,
+                "initializer_range": 0.02,
+                "intermediate_size": 3072,
+                "layer_norm_eps": 1e-12,
+                "max_position_embeddings": 512,
+                "model_type": "bert",
+                "num_attention_heads": 12,
+                "num_hidden_layers": 12,
+                "pad_token_id": 0,
+                "position_embedding_type": "absolute",
+                "torch_dtype": "float32",
+                "transformers_version": "4.26.0",
+                "type_vocab_size": 2,
+                "use_cache": False,
+                "vocab_size": 30522
+
+        }}},
+        "tokenizer":{}
+    }
+    if 'state' not in composer_state_dict:
+        raise RuntimeError(
+            f'"state" is not an available key in the provided composer checkpoint. Is {local_checkpoint_save_location} ill-formed?'
+        )
+
+    # Build and save HF Config
+    print('#' * 30)
+    print('Saving HF Model Config...')
+    hf_config = get_hf_config_from_composer_state_dict(composer_state_dict)
+    hf_config.torch_dtype = dtype
+    hf_config.save_pretrained(output_path)
+    print(hf_config)
+
+    # Extract the HF model weights
+    print('#' * 30)
+    print('Saving HF Model Weights...')
+    weights_state_dict = composer_state_dict
+    if 'state' in weights_state_dict:
+        weights_state_dict = weights_state_dict['state']['model']
+    torch.nn.modules.utils.consume_prefix_in_state_dict_if_present(
+        weights_state_dict, prefix='model.')
+
+    # Convert weights to desired dtype
+    for k, v in weights_state_dict.items():
+        if isinstance(v, torch.Tensor):
+            weights_state_dict[k] = v.to(dtype=dtype)
+
+    # Save weights
+    torch.save(weights_state_dict, Path(output_path) / 'pytorch_model.bin')
+
+    print('#' * 30)
+    print(f'HF checkpoint folder successfully created at {output_path}.')
+
+    return hf_config
+
+
+def parse_args() -> Namespace:
+    """Parse commandline arguments."""
+    parser = ArgumentParser(
+        description=
+        'Convert a HuggingFace causal LM in a Composer checkpoint into a standard HuggingFace checkpoint folder, and optionally upload to the hub.'
+    )
+    parser.add_argument('--composer_path', type=str, required=True)
+    parser.add_argument('--hf_output_path', type=str, required=True)
+    
+    parser.add_argument('--bert_config_path', type=str)
+    parser.add_argument('--local_checkpoint_save_location',
+                        type=str,
+                        default=None)
+    parser.add_argument('--output_precision',
+                        type=str,
+                        choices=['fp32', 'fp16', 'bf16'],
+                        default='fp32')
+    parser.add_argument('--hf_repo_for_upload', type=str, default=None)
+    parser.add_argument('--test_uploaded_model', action='store_true')
+    parser.add_argument(
+        '--trust_remote_code',
+        action='store_true',
+        help='Whether or not to use code outside of transformers module.')
+
+    return parser.parse_args()
+
+
+def _convert_composer_to_hf(args: Namespace) -> None:
+    print()
+    print('#' * 30)
+    print('Converting Composer checkpoint to HuggingFace checkpoint format...')
+
+    _, _, local_folder_path = parse_uri(args.hf_output_path)
+
+    config = write_huggingface_pretrained_from_composer_checkpoint(
+        checkpoint_path=args.composer_path,
+        output_path=local_folder_path,
+        trust_remote_code=args.trust_remote_code,
+        output_precision=args.output_precision,
+        local_checkpoint_save_location=args.local_checkpoint_save_location,
+        bert_config_path=args.bert_config_path)
+
+
+    dtype = {
+        'fp32': torch.float32,
+        'fp16': torch.float16,
+        'bf16': torch.bfloat16,
+    }[args.output_precision]
+
+    print(f'Loading model from {local_folder_path}')
+
+    download_file("https://huggingface.co/mosaicml/mosaic-bert-base/raw/main/bert_layers.py", f"{local_folder_path}/bert_layers.py")
+    download_file("https://huggingface.co/mosaicml/mosaic-bert-base/raw/main/bert_padding.py", f"{local_folder_path}/bert_padding.py")
+    download_file("https://huggingface.co/mosaicml/mosaic-bert-base/raw/main/configuration_bert.py", f"{local_folder_path}/configuration_bert.py")
+    download_file("https://huggingface.co/mosaicml/mosaic-bert-base/raw/main/flash_attn_triton.py", f"{local_folder_path}/flash_attn_triton.py")
+
+
+    config = transformers.BertConfig.from_pretrained(local_folder_path)
+    loaded_hf_model = AutoModelForMaskedLM.from_pretrained(local_folder_path,config=config,trust_remote_code=True)
+
+
+    loaded_hf_model.save_pretrained(local_folder_path)
+
+    # Only need to edit files for MPT because it has custom code
+    if config.model_type == 'mpt':
+        print('Editing files for HF compatibility...')
+        edit_files_for_hf_compatibility(local_folder_path)
+
+    if args.hf_repo_for_upload is not None:
+        from huggingface_hub import HfApi
+        api = HfApi()
+
+        print(
+            f'Uploading {args.hf_output_path} to HuggingFace Hub at {args.hf_repo_for_upload}'
+        )
+        api.create_repo(repo_id=args.hf_repo_for_upload,
+                        use_auth_token=True,
+                        repo_type='model',
+                        private=True,
+                        exist_ok=True)
+        print('Repo created.')
+
+        # ignore the full checkpoint file if we now have sharded checkpoint files
+        ignore_patterns = []
+        if any(
+                f.startswith('pytorch_model-00001')
+                for f in os.listdir(args.hf_output_path)):
+            ignore_patterns.append('pytorch_model.bin')
+
+        api.upload_folder(folder_path=args.hf_output_path,
+                          repo_id=args.hf_repo_for_upload,
+                          use_auth_token=True,
+                          repo_type='model',
+                          ignore_patterns=ignore_patterns)
+        print('Folder uploaded.')
+
+        if args.test_uploaded_model:
+            print('Testing uploaded model...')
+            hub_config = transformers.BertConfig.from_pretrained(args.hf_repo_for_upload)
+            hub_model = AutoModelForMaskedLM.from_pretrained(args.hf_repo_for_upload,config=hub_config,trust_remote_code=True)
+
+
+            assert sum(p.numel() for p in hub_model.parameters()) == sum(
+                p.numel() for p in loaded_hf_model.parameters())
+            assert all(
+                str(type(module1)).split('.')[-2:] == str(type(module2)).split(
+                    '.')[-2:] for module1, module2 in zip(
+                        hub_model.modules(), loaded_hf_model.modules()))
+
+            assert next(
+                hub_model.parameters()
+            ).dtype == dtype, f'Expected model dtype to be {dtype}, but got {next(hub_model.parameters()).dtype}'
+            
+    print(
+        'Composer checkpoint successfully converted to HuggingFace checkpoint format.'
+    )
+
+
+def convert_composer_to_hf(args: Namespace) -> None:
+
+    try:
+        _convert_composer_to_hf(args)
+    except Exception as e:
+        raise e
+
+if __name__ == '__main__':
+    convert_composer_to_hf(parse_args())

From 66595311539ce839ac96937733c5c2139d7ed0bb Mon Sep 17 00:00:00 2001
From: Haz Sameen Shahgir <83033987+Patchwork53@users.noreply.github.com>
Date: Sat, 13 Apr 2024 07:37:17 +0000
Subject: [PATCH 2/5] added typing annotation

---
 scripts/inference/convert_mosaicbert_to_hf.py | 286 ++++++++++++++++++
 1 file changed, 286 insertions(+)
 create mode 100644 scripts/inference/convert_mosaicbert_to_hf.py

diff --git a/scripts/inference/convert_mosaicbert_to_hf.py b/scripts/inference/convert_mosaicbert_to_hf.py
new file mode 100644
index 0000000000..04cd9f2b8b
--- /dev/null
+++ b/scripts/inference/convert_mosaicbert_to_hf.py
@@ -0,0 +1,286 @@
+# Copyright 2022 MosaicML LLM Foundry authors
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import tempfile
+from argparse import ArgumentParser, Namespace
+from pathlib import Path
+from typing import Optional, Tuple, Union
+import json
+import torch
+import transformers
+from composer.models.huggingface import get_hf_config_from_composer_state_dict
+from composer.utils import (get_file,
+                            parse_uri, safe_torch_load)
+from transformers import PretrainedConfig, PreTrainedTokenizerBase, AutoModelForMaskedLM
+import requests
+from llmfoundry.utils.huggingface_hub_utils import \
+    edit_files_for_hf_compatibility
+
+
+def download_file(url: str, file_name: Union[Path, str]):
+    response = requests.get(url)
+    
+    if response.status_code == 200:
+        with open(file_name, 'wb') as file:
+            file.write(response.content)
+        print(f'File downloaded as {file_name}')
+    else:
+        print(f'Failed to download file. Status code: {response.status_code}')
+
+
+def write_huggingface_pretrained_from_composer_checkpoint(
+    checkpoint_path: Union[Path, str],
+    output_path: Union[Path, str],
+    trust_remote_code: bool,
+    output_precision: str = 'fp32',
+    local_checkpoint_save_location: Optional[Union[Path, str]] = None,
+    bert_config_path: Optional[str] = None,
+) -> Tuple[PretrainedConfig, Optional[PreTrainedTokenizerBase]]:
+    """Convert a Composer checkpoint to a pretrained HF checkpoint folder.
+
+    Write a ``config.json`` and ``pytorch_model.bin``, like
+    :meth:`transformers.PreTrainedModel.from_pretrained` expects, from a
+    composer checkpoint.
+
+    Args:
+        checkpoint_path (Union[Path, str]): Path to the composer checkpoint, can be a local path, or a remote path beginning with ``s3://``, or another backend
+            supported by :meth:`composer.utils.maybe_create_object_store_from_uri`.
+        output_path (Union[Path, str]): Path to the folder to write the output to.
+        trust_remote_code (bool): Whether or not to use code outside of the transformers module.
+        output_precision (str, optional): The precision of the output weights saved to `pytorch_model.bin`. Can be one of ``fp32``, ``fp16``, or ``bf16``.
+        local_checkpoint_save_location (Optional[Union[Path, str]], optional): If specified, where to save the checkpoint file to locally.
+                                                                                If the input ``checkpoint_path`` is already a local path, this will be a symlink.
+                                                                                Defaults to None, which will use a temporary file.
+    """
+    dtype = {
+        'fp32': torch.float32,
+        'fp16': torch.float16,
+        'bf16': torch.bfloat16,
+    }[output_precision]
+
+    # default local path to a tempfile if path is not provided
+    if local_checkpoint_save_location is None:
+        tmp_dir = tempfile.TemporaryDirectory()
+        local_checkpoint_save_location = Path(
+            tmp_dir.name) / 'local-composer-checkpoint.pt'
+
+    # create folder
+    os.makedirs(output_path)
+
+    # download the checkpoint file
+    print(
+        f'Downloading checkpoint from {checkpoint_path} -> {local_checkpoint_save_location}'
+    )
+    get_file(str(checkpoint_path), str(local_checkpoint_save_location))
+
+    # Load the Composer checkpoint state dict
+    print('Loading checkpoint into CPU RAM...')
+    composer_state_dict = safe_torch_load(local_checkpoint_save_location)
+
+    if bert_config_path is not None:
+        #json
+        with open(bert_config_path, 'r') as f:
+            bert_config = json.load(f)   
+        composer_state_dict["state"]["integrations"]["huggingface"]["model"]["config"]["content"] = bert_config
+
+    else:
+        # placeholder config from mosaicml/mosaic-bert-base
+        composer_state_dict["state"]["integrations"]["huggingface"] = {
+            "model":{"config":{"content":{
+                "_name_or_path": "mosaicml/mosaic-bert",
+                "alibi_starting_size": 512,
+                "architectures": [
+                    "BertForMaskedLM"
+                ],
+                "attention_probs_dropout_prob": 0.1,
+                "auto_map": {
+                    "AutoConfig": "configuration_bert.BertConfig",
+                    "AutoModelForMaskedLM": "bert_layers.BertForMaskedLM"
+                },
+                "classifier_dropout": None,
+                "gradient_checkpointing": False,
+                "hidden_act": "gelu",
+                "hidden_dropout_prob": 0.1,
+                "hidden_size": 768,
+                "initializer_range": 0.02,
+                "intermediate_size": 3072,
+                "layer_norm_eps": 1e-12,
+                "max_position_embeddings": 512,
+                "model_type": "bert",
+                "num_attention_heads": 12,
+                "num_hidden_layers": 12,
+                "pad_token_id": 0,
+                "position_embedding_type": "absolute",
+                "torch_dtype": "float32",
+                "transformers_version": "4.26.0",
+                "type_vocab_size": 2,
+                "use_cache": False,
+                "vocab_size": 30522
+
+        }}},
+        "tokenizer":{}
+    }
+    if 'state' not in composer_state_dict:
+        raise RuntimeError(
+            f'"state" is not an available key in the provided composer checkpoint. Is {local_checkpoint_save_location} ill-formed?'
+        )
+
+    # Build and save HF Config
+    print('#' * 30)
+    print('Saving HF Model Config...')
+    hf_config = get_hf_config_from_composer_state_dict(composer_state_dict)
+    hf_config.torch_dtype = dtype
+    hf_config.save_pretrained(output_path)
+    print(hf_config)
+
+    # Extract the HF model weights
+    print('#' * 30)
+    print('Saving HF Model Weights...')
+    weights_state_dict = composer_state_dict
+    if 'state' in weights_state_dict:
+        weights_state_dict = weights_state_dict['state']['model']
+    torch.nn.modules.utils.consume_prefix_in_state_dict_if_present(
+        weights_state_dict, prefix='model.')
+
+    # Convert weights to desired dtype
+    for k, v in weights_state_dict.items():
+        if isinstance(v, torch.Tensor):
+            weights_state_dict[k] = v.to(dtype=dtype)
+
+    # Save weights
+    torch.save(weights_state_dict, Path(output_path) / 'pytorch_model.bin')
+
+    print('#' * 30)
+    print(f'HF checkpoint folder successfully created at {output_path}.')
+
+    return hf_config
+
+
+def parse_args() -> Namespace:
+    """Parse commandline arguments."""
+    parser = ArgumentParser(
+        description=
+        'Convert a HuggingFace causal LM in a Composer checkpoint into a standard HuggingFace checkpoint folder, and optionally upload to the hub.'
+    )
+    parser.add_argument('--composer_path', type=str, required=True)
+    parser.add_argument('--hf_output_path', type=str, required=True)
+    
+    parser.add_argument('--bert_config_path', type=str)
+    parser.add_argument('--local_checkpoint_save_location',
+                        type=str,
+                        default=None)
+    parser.add_argument('--output_precision',
+                        type=str,
+                        choices=['fp32', 'fp16', 'bf16'],
+                        default='fp32')
+    parser.add_argument('--hf_repo_for_upload', type=str, default=None)
+    parser.add_argument('--test_uploaded_model', action='store_true')
+    parser.add_argument(
+        '--trust_remote_code',
+        action='store_true',
+        help='Whether or not to use code outside of transformers module.')
+
+    return parser.parse_args()
+
+
+def _convert_composer_to_hf(args: Namespace) -> None:
+    print()
+    print('#' * 30)
+    print('Converting Composer checkpoint to HuggingFace checkpoint format...')
+
+    _, _, local_folder_path = parse_uri(args.hf_output_path)
+
+    config = write_huggingface_pretrained_from_composer_checkpoint(
+        checkpoint_path=args.composer_path,
+        output_path=local_folder_path,
+        trust_remote_code=args.trust_remote_code,
+        output_precision=args.output_precision,
+        local_checkpoint_save_location=args.local_checkpoint_save_location,
+        bert_config_path=args.bert_config_path)
+
+
+    dtype = {
+        'fp32': torch.float32,
+        'fp16': torch.float16,
+        'bf16': torch.bfloat16,
+    }[args.output_precision]
+
+    print(f'Loading model from {local_folder_path}')
+
+    download_file("https://huggingface.co/mosaicml/mosaic-bert-base/raw/main/bert_layers.py", f"{local_folder_path}/bert_layers.py")
+    download_file("https://huggingface.co/mosaicml/mosaic-bert-base/raw/main/bert_padding.py", f"{local_folder_path}/bert_padding.py")
+    download_file("https://huggingface.co/mosaicml/mosaic-bert-base/raw/main/configuration_bert.py", f"{local_folder_path}/configuration_bert.py")
+    download_file("https://huggingface.co/mosaicml/mosaic-bert-base/raw/main/flash_attn_triton.py", f"{local_folder_path}/flash_attn_triton.py")
+
+
+    config = transformers.BertConfig.from_pretrained(local_folder_path)
+    loaded_hf_model = AutoModelForMaskedLM.from_pretrained(local_folder_path,config=config,trust_remote_code=True)
+
+
+    loaded_hf_model.save_pretrained(local_folder_path)
+
+    # Only need to edit files for MPT because it has custom code
+    if config.model_type == 'mpt':
+        print('Editing files for HF compatibility...')
+        edit_files_for_hf_compatibility(local_folder_path)
+
+    if args.hf_repo_for_upload is not None:
+        from huggingface_hub import HfApi
+        api = HfApi()
+
+        print(
+            f'Uploading {args.hf_output_path} to HuggingFace Hub at {args.hf_repo_for_upload}'
+        )
+        api.create_repo(repo_id=args.hf_repo_for_upload,
+                        use_auth_token=True,
+                        repo_type='model',
+                        private=True,
+                        exist_ok=True)
+        print('Repo created.')
+
+        # ignore the full checkpoint file if we now have sharded checkpoint files
+        ignore_patterns = []
+        if any(
+                f.startswith('pytorch_model-00001')
+                for f in os.listdir(args.hf_output_path)):
+            ignore_patterns.append('pytorch_model.bin')
+
+        api.upload_folder(folder_path=args.hf_output_path,
+                          repo_id=args.hf_repo_for_upload,
+                          use_auth_token=True,
+                          repo_type='model',
+                          ignore_patterns=ignore_patterns)
+        print('Folder uploaded.')
+
+        if args.test_uploaded_model:
+            print('Testing uploaded model...')
+            hub_config = transformers.BertConfig.from_pretrained(args.hf_repo_for_upload)
+            hub_model = AutoModelForMaskedLM.from_pretrained(args.hf_repo_for_upload,config=hub_config,trust_remote_code=True)
+
+
+            assert sum(p.numel() for p in hub_model.parameters()) == sum(
+                p.numel() for p in loaded_hf_model.parameters())
+            assert all(
+                str(type(module1)).split('.')[-2:] == str(type(module2)).split(
+                    '.')[-2:] for module1, module2 in zip(
+                        hub_model.modules(), loaded_hf_model.modules()))
+
+            assert next(
+                hub_model.parameters()
+            ).dtype == dtype, f'Expected model dtype to be {dtype}, but got {next(hub_model.parameters()).dtype}'
+            
+    print(
+        'Composer checkpoint successfully converted to HuggingFace checkpoint format.'
+    )
+
+
+def convert_composer_to_hf(args: Namespace) -> None:
+
+    try:
+        _convert_composer_to_hf(args)
+    except Exception as e:
+        raise e
+
+if __name__ == '__main__':
+    convert_composer_to_hf(parse_args())

From f29191de25c3afe3e0c0439038b298101f08f033 Mon Sep 17 00:00:00 2001
From: Haz Sameen Shahgir <83033987+Patchwork53@users.noreply.github.com>
Date: Sat, 13 Apr 2024 09:02:03 +0000
Subject: [PATCH 3/5] minor missing key fixes

---
 convert_mosaicbert_to_hf.py                   | 286 ------------------
 scripts/inference/convert_mosaicbert_to_hf.py |   6 +-
 2 files changed, 3 insertions(+), 289 deletions(-)
 delete mode 100644 convert_mosaicbert_to_hf.py

diff --git a/convert_mosaicbert_to_hf.py b/convert_mosaicbert_to_hf.py
deleted file mode 100644
index 654f53fb45..0000000000
--- a/convert_mosaicbert_to_hf.py
+++ /dev/null
@@ -1,286 +0,0 @@
-# Copyright 2022 MosaicML LLM Foundry authors
-# SPDX-License-Identifier: Apache-2.0
-
-import os
-import tempfile
-from argparse import ArgumentParser, Namespace
-from pathlib import Path
-from typing import Optional, Tuple, Union
-import json
-import torch
-import transformers
-from composer.models.huggingface import get_hf_config_from_composer_state_dict
-from composer.utils import (get_file,
-                            parse_uri, safe_torch_load)
-from transformers import PretrainedConfig, PreTrainedTokenizerBase, AutoModelForMaskedLM
-import requests
-
-def download_file(url, filename):
-    response = requests.get(url)
-    
-    if response.status_code == 200:
-        with open(filename, 'wb') as file:
-            file.write(response.content)
-        print(f'File downloaded as {filename}')
-    else:
-        print(f'Failed to download file. Status code: {response.status_code}')
-
-from llmfoundry.utils.huggingface_hub_utils import \
-    edit_files_for_hf_compatibility
-
-
-def write_huggingface_pretrained_from_composer_checkpoint(
-    checkpoint_path: Union[Path, str],
-    output_path: Union[Path, str],
-    trust_remote_code: bool,
-    output_precision: str = 'fp32',
-    local_checkpoint_save_location: Optional[Union[Path, str]] = None,
-    bert_config_path: Optional[str] = None,
-) -> Tuple[PretrainedConfig, Optional[PreTrainedTokenizerBase]]:
-    """Convert a Composer checkpoint to a pretrained HF checkpoint folder.
-
-    Write a ``config.json`` and ``pytorch_model.bin``, like
-    :meth:`transformers.PreTrainedModel.from_pretrained` expects, from a
-    composer checkpoint.
-
-    Args:
-        checkpoint_path (Union[Path, str]): Path to the composer checkpoint, can be a local path, or a remote path beginning with ``s3://``, or another backend
-            supported by :meth:`composer.utils.maybe_create_object_store_from_uri`.
-        output_path (Union[Path, str]): Path to the folder to write the output to.
-        trust_remote_code (bool): Whether or not to use code outside of the transformers module.
-        output_precision (str, optional): The precision of the output weights saved to `pytorch_model.bin`. Can be one of ``fp32``, ``fp16``, or ``bf16``.
-        local_checkpoint_save_location (Optional[Union[Path, str]], optional): If specified, where to save the checkpoint file to locally.
-                                                                                If the input ``checkpoint_path`` is already a local path, this will be a symlink.
-                                                                                Defaults to None, which will use a temporary file.
-    """
-    dtype = {
-        'fp32': torch.float32,
-        'fp16': torch.float16,
-        'bf16': torch.bfloat16,
-    }[output_precision]
-
-    # default local path to a tempfile if path is not provided
-    if local_checkpoint_save_location is None:
-        tmp_dir = tempfile.TemporaryDirectory()
-        local_checkpoint_save_location = Path(
-            tmp_dir.name) / 'local-composer-checkpoint.pt'
-
-    # create folder
-    os.makedirs(output_path)
-
-    # download the checkpoint file
-    print(
-        f'Downloading checkpoint from {checkpoint_path} -> {local_checkpoint_save_location}'
-    )
-    get_file(str(checkpoint_path), str(local_checkpoint_save_location))
-
-    # Load the Composer checkpoint state dict
-    print('Loading checkpoint into CPU RAM...')
-    composer_state_dict = safe_torch_load(local_checkpoint_save_location)
-
-    if bert_config_path is not None:
-        #json
-        with open(bert_config_path, 'r') as f:
-            bert_config = json.load(f)   
-        composer_state_dict["state"]["integrations"]["huggingface"]["model"]["config"]["content"] = bert_config
-
-    else:
-        # placeholder config from mosaicml/mosaic-bert-base
-        composer_state_dict["state"]["integrations"]["huggingface"] = {
-            "model":{"config":{"content":{
-                "_name_or_path": "mosaicml/mosaic-bert",
-                "alibi_starting_size": 512,
-                "architectures": [
-                    "BertForMaskedLM"
-                ],
-                "attention_probs_dropout_prob": 0.1,
-                "auto_map": {
-                    "AutoConfig": "configuration_bert.BertConfig",
-                    "AutoModelForMaskedLM": "bert_layers.BertForMaskedLM"
-                },
-                "classifier_dropout": None,
-                "gradient_checkpointing": False,
-                "hidden_act": "gelu",
-                "hidden_dropout_prob": 0.1,
-                "hidden_size": 768,
-                "initializer_range": 0.02,
-                "intermediate_size": 3072,
-                "layer_norm_eps": 1e-12,
-                "max_position_embeddings": 512,
-                "model_type": "bert",
-                "num_attention_heads": 12,
-                "num_hidden_layers": 12,
-                "pad_token_id": 0,
-                "position_embedding_type": "absolute",
-                "torch_dtype": "float32",
-                "transformers_version": "4.26.0",
-                "type_vocab_size": 2,
-                "use_cache": False,
-                "vocab_size": 30522
-
-        }}},
-        "tokenizer":{}
-    }
-    if 'state' not in composer_state_dict:
-        raise RuntimeError(
-            f'"state" is not an available key in the provided composer checkpoint. Is {local_checkpoint_save_location} ill-formed?'
-        )
-
-    # Build and save HF Config
-    print('#' * 30)
-    print('Saving HF Model Config...')
-    hf_config = get_hf_config_from_composer_state_dict(composer_state_dict)
-    hf_config.torch_dtype = dtype
-    hf_config.save_pretrained(output_path)
-    print(hf_config)
-
-    # Extract the HF model weights
-    print('#' * 30)
-    print('Saving HF Model Weights...')
-    weights_state_dict = composer_state_dict
-    if 'state' in weights_state_dict:
-        weights_state_dict = weights_state_dict['state']['model']
-    torch.nn.modules.utils.consume_prefix_in_state_dict_if_present(
-        weights_state_dict, prefix='model.')
-
-    # Convert weights to desired dtype
-    for k, v in weights_state_dict.items():
-        if isinstance(v, torch.Tensor):
-            weights_state_dict[k] = v.to(dtype=dtype)
-
-    # Save weights
-    torch.save(weights_state_dict, Path(output_path) / 'pytorch_model.bin')
-
-    print('#' * 30)
-    print(f'HF checkpoint folder successfully created at {output_path}.')
-
-    return hf_config
-
-
-def parse_args() -> Namespace:
-    """Parse commandline arguments."""
-    parser = ArgumentParser(
-        description=
-        'Convert a HuggingFace causal LM in a Composer checkpoint into a standard HuggingFace checkpoint folder, and optionally upload to the hub.'
-    )
-    parser.add_argument('--composer_path', type=str, required=True)
-    parser.add_argument('--hf_output_path', type=str, required=True)
-    
-    parser.add_argument('--bert_config_path', type=str)
-    parser.add_argument('--local_checkpoint_save_location',
-                        type=str,
-                        default=None)
-    parser.add_argument('--output_precision',
-                        type=str,
-                        choices=['fp32', 'fp16', 'bf16'],
-                        default='fp32')
-    parser.add_argument('--hf_repo_for_upload', type=str, default=None)
-    parser.add_argument('--test_uploaded_model', action='store_true')
-    parser.add_argument(
-        '--trust_remote_code',
-        action='store_true',
-        help='Whether or not to use code outside of transformers module.')
-
-    return parser.parse_args()
-
-
-def _convert_composer_to_hf(args: Namespace) -> None:
-    print()
-    print('#' * 30)
-    print('Converting Composer checkpoint to HuggingFace checkpoint format...')
-
-    _, _, local_folder_path = parse_uri(args.hf_output_path)
-
-    config = write_huggingface_pretrained_from_composer_checkpoint(
-        checkpoint_path=args.composer_path,
-        output_path=local_folder_path,
-        trust_remote_code=args.trust_remote_code,
-        output_precision=args.output_precision,
-        local_checkpoint_save_location=args.local_checkpoint_save_location,
-        bert_config_path=args.bert_config_path)
-
-
-    dtype = {
-        'fp32': torch.float32,
-        'fp16': torch.float16,
-        'bf16': torch.bfloat16,
-    }[args.output_precision]
-
-    print(f'Loading model from {local_folder_path}')
-
-    download_file("https://huggingface.co/mosaicml/mosaic-bert-base/raw/main/bert_layers.py", f"{local_folder_path}/bert_layers.py")
-    download_file("https://huggingface.co/mosaicml/mosaic-bert-base/raw/main/bert_padding.py", f"{local_folder_path}/bert_padding.py")
-    download_file("https://huggingface.co/mosaicml/mosaic-bert-base/raw/main/configuration_bert.py", f"{local_folder_path}/configuration_bert.py")
-    download_file("https://huggingface.co/mosaicml/mosaic-bert-base/raw/main/flash_attn_triton.py", f"{local_folder_path}/flash_attn_triton.py")
-
-
-    config = transformers.BertConfig.from_pretrained(local_folder_path)
-    loaded_hf_model = AutoModelForMaskedLM.from_pretrained(local_folder_path,config=config,trust_remote_code=True)
-
-
-    loaded_hf_model.save_pretrained(local_folder_path)
-
-    # Only need to edit files for MPT because it has custom code
-    if config.model_type == 'mpt':
-        print('Editing files for HF compatibility...')
-        edit_files_for_hf_compatibility(local_folder_path)
-
-    if args.hf_repo_for_upload is not None:
-        from huggingface_hub import HfApi
-        api = HfApi()
-
-        print(
-            f'Uploading {args.hf_output_path} to HuggingFace Hub at {args.hf_repo_for_upload}'
-        )
-        api.create_repo(repo_id=args.hf_repo_for_upload,
-                        use_auth_token=True,
-                        repo_type='model',
-                        private=True,
-                        exist_ok=True)
-        print('Repo created.')
-
-        # ignore the full checkpoint file if we now have sharded checkpoint files
-        ignore_patterns = []
-        if any(
-                f.startswith('pytorch_model-00001')
-                for f in os.listdir(args.hf_output_path)):
-            ignore_patterns.append('pytorch_model.bin')
-
-        api.upload_folder(folder_path=args.hf_output_path,
-                          repo_id=args.hf_repo_for_upload,
-                          use_auth_token=True,
-                          repo_type='model',
-                          ignore_patterns=ignore_patterns)
-        print('Folder uploaded.')
-
-        if args.test_uploaded_model:
-            print('Testing uploaded model...')
-            hub_config = transformers.BertConfig.from_pretrained(args.hf_repo_for_upload)
-            hub_model = AutoModelForMaskedLM.from_pretrained(args.hf_repo_for_upload,config=hub_config,trust_remote_code=True)
-
-
-            assert sum(p.numel() for p in hub_model.parameters()) == sum(
-                p.numel() for p in loaded_hf_model.parameters())
-            assert all(
-                str(type(module1)).split('.')[-2:] == str(type(module2)).split(
-                    '.')[-2:] for module1, module2 in zip(
-                        hub_model.modules(), loaded_hf_model.modules()))
-
-            assert next(
-                hub_model.parameters()
-            ).dtype == dtype, f'Expected model dtype to be {dtype}, but got {next(hub_model.parameters()).dtype}'
-            
-    print(
-        'Composer checkpoint successfully converted to HuggingFace checkpoint format.'
-    )
-
-
-def convert_composer_to_hf(args: Namespace) -> None:
-
-    try:
-        _convert_composer_to_hf(args)
-    except Exception as e:
-        raise e
-
-if __name__ == '__main__':
-    convert_composer_to_hf(parse_args())
diff --git a/scripts/inference/convert_mosaicbert_to_hf.py b/scripts/inference/convert_mosaicbert_to_hf.py
index 04cd9f2b8b..d693163399 100644
--- a/scripts/inference/convert_mosaicbert_to_hf.py
+++ b/scripts/inference/convert_mosaicbert_to_hf.py
@@ -82,11 +82,11 @@ def write_huggingface_pretrained_from_composer_checkpoint(
         #json
         with open(bert_config_path, 'r') as f:
             bert_config = json.load(f)   
-        composer_state_dict["state"]["integrations"]["huggingface"]["model"]["config"]["content"] = bert_config
+        composer_state_dict["state"]["integrations"] = {"huggingface":{"model":{"config":{"content": bert_config}}}}
 
     else:
         # placeholder config from mosaicml/mosaic-bert-base
-        composer_state_dict["state"]["integrations"]["huggingface"] = {
+        composer_state_dict["state"]["integrations"]={"huggingface":{
             "model":{"config":{"content":{
                 "_name_or_path": "mosaicml/mosaic-bert",
                 "alibi_starting_size": 512,
@@ -118,7 +118,7 @@ def write_huggingface_pretrained_from_composer_checkpoint(
                 "use_cache": False,
                 "vocab_size": 30522
 
-        }}},
+        }}}},
         "tokenizer":{}
     }
     if 'state' not in composer_state_dict:

From 11e959af2f97048c8639a65a20cf1b01175bebfd Mon Sep 17 00:00:00 2001
From: Haz Sameen Shahgir <83033987+Patchwork53@users.noreply.github.com>
Date: Sat, 13 Apr 2024 17:59:03 +0000
Subject: [PATCH 4/5] docstring added

---
 scripts/inference/convert_mosaicbert_to_hf.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/inference/convert_mosaicbert_to_hf.py b/scripts/inference/convert_mosaicbert_to_hf.py
index d693163399..9741870d55 100644
--- a/scripts/inference/convert_mosaicbert_to_hf.py
+++ b/scripts/inference/convert_mosaicbert_to_hf.py
@@ -52,6 +52,7 @@ def write_huggingface_pretrained_from_composer_checkpoint(
         local_checkpoint_save_location (Optional[Union[Path, str]], optional): If specified, where to save the checkpoint file to locally.
                                                                                 If the input ``checkpoint_path`` is already a local path, this will be a symlink.
                                                                                 Defaults to None, which will use a temporary file.
+        bert_config_path (Optional[str], optional): Path to the bert config file. Defaults to None. A placeholder config from mosaicml/mosaic-bert-base will be used if not provided.
     """
     dtype = {
         'fp32': torch.float32,

From 4ee91f40a1af88994c3377f1f5f9251e1f4c4af5 Mon Sep 17 00:00:00 2001
From: Haz Sameen Shahgir <83033987+Patchwork53@users.noreply.github.com>
Date: Sun, 21 Apr 2024 11:18:27 +0600
Subject: [PATCH 5/5] Update convert_mosaicbert_to_hf.py

---
 scripts/inference/convert_mosaicbert_to_hf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/inference/convert_mosaicbert_to_hf.py b/scripts/inference/convert_mosaicbert_to_hf.py
index 9741870d55..d34b9969a2 100644
--- a/scripts/inference/convert_mosaicbert_to_hf.py
+++ b/scripts/inference/convert_mosaicbert_to_hf.py
@@ -219,7 +219,7 @@ def _convert_composer_to_hf(args: Namespace) -> None:
     loaded_hf_model = AutoModelForMaskedLM.from_pretrained(local_folder_path,config=config,trust_remote_code=True)
 
 
-    loaded_hf_model.save_pretrained(local_folder_path)
+    loaded_hf_model.save_pretrained(local_folder_path, safe_serialization=False)
 
     # Only need to edit files for MPT because it has custom code
     if config.model_type == 'mpt':