From 195ef79a6ca36a37d068cb4ad14d732a5edcda12 Mon Sep 17 00:00:00 2001 From: pass_lin <935499957@qq.com> Date: Sun, 28 Sep 2025 22:48:17 +0800 Subject: [PATCH 01/10] add RWKV --- keras_hub/api/__init__.py | 16 +- keras_hub/api/layers/__init__.py | 144 ++-- keras_hub/api/metrics/__init__.py | 10 +- keras_hub/api/models/__init__.py | 742 +++++++----------- keras_hub/api/samplers/__init__.py | 22 +- keras_hub/api/tokenizers/__init__.py | 143 ++-- keras_hub/api/utils/__init__.py | 18 +- keras_hub/src/models/rwkv7/rwkv7_backbone.py | 119 +++ keras_hub/src/models/rwkv7/rwkv7_casual_lm.py | 50 ++ .../rwkv7/rwkv7_causal_lm_preprocessor.py | 88 +++ keras_hub/src/models/rwkv7/rwkv7_layer.py | 612 +++++++++++++++ keras_hub/src/models/rwkv7/rwkv7_tokenizer.py | 224 ++++++ .../convert_rwkv7_checkpoints.py | 464 +++++++++++ 13 files changed, 1946 insertions(+), 706 deletions(-) create mode 100644 keras_hub/src/models/rwkv7/rwkv7_backbone.py create mode 100644 keras_hub/src/models/rwkv7/rwkv7_casual_lm.py create mode 100644 keras_hub/src/models/rwkv7/rwkv7_causal_lm_preprocessor.py create mode 100644 keras_hub/src/models/rwkv7/rwkv7_layer.py create mode 100644 keras_hub/src/models/rwkv7/rwkv7_tokenizer.py create mode 100644 tools/checkpoint_conversion/convert_rwkv7_checkpoints.py diff --git a/keras_hub/api/__init__.py b/keras_hub/api/__init__.py index 2aa98bf3f9..3796e4c7f4 100644 --- a/keras_hub/api/__init__.py +++ b/keras_hub/api/__init__.py @@ -4,12 +4,12 @@ since your modifications would be overwritten. """ -from keras_hub import layers as layers -from keras_hub import metrics as metrics -from keras_hub import models as models -from keras_hub import samplers as samplers -from keras_hub import tokenizers as tokenizers -from keras_hub import utils as utils -from keras_hub.src.utils.preset_utils import upload_preset as upload_preset +from keras_hub import layers +from keras_hub import metrics +from keras_hub import models +from keras_hub import samplers +from keras_hub import tokenizers +from keras_hub import utils +from keras_hub.src.utils.preset_utils import upload_preset from keras_hub.src.version import __version__ as __version__ -from keras_hub.src.version import version as version +from keras_hub.src.version import version diff --git a/keras_hub/api/layers/__init__.py b/keras_hub/api/layers/__init__.py index 4550cf8689..c4411ba889 100644 --- a/keras_hub/api/layers/__init__.py +++ b/keras_hub/api/layers/__init__.py @@ -4,149 +4,105 @@ since your modifications would be overwritten. """ -from keras_hub.src.layers.modeling.alibi_bias import AlibiBias as AlibiBias -from keras_hub.src.layers.modeling.anchor_generator import ( - AnchorGenerator as AnchorGenerator, -) -from keras_hub.src.layers.modeling.box_matcher import BoxMatcher as BoxMatcher +from keras_hub.src.layers.modeling.alibi_bias import AlibiBias +from keras_hub.src.layers.modeling.anchor_generator import AnchorGenerator +from keras_hub.src.layers.modeling.box_matcher import BoxMatcher from keras_hub.src.layers.modeling.cached_multi_head_attention import ( - CachedMultiHeadAttention as CachedMultiHeadAttention, -) -from keras_hub.src.layers.modeling.f_net_encoder import ( - FNetEncoder as FNetEncoder, -) -from keras_hub.src.layers.modeling.masked_lm_head import ( - MaskedLMHead as MaskedLMHead, -) -from keras_hub.src.layers.modeling.non_max_supression import ( - NonMaxSuppression as NonMaxSuppression, -) -from keras_hub.src.layers.modeling.position_embedding import ( - PositionEmbedding as PositionEmbedding, + CachedMultiHeadAttention, ) +from keras_hub.src.layers.modeling.f_net_encoder import FNetEncoder +from keras_hub.src.layers.modeling.masked_lm_head import MaskedLMHead +from keras_hub.src.layers.modeling.non_max_supression import NonMaxSuppression +from keras_hub.src.layers.modeling.position_embedding import PositionEmbedding from keras_hub.src.layers.modeling.reversible_embedding import ( - ReversibleEmbedding as ReversibleEmbedding, -) -from keras_hub.src.layers.modeling.rms_normalization import ( - RMSNormalization as RMSNormalization, -) -from keras_hub.src.layers.modeling.rotary_embedding import ( - RotaryEmbedding as RotaryEmbedding, + ReversibleEmbedding, ) +from keras_hub.src.layers.modeling.rms_normalization import RMSNormalization +from keras_hub.src.layers.modeling.rotary_embedding import RotaryEmbedding from keras_hub.src.layers.modeling.sine_position_encoding import ( - SinePositionEncoding as SinePositionEncoding, + SinePositionEncoding, ) from keras_hub.src.layers.modeling.token_and_position_embedding import ( - TokenAndPositionEmbedding as TokenAndPositionEmbedding, -) -from keras_hub.src.layers.modeling.transformer_decoder import ( - TransformerDecoder as TransformerDecoder, -) -from keras_hub.src.layers.modeling.transformer_encoder import ( - TransformerEncoder as TransformerEncoder, -) -from keras_hub.src.layers.preprocessing.audio_converter import ( - AudioConverter as AudioConverter, -) -from keras_hub.src.layers.preprocessing.image_converter import ( - ImageConverter as ImageConverter, + TokenAndPositionEmbedding, ) +from keras_hub.src.layers.modeling.transformer_decoder import TransformerDecoder +from keras_hub.src.layers.modeling.transformer_encoder import TransformerEncoder +from keras_hub.src.layers.preprocessing.audio_converter import AudioConverter +from keras_hub.src.layers.preprocessing.image_converter import ImageConverter from keras_hub.src.layers.preprocessing.masked_lm_mask_generator import ( - MaskedLMMaskGenerator as MaskedLMMaskGenerator, + MaskedLMMaskGenerator, ) from keras_hub.src.layers.preprocessing.multi_segment_packer import ( - MultiSegmentPacker as MultiSegmentPacker, -) -from keras_hub.src.layers.preprocessing.random_deletion import ( - RandomDeletion as RandomDeletion, -) -from keras_hub.src.layers.preprocessing.random_swap import ( - RandomSwap as RandomSwap, -) -from keras_hub.src.layers.preprocessing.start_end_packer import ( - StartEndPacker as StartEndPacker, + MultiSegmentPacker, ) +from keras_hub.src.layers.preprocessing.random_deletion import RandomDeletion +from keras_hub.src.layers.preprocessing.random_swap import RandomSwap +from keras_hub.src.layers.preprocessing.start_end_packer import StartEndPacker from keras_hub.src.models.basnet.basnet_image_converter import ( - BASNetImageConverter as BASNetImageConverter, -) -from keras_hub.src.models.clip.clip_image_converter import ( - CLIPImageConverter as CLIPImageConverter, + BASNetImageConverter, ) +from keras_hub.src.models.clip.clip_image_converter import CLIPImageConverter from keras_hub.src.models.cspnet.cspnet_image_converter import ( - CSPNetImageConverter as CSPNetImageConverter, + CSPNetImageConverter, ) from keras_hub.src.models.d_fine.d_fine_image_converter import ( - DFineImageConverter as DFineImageConverter, + DFineImageConverter, ) from keras_hub.src.models.deeplab_v3.deeplab_v3_image_converter import ( - DeepLabV3ImageConverter as DeepLabV3ImageConverter, -) -from keras_hub.src.models.deit.deit_image_converter import ( - DeiTImageConverter as DeiTImageConverter, + DeepLabV3ImageConverter, ) +from keras_hub.src.models.deit.deit_image_converter import DeiTImageConverter from keras_hub.src.models.densenet.densenet_image_converter import ( - DenseNetImageConverter as DenseNetImageConverter, + DenseNetImageConverter, ) from keras_hub.src.models.depth_anything.depth_anything_image_converter import ( - DepthAnythingImageConverter as DepthAnythingImageConverter, + DepthAnythingImageConverter, ) from keras_hub.src.models.dinov2.dinov2_image_converter import ( - DINOV2ImageConverter as DINOV2ImageConverter, + DINOV2ImageConverter, ) from keras_hub.src.models.efficientnet.efficientnet_image_converter import ( - EfficientNetImageConverter as EfficientNetImageConverter, + EfficientNetImageConverter, ) from keras_hub.src.models.gemma3.gemma3_image_converter import ( - Gemma3ImageConverter as Gemma3ImageConverter, + Gemma3ImageConverter, ) from keras_hub.src.models.hgnetv2.hgnetv2_image_converter import ( - HGNetV2ImageConverter as HGNetV2ImageConverter, -) -from keras_hub.src.models.mit.mit_image_converter import ( - MiTImageConverter as MiTImageConverter, + HGNetV2ImageConverter, ) +from keras_hub.src.models.mit.mit_image_converter import MiTImageConverter from keras_hub.src.models.mobilenet.mobilenet_image_converter import ( - MobileNetImageConverter as MobileNetImageConverter, + MobileNetImageConverter, ) from keras_hub.src.models.moonshine.moonshine_audio_converter import ( - MoonshineAudioConverter as MoonshineAudioConverter, + MoonshineAudioConverter, ) from keras_hub.src.models.pali_gemma.pali_gemma_image_converter import ( - PaliGemmaImageConverter as PaliGemmaImageConverter, + PaliGemmaImageConverter, ) from keras_hub.src.models.parseq.parseq_image_converter import ( - PARSeqImageConverter as PARSeqImageConverter, + PARSeqImageConverter, ) from keras_hub.src.models.resnet.resnet_image_converter import ( - ResNetImageConverter as ResNetImageConverter, + ResNetImageConverter, ) from keras_hub.src.models.retinanet.retinanet_image_converter import ( - RetinaNetImageConverter as RetinaNetImageConverter, -) -from keras_hub.src.models.sam.sam_image_converter import ( - SAMImageConverter as SAMImageConverter, -) -from keras_hub.src.models.sam.sam_mask_decoder import ( - SAMMaskDecoder as SAMMaskDecoder, -) -from keras_hub.src.models.sam.sam_prompt_encoder import ( - SAMPromptEncoder as SAMPromptEncoder, + RetinaNetImageConverter, ) +from keras_hub.src.models.sam.sam_image_converter import SAMImageConverter +from keras_hub.src.models.sam.sam_mask_decoder import SAMMaskDecoder +from keras_hub.src.models.sam.sam_prompt_encoder import SAMPromptEncoder from keras_hub.src.models.segformer.segformer_image_converter import ( - SegFormerImageConverter as SegFormerImageConverter, + SegFormerImageConverter, ) from keras_hub.src.models.siglip.siglip_image_converter import ( - SigLIPImageConverter as SigLIPImageConverter, -) -from keras_hub.src.models.vgg.vgg_image_converter import ( - VGGImageConverter as VGGImageConverter, -) -from keras_hub.src.models.vit.vit_image_converter import ( - ViTImageConverter as ViTImageConverter, + SigLIPImageConverter, ) +from keras_hub.src.models.vgg.vgg_image_converter import VGGImageConverter +from keras_hub.src.models.vit.vit_image_converter import ViTImageConverter from keras_hub.src.models.whisper.whisper_audio_converter import ( - WhisperAudioConverter as WhisperAudioConverter, + WhisperAudioConverter, ) from keras_hub.src.models.xception.xception_image_converter import ( - XceptionImageConverter as XceptionImageConverter, + XceptionImageConverter, ) diff --git a/keras_hub/api/metrics/__init__.py b/keras_hub/api/metrics/__init__.py index 100c2c66fb..88a0a7df2b 100644 --- a/keras_hub/api/metrics/__init__.py +++ b/keras_hub/api/metrics/__init__.py @@ -4,8 +4,8 @@ since your modifications would be overwritten. """ -from keras_hub.src.metrics.bleu import Bleu as Bleu -from keras_hub.src.metrics.edit_distance import EditDistance as EditDistance -from keras_hub.src.metrics.perplexity import Perplexity as Perplexity -from keras_hub.src.metrics.rouge_l import RougeL as RougeL -from keras_hub.src.metrics.rouge_n import RougeN as RougeN +from keras_hub.src.metrics.bleu import Bleu +from keras_hub.src.metrics.edit_distance import EditDistance +from keras_hub.src.metrics.perplexity import Perplexity +from keras_hub.src.metrics.rouge_l import RougeL +from keras_hub.src.metrics.rouge_n import RougeN diff --git a/keras_hub/api/models/__init__.py b/keras_hub/api/models/__init__.py index 308321717c..403bfb65ad 100644 --- a/keras_hub/api/models/__init__.py +++ b/keras_hub/api/models/__init__.py @@ -4,743 +4,535 @@ since your modifications would be overwritten. """ -from keras_hub.src.models.albert.albert_backbone import ( - AlbertBackbone as AlbertBackbone, -) -from keras_hub.src.models.albert.albert_masked_lm import ( - AlbertMaskedLM as AlbertMaskedLM, -) +from keras_hub.src.models.albert.albert_backbone import AlbertBackbone +from keras_hub.src.models.albert.albert_masked_lm import AlbertMaskedLM from keras_hub.src.models.albert.albert_masked_lm_preprocessor import ( - AlbertMaskedLMPreprocessor as AlbertMaskedLMPreprocessor, + AlbertMaskedLMPreprocessor, ) from keras_hub.src.models.albert.albert_text_classifier import ( - AlbertTextClassifier as AlbertClassifier, + AlbertTextClassifier, ) from keras_hub.src.models.albert.albert_text_classifier import ( - AlbertTextClassifier as AlbertTextClassifier, + AlbertTextClassifier as AlbertClassifier, ) from keras_hub.src.models.albert.albert_text_classifier_preprocessor import ( - AlbertTextClassifierPreprocessor as AlbertPreprocessor, + AlbertTextClassifierPreprocessor, ) from keras_hub.src.models.albert.albert_text_classifier_preprocessor import ( - AlbertTextClassifierPreprocessor as AlbertTextClassifierPreprocessor, -) -from keras_hub.src.models.albert.albert_tokenizer import ( - AlbertTokenizer as AlbertTokenizer, -) -from keras_hub.src.models.backbone import Backbone as Backbone -from keras_hub.src.models.bart.bart_backbone import BartBackbone as BartBackbone -from keras_hub.src.models.bart.bart_seq_2_seq_lm import ( - BartSeq2SeqLM as BartSeq2SeqLM, + AlbertTextClassifierPreprocessor as AlbertPreprocessor, ) +from keras_hub.src.models.albert.albert_tokenizer import AlbertTokenizer +from keras_hub.src.models.backbone import Backbone +from keras_hub.src.models.bart.bart_backbone import BartBackbone +from keras_hub.src.models.bart.bart_seq_2_seq_lm import BartSeq2SeqLM from keras_hub.src.models.bart.bart_seq_2_seq_lm_preprocessor import ( - BartSeq2SeqLMPreprocessor as BartSeq2SeqLMPreprocessor, -) -from keras_hub.src.models.bart.bart_tokenizer import ( - BartTokenizer as BartTokenizer, -) -from keras_hub.src.models.basnet.basnet import ( - BASNetImageSegmenter as BASNetImageSegmenter, -) -from keras_hub.src.models.basnet.basnet_backbone import ( - BASNetBackbone as BASNetBackbone, -) -from keras_hub.src.models.basnet.basnet_preprocessor import ( - BASNetPreprocessor as BASNetPreprocessor, -) -from keras_hub.src.models.bert.bert_backbone import BertBackbone as BertBackbone -from keras_hub.src.models.bert.bert_masked_lm import ( - BertMaskedLM as BertMaskedLM, -) + BartSeq2SeqLMPreprocessor, +) +from keras_hub.src.models.bart.bart_tokenizer import BartTokenizer +from keras_hub.src.models.basnet.basnet import BASNetImageSegmenter +from keras_hub.src.models.basnet.basnet_backbone import BASNetBackbone +from keras_hub.src.models.basnet.basnet_preprocessor import BASNetPreprocessor +from keras_hub.src.models.bert.bert_backbone import BertBackbone +from keras_hub.src.models.bert.bert_masked_lm import BertMaskedLM from keras_hub.src.models.bert.bert_masked_lm_preprocessor import ( - BertMaskedLMPreprocessor as BertMaskedLMPreprocessor, + BertMaskedLMPreprocessor, ) +from keras_hub.src.models.bert.bert_text_classifier import BertTextClassifier from keras_hub.src.models.bert.bert_text_classifier import ( BertTextClassifier as BertClassifier, ) -from keras_hub.src.models.bert.bert_text_classifier import ( - BertTextClassifier as BertTextClassifier, -) from keras_hub.src.models.bert.bert_text_classifier_preprocessor import ( - BertTextClassifierPreprocessor as BertPreprocessor, + BertTextClassifierPreprocessor, ) from keras_hub.src.models.bert.bert_text_classifier_preprocessor import ( - BertTextClassifierPreprocessor as BertTextClassifierPreprocessor, -) -from keras_hub.src.models.bert.bert_tokenizer import ( - BertTokenizer as BertTokenizer, -) -from keras_hub.src.models.bloom.bloom_backbone import ( - BloomBackbone as BloomBackbone, -) -from keras_hub.src.models.bloom.bloom_causal_lm import ( - BloomCausalLM as BloomCausalLM, + BertTextClassifierPreprocessor as BertPreprocessor, ) +from keras_hub.src.models.bert.bert_tokenizer import BertTokenizer +from keras_hub.src.models.bloom.bloom_backbone import BloomBackbone +from keras_hub.src.models.bloom.bloom_causal_lm import BloomCausalLM from keras_hub.src.models.bloom.bloom_causal_lm_preprocessor import ( - BloomCausalLMPreprocessor as BloomCausalLMPreprocessor, -) -from keras_hub.src.models.bloom.bloom_tokenizer import ( - BloomTokenizer as BloomTokenizer, -) -from keras_hub.src.models.causal_lm import CausalLM as CausalLM -from keras_hub.src.models.causal_lm_preprocessor import ( - CausalLMPreprocessor as CausalLMPreprocessor, -) -from keras_hub.src.models.clip.clip_backbone import CLIPBackbone as CLIPBackbone -from keras_hub.src.models.clip.clip_preprocessor import ( - CLIPPreprocessor as CLIPPreprocessor, -) -from keras_hub.src.models.clip.clip_text_encoder import ( - CLIPTextEncoder as CLIPTextEncoder, -) -from keras_hub.src.models.clip.clip_tokenizer import ( - CLIPTokenizer as CLIPTokenizer, -) -from keras_hub.src.models.clip.clip_vision_encoder import ( - CLIPVisionEncoder as CLIPVisionEncoder, -) -from keras_hub.src.models.cspnet.cspnet_backbone import ( - CSPNetBackbone as CSPNetBackbone, -) + BloomCausalLMPreprocessor, +) +from keras_hub.src.models.bloom.bloom_tokenizer import BloomTokenizer +from keras_hub.src.models.causal_lm import CausalLM +from keras_hub.src.models.causal_lm_preprocessor import CausalLMPreprocessor +from keras_hub.src.models.clip.clip_backbone import CLIPBackbone +from keras_hub.src.models.clip.clip_preprocessor import CLIPPreprocessor +from keras_hub.src.models.clip.clip_text_encoder import CLIPTextEncoder +from keras_hub.src.models.clip.clip_tokenizer import CLIPTokenizer +from keras_hub.src.models.clip.clip_vision_encoder import CLIPVisionEncoder +from keras_hub.src.models.cspnet.cspnet_backbone import CSPNetBackbone from keras_hub.src.models.cspnet.cspnet_image_classifier import ( - CSPNetImageClassifier as CSPNetImageClassifier, + CSPNetImageClassifier, ) from keras_hub.src.models.cspnet.cspnet_image_classifier_preprocessor import ( - CSPNetImageClassifierPreprocessor as CSPNetImageClassifierPreprocessor, -) -from keras_hub.src.models.d_fine.d_fine_backbone import ( - DFineBackbone as DFineBackbone, + CSPNetImageClassifierPreprocessor, ) +from keras_hub.src.models.d_fine.d_fine_backbone import DFineBackbone from keras_hub.src.models.d_fine.d_fine_object_detector import ( - DFineObjectDetector as DFineObjectDetector, + DFineObjectDetector, ) from keras_hub.src.models.d_fine.d_fine_object_detector_preprocessor import ( - DFineObjectDetectorPreprocessor as DFineObjectDetectorPreprocessor, + DFineObjectDetectorPreprocessor, ) from keras_hub.src.models.deberta_v3.deberta_v3_backbone import ( - DebertaV3Backbone as DebertaV3Backbone, + DebertaV3Backbone, ) from keras_hub.src.models.deberta_v3.deberta_v3_masked_lm import ( - DebertaV3MaskedLM as DebertaV3MaskedLM, + DebertaV3MaskedLM, ) from keras_hub.src.models.deberta_v3.deberta_v3_masked_lm_preprocessor import ( - DebertaV3MaskedLMPreprocessor as DebertaV3MaskedLMPreprocessor, + DebertaV3MaskedLMPreprocessor, ) from keras_hub.src.models.deberta_v3.deberta_v3_text_classifier import ( - DebertaV3TextClassifier as DebertaV3Classifier, + DebertaV3TextClassifier, ) from keras_hub.src.models.deberta_v3.deberta_v3_text_classifier import ( - DebertaV3TextClassifier as DebertaV3TextClassifier, + DebertaV3TextClassifier as DebertaV3Classifier, ) from keras_hub.src.models.deberta_v3.deberta_v3_text_classifier_preprocessor import ( - DebertaV3TextClassifierPreprocessor as DebertaV3Preprocessor, + DebertaV3TextClassifierPreprocessor, ) from keras_hub.src.models.deberta_v3.deberta_v3_text_classifier_preprocessor import ( - DebertaV3TextClassifierPreprocessor as DebertaV3TextClassifierPreprocessor, + DebertaV3TextClassifierPreprocessor as DebertaV3Preprocessor, ) from keras_hub.src.models.deberta_v3.deberta_v3_tokenizer import ( - DebertaV3Tokenizer as DebertaV3Tokenizer, + DebertaV3Tokenizer, ) from keras_hub.src.models.deeplab_v3.deeplab_v3_backbone import ( - DeepLabV3Backbone as DeepLabV3Backbone, + DeepLabV3Backbone, ) from keras_hub.src.models.deeplab_v3.deeplab_v3_image_segmeter_preprocessor import ( - DeepLabV3ImageSegmenterPreprocessor as DeepLabV3ImageSegmenterPreprocessor, + DeepLabV3ImageSegmenterPreprocessor, ) from keras_hub.src.models.deeplab_v3.deeplab_v3_segmenter import ( - DeepLabV3ImageSegmenter as DeepLabV3ImageSegmenter, -) -from keras_hub.src.models.deit.deit_backbone import DeiTBackbone as DeiTBackbone -from keras_hub.src.models.deit.deit_image_classifier import ( - DeiTImageClassifier as DeiTImageClassifier, + DeepLabV3ImageSegmenter, ) +from keras_hub.src.models.deit.deit_backbone import DeiTBackbone +from keras_hub.src.models.deit.deit_image_classifier import DeiTImageClassifier from keras_hub.src.models.deit.deit_image_classifier_preprocessor import ( - DeiTImageClassifierPreprocessor as DeiTImageClassifierPreprocessor, -) -from keras_hub.src.models.densenet.densenet_backbone import ( - DenseNetBackbone as DenseNetBackbone, + DeiTImageClassifierPreprocessor, ) +from keras_hub.src.models.densenet.densenet_backbone import DenseNetBackbone from keras_hub.src.models.densenet.densenet_image_classifier import ( - DenseNetImageClassifier as DenseNetImageClassifier, + DenseNetImageClassifier, ) from keras_hub.src.models.densenet.densenet_image_classifier_preprocessor import ( - DenseNetImageClassifierPreprocessor as DenseNetImageClassifierPreprocessor, + DenseNetImageClassifierPreprocessor, ) from keras_hub.src.models.depth_anything.depth_anything_backbone import ( - DepthAnythingBackbone as DepthAnythingBackbone, + DepthAnythingBackbone, ) from keras_hub.src.models.depth_anything.depth_anything_depth_estimator import ( - DepthAnythingDepthEstimator as DepthAnythingDepthEstimator, + DepthAnythingDepthEstimator, ) from keras_hub.src.models.depth_anything.depth_anything_depth_estimator_preprocessor import ( - DepthAnythingDepthEstimatorPreprocessor as DepthAnythingDepthEstimatorPreprocessor, -) -from keras_hub.src.models.depth_estimator import ( - DepthEstimator as DepthEstimator, + DepthAnythingDepthEstimatorPreprocessor, ) +from keras_hub.src.models.depth_estimator import DepthEstimator from keras_hub.src.models.depth_estimator_preprocessor import ( - DepthEstimatorPreprocessor as DepthEstimatorPreprocessor, -) -from keras_hub.src.models.dinov2.dinov2_backbone import ( - DINOV2Backbone as DINOV2Backbone, + DepthEstimatorPreprocessor, ) +from keras_hub.src.models.dinov2.dinov2_backbone import DINOV2Backbone from keras_hub.src.models.distil_bert.distil_bert_backbone import ( - DistilBertBackbone as DistilBertBackbone, + DistilBertBackbone, ) from keras_hub.src.models.distil_bert.distil_bert_masked_lm import ( - DistilBertMaskedLM as DistilBertMaskedLM, + DistilBertMaskedLM, ) from keras_hub.src.models.distil_bert.distil_bert_masked_lm_preprocessor import ( - DistilBertMaskedLMPreprocessor as DistilBertMaskedLMPreprocessor, + DistilBertMaskedLMPreprocessor, ) from keras_hub.src.models.distil_bert.distil_bert_text_classifier import ( - DistilBertTextClassifier as DistilBertClassifier, + DistilBertTextClassifier, ) from keras_hub.src.models.distil_bert.distil_bert_text_classifier import ( - DistilBertTextClassifier as DistilBertTextClassifier, + DistilBertTextClassifier as DistilBertClassifier, ) from keras_hub.src.models.distil_bert.distil_bert_text_classifier_preprocessor import ( - DistilBertTextClassifierPreprocessor as DistilBertPreprocessor, + DistilBertTextClassifierPreprocessor, ) from keras_hub.src.models.distil_bert.distil_bert_text_classifier_preprocessor import ( - DistilBertTextClassifierPreprocessor as DistilBertTextClassifierPreprocessor, + DistilBertTextClassifierPreprocessor as DistilBertPreprocessor, ) from keras_hub.src.models.distil_bert.distil_bert_tokenizer import ( - DistilBertTokenizer as DistilBertTokenizer, + DistilBertTokenizer, ) from keras_hub.src.models.efficientnet.efficientnet_backbone import ( - EfficientNetBackbone as EfficientNetBackbone, + EfficientNetBackbone, ) from keras_hub.src.models.efficientnet.efficientnet_image_classifier import ( - EfficientNetImageClassifier as EfficientNetImageClassifier, + EfficientNetImageClassifier, ) from keras_hub.src.models.efficientnet.efficientnet_image_classifier_preprocessor import ( - EfficientNetImageClassifierPreprocessor as EfficientNetImageClassifierPreprocessor, -) -from keras_hub.src.models.electra.electra_backbone import ( - ElectraBackbone as ElectraBackbone, -) -from keras_hub.src.models.electra.electra_tokenizer import ( - ElectraTokenizer as ElectraTokenizer, + EfficientNetImageClassifierPreprocessor, ) +from keras_hub.src.models.electra.electra_backbone import ElectraBackbone +from keras_hub.src.models.electra.electra_tokenizer import ElectraTokenizer +from keras_hub.src.models.esm.esm_backbone import ESMBackbone from keras_hub.src.models.esm.esm_backbone import ESMBackbone as ESM2Backbone -from keras_hub.src.models.esm.esm_backbone import ESMBackbone as ESMBackbone -from keras_hub.src.models.esm.esm_classifier import ( - ESMProteinClassifier as ESMProteinClassifier, -) +from keras_hub.src.models.esm.esm_classifier import ESMProteinClassifier from keras_hub.src.models.esm.esm_classifier_preprocessor import ( - ESMProteinClassifierPreprocessor as ESMProteinClassifierPreprocessor, + ESMProteinClassifierPreprocessor, ) +from keras_hub.src.models.esm.esm_masked_plm import ESMMaskedPLM from keras_hub.src.models.esm.esm_masked_plm import ( ESMMaskedPLM as ESM2MaskedPLM, ) -from keras_hub.src.models.esm.esm_masked_plm import ESMMaskedPLM as ESMMaskedPLM from keras_hub.src.models.esm.esm_masked_plm_preprocessor import ( - ESMMaskedPLMPreprocessor as ESMMaskedPLMPreprocessor, -) -from keras_hub.src.models.esm.esm_tokenizer import ESMTokenizer as ESMTokenizer -from keras_hub.src.models.f_net.f_net_backbone import ( - FNetBackbone as FNetBackbone, -) -from keras_hub.src.models.f_net.f_net_masked_lm import ( - FNetMaskedLM as FNetMaskedLM, + ESMMaskedPLMPreprocessor, ) +from keras_hub.src.models.esm.esm_tokenizer import ESMTokenizer +from keras_hub.src.models.f_net.f_net_backbone import FNetBackbone +from keras_hub.src.models.f_net.f_net_masked_lm import FNetMaskedLM from keras_hub.src.models.f_net.f_net_masked_lm_preprocessor import ( - FNetMaskedLMPreprocessor as FNetMaskedLMPreprocessor, + FNetMaskedLMPreprocessor, ) +from keras_hub.src.models.f_net.f_net_text_classifier import FNetTextClassifier from keras_hub.src.models.f_net.f_net_text_classifier import ( FNetTextClassifier as FNetClassifier, ) -from keras_hub.src.models.f_net.f_net_text_classifier import ( - FNetTextClassifier as FNetTextClassifier, -) from keras_hub.src.models.f_net.f_net_text_classifier_preprocessor import ( - FNetTextClassifierPreprocessor as FNetPreprocessor, + FNetTextClassifierPreprocessor, ) from keras_hub.src.models.f_net.f_net_text_classifier_preprocessor import ( - FNetTextClassifierPreprocessor as FNetTextClassifierPreprocessor, -) -from keras_hub.src.models.f_net.f_net_tokenizer import ( - FNetTokenizer as FNetTokenizer, -) -from keras_hub.src.models.falcon.falcon_backbone import ( - FalconBackbone as FalconBackbone, -) -from keras_hub.src.models.falcon.falcon_causal_lm import ( - FalconCausalLM as FalconCausalLM, + FNetTextClassifierPreprocessor as FNetPreprocessor, ) +from keras_hub.src.models.f_net.f_net_tokenizer import FNetTokenizer +from keras_hub.src.models.falcon.falcon_backbone import FalconBackbone +from keras_hub.src.models.falcon.falcon_causal_lm import FalconCausalLM from keras_hub.src.models.falcon.falcon_causal_lm_preprocessor import ( - FalconCausalLMPreprocessor as FalconCausalLMPreprocessor, -) -from keras_hub.src.models.falcon.falcon_tokenizer import ( - FalconTokenizer as FalconTokenizer, -) -from keras_hub.src.models.feature_pyramid_backbone import ( - FeaturePyramidBackbone as FeaturePyramidBackbone, -) -from keras_hub.src.models.flux.flux_model import FluxBackbone as FluxBackbone -from keras_hub.src.models.flux.flux_text_to_image import ( - FluxTextToImage as FluxTextToImage, + FalconCausalLMPreprocessor, ) +from keras_hub.src.models.falcon.falcon_tokenizer import FalconTokenizer +from keras_hub.src.models.feature_pyramid_backbone import FeaturePyramidBackbone +from keras_hub.src.models.flux.flux_model import FluxBackbone +from keras_hub.src.models.flux.flux_text_to_image import FluxTextToImage from keras_hub.src.models.flux.flux_text_to_image_preprocessor import ( - FluxTextToImagePreprocessor as FluxTextToImagePreprocessor, -) -from keras_hub.src.models.gemma.gemma_backbone import ( - GemmaBackbone as GemmaBackbone, -) -from keras_hub.src.models.gemma.gemma_causal_lm import ( - GemmaCausalLM as GemmaCausalLM, + FluxTextToImagePreprocessor, ) +from keras_hub.src.models.gemma.gemma_backbone import GemmaBackbone +from keras_hub.src.models.gemma.gemma_causal_lm import GemmaCausalLM from keras_hub.src.models.gemma.gemma_causal_lm_preprocessor import ( - GemmaCausalLMPreprocessor as GemmaCausalLMPreprocessor, -) -from keras_hub.src.models.gemma.gemma_tokenizer import ( - GemmaTokenizer as GemmaTokenizer, -) -from keras_hub.src.models.gemma3.gemma3_backbone import ( - Gemma3Backbone as Gemma3Backbone, -) -from keras_hub.src.models.gemma3.gemma3_causal_lm import ( - Gemma3CausalLM as Gemma3CausalLM, + GemmaCausalLMPreprocessor, ) +from keras_hub.src.models.gemma.gemma_tokenizer import GemmaTokenizer +from keras_hub.src.models.gemma3.gemma3_backbone import Gemma3Backbone +from keras_hub.src.models.gemma3.gemma3_causal_lm import Gemma3CausalLM from keras_hub.src.models.gemma3.gemma3_causal_lm_preprocessor import ( - Gemma3CausalLMPreprocessor as Gemma3CausalLMPreprocessor, -) -from keras_hub.src.models.gemma3.gemma3_tokenizer import ( - Gemma3Tokenizer as Gemma3Tokenizer, + Gemma3CausalLMPreprocessor, ) +from keras_hub.src.models.gemma3.gemma3_tokenizer import Gemma3Tokenizer from keras_hub.src.models.gemma3.gemma3_vision_encoder import ( - Gemma3VisionEncoder as Gemma3VisionEncoder, -) -from keras_hub.src.models.gpt2.gpt2_backbone import GPT2Backbone as GPT2Backbone -from keras_hub.src.models.gpt2.gpt2_causal_lm import ( - GPT2CausalLM as GPT2CausalLM, + Gemma3VisionEncoder, ) +from keras_hub.src.models.gpt2.gpt2_backbone import GPT2Backbone +from keras_hub.src.models.gpt2.gpt2_causal_lm import GPT2CausalLM from keras_hub.src.models.gpt2.gpt2_causal_lm_preprocessor import ( - GPT2CausalLMPreprocessor as GPT2CausalLMPreprocessor, -) -from keras_hub.src.models.gpt2.gpt2_preprocessor import ( - GPT2Preprocessor as GPT2Preprocessor, -) -from keras_hub.src.models.gpt2.gpt2_tokenizer import ( - GPT2Tokenizer as GPT2Tokenizer, -) -from keras_hub.src.models.gpt_neo_x.gpt_neo_x_backbone import ( - GPTNeoXBackbone as GPTNeoXBackbone, -) -from keras_hub.src.models.gpt_neo_x.gpt_neo_x_causal_lm import ( - GPTNeoXCausalLM as GPTNeoXCausalLM, + GPT2CausalLMPreprocessor, ) +from keras_hub.src.models.gpt2.gpt2_preprocessor import GPT2Preprocessor +from keras_hub.src.models.gpt2.gpt2_tokenizer import GPT2Tokenizer +from keras_hub.src.models.gpt_neo_x.gpt_neo_x_backbone import GPTNeoXBackbone +from keras_hub.src.models.gpt_neo_x.gpt_neo_x_causal_lm import GPTNeoXCausalLM from keras_hub.src.models.gpt_neo_x.gpt_neo_x_causal_lm_preprocessor import ( - GPTNeoXCausalLMPreprocessor as GPTNeoXCausalLMPreprocessor, -) -from keras_hub.src.models.gpt_neo_x.gpt_neo_x_tokenizer import ( - GPTNeoXTokenizer as GPTNeoXTokenizer, -) -from keras_hub.src.models.hgnetv2.hgnetv2_backbone import ( - HGNetV2Backbone as HGNetV2Backbone, + GPTNeoXCausalLMPreprocessor, ) +from keras_hub.src.models.gpt_neo_x.gpt_neo_x_tokenizer import GPTNeoXTokenizer +from keras_hub.src.models.hgnetv2.hgnetv2_backbone import HGNetV2Backbone from keras_hub.src.models.hgnetv2.hgnetv2_image_classifier import ( - HGNetV2ImageClassifier as HGNetV2ImageClassifier, + HGNetV2ImageClassifier, ) from keras_hub.src.models.hgnetv2.hgnetv2_image_classifier_preprocessor import ( - HGNetV2ImageClassifierPreprocessor as HGNetV2ImageClassifierPreprocessor, -) -from keras_hub.src.models.image_classifier import ( - ImageClassifier as ImageClassifier, + HGNetV2ImageClassifierPreprocessor, ) +from keras_hub.src.models.image_classifier import ImageClassifier from keras_hub.src.models.image_classifier_preprocessor import ( - ImageClassifierPreprocessor as ImageClassifierPreprocessor, -) -from keras_hub.src.models.image_segmenter import ( - ImageSegmenter as ImageSegmenter, + ImageClassifierPreprocessor, ) +from keras_hub.src.models.image_segmenter import ImageSegmenter from keras_hub.src.models.image_segmenter_preprocessor import ( - ImageSegmenterPreprocessor as ImageSegmenterPreprocessor, -) -from keras_hub.src.models.image_to_image import ImageToImage as ImageToImage -from keras_hub.src.models.inpaint import Inpaint as Inpaint -from keras_hub.src.models.llama.llama_backbone import ( - LlamaBackbone as LlamaBackbone, -) -from keras_hub.src.models.llama.llama_causal_lm import ( - LlamaCausalLM as LlamaCausalLM, + ImageSegmenterPreprocessor, ) +from keras_hub.src.models.image_to_image import ImageToImage +from keras_hub.src.models.inpaint import Inpaint +from keras_hub.src.models.llama.llama_backbone import LlamaBackbone +from keras_hub.src.models.llama.llama_causal_lm import LlamaCausalLM from keras_hub.src.models.llama.llama_causal_lm_preprocessor import ( - LlamaCausalLMPreprocessor as LlamaCausalLMPreprocessor, -) -from keras_hub.src.models.llama.llama_tokenizer import ( - LlamaTokenizer as LlamaTokenizer, -) -from keras_hub.src.models.llama3.llama3_backbone import ( - Llama3Backbone as Llama3Backbone, -) -from keras_hub.src.models.llama3.llama3_causal_lm import ( - Llama3CausalLM as Llama3CausalLM, + LlamaCausalLMPreprocessor, ) +from keras_hub.src.models.llama.llama_tokenizer import LlamaTokenizer +from keras_hub.src.models.llama3.llama3_backbone import Llama3Backbone +from keras_hub.src.models.llama3.llama3_causal_lm import Llama3CausalLM from keras_hub.src.models.llama3.llama3_causal_lm_preprocessor import ( - Llama3CausalLMPreprocessor as Llama3CausalLMPreprocessor, -) -from keras_hub.src.models.llama3.llama3_tokenizer import ( - Llama3Tokenizer as Llama3Tokenizer, -) -from keras_hub.src.models.masked_lm import MaskedLM as MaskedLM -from keras_hub.src.models.masked_lm_preprocessor import ( - MaskedLMPreprocessor as MaskedLMPreprocessor, -) -from keras_hub.src.models.mistral.mistral_backbone import ( - MistralBackbone as MistralBackbone, -) -from keras_hub.src.models.mistral.mistral_causal_lm import ( - MistralCausalLM as MistralCausalLM, + Llama3CausalLMPreprocessor, ) +from keras_hub.src.models.llama3.llama3_tokenizer import Llama3Tokenizer +from keras_hub.src.models.masked_lm import MaskedLM +from keras_hub.src.models.masked_lm_preprocessor import MaskedLMPreprocessor +from keras_hub.src.models.mistral.mistral_backbone import MistralBackbone +from keras_hub.src.models.mistral.mistral_causal_lm import MistralCausalLM from keras_hub.src.models.mistral.mistral_causal_lm_preprocessor import ( - MistralCausalLMPreprocessor as MistralCausalLMPreprocessor, -) -from keras_hub.src.models.mistral.mistral_tokenizer import ( - MistralTokenizer as MistralTokenizer, -) -from keras_hub.src.models.mit.mit_backbone import MiTBackbone as MiTBackbone -from keras_hub.src.models.mit.mit_image_classifier import ( - MiTImageClassifier as MiTImageClassifier, + MistralCausalLMPreprocessor, ) +from keras_hub.src.models.mistral.mistral_tokenizer import MistralTokenizer +from keras_hub.src.models.mit.mit_backbone import MiTBackbone +from keras_hub.src.models.mit.mit_image_classifier import MiTImageClassifier from keras_hub.src.models.mit.mit_image_classifier_preprocessor import ( - MiTImageClassifierPreprocessor as MiTImageClassifierPreprocessor, -) -from keras_hub.src.models.mixtral.mixtral_backbone import ( - MixtralBackbone as MixtralBackbone, -) -from keras_hub.src.models.mixtral.mixtral_causal_lm import ( - MixtralCausalLM as MixtralCausalLM, + MiTImageClassifierPreprocessor, ) +from keras_hub.src.models.mixtral.mixtral_backbone import MixtralBackbone +from keras_hub.src.models.mixtral.mixtral_causal_lm import MixtralCausalLM from keras_hub.src.models.mixtral.mixtral_causal_lm_preprocessor import ( - MixtralCausalLMPreprocessor as MixtralCausalLMPreprocessor, -) -from keras_hub.src.models.mixtral.mixtral_tokenizer import ( - MixtralTokenizer as MixtralTokenizer, -) -from keras_hub.src.models.mobilenet.mobilenet_backbone import ( - MobileNetBackbone as MobileNetBackbone, + MixtralCausalLMPreprocessor, ) +from keras_hub.src.models.mixtral.mixtral_tokenizer import MixtralTokenizer +from keras_hub.src.models.mobilenet.mobilenet_backbone import MobileNetBackbone from keras_hub.src.models.mobilenet.mobilenet_image_classifier import ( - MobileNetImageClassifier as MobileNetImageClassifier, + MobileNetImageClassifier, ) from keras_hub.src.models.mobilenet.mobilenet_image_classifier_preprocessor import ( - MobileNetImageClassifierPreprocessor as MobileNetImageClassifierPreprocessor, + MobileNetImageClassifierPreprocessor, ) from keras_hub.src.models.moonshine.moonshine_audio_to_text import ( - MoonshineAudioToText as MoonshineAudioToText, + MoonshineAudioToText, ) from keras_hub.src.models.moonshine.moonshine_audio_to_text_preprocessor import ( - MoonshineAudioToTextPreprocessor as MoonshineAudioToTextPreprocessor, -) -from keras_hub.src.models.moonshine.moonshine_backbone import ( - MoonshineBackbone as MoonshineBackbone, + MoonshineAudioToTextPreprocessor, ) +from keras_hub.src.models.moonshine.moonshine_backbone import MoonshineBackbone from keras_hub.src.models.moonshine.moonshine_tokenizer import ( - MoonshineTokenizer as MoonshineTokenizer, + MoonshineTokenizer, ) +from keras_hub.src.models.object_detector import ObjectDetector from keras_hub.src.models.object_detector import ( ObjectDetector as ImageObjectDetector, ) -from keras_hub.src.models.object_detector import ( - ObjectDetector as ObjectDetector, -) from keras_hub.src.models.object_detector_preprocessor import ( - ObjectDetectorPreprocessor as ImageObjectDetectorPreprocessor, + ObjectDetectorPreprocessor, ) from keras_hub.src.models.object_detector_preprocessor import ( - ObjectDetectorPreprocessor as ObjectDetectorPreprocessor, + ObjectDetectorPreprocessor as ImageObjectDetectorPreprocessor, ) -from keras_hub.src.models.opt.opt_backbone import OPTBackbone as OPTBackbone -from keras_hub.src.models.opt.opt_causal_lm import OPTCausalLM as OPTCausalLM +from keras_hub.src.models.opt.opt_backbone import OPTBackbone +from keras_hub.src.models.opt.opt_causal_lm import OPTCausalLM from keras_hub.src.models.opt.opt_causal_lm_preprocessor import ( - OPTCausalLMPreprocessor as OPTCausalLMPreprocessor, + OPTCausalLMPreprocessor, ) -from keras_hub.src.models.opt.opt_tokenizer import OPTTokenizer as OPTTokenizer +from keras_hub.src.models.opt.opt_tokenizer import OPTTokenizer from keras_hub.src.models.pali_gemma.pali_gemma_backbone import ( - PaliGemmaBackbone as PaliGemmaBackbone, + PaliGemmaBackbone, ) from keras_hub.src.models.pali_gemma.pali_gemma_causal_lm import ( - PaliGemmaCausalLM as PaliGemmaCausalLM, + PaliGemmaCausalLM, ) from keras_hub.src.models.pali_gemma.pali_gemma_causal_lm_preprocessor import ( - PaliGemmaCausalLMPreprocessor as PaliGemmaCausalLMPreprocessor, + PaliGemmaCausalLMPreprocessor, ) from keras_hub.src.models.pali_gemma.pali_gemma_tokenizer import ( - PaliGemmaTokenizer as PaliGemmaTokenizer, -) -from keras_hub.src.models.parseq.parseq_backbone import ( - PARSeqBackbone as PARSeqBackbone, -) -from keras_hub.src.models.parseq.parseq_causal_lm import ( - PARSeqCausalLM as PARSeqCausalLM, + PaliGemmaTokenizer, ) +from keras_hub.src.models.parseq.parseq_backbone import PARSeqBackbone +from keras_hub.src.models.parseq.parseq_causal_lm import PARSeqCausalLM from keras_hub.src.models.parseq.parseq_causal_lm_preprocessor import ( - PARSeqCausalLMPreprocessor as PARSeqCausalLMPreprocessor, -) -from keras_hub.src.models.parseq.parseq_tokenizer import ( - PARSeqTokenizer as PARSeqTokenizer, -) -from keras_hub.src.models.phi3.phi3_backbone import Phi3Backbone as Phi3Backbone -from keras_hub.src.models.phi3.phi3_causal_lm import ( - Phi3CausalLM as Phi3CausalLM, + PARSeqCausalLMPreprocessor, ) +from keras_hub.src.models.parseq.parseq_tokenizer import PARSeqTokenizer +from keras_hub.src.models.phi3.phi3_backbone import Phi3Backbone +from keras_hub.src.models.phi3.phi3_causal_lm import Phi3CausalLM from keras_hub.src.models.phi3.phi3_causal_lm_preprocessor import ( - Phi3CausalLMPreprocessor as Phi3CausalLMPreprocessor, + Phi3CausalLMPreprocessor, ) -from keras_hub.src.models.phi3.phi3_tokenizer import ( - Phi3Tokenizer as Phi3Tokenizer, -) -from keras_hub.src.models.preprocessor import Preprocessor as Preprocessor +from keras_hub.src.models.phi3.phi3_tokenizer import Phi3Tokenizer +from keras_hub.src.models.preprocessor import Preprocessor +from keras_hub.src.models.qwen.qwen_backbone import QwenBackbone from keras_hub.src.models.qwen.qwen_backbone import ( QwenBackbone as Qwen2Backbone, ) -from keras_hub.src.models.qwen.qwen_backbone import QwenBackbone as QwenBackbone +from keras_hub.src.models.qwen.qwen_causal_lm import QwenCausalLM from keras_hub.src.models.qwen.qwen_causal_lm import ( QwenCausalLM as Qwen2CausalLM, ) -from keras_hub.src.models.qwen.qwen_causal_lm import ( - QwenCausalLM as QwenCausalLM, -) from keras_hub.src.models.qwen.qwen_causal_lm_preprocessor import ( - QwenCausalLMPreprocessor as Qwen2CausalLMPreprocessor, + QwenCausalLMPreprocessor, ) from keras_hub.src.models.qwen.qwen_causal_lm_preprocessor import ( - QwenCausalLMPreprocessor as QwenCausalLMPreprocessor, + QwenCausalLMPreprocessor as Qwen2CausalLMPreprocessor, ) +from keras_hub.src.models.qwen.qwen_tokenizer import QwenTokenizer from keras_hub.src.models.qwen.qwen_tokenizer import ( QwenTokenizer as Qwen2Tokenizer, ) -from keras_hub.src.models.qwen.qwen_tokenizer import ( - QwenTokenizer as QwenTokenizer, -) -from keras_hub.src.models.qwen3.qwen3_backbone import ( - Qwen3Backbone as Qwen3Backbone, -) -from keras_hub.src.models.qwen3.qwen3_causal_lm import ( - Qwen3CausalLM as Qwen3CausalLM, -) +from keras_hub.src.models.qwen3.qwen3_backbone import Qwen3Backbone +from keras_hub.src.models.qwen3.qwen3_causal_lm import Qwen3CausalLM from keras_hub.src.models.qwen3.qwen3_causal_lm_preprocessor import ( - Qwen3CausalLMPreprocessor as Qwen3CausalLMPreprocessor, -) -from keras_hub.src.models.qwen3.qwen3_tokenizer import ( - Qwen3Tokenizer as Qwen3Tokenizer, -) -from keras_hub.src.models.qwen3_moe.qwen3_moe_backbone import ( - Qwen3MoeBackbone as Qwen3MoeBackbone, -) -from keras_hub.src.models.qwen3_moe.qwen3_moe_causal_lm import ( - Qwen3MoeCausalLM as Qwen3MoeCausalLM, + Qwen3CausalLMPreprocessor, ) +from keras_hub.src.models.qwen3.qwen3_tokenizer import Qwen3Tokenizer +from keras_hub.src.models.qwen3_moe.qwen3_moe_backbone import Qwen3MoeBackbone +from keras_hub.src.models.qwen3_moe.qwen3_moe_causal_lm import Qwen3MoeCausalLM from keras_hub.src.models.qwen3_moe.qwen3_moe_causal_lm_preprocessor import ( - Qwen3MoeCausalLMPreprocessor as Qwen3MoeCausalLMPreprocessor, -) -from keras_hub.src.models.qwen_moe.qwen_moe_backbone import ( - QwenMoeBackbone as QwenMoeBackbone, -) -from keras_hub.src.models.qwen_moe.qwen_moe_causal_lm import ( - QwenMoeCausalLM as QwenMoeCausalLM, + Qwen3MoeCausalLMPreprocessor, ) +from keras_hub.src.models.qwen_moe.qwen_moe_backbone import QwenMoeBackbone +from keras_hub.src.models.qwen_moe.qwen_moe_causal_lm import QwenMoeCausalLM from keras_hub.src.models.qwen_moe.qwen_moe_causal_lm_preprocessor import ( - QwenMoeCausalLMPreprocessor as QwenMoeCausalLMPreprocessor, -) -from keras_hub.src.models.resnet.resnet_backbone import ( - ResNetBackbone as ResNetBackbone, + QwenMoeCausalLMPreprocessor, ) +from keras_hub.src.models.resnet.resnet_backbone import ResNetBackbone from keras_hub.src.models.resnet.resnet_image_classifier import ( - ResNetImageClassifier as ResNetImageClassifier, + ResNetImageClassifier, ) from keras_hub.src.models.resnet.resnet_image_classifier_preprocessor import ( - ResNetImageClassifierPreprocessor as ResNetImageClassifierPreprocessor, -) -from keras_hub.src.models.retinanet.retinanet_backbone import ( - RetinaNetBackbone as RetinaNetBackbone, + ResNetImageClassifierPreprocessor, ) +from keras_hub.src.models.retinanet.retinanet_backbone import RetinaNetBackbone from keras_hub.src.models.retinanet.retinanet_object_detector import ( - RetinaNetObjectDetector as RetinaNetObjectDetector, + RetinaNetObjectDetector, ) from keras_hub.src.models.retinanet.retinanet_object_detector_preprocessor import ( - RetinaNetObjectDetectorPreprocessor as RetinaNetObjectDetectorPreprocessor, -) -from keras_hub.src.models.roberta.roberta_backbone import ( - RobertaBackbone as RobertaBackbone, -) -from keras_hub.src.models.roberta.roberta_masked_lm import ( - RobertaMaskedLM as RobertaMaskedLM, + RetinaNetObjectDetectorPreprocessor, ) +from keras_hub.src.models.roberta.roberta_backbone import RobertaBackbone +from keras_hub.src.models.roberta.roberta_masked_lm import RobertaMaskedLM from keras_hub.src.models.roberta.roberta_masked_lm_preprocessor import ( - RobertaMaskedLMPreprocessor as RobertaMaskedLMPreprocessor, + RobertaMaskedLMPreprocessor, ) from keras_hub.src.models.roberta.roberta_text_classifier import ( - RobertaTextClassifier as RobertaClassifier, + RobertaTextClassifier, ) from keras_hub.src.models.roberta.roberta_text_classifier import ( - RobertaTextClassifier as RobertaTextClassifier, + RobertaTextClassifier as RobertaClassifier, ) from keras_hub.src.models.roberta.roberta_text_classifier_preprocessor import ( - RobertaTextClassifierPreprocessor as RobertaPreprocessor, + RobertaTextClassifierPreprocessor, ) from keras_hub.src.models.roberta.roberta_text_classifier_preprocessor import ( - RobertaTextClassifierPreprocessor as RobertaTextClassifierPreprocessor, -) -from keras_hub.src.models.roberta.roberta_tokenizer import ( - RobertaTokenizer as RobertaTokenizer, + RobertaTextClassifierPreprocessor as RobertaPreprocessor, ) +from keras_hub.src.models.roberta.roberta_tokenizer import RobertaTokenizer from keras_hub.src.models.roformer_v2.roformer_v2_backbone import ( - RoformerV2Backbone as RoformerV2Backbone, + RoformerV2Backbone, ) from keras_hub.src.models.roformer_v2.roformer_v2_masked_lm import ( - RoformerV2MaskedLM as RoformerV2MaskedLM, + RoformerV2MaskedLM, ) from keras_hub.src.models.roformer_v2.roformer_v2_masked_lm_preprocessor import ( - RoformerV2MaskedLMPreprocessor as RoformerV2MaskedLMPreprocessor, + RoformerV2MaskedLMPreprocessor, ) from keras_hub.src.models.roformer_v2.roformer_v2_text_classifier import ( - RoformerV2TextClassifier as RoformerV2TextClassifier, + RoformerV2TextClassifier, ) from keras_hub.src.models.roformer_v2.roformer_v2_text_classifier_preprocessor import ( - RoformerV2TextClassifierPreprocessor as RoformerV2TextClassifierPreprocessor, + RoformerV2TextClassifierPreprocessor, ) from keras_hub.src.models.roformer_v2.roformer_v2_tokenizer import ( - RoformerV2Tokenizer as RoformerV2Tokenizer, + RoformerV2Tokenizer, ) -from keras_hub.src.models.sam.sam_backbone import SAMBackbone as SAMBackbone -from keras_hub.src.models.sam.sam_image_segmenter import ( - SAMImageSegmenter as SAMImageSegmenter, +from keras_hub.src.models.rwkv7.rwkv7_backbone import RWKV7Backbone +from keras_hub.src.models.rwkv7.rwkv7_casual_lm import RWKV7CausalLM +from keras_hub.src.models.rwkv7.rwkv7_causal_lm_preprocessor import ( + RWKV7CausalLMPreprocessor, ) +from keras_hub.src.models.rwkv7.rwkv7_tokenizer import RWKVTokenizer +from keras_hub.src.models.sam.sam_backbone import SAMBackbone +from keras_hub.src.models.sam.sam_image_segmenter import SAMImageSegmenter from keras_hub.src.models.sam.sam_image_segmenter_preprocessor import ( - SAMImageSegmenterPreprocessor as SAMImageSegmenterPreprocessor, -) -from keras_hub.src.models.segformer.segformer_backbone import ( - SegFormerBackbone as SegFormerBackbone, + SAMImageSegmenterPreprocessor, ) +from keras_hub.src.models.segformer.segformer_backbone import SegFormerBackbone from keras_hub.src.models.segformer.segformer_image_segmenter import ( - SegFormerImageSegmenter as SegFormerImageSegmenter, + SegFormerImageSegmenter, ) from keras_hub.src.models.segformer.segformer_image_segmenter_preprocessor import ( - SegFormerImageSegmenterPreprocessor as SegFormerImageSegmenterPreprocessor, -) -from keras_hub.src.models.seq_2_seq_lm import Seq2SeqLM as Seq2SeqLM -from keras_hub.src.models.seq_2_seq_lm_preprocessor import ( - Seq2SeqLMPreprocessor as Seq2SeqLMPreprocessor, -) -from keras_hub.src.models.siglip.siglip_backbone import ( - SigLIPBackbone as SigLIPBackbone, -) -from keras_hub.src.models.siglip.siglip_preprocessor import ( - SigLIPPreprocessor as SigLIPPreprocessor, -) -from keras_hub.src.models.siglip.siglip_text_encoder import ( - SigLIPTextEncoder as SigLIPTextEncoder, -) -from keras_hub.src.models.siglip.siglip_tokenizer import ( - SigLIPTokenizer as SigLIPTokenizer, -) + SegFormerImageSegmenterPreprocessor, +) +from keras_hub.src.models.seq_2_seq_lm import Seq2SeqLM +from keras_hub.src.models.seq_2_seq_lm_preprocessor import Seq2SeqLMPreprocessor +from keras_hub.src.models.siglip.siglip_backbone import SigLIPBackbone +from keras_hub.src.models.siglip.siglip_preprocessor import SigLIPPreprocessor +from keras_hub.src.models.siglip.siglip_text_encoder import SigLIPTextEncoder +from keras_hub.src.models.siglip.siglip_tokenizer import SigLIPTokenizer from keras_hub.src.models.siglip.siglip_vision_encoder import ( - SigLIPVisionEncoder as SigLIPVisionEncoder, + SigLIPVisionEncoder, ) from keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_backbone import ( - StableDiffusion3Backbone as StableDiffusion3Backbone, + StableDiffusion3Backbone, ) from keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_image_to_image import ( - StableDiffusion3ImageToImage as StableDiffusion3ImageToImage, + StableDiffusion3ImageToImage, ) from keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_inpaint import ( - StableDiffusion3Inpaint as StableDiffusion3Inpaint, + StableDiffusion3Inpaint, ) from keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_text_to_image import ( - StableDiffusion3TextToImage as StableDiffusion3TextToImage, + StableDiffusion3TextToImage, ) from keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_text_to_image_preprocessor import ( - StableDiffusion3TextToImagePreprocessor as StableDiffusion3TextToImagePreprocessor, -) -from keras_hub.src.models.t5.t5_backbone import T5Backbone as T5Backbone -from keras_hub.src.models.t5.t5_preprocessor import ( - T5Preprocessor as T5Preprocessor, -) -from keras_hub.src.models.t5.t5_tokenizer import T5Tokenizer as T5Tokenizer -from keras_hub.src.models.t5gemma.t5gemma_backbone import ( - T5GemmaBackbone as T5GemmaBackbone, -) -from keras_hub.src.models.t5gemma.t5gemma_seq_2_seq_lm import ( - T5GemmaSeq2SeqLM as T5GemmaSeq2SeqLM, + StableDiffusion3TextToImagePreprocessor, ) +from keras_hub.src.models.t5.t5_backbone import T5Backbone +from keras_hub.src.models.t5.t5_preprocessor import T5Preprocessor +from keras_hub.src.models.t5.t5_tokenizer import T5Tokenizer +from keras_hub.src.models.t5gemma.t5gemma_backbone import T5GemmaBackbone +from keras_hub.src.models.t5gemma.t5gemma_seq_2_seq_lm import T5GemmaSeq2SeqLM from keras_hub.src.models.t5gemma.t5gemma_seq_2_seq_lm_preprocessor import ( - T5GemmaSeq2SeqLMPreprocessor as T5GemmaSeq2SeqLMPreprocessor, -) -from keras_hub.src.models.t5gemma.t5gemma_tokenizer import ( - T5GemmaTokenizer as T5GemmaTokenizer, + T5GemmaSeq2SeqLMPreprocessor, ) -from keras_hub.src.models.task import Task as Task +from keras_hub.src.models.t5gemma.t5gemma_tokenizer import T5GemmaTokenizer +from keras_hub.src.models.task import Task +from keras_hub.src.models.text_classifier import TextClassifier from keras_hub.src.models.text_classifier import TextClassifier as Classifier -from keras_hub.src.models.text_classifier import ( - TextClassifier as TextClassifier, -) from keras_hub.src.models.text_classifier_preprocessor import ( - TextClassifierPreprocessor as TextClassifierPreprocessor, + TextClassifierPreprocessor, ) -from keras_hub.src.models.text_to_image import TextToImage as TextToImage +from keras_hub.src.models.text_to_image import TextToImage from keras_hub.src.models.text_to_image_preprocessor import ( - TextToImagePreprocessor as TextToImagePreprocessor, -) -from keras_hub.src.models.vgg.vgg_backbone import VGGBackbone as VGGBackbone -from keras_hub.src.models.vgg.vgg_image_classifier import ( - VGGImageClassifier as VGGImageClassifier, + TextToImagePreprocessor, ) +from keras_hub.src.models.vgg.vgg_backbone import VGGBackbone +from keras_hub.src.models.vgg.vgg_image_classifier import VGGImageClassifier from keras_hub.src.models.vgg.vgg_image_classifier_preprocessor import ( - VGGImageClassifierPreprocessor as VGGImageClassifierPreprocessor, -) -from keras_hub.src.models.vit.vit_backbone import ViTBackbone as ViTBackbone -from keras_hub.src.models.vit.vit_image_classifier import ( - ViTImageClassifier as ViTImageClassifier, + VGGImageClassifierPreprocessor, ) +from keras_hub.src.models.vit.vit_backbone import ViTBackbone +from keras_hub.src.models.vit.vit_image_classifier import ViTImageClassifier from keras_hub.src.models.vit.vit_image_classifier_preprocessor import ( - ViTImageClassifierPreprocessor as ViTImageClassifierPreprocessor, -) -from keras_hub.src.models.vit_det.vit_det_backbone import ( - ViTDetBackbone as ViTDetBackbone, -) -from keras_hub.src.models.whisper.whisper_backbone import ( - WhisperBackbone as WhisperBackbone, -) -from keras_hub.src.models.whisper.whisper_tokenizer import ( - WhisperTokenizer as WhisperTokenizer, -) -from keras_hub.src.models.xception.xception_backbone import ( - XceptionBackbone as XceptionBackbone, + ViTImageClassifierPreprocessor, ) +from keras_hub.src.models.vit_det.vit_det_backbone import ViTDetBackbone +from keras_hub.src.models.whisper.whisper_backbone import WhisperBackbone +from keras_hub.src.models.whisper.whisper_tokenizer import WhisperTokenizer +from keras_hub.src.models.xception.xception_backbone import XceptionBackbone from keras_hub.src.models.xception.xception_image_classifier import ( - XceptionImageClassifier as XceptionImageClassifier, + XceptionImageClassifier, ) from keras_hub.src.models.xception.xception_image_classifier_preprocessor import ( - XceptionImageClassifierPreprocessor as XceptionImageClassifierPreprocessor, + XceptionImageClassifierPreprocessor, ) from keras_hub.src.models.xlm_roberta.xlm_roberta_backbone import ( - XLMRobertaBackbone as XLMRobertaBackbone, + XLMRobertaBackbone, ) from keras_hub.src.models.xlm_roberta.xlm_roberta_masked_lm import ( - XLMRobertaMaskedLM as XLMRobertaMaskedLM, + XLMRobertaMaskedLM, ) from keras_hub.src.models.xlm_roberta.xlm_roberta_masked_lm_preprocessor import ( - XLMRobertaMaskedLMPreprocessor as XLMRobertaMaskedLMPreprocessor, + XLMRobertaMaskedLMPreprocessor, ) from keras_hub.src.models.xlm_roberta.xlm_roberta_text_classifier import ( - XLMRobertaTextClassifier as XLMRobertaClassifier, + XLMRobertaTextClassifier, ) from keras_hub.src.models.xlm_roberta.xlm_roberta_text_classifier import ( - XLMRobertaTextClassifier as XLMRobertaTextClassifier, + XLMRobertaTextClassifier as XLMRobertaClassifier, ) from keras_hub.src.models.xlm_roberta.xlm_roberta_text_classifier_preprocessor import ( - XLMRobertaTextClassifierPreprocessor as XLMRobertaPreprocessor, + XLMRobertaTextClassifierPreprocessor, ) from keras_hub.src.models.xlm_roberta.xlm_roberta_text_classifier_preprocessor import ( - XLMRobertaTextClassifierPreprocessor as XLMRobertaTextClassifierPreprocessor, + XLMRobertaTextClassifierPreprocessor as XLMRobertaPreprocessor, ) from keras_hub.src.models.xlm_roberta.xlm_roberta_tokenizer import ( - XLMRobertaTokenizer as XLMRobertaTokenizer, -) -from keras_hub.src.models.xlnet.xlnet_backbone import ( - XLNetBackbone as XLNetBackbone, + XLMRobertaTokenizer, ) -from keras_hub.src.tokenizers.tokenizer import Tokenizer as Tokenizer +from keras_hub.src.models.xlnet.xlnet_backbone import XLNetBackbone +from keras_hub.src.tokenizers.tokenizer import Tokenizer diff --git a/keras_hub/api/samplers/__init__.py b/keras_hub/api/samplers/__init__.py index 29bfef00fc..9feb76c669 100644 --- a/keras_hub/api/samplers/__init__.py +++ b/keras_hub/api/samplers/__init__.py @@ -4,15 +4,13 @@ since your modifications would be overwritten. """ -from keras_hub.src.samplers.beam_sampler import BeamSampler as BeamSampler -from keras_hub.src.samplers.contrastive_sampler import ( - ContrastiveSampler as ContrastiveSampler, -) -from keras_hub.src.samplers.greedy_sampler import GreedySampler as GreedySampler -from keras_hub.src.samplers.random_sampler import RandomSampler as RandomSampler -from keras_hub.src.samplers.sampler import Sampler as Sampler -from keras_hub.src.samplers.serialization import deserialize as deserialize -from keras_hub.src.samplers.serialization import get as get -from keras_hub.src.samplers.serialization import serialize as serialize -from keras_hub.src.samplers.top_k_sampler import TopKSampler as TopKSampler -from keras_hub.src.samplers.top_p_sampler import TopPSampler as TopPSampler +from keras_hub.src.samplers.beam_sampler import BeamSampler +from keras_hub.src.samplers.contrastive_sampler import ContrastiveSampler +from keras_hub.src.samplers.greedy_sampler import GreedySampler +from keras_hub.src.samplers.random_sampler import RandomSampler +from keras_hub.src.samplers.sampler import Sampler +from keras_hub.src.samplers.serialization import deserialize +from keras_hub.src.samplers.serialization import get +from keras_hub.src.samplers.serialization import serialize +from keras_hub.src.samplers.top_k_sampler import TopKSampler +from keras_hub.src.samplers.top_p_sampler import TopPSampler diff --git a/keras_hub/api/tokenizers/__init__.py b/keras_hub/api/tokenizers/__init__.py index b155d0e6e1..b13023ef3e 100644 --- a/keras_hub/api/tokenizers/__init__.py +++ b/keras_hub/api/tokenizers/__init__.py @@ -4,124 +4,69 @@ since your modifications would be overwritten. """ -from keras_hub.src.models.albert.albert_tokenizer import ( - AlbertTokenizer as AlbertTokenizer, -) -from keras_hub.src.models.bart.bart_tokenizer import ( - BartTokenizer as BartTokenizer, -) -from keras_hub.src.models.bert.bert_tokenizer import ( - BertTokenizer as BertTokenizer, -) -from keras_hub.src.models.bloom.bloom_tokenizer import ( - BloomTokenizer as BloomTokenizer, -) -from keras_hub.src.models.clip.clip_tokenizer import ( - CLIPTokenizer as CLIPTokenizer, -) +from keras_hub.src.models.albert.albert_tokenizer import AlbertTokenizer +from keras_hub.src.models.bart.bart_tokenizer import BartTokenizer +from keras_hub.src.models.bert.bert_tokenizer import BertTokenizer +from keras_hub.src.models.bloom.bloom_tokenizer import BloomTokenizer +from keras_hub.src.models.clip.clip_tokenizer import CLIPTokenizer from keras_hub.src.models.deberta_v3.deberta_v3_tokenizer import ( - DebertaV3Tokenizer as DebertaV3Tokenizer, + DebertaV3Tokenizer, ) from keras_hub.src.models.distil_bert.distil_bert_tokenizer import ( - DistilBertTokenizer as DistilBertTokenizer, -) -from keras_hub.src.models.electra.electra_tokenizer import ( - ElectraTokenizer as ElectraTokenizer, -) -from keras_hub.src.models.esm.esm_tokenizer import ESMTokenizer as ESMTokenizer -from keras_hub.src.models.f_net.f_net_tokenizer import ( - FNetTokenizer as FNetTokenizer, -) -from keras_hub.src.models.falcon.falcon_tokenizer import ( - FalconTokenizer as FalconTokenizer, -) -from keras_hub.src.models.gemma.gemma_tokenizer import ( - GemmaTokenizer as GemmaTokenizer, -) -from keras_hub.src.models.gemma3.gemma3_tokenizer import ( - Gemma3Tokenizer as Gemma3Tokenizer, -) -from keras_hub.src.models.gpt2.gpt2_tokenizer import ( - GPT2Tokenizer as GPT2Tokenizer, -) -from keras_hub.src.models.gpt_neo_x.gpt_neo_x_tokenizer import ( - GPTNeoXTokenizer as GPTNeoXTokenizer, -) -from keras_hub.src.models.llama.llama_tokenizer import ( - LlamaTokenizer as LlamaTokenizer, -) -from keras_hub.src.models.llama3.llama3_tokenizer import ( - Llama3Tokenizer as Llama3Tokenizer, -) -from keras_hub.src.models.mistral.mistral_tokenizer import ( - MistralTokenizer as MistralTokenizer, -) -from keras_hub.src.models.mixtral.mixtral_tokenizer import ( - MixtralTokenizer as MixtralTokenizer, -) + DistilBertTokenizer, +) +from keras_hub.src.models.electra.electra_tokenizer import ElectraTokenizer +from keras_hub.src.models.esm.esm_tokenizer import ESMTokenizer +from keras_hub.src.models.f_net.f_net_tokenizer import FNetTokenizer +from keras_hub.src.models.falcon.falcon_tokenizer import FalconTokenizer +from keras_hub.src.models.gemma.gemma_tokenizer import GemmaTokenizer +from keras_hub.src.models.gemma3.gemma3_tokenizer import Gemma3Tokenizer +from keras_hub.src.models.gpt2.gpt2_tokenizer import GPT2Tokenizer +from keras_hub.src.models.gpt_neo_x.gpt_neo_x_tokenizer import GPTNeoXTokenizer +from keras_hub.src.models.llama.llama_tokenizer import LlamaTokenizer +from keras_hub.src.models.llama3.llama3_tokenizer import Llama3Tokenizer +from keras_hub.src.models.mistral.mistral_tokenizer import MistralTokenizer +from keras_hub.src.models.mixtral.mixtral_tokenizer import MixtralTokenizer from keras_hub.src.models.moonshine.moonshine_tokenizer import ( - MoonshineTokenizer as MoonshineTokenizer, + MoonshineTokenizer, ) -from keras_hub.src.models.opt.opt_tokenizer import OPTTokenizer as OPTTokenizer +from keras_hub.src.models.opt.opt_tokenizer import OPTTokenizer from keras_hub.src.models.pali_gemma.pali_gemma_tokenizer import ( - PaliGemmaTokenizer as PaliGemmaTokenizer, -) -from keras_hub.src.models.parseq.parseq_tokenizer import ( - PARSeqTokenizer as PARSeqTokenizer, -) -from keras_hub.src.models.phi3.phi3_tokenizer import ( - Phi3Tokenizer as Phi3Tokenizer, + PaliGemmaTokenizer, ) +from keras_hub.src.models.parseq.parseq_tokenizer import PARSeqTokenizer +from keras_hub.src.models.phi3.phi3_tokenizer import Phi3Tokenizer +from keras_hub.src.models.qwen.qwen_tokenizer import QwenTokenizer from keras_hub.src.models.qwen.qwen_tokenizer import ( QwenTokenizer as Qwen2Tokenizer, ) -from keras_hub.src.models.qwen.qwen_tokenizer import ( - QwenTokenizer as QwenTokenizer, -) -from keras_hub.src.models.qwen3_moe.qwen3_moe_tokenizer import ( - Qwen3MoeTokenizer as Qwen3MoeTokenizer, -) -from keras_hub.src.models.qwen_moe.qwen_moe_tokenizer import ( - QwenMoeTokenizer as QwenMoeTokenizer, -) -from keras_hub.src.models.roberta.roberta_tokenizer import ( - RobertaTokenizer as RobertaTokenizer, -) +from keras_hub.src.models.qwen3_moe.qwen3_moe_tokenizer import Qwen3MoeTokenizer +from keras_hub.src.models.qwen_moe.qwen_moe_tokenizer import QwenMoeTokenizer +from keras_hub.src.models.roberta.roberta_tokenizer import RobertaTokenizer from keras_hub.src.models.roformer_v2.roformer_v2_tokenizer import ( - RoformerV2Tokenizer as RoformerV2Tokenizer, -) -from keras_hub.src.models.siglip.siglip_tokenizer import ( - SigLIPTokenizer as SigLIPTokenizer, -) -from keras_hub.src.models.t5.t5_tokenizer import T5Tokenizer as T5Tokenizer -from keras_hub.src.models.t5gemma.t5gemma_tokenizer import ( - T5GemmaTokenizer as T5GemmaTokenizer, -) -from keras_hub.src.models.whisper.whisper_tokenizer import ( - WhisperTokenizer as WhisperTokenizer, + RoformerV2Tokenizer, ) +from keras_hub.src.models.rwkv7.rwkv7_tokenizer import RWKVTokenizer +from keras_hub.src.models.siglip.siglip_tokenizer import SigLIPTokenizer +from keras_hub.src.models.t5.t5_tokenizer import T5Tokenizer +from keras_hub.src.models.t5gemma.t5gemma_tokenizer import T5GemmaTokenizer +from keras_hub.src.models.whisper.whisper_tokenizer import WhisperTokenizer from keras_hub.src.models.xlm_roberta.xlm_roberta_tokenizer import ( - XLMRobertaTokenizer as XLMRobertaTokenizer, -) -from keras_hub.src.tokenizers.byte_pair_tokenizer import ( - BytePairTokenizer as BytePairTokenizer, -) -from keras_hub.src.tokenizers.byte_tokenizer import ( - ByteTokenizer as ByteTokenizer, + XLMRobertaTokenizer, ) +from keras_hub.src.tokenizers.byte_pair_tokenizer import BytePairTokenizer +from keras_hub.src.tokenizers.byte_tokenizer import ByteTokenizer from keras_hub.src.tokenizers.sentence_piece_tokenizer import ( - SentencePieceTokenizer as SentencePieceTokenizer, + SentencePieceTokenizer, ) from keras_hub.src.tokenizers.sentence_piece_tokenizer_trainer import ( - compute_sentence_piece_proto as compute_sentence_piece_proto, + compute_sentence_piece_proto, ) -from keras_hub.src.tokenizers.tokenizer import Tokenizer as Tokenizer +from keras_hub.src.tokenizers.tokenizer import Tokenizer from keras_hub.src.tokenizers.unicode_codepoint_tokenizer import ( - UnicodeCodepointTokenizer as UnicodeCodepointTokenizer, -) -from keras_hub.src.tokenizers.word_piece_tokenizer import ( - WordPieceTokenizer as WordPieceTokenizer, + UnicodeCodepointTokenizer, ) +from keras_hub.src.tokenizers.word_piece_tokenizer import WordPieceTokenizer from keras_hub.src.tokenizers.word_piece_tokenizer_trainer import ( - compute_word_piece_vocabulary as compute_word_piece_vocabulary, + compute_word_piece_vocabulary, ) diff --git a/keras_hub/api/utils/__init__.py b/keras_hub/api/utils/__init__.py index 0bd8cb642e..8ce47790b0 100644 --- a/keras_hub/api/utils/__init__.py +++ b/keras_hub/api/utils/__init__.py @@ -4,18 +4,10 @@ since your modifications would be overwritten. """ -from keras_hub.src.utils.coco.coco_utils import ( - coco_id_to_name as coco_id_to_name, -) -from keras_hub.src.utils.coco.coco_utils import ( - coco_name_to_id as coco_name_to_id, -) -from keras_hub.src.utils.imagenet.imagenet_utils import ( - decode_imagenet_predictions as decode_imagenet_predictions, -) -from keras_hub.src.utils.imagenet.imagenet_utils import ( - imagenet_id_to_name as imagenet_id_to_name, -) +from keras_hub.src.utils.coco.coco_utils import coco_id_to_name +from keras_hub.src.utils.coco.coco_utils import coco_name_to_id from keras_hub.src.utils.imagenet.imagenet_utils import ( - imagenet_name_to_id as imagenet_name_to_id, + decode_imagenet_predictions, ) +from keras_hub.src.utils.imagenet.imagenet_utils import imagenet_id_to_name +from keras_hub.src.utils.imagenet.imagenet_utils import imagenet_name_to_id diff --git a/keras_hub/src/models/rwkv7/rwkv7_backbone.py b/keras_hub/src/models/rwkv7/rwkv7_backbone.py new file mode 100644 index 0000000000..d6d3d9a36b --- /dev/null +++ b/keras_hub/src/models/rwkv7/rwkv7_backbone.py @@ -0,0 +1,119 @@ +import keras +from keras import ops + +from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.models.backbone import Backbone +from keras_hub.src.models.rwkv7.rwkv7_layer import RWKV7_Block + + +def rwkv7_kernel_initializer(stddev=0.02): + return keras.initializers.TruncatedNormal(stddev=stddev) + + +@keras_hub_export("keras_hub.models.RWKV7Backbone") +class RWKV7Backbone(Backbone): + def __init__( + self, + hidden_size, + head_size, + num_layers, + vocabulary_size, + intermediate_dim, + gate_lora=128, + mv_lora=32, + aaa_lora=64, + decay_lora=64, + dtype=None, + dropout_rate=0, + **kwargs, + ): + # === Layers === + self.token_embedding = keras.layers.Embedding( + input_dim=vocabulary_size, + output_dim=hidden_size, + embeddings_initializer=rwkv7_kernel_initializer(), + dtype=dtype, + name="token_embedding", + ) + self.token_embedding.build([None, None]) + + self.output_layer_norm = keras.layers.LayerNormalization( + epsilon=1e-5, name="output_norm" + ) + self.output_layer_norm.build([None, None, hidden_size]) + self.dropout = keras.layers.Dropout( + dropout_rate, + dtype=dtype, + name="dropout", + ) + self.rwkv_layers = [] + for i in range(num_layers): + layer = RWKV7_Block( + hidden_size, + head_size, + intermediate_dim, + gate_lora, + mv_lora, + aaa_lora, + decay_lora, + use_initial_norm=i == 0, + kernel_initializer=rwkv7_kernel_initializer(), + dtype=dtype, + name=f"rwkv_layer_{i}", + ) + + self.rwkv_layers.append(layer) + self.head = keras.layers.Dense( + units=vocabulary_size, + kernel_initializer=rwkv7_kernel_initializer(), + use_bias=False, + name="head", + ) + # === Functional Model === + token_id_input = keras.Input( + shape=(None,), dtype="int32", name="token_ids" + ) + + padding_mask = ops.not_equal(token_id_input, 0) + + x = self.token_embedding(token_id_input) + padding_mask = ops.cast(padding_mask, dtype=x.dtype) + v_first = None + for rwkv_layer in self.rwkv_layers: + x, v_first = rwkv_layer(x, v_first, padding_mask) + x = self.dropout(x) + sequence_output = self.output_layer_norm(x) + sequence_output = self.head(sequence_output) + super().__init__( + inputs=token_id_input, + outputs=sequence_output, + dtype=dtype, + **kwargs, + ) + + self.num_layers = num_layers + self.head_size = head_size + self.hidden_size = hidden_size + self.gate_lora = gate_lora + self.mv_lora = mv_lora + self.aaa_lora = aaa_lora + self.decay_lora = decay_lora + self.vocabulary_size = vocabulary_size + self.dropout_rate = dropout_rate + self.intermediate_dim = intermediate_dim + + def get_config(self): + config = { + "hidden_size": self.hidden_size, + "head_size": self.head_size, + "gate_lora": self.gate_lora, + "mv_lora": self.mv_lora, + "aaa_lora": self.aaa_lora, + "decay_lora": self.decay_lora, + "vocabulary_size": self.vocabulary_size, + "dropout_rate": self.dropout_rate, + "intermediate_dim": self.intermediate_dim, + "num_layers": self.num_layers, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras_hub/src/models/rwkv7/rwkv7_casual_lm.py b/keras_hub/src/models/rwkv7/rwkv7_casual_lm.py new file mode 100644 index 0000000000..c78154129b --- /dev/null +++ b/keras_hub/src/models/rwkv7/rwkv7_casual_lm.py @@ -0,0 +1,50 @@ +from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.models.causal_lm import CausalLM +from keras_hub.src.models.rwkv7.rwkv7_backbone import RWKV7Backbone +from keras_hub.src.models.rwkv7.rwkv7_causal_lm_preprocessor import ( + RWKV7CausalLMPreprocessor, +) + + +@keras_hub_export("keras_hub.models.RWKV7CausalLM") +class RWKV7CausalLM(CausalLM): + backbone_cls = RWKV7Backbone + preprocessor_cls = RWKV7CausalLMPreprocessor + + def __init__(self, backbone, preprocessor=None, **kwargs): + # === Layers === + self.backbone = backbone + self.preprocessor = preprocessor + super().__init__( + inputs=backbone.inputs, + outputs=backbone.outputs, + **kwargs, + ) + + def call_with_cache( + self, + token_ids, + cache, + cache_update_index, + ): + pass # TODO + + def _build_cache(self, token_ids): + pass # TODO + + def generate_step( + self, + inputs, + stop_token_ids=None, + ): + pass # TODO + + def score( + self, + token_ids, + padding_mask=None, + scoring_mode="logits", + layer_intercept_fn=None, + target_ids=None, + ): + pass # TODO diff --git a/keras_hub/src/models/rwkv7/rwkv7_causal_lm_preprocessor.py b/keras_hub/src/models/rwkv7/rwkv7_causal_lm_preprocessor.py new file mode 100644 index 0000000000..9a8a88211c --- /dev/null +++ b/keras_hub/src/models/rwkv7/rwkv7_causal_lm_preprocessor.py @@ -0,0 +1,88 @@ +import keras + +from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.models.causal_lm_preprocessor import CausalLMPreprocessor +from keras_hub.src.models.rwkv7.rwkv7_backbone import RWKV7Backbone +from keras_hub.src.models.rwkv7.rwkv7_tokenizer import RWKVTokenizer +from keras_hub.src.utils.tensor_utils import strip_to_ragged + + +@keras_hub_export("keras_hub.models.RWKV7CausalLMPreprocessor") +class RWKV7CausalLMPreprocessor(CausalLMPreprocessor): + backbone_cls = RWKV7Backbone + tokenizer_cls = RWKVTokenizer + + def __init__( + self, + tokenizer, + add_start_token=False, + **kwargs, + ): + super().__init__( + tokenizer=tokenizer, add_start_token=add_start_token, **kwargs + ) + + def call( + self, + x, + y=None, + sample_weight=None, + sequence_length=None, + ): + sequence_length = sequence_length or self.sequence_length + x = self.tokenizer(x) + # Pad with one extra token to account for the truncation below. + token_ids, padding_mask = self.packer( + x, + sequence_length=sequence_length + 1, + add_start_value=self.add_start_token, + add_end_value=self.add_end_token, + ) + # The last token does not have a next token, so we truncate it out. + x = token_ids[..., :-1] + # Target `y` will be the next token. + y, sample_weight = token_ids[..., 1:], padding_mask[..., 1:] + return keras.utils.pack_x_y_sample_weight(x, y, sample_weight) + + def generate_preprocess( + self, + x, + sequence_length=None, + ): + """Convert strings to integer token input for generation. + + Similar to calling the layer for training, this method takes in strings + or tensor strings, tokenizes and packs the input, and computes a padding + mask masking all inputs not filled in with a padded value. + + Unlike calling the layer for training, this method does not compute + labels and will never append a `tokenizer.end_token_id` to the end of + the sequence (as generation is expected to continue at the end of the + inputted prompt). + """ + if not self.built: + self.build(None) + + x = self.tokenizer(x) + token_ids, padding_mask = self.packer( + x, sequence_length=sequence_length, add_end_value=False + ) + return token_ids + + def generate_postprocess( + self, + x, + ): + """Convert integer token output to strings for generation. + + This method reverses `generate_preprocess()`, by first removing all + padding and start/end tokens, and then converting the integer sequence + back to a string. + """ + if not self.built: + self.build(None) + + token_ids, padding_mask = x["token_ids"], x["padding_mask"] + ids_to_strip = self.tokenizer.special_token_ids + token_ids = strip_to_ragged(token_ids, padding_mask, ids_to_strip) + return self.tokenizer.detokenize(token_ids) diff --git a/keras_hub/src/models/rwkv7/rwkv7_layer.py b/keras_hub/src/models/rwkv7/rwkv7_layer.py new file mode 100644 index 0000000000..217f42ee32 --- /dev/null +++ b/keras_hub/src/models/rwkv7/rwkv7_layer.py @@ -0,0 +1,612 @@ +import warnings + +import keras +from keras import initializers +from keras import ops +from keras.layers import Layer + + +def transpose_head(x, head_first): + """ + Transpose the input tensor. + + Parameters: + x: Input tensor. + head_first: Boolean flag indicating whether to transpose. + + Returns: + Transposed tensor if head_first is True, otherwise the original tensor. + """ + x = ops.cast(x, "float32") + if head_first: + return ops.transpose(x, (0, 2, 1, 3)) + else: + return x + + +def rnn_generalized_delta_rule( + r, + w, + k, + v, + a, + b, + initial_state=None, + output_final_state: bool = True, + head_first: bool = False, +): + """ + Implements the generalized delta rule. + + Parameters: + r: Input tensor. + w: Weight tensor. + k, v, a, b: Other input tensors. + initial_state: Initial state tensor. + output_final_state: Whether to return the final state. + head_first: Whether to place the head dimension first during computation. + + Returns: + Final state if output_final_state is True, otherwise only the output. + """ + DTYPE = r.dtype + B, T, H, N = ops.shape(r) + r = transpose_head(r, head_first) + + k = transpose_head(k, head_first) + + v = transpose_head(v, head_first) + a = transpose_head(a, head_first) + b = transpose_head(b, head_first) + w = transpose_head(w, head_first) + w = ops.exp(-ops.exp(w)) + + if initial_state is not None: + state = initial_state + if ops.shape(state)[0] == 1: + state = ops.broadcast_to(state, (B, H, N, N)) + else: + state = ops.zeros((B, H, N, N), dtype="float32") + out = ops.zeros((B, T, H, N), dtype=r.dtype) + + def step(t, inputs): + """ + Performs computation for a single time step. + + Parameters: + t: Current time step. + inputs: List containing current state and output. + + Returns: + Updated state and output. + """ + state, out = inputs + kk = ops.reshape(k[:, t, :], (B, H, 1, N)) + rr = ops.reshape(r[:, t, :], (B, H, N, 1)) + vv = ops.reshape(v[:, t, :], (B, H, N, 1)) + aa = ops.reshape(a[:, t, :], (B, H, N, 1)) + bb = ops.reshape(b[:, t, :], (B, H, 1, N)) + state = state * w[:, t, :, None, :] + state @ aa @ bb + vv @ kk + out = ops.slice_update( + out, [0, t, 0, 0], ops.reshape((state @ rr), (B, 1, H, N)) + ) + return [state, out] + + state, out = ops.fori_loop(0, T, step, [state, out]) + + if output_final_state: + return ops.cast(out, DTYPE), state + return ops.cast(out, DTYPE) + + +class TimeShift(Layer): + def __init__(self, name="time_shift"): + super(TimeShift, self).__init__(name=name) + + def call(self, inputs, cache_x=None): + x = ops.pad(inputs, [[0, 0], [1, 0], [0, 0]], constant_values=0.0)[ + :, :-1, : + ] + if cache_x is not None: + x = ops.slice_update(x, [0, 0, 0], cache_x) + return x + + def compute_output_shape(self, input_shape): + return input_shape + + +class RWKV7_ChannelMix(Layer): + def __init__(self, dim_ffn, kernel_initializer="glorot_uniform", **kwargs): + super().__init__(**kwargs) + self.dim_ffn = dim_ffn + self.kernel_initializer = initializers.get(kernel_initializer) + + def call(self, x, last_cache_x=None): + if last_cache_x is None: + xx = self.time_shift(x) - x + else: + xx = self.time_shift(x, last_cache_x) - x + last_cache_x = x[:, -1:, :] + k = x + xx * self.x_k + k = ops.relu(self.key(k)) ** 2 + output = self.value(k) + if last_cache_x is not None: + output = [output, last_cache_x] + return output + + def compute_output_shape(self, input_shape): + if isinstance(input_shape, list): + return input_shape[0] + return input_shape + + def build(self, input_shape): + super().build(input_shape) + if isinstance(input_shape, list): + input_shape = input_shape[0] + self.x_k = self.add_weight( + shape=(1, 1, input_shape[-1]), + name="time_mix_k", + initializer=self.kernel_initializer, + ) + self.time_shift = TimeShift() + self.key = keras.layers.Dense( + self.dim_ffn, + use_bias=False, + name="dense_k", + kernel_initializer=self.kernel_initializer, + ) + self.value = keras.layers.Dense( + input_shape[-1], + use_bias=False, + name="dense_v", + kernel_initializer=self.kernel_initializer, + ) + self.key.build(input_shape) + self.value.build([None, None, self.dim_ffn]) + + def get_config(self): + config = { + "dim_ffn": self.dim_ffn, + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class GroupNorm(keras.layers.GroupNormalization): + def call(self, inputs): + if keras.config.backend() == "torch": + import torch.nn.functional as F + + return F.group_norm( + inputs, self.groups, self.gamma, self.beta, self.epsilon + ) + return super().call(inputs) + + +class RWKV7_TimeMix(Layer): + def __init__( + self, + hidden_size, + head_size, + gate_lora=128, + mv_lora=32, + aaa_lora=64, + decay_lora=64, + kernel_initializer="glorot_uniform", + **kwargs, + ): + super().__init__(**kwargs) + self.head_size = head_size + self.hidden_size = hidden_size + self.n_head = hidden_size // self.head_size + self.gate_lora = gate_lora + self.mv_lora = mv_lora + self.aaa_lora = aaa_lora + self.decay_lora = decay_lora + self.kernel_initializer = initializers.get(kernel_initializer) + self.initial_state = None + try: + from rwkv_ops import RWKV7_USE_KERNEL + from rwkv_ops import generalized_delta_rule + except ImportError: + warnings.warn( + "The 'rwkv_ops' package is not installed. " + "Falling back to a pure-Python operator,that will very slow." + "Please install 'rwkv_ops' to enable the optimized kernels.", + UserWarning, + stacklevel=2, + ) + generalized_delta_rule = rnn_generalized_delta_rule + RWKV7_USE_KERNEL = False + self.RWKV7_OP, self.USE_KERNEL = ( + generalized_delta_rule, + RWKV7_USE_KERNEL, + ) + assert self.hidden_size % self.n_head == 0 + + def build(self, input_shape): + super().build(input_shape) + if isinstance(input_shape[0], list): + input_shape = input_shape[0] + H = self.n_head + N = self.head_size + B, T, C = input_shape + + self.x_r = self.add_weight( + shape=(1, 1, C), name="x_r", initializer=self.kernel_initializer + ) + self.x_w = self.add_weight( + shape=(1, 1, C), name="x_w", initializer=self.kernel_initializer + ) + self.x_k = self.add_weight( + shape=(1, 1, C), name="x_k", initializer=self.kernel_initializer + ) + self.x_v = self.add_weight( + shape=(1, 1, C), name="x_v", initializer=self.kernel_initializer + ) + self.x_a = self.add_weight( + shape=(1, 1, C), name="x_a", initializer=self.kernel_initializer + ) + self.x_g = self.add_weight( + shape=(1, 1, C), name="x_g", initializer=self.kernel_initializer + ) + + self.w0 = self.add_weight( + shape=(1, 1, C), name="w0", initializer=self.kernel_initializer + ) + self.w1 = self.add_weight( + shape=(C, self.decay_lora), + name="w1", + initializer=self.kernel_initializer, + ) + self.w2 = self.add_weight( + shape=(self.decay_lora, C), + name="w2", + initializer=self.kernel_initializer, + ) + + self.a0 = self.add_weight( + shape=(1, 1, C), name="a0", initializer=self.kernel_initializer + ) + self.a1 = self.add_weight( + shape=(C, self.aaa_lora), + name="a1", + initializer=self.kernel_initializer, + ) + self.a2 = self.add_weight( + shape=(self.aaa_lora, C), + name="a2", + initializer=self.kernel_initializer, + ) + + self.v0 = self.add_weight( + shape=(1, 1, C), name="v0", initializer=self.kernel_initializer + ) + self.v1 = self.add_weight( + shape=(C, self.mv_lora), + name="v1", + initializer=self.kernel_initializer, + ) + self.v2 = self.add_weight( + shape=(self.mv_lora, C), + name="v2", + initializer=self.kernel_initializer, + ) + + self.g1 = self.add_weight( + shape=(C, self.gate_lora), + name="g1", + initializer=self.kernel_initializer, + ) + self.g2 = self.add_weight( + shape=(self.gate_lora, C), + name="g2", + initializer=self.kernel_initializer, + ) + + self.k_k = self.add_weight( + shape=(1, 1, C), name="k_k", initializer=self.kernel_initializer + ) + self.k_a = self.add_weight( + shape=(1, 1, C), name="k_a", initializer=self.kernel_initializer + ) + self.r_k = self.add_weight( + shape=(H, N), name="r_k", initializer=self.kernel_initializer + ) + + self.time_shift = TimeShift() + self.receptance = keras.layers.Dense( + C, + use_bias=False, + kernel_initializer=self.kernel_initializer, + name="receptance", + ) + self.key = keras.layers.Dense( + C, + use_bias=False, + kernel_initializer=self.kernel_initializer, + name="key", + ) + self.value = keras.layers.Dense( + C, + use_bias=False, + kernel_initializer=self.kernel_initializer, + name="value", + ) + self.output_layer = keras.layers.Dense( + C, + use_bias=False, + kernel_initializer=self.kernel_initializer, + name="output_layer", + ) + self.ln_x = GroupNorm(groups=H, epsilon=64e-5) + + self.receptance.build(input_shape) + self.value.build(input_shape) + self.key.build(input_shape) + self.output_layer.build(input_shape) + self.ln_x.build((None, C)) + + def call( + self, + x, + v_first=None, + padding_mask=None, + last_cache_x=None, + cache_state=None, + rnn_mode=False, + ): + if cache_state is None: + initial_state = self.initial_state + else: + initial_state = cache_state + if padding_mask is not None: + if ops.ndim(padding_mask) == 2: + padding_mask = padding_mask[..., None] + padding_mask = ops.cast(padding_mask, x.dtype) + x *= padding_mask + B, T, C = ops.shape(x) + H = self.n_head + if last_cache_x is None: + xx = self.time_shift(x) - x + else: + xx = self.time_shift(x, last_cache_x) - x + last_cache_x = x[:, -1:, :] + + xr = x + xx * self.x_r + xw = x + xx * self.x_w + xk = x + xx * self.x_k + xv = x + xx * self.x_v + xa = x + xx * self.x_a + xg = x + xx * self.x_g + + r = self.receptance(xr) + w = ( + -ops.softplus( + -( + self.w0 + + ops.matmul(ops.tanh(ops.matmul(xw, self.w1)), self.w2) + ) + ) + - 0.5 + ) # soft-clamp to (-inf, -0.5) + k = self.key(xk) + v = self.value(xv) + if v_first is None: + v_first = v + else: + v = v + (v_first - v) * ops.sigmoid( + self.v0 + ops.matmul(ops.matmul(xv, self.v1), self.v2) + ) + + a = ops.sigmoid( + self.a0 + ops.matmul(ops.matmul(xa, self.a1), self.a2) + ) # a is "in-context learning rate" + g = ops.matmul(ops.sigmoid(ops.matmul(xg, self.g1)), self.g2) + + kk = k * self.k_k + + kk = self.normalize(ops.reshape(kk, (B, T, H, -1))) + kk = ops.reshape(kk, (B, T, C)) + + k = k * (1 + (a - 1) * self.k_a) + if padding_mask is not None: + v *= padding_mask + if self.USE_KERNEL: + w += (1 - padding_mask) * -1e9 + else: + w = w * padding_mask + 1 - padding_mask + # N = self.head_size + if rnn_mode: + rwkv7_op = rnn_generalized_delta_rule + else: + rwkv7_op = self.RWKV7_OP + x, finnal_state = rwkv7_op( + ops.reshape(r, (B, T, self.n_head, self.head_size)), + ops.reshape(w, (B, T, self.n_head, self.head_size)), + ops.reshape(k, (B, T, self.n_head, self.head_size)), + ops.reshape(v, (B, T, self.n_head, self.head_size)), + ops.reshape(-kk, (B, T, self.n_head, self.head_size)), + ops.reshape(kk * a, (B, T, self.n_head, self.head_size)), + initial_state=initial_state, + ) + + x = ops.reshape(x, (B, T, C)) + + x = ops.reshape(self.ln_x(ops.reshape(x, (B * T, C))), ops.shape(x)) + + x = ops.reshape(x, (B, T, C)) + r = ops.reshape(r, (B, T, H, -1)) + k = ops.reshape(k, (B, T, H, -1)) + v = ops.reshape(v, (B, T, C)) + + rwkv = ops.sum(r * k * self.r_k, axis=-1, keepdims=True) * ops.reshape( + v, (B, T, H, -1) + ) + + x = x + ops.reshape(rwkv, (B, T, C)) + x = self.output_layer(x * g) + output = [x, v_first] + if last_cache_x is not None: + output.extend([last_cache_x, finnal_state]) + return output + + def compute_output_shape(self, input_shape): + output_shapes = [ + [None, None, self.hidden_size], + [None, None, self.hidden_size], + ] + return output_shapes + + def normalize( + self, + x, + eps: float = 1e-12, + ): + # F.normalize like api + if keras.config.backend() == "torch": + import torch.nn.functional as F + + return F.normalize(x, dim=-1, p=2.0) + square_sum = ops.sum(ops.square(x), axis=-1, keepdims=True) + inv_norm = ops.rsqrt(square_sum + eps) + inv_norm = ops.maximum(inv_norm, eps) + return x * inv_norm + + def get_config(self): + config = { + "hidden_size": self.hidden_size, + "head_size": self.head_size, + "gate_lora": self.gate_lora, + "mv_lora": self.mv_lora, + "aaa_lora": self.aaa_lora, + "decay_lora": self.decay_lora, + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class RWKV7_Block(Layer): + def __init__( + self, + hidden_size, + head_size, + intermediate_dim, + gate_lora=128, + mv_lora=32, + aaa_lora=64, + decay_lora=64, + use_initial_norm=False, + kernel_initializer="glorot_uniform", + **kwargs, + ): + super().__init__(**kwargs) + self.head_size = head_size + self.hidden_size = hidden_size + self.gate_lora = gate_lora + self.mv_lora = mv_lora + self.aaa_lora = aaa_lora + self.decay_lora = decay_lora + self.intermediate_dim = intermediate_dim + self.use_initial_norm = use_initial_norm + self.kernel_initializer = initializers.get(kernel_initializer) + + def build(self, input_shape): + super().build(input_shape) + if self.use_initial_norm: + self.ln0 = keras.layers.LayerNormalization( + epsilon=1e-5, name="init_norm" + ) + self.ln0.build(input_shape) + + self.ln1 = keras.layers.LayerNormalization( + epsilon=1e-5, name="att_norm" + ) + self.ln1.build(input_shape) + + self.ln2 = keras.layers.LayerNormalization( + epsilon=1e-5, name="ffn_norm" + ) + self.ln2.build(input_shape) + + self.att = RWKV7_TimeMix( + self.hidden_size, + self.head_size, + self.gate_lora, + self.mv_lora, + self.aaa_lora, + self.decay_lora, + name="RWKV_TIME_MIX", + kernel_initializer=self.kernel_initializer, + ) + self.att.build(input_shape) + + self.ffn = RWKV7_ChannelMix( + self.intermediate_dim, + name="RWKV_CMIX", + kernel_initializer=self.kernel_initializer, + ) + self.ffn.build(input_shape) + + def call( + self, + x, + v_first=None, + padding_mask=None, + cache_state=None, + cache_tmix_x=None, + cache_cmix_x=None, + rnn_mode=False, + ): + if self.use_initial_norm: + x = self.ln0(x) + if cache_state is None: + xx, v_first = self.att( + self.ln1(x), v_first=v_first, padding_mask=padding_mask + ) + x = x + xx + x = x + self.ffn(self.ln2(x)) + return x, v_first + else: + xx, v_first, cache_tmix_x, cache_state = self.att( + self.ln1(x), + v_first=v_first, + padding_mask=padding_mask, + last_cache_x=cache_tmix_x, + cache_state=cache_state, + rnn_mode=rnn_mode, + ) + x = x + xx + xx, cache_cmix_x = self.ffn(self.ln2(x), cache_cmix_x) + x = x + xx + return x, v_first, cache_state, cache_tmix_x, cache_cmix_x + + def compute_output_shape(self, input_shape): + output_shapes = [ + [None, None, self.hidden_size], + [None, None, self.hidden_size], + ] + return output_shapes + + def get_config(self): + config = { + "hidden_size": self.hidden_size, + "head_size": self.head_size, + "gate_lora": self.gate_lora, + "mv_lora": self.mv_lora, + "aaa_lora": self.aaa_lora, + "decay_lora": self.decay_lora, + "intermediate_dim": self.intermediate_dim, + "use_initial_norm": self.use_initial_norm, + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras_hub/src/models/rwkv7/rwkv7_tokenizer.py b/keras_hub/src/models/rwkv7/rwkv7_tokenizer.py new file mode 100644 index 0000000000..ce2e49535a --- /dev/null +++ b/keras_hub/src/models/rwkv7/rwkv7_tokenizer.py @@ -0,0 +1,224 @@ +import os + +import keras + +from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.tokenizers import tokenizer +from keras_hub.src.utils.tensor_utils import is_int_dtype +from keras_hub.src.utils.tensor_utils import is_string_dtype +from keras_hub.src.utils.tensor_utils import tensor_to_list + +VOCAB_FILENAME = "vocab.txt" + + +class TRIE: + __slots__ = tuple("ch,to,values,front".split(",")) + to: list + values: set + + def __init__(self, front=None, ch=None): + self.ch = ch + self.to = [None for ch in range(256)] + self.values = set() + self.front = front + + def __repr__(self): + fr = self + ret = [] + while fr is not None: + if fr.ch is not None: + ret.append(fr.ch) + fr = fr.front + return "" % (ret[::-1], self.values) + + def add(self, key: bytes, idx: int = 0, val=None): + if idx == len(key): + if val is None: + val = key + self.values.add(val) + return self + ch = key[idx] + if self.to[ch] is None: + self.to[ch] = TRIE(front=self, ch=ch) + return self.to[ch].add(key, idx=idx + 1, val=val) + + def find_longest(self, key: bytes, idx: int = 0): + u: TRIE = self + ch: int = key[idx] + + while u.to[ch] is not None: + u = u.to[ch] + idx += 1 + if u.values: + ret = idx, u, u.values + if idx == len(key): + break + ch = key[idx] + return ret + + +class RWKV_TOKENIZER: + def __init__(self, vocabs): + self.idx2token = {} + sorted = [] # must be already sorted + for l in vocabs: + idx = int(l[: l.index(" ")]) + x = eval(l[l.index(" ") : l.rindex(" ")]) + x = x.encode("utf-8") if isinstance(x, str) else x + assert isinstance(x, bytes) + assert len(x) == int(l[l.rindex(" ") :]) + sorted += [x] + self.idx2token[idx] = x + + self.token2idx = {} + for k, v in self.idx2token.items(): + self.token2idx[v] = int(k) + + self.root = TRIE() + for t, i in self.token2idx.items(): + _ = self.root.add(t, val=(t, i)) + + def encodeBytes(self, src: bytes): + idx: int = 0 + tokens = [] + while idx < len(src): + _idx: int = idx + idx, _, values = self.root.find_longest(src, idx) + assert idx != _idx + _, token = next(iter(values)) + tokens.append(token) + return tokens + + def decodeBytes(self, tokens): + return b"".join(map(lambda i: self.idx2token[i], tokens)) + + def encode(self, src): + if isinstance(src, str): + return self.encodeBytes(src.encode("utf-8")) + else: + return [self.encodeBytes(s.encode("utf-8")) for s in src] + + def decode(self, tokens): + return [self.decodeBytes(batch).decode("utf-8") for batch in tokens] + # try: + # return self.decodeBytes(tokens).decode('utf-8') + # except: + # return '\ufffd' # bad utf-8 + + def printTokens(self, tokens): + for i in tokens: + s = self.idx2token[i] + try: + s = s.decode("utf-8") + except BaseException: + pass + print(f"{repr(s)}{i}", end=" ") + print() + + +@keras_hub_export( + [ + "keras_hub.tokenizers.RWKVTokenizer", + "keras_hub.models.RWKVTokenizer", + ] +) +class RWKVTokenizer(tokenizer.Tokenizer): + def __init__( + self, + vocabulary=None, + dtype="int32", + **kwargs, + ) -> None: + if not is_int_dtype(dtype) and not is_string_dtype(dtype): + raise ValueError( + "Output dtype must be an integer type or a string. " + f"Received: dtype={dtype}" + ) + + super().__init__(dtype=dtype, **kwargs) + + self.vocabulary = None + if vocabulary is not None: + self.set_vocabulary(vocabulary) + self.file_assets = [VOCAB_FILENAME] + + def set_vocabulary(self, vocabulary): + self.vocabulary = vocabulary + self._tokenizer = RWKV_TOKENIZER(vocabulary) + self.pad_token_id = 0 + self.start_token_id = None + self.end_token_id = self.tokenize(["\n\n"])[0][0] + + def save_assets(self, dir_path): + path = os.path.join(dir_path, VOCAB_FILENAME) + with open(path, "wb") as file: + file.write("\n".join(self.vocabulary)) + + def load_assets(self, dir_path=""): + path = os.path.join(dir_path, VOCAB_FILENAME) + with open(path, "r", encoding="utf-8") as f: + vocabulary = f.readlines() + self.set_vocabulary(vocabulary) + + def _check_vocabulary(self): + if self.vocabulary is None: + raise ValueError( + "No vocabulary has been set for RWKVTokenizer. Make " + "sure to pass a `vocabulary` argument when creating the layer." + ) + + def vocabulary_size(self): + self._check_vocabulary() + return int(len(self.vocabulary)) + + def get_vocabulary(self): + self._check_vocabulary() + return tensor_to_list(self.vocabulary) + + def id_to_token(self, id): + self._check_vocabulary() + if id >= self.vocabulary_size() or id < 0: + raise ValueError( + f"`id` must be in range [0, {self.vocabulary_size() - 1}]. " + f"Received: {id}" + ) + return self._tokenizer.idx2token[id] + + def token_to_id(self, token): + """Convert a string token to an integer id.""" + self._check_vocabulary() + return int(self._tokenizer.token2idx[token]) + + def get_config(self): + config = super().get_config() + config.update( + { + "vocabulary": None, # Save vocabulary via an asset! + } + ) + return config + + def tokenize(self, inputs): + self._check_vocabulary() + tokens = self._tokenizer.encode(inputs) + + def tokens2ids(x): + return [self.token_to_id(t) for t in x] + + if is_string_dtype(self.dtype): + if isinstance(inputs, str): + return tokens2ids(tokens) + return [tokens2ids(t) for t in tokens] + return tokens + + def detokenize(self, inputs): + self._check_vocabulary() + return self._tokenizer.decode(inputs) + + def compute_output_spec(self, input_spec): + return keras.KerasTensor( + input_spec.shape + (None,), dtype=self.compute_dtype + ) + + def call(self, inputs): + return self.tokenize(inputs) diff --git a/tools/checkpoint_conversion/convert_rwkv7_checkpoints.py b/tools/checkpoint_conversion/convert_rwkv7_checkpoints.py new file mode 100644 index 0000000000..e51e0c4d79 --- /dev/null +++ b/tools/checkpoint_conversion/convert_rwkv7_checkpoints.py @@ -0,0 +1,464 @@ +# ============================================================================== +# Environment & Dependency Setup +# ============================================================================== +import os + +import numpy as np +import requests +import torch +from absl import app +from absl import flags + +# Force CPU only (GPU index -1 disables CUDA) +os.environ["CUDA_VISIBLE_DEVICES"] = "-1" +# Use native kernel implementations +os.environ["KERNEL_TYPE"] = "native" + +# Keras-Ops is imported **after** environment variables are set +import types + +import torch.nn as nn +import torch.nn.functional as F +from keras import ops # noqa: E402 +from modelscope import snapshot_download + +from keras_hub.src.models.rwkv7.rwkv7_backbone import RWKV7Backbone +from keras_hub.src.models.rwkv7.rwkv7_casual_lm import RWKV7CausalLM + +# Local modules +from keras_hub.src.models.rwkv7.rwkv7_tokenizer import RWKVTokenizer + +# ============================================================================== +# Model Preset Registry +# ============================================================================== +PRESET_MAP = { + "rwkv7_world_0.1B": "RWKV-x070-World-0.1B-v2.8-20241210-ctx4096.pth", + "rwkv7_world_0.3B": "RWKV-x070-World-0.4B-v2.9-20250107-ctx4096.pth", + "rwkv7_world_1.5B": "RWKV-x070-World-1.5B-v3-20250127-ctx4096.pth", + "rwkv7_world_2.9B": "RWKV-x070-World-2.9B-v3-20250211-ctx4096.pth", +} + +# ============================================================================== +# Command-line Interface +# ============================================================================== +FLAGS = flags.FLAGS +flags.DEFINE_string( + "preset", None, f"Must be one of {','.join(PRESET_MAP.keys())}" +) + +# ============================================================================== +# RWKV-v7 official PyTorch implementation +# From https://github.com/BlinkDL/RWKV-LM/blob/main/RWKV-v7/rwkv_v7_demo.py +# ============================================================================== +HEAD_SIZE = 64 +D_DECAY_LORA = 64 +D_AAA_LORA = 64 +D_MV_LORA = 32 +D_GATE_LORA = 128 + + +def RWKV7_OP(r, w, k, v, a, b): + """ + Official RWKV-7 core operator. + Performs the time-mix recurrence with delta-rule based learning. + """ + DTYPE = r.dtype + B, T, C = r.size() + H = C // HEAD_SIZE + N = HEAD_SIZE + r = r.view(B, T, H, N).float() + k = k.view(B, T, H, N).float() + v = v.view(B, T, H, N).float() + a = a.view(B, T, H, N).float() + b = b.view(B, T, H, N).float() + + # Compute decay factor (log-space) + w = torch.exp(-torch.exp(w.view(B, T, H, N).float())) + out = torch.zeros((B, T, H, N), device=r.device, dtype=torch.float) + state = torch.zeros((B, H, N, N), device=r.device, dtype=torch.float) + + # Recurrent inference loop over time + for t in range(T): + kk = k[:, t, :].view(B, H, 1, N) + rr = r[:, t, :].view(B, H, N, 1) + vv = v[:, t, :].view(B, H, N, 1) + aa = a[:, t, :].view(B, H, N, 1) + bb = b[:, t, :].view(B, H, 1, N) + # State update: decay + delta-rule + residual + state = state * w[:, t, :, None, :] + state @ aa @ bb + vv @ kk + # Read-out for current position + out[:, t, :] = (state @ rr).view(B, H, N) + return out.view(B, T, C).to(DTYPE) + + +# ============================================================================== +# RWKV Time-Mix Layer (Attention) +# ============================================================================== +class RWKV_Tmix_x070(nn.Module): + def __init__(self, args, layer_id): + super().__init__() + self.args = args + self.layer_id = layer_id + self.head_size = args.head_size_a + self.n_head = args.dim_att // self.head_size + assert args.dim_att % self.n_head == 0 + + H, N, C = self.n_head, self.head_size, args.n_embd + + # Low-rank adaptation & shift scalars + self.x_r = nn.Parameter(torch.empty(1, 1, C)) + self.x_w = nn.Parameter(torch.empty(1, 1, C)) + self.x_k = nn.Parameter(torch.empty(1, 1, C)) + self.x_v = nn.Parameter(torch.empty(1, 1, C)) + self.x_a = nn.Parameter(torch.empty(1, 1, C)) + self.x_g = nn.Parameter(torch.empty(1, 1, C)) + + # Decay (w) modulation + self.w0 = nn.Parameter(torch.empty(1, 1, C)) + self.w1 = nn.Parameter(torch.empty(C, D_DECAY_LORA)) + self.w2 = nn.Parameter(torch.empty(D_DECAY_LORA, C)) + + # In-context learning rate (a) modulation + self.a0 = nn.Parameter(torch.empty(1, 1, C)) + self.a1 = nn.Parameter(torch.empty(C, D_AAA_LORA)) + self.a2 = nn.Parameter(torch.empty(D_AAA_LORA, C)) + + # Value residual modulation + self.v0 = nn.Parameter(torch.empty(1, 1, C)) + self.v1 = nn.Parameter(torch.empty(C, D_MV_LORA)) + self.v2 = nn.Parameter(torch.empty(D_MV_LORA, C)) + + # Gate modulation + self.g1 = nn.Parameter(torch.empty(C, D_GATE_LORA)) + self.g2 = nn.Parameter(torch.empty(D_GATE_LORA, C)) + + # Normalization & positional factors + self.k_k = nn.Parameter(torch.empty(1, 1, C)) + self.k_a = nn.Parameter(torch.empty(1, 1, C)) + self.r_k = nn.Parameter(torch.empty(H, N)) + + self.time_shift = nn.ZeroPad2d((0, 0, 1, -1)) + self.receptance = nn.Linear(C, C, bias=False) + self.key = nn.Linear(C, C, bias=False) + self.value = nn.Linear(C, C, bias=False) + self.output = nn.Linear(C, C, bias=False) + # GroupNorm with very small epsilon for numerical stability + self.ln_x = nn.GroupNorm(H, C, eps=64e-5) + + # -------------------------------------------------------------------------- + def forward(self, x, v_first=None): + B, T, C = x.size() + H = self.n_head + xx = self.time_shift(x) - x # Difference token shift + + # Apply token-shift to each branch + xr = x + xx * self.x_r + xw = x + xx * self.x_w + xk = x + xx * self.x_k + xv = x + xx * self.x_v + xa = x + xx * self.x_a + xg = x + xx * self.x_g + + r = self.receptance(xr) + w = ( + -F.softplus(-(self.w0 + torch.tanh(xw @ self.w1) @ self.w2)) - 0.5 + ) # Clamp + k = self.key(xk) + v = self.value(xv) + + # Value residual: only active on non-first layers + if self.layer_id == 0: + v_first = v + else: + v = v + (v_first - v) * torch.sigmoid( + self.v0 + (xv @ self.v1) @ self.v2 + ) + + a = torch.sigmoid(self.a0 + (xa @ self.a1) @ self.a2) # In-context LR + g = torch.sigmoid(xg @ self.g1) @ self.g2 # Gate + + # Normalize keys for stability + kk = k * self.k_k + kk = F.normalize(kk.view(B, T, H, -1), dim=-1, p=2.0).view(B, T, C) + k = k * (1 + (a - 1) * self.k_a) + + # Core recurrence + x = RWKV7_OP(r, w, k, v, -kk, kk * a).to(r.dtype) + x = self.ln_x(x.view(B * T, C)).view(B, T, C) + + # Additional local mix (receptance * key * r_k) * value + x = x + ( + (r.view(B, T, H, -1) * k.view(B, T, H, -1) * self.r_k).sum( + dim=-1, keepdim=True + ) + * v.view(B, T, H, -1) + ).view(B, T, C) + x = self.output(x * g) + return x, v_first + + +# ============================================================================== +# RWKV Channel-Mix Layer (Feed-Forward) +# ============================================================================== +class RWKV_CMix_x070(nn.Module): + def __init__(self, args, layer_id): + super().__init__() + self.args = args + self.layer_id = layer_id + self.time_shift = nn.ZeroPad2d((0, 0, 1, -1)) + with torch.no_grad(): + self.x_k = nn.Parameter(torch.empty(1, 1, args.n_embd)) + + self.key = nn.Linear(args.n_embd, args.dim_ffn, bias=False) + self.value = nn.Linear(args.dim_ffn, args.n_embd, bias=False) + + def forward(self, x): + xx = self.time_shift(x) - x + k = x + xx * self.x_k + k = torch.relu(self.key(k)) ** 2 # Squared ReLU + return self.value(k) + + +# ============================================================================== +# RWKV Building Block (Time-Mix + Channel-Mix + Norms) +# ============================================================================== +class Block(nn.Module): + def __init__(self, args, layer_id): + super().__init__() + self.args = args + self.layer_id = layer_id + self.ln0 = nn.LayerNorm(args.n_embd) if layer_id == 0 else None + self.ln1 = nn.LayerNorm(args.n_embd) + self.ln2 = nn.LayerNorm(args.n_embd) + + self.att = RWKV_Tmix_x070(args, layer_id) + self.ffn = RWKV_CMix_x070(args, layer_id) + + def forward(self, x, v_first): + if self.layer_id == 0: + x = self.ln0(x) + xx, v_first = self.att(self.ln1(x), v_first) + x = x + xx + x = x + self.ffn(self.ln2(x)) + return x, v_first + + +# ============================================================================== +# Full RWKV Model +# ============================================================================== +class RWKV(nn.Module): + def __init__(self, args): + super().__init__() + args.dim_att = args.n_embd + args.dim_ffn = args.n_embd * 4 + self.emb = nn.Embedding(args.vocab_size, args.n_embd) + + self.blocks = nn.ModuleList( + [Block(args, i) for i in range(args.n_layer)] + ) + self.ln_out = nn.LayerNorm(args.n_embd) + self.head = nn.Linear(args.n_embd, args.vocab_size, bias=False) + + def forward(self, idx): + x = self.emb(idx) + v_first = torch.empty_like(x) + for block in self.blocks: + x, v_first = block(x, v_first) + x = self.ln_out(x) + x = self.head(x) + return x + + +# ============================================================================== +# Weight Conversion Utilities (PyTorch ↔ Keras) +# ============================================================================== +def convert_cmix(my_chnnal_mix, weights, i): + my_chnnal_mix.set_weights( + [ + weights.pop("blocks.%d.ffn.x_k" % i), + weights.pop("blocks.%d.ffn.key.weight" % i).T, + weights.pop("blocks.%d.ffn.value.weight" % i).T, + ] + ) + + +def convert_tmix(my_time_mix, weights, i): + weights_list = [ + weights.pop("blocks.%d.att.x_r" % i), + weights.pop("blocks.%d.att.x_w" % i), + weights.pop("blocks.%d.att.x_k" % i), + weights.pop("blocks.%d.att.x_v" % i), + weights.pop("blocks.%d.att.x_a" % i), + weights.pop("blocks.%d.att.x_g" % i), + weights.pop("blocks.%d.att.w0" % i), + weights.pop("blocks.%d.att.w1" % i), + weights.pop("blocks.%d.att.w2" % i), + weights.pop("blocks.%d.att.a0" % i), + weights.pop("blocks.%d.att.a1" % i), + weights.pop("blocks.%d.att.a2" % i), + weights.pop("blocks.%d.att.v0" % i), + weights.pop("blocks.%d.att.v1" % i), + weights.pop("blocks.%d.att.v2" % i), + weights.pop("blocks.%d.att.g1" % i), + weights.pop("blocks.%d.att.g2" % i), + weights.pop("blocks.%d.att.k_k" % i), + weights.pop("blocks.%d.att.k_a" % i), + weights.pop("blocks.%d.att.r_k" % i), + weights.pop("blocks.%d.att.receptance.weight" % i).T, + weights.pop("blocks.%d.att.key.weight" % i).T, + weights.pop("blocks.%d.att.value.weight" % i).T, + weights.pop("blocks.%d.att.output.weight" % i).T, + weights.pop("blocks.%d.att.ln_x.weight" % i), + weights.pop("blocks.%d.att.ln_x.bias" % i), + ] + my_time_mix.set_weights(weights_list) + + +def convert_layernorm(myln, weights, ln_id, layer_id): + myln.set_weights( + [ + weights.pop("blocks.%d.ln%d.weight" % (layer_id, ln_id)), + weights.pop("blocks.%d.ln%d.bias" % (layer_id, ln_id)), + ] + ) + + +def convert_block(my_block, weights, i): + convert_cmix(my_block.ffn, weights, i) + convert_tmix(my_block.att, weights, i) + if my_block.use_initial_norm: + convert_layernorm(my_block.ln0, weights, 0, i) + convert_layernorm(my_block.ln1, weights, 1, i) + convert_layernorm(my_block.ln2, weights, 2, i) + + +def convert_backbone(my_backbone, standard_RWKV): + for i in range(my_backbone.num_layers): + convert_block(my_backbone.rwkv_layers[i], standard_RWKV.blocks[i]) + my_backbone.token_embedding.set_weights( + [standard_RWKV.emb.weight.detach().cpu()] + ) + convert_layernorm(my_backbone.output_layer_norm, standard_RWKV.ln_out) + + +# ============================================================================== +# Checkpoint Conversion Entry Point +# ============================================================================== +def convert_rwkv7_checkpoints(weights_path): + weights = torch.load(weights_path, map_location="cpu") + weights = {k: v.float().numpy() for k, v in weights.items()} + w = weights + n_layer = 0 + for k in w.keys(): + layer_id = int(k.split(".")[1]) if ("blocks." in k) else 0 + n_layer = max(n_layer, layer_id + 1) + + config = { + "hidden_size": w["emb.weight"].shape[1], + "num_layers": n_layer, + "intermediate_dim": w["blocks.0.ffn.key.weight"].shape[0], + "vocabulary_size": 65536, + "head_size": 64, + } + my_backbone = RWKV7Backbone(**config) + + # Copy layer-1 value-residual params to layer-0 (compatibility) + weights["blocks.0.att.v0"] = weights["blocks.1.att.v0"] + weights["blocks.0.att.v1"] = weights["blocks.1.att.v1"] + weights["blocks.0.att.v2"] = weights["blocks.1.att.v2"] + + my_backbone.get_layer("token_embedding").set_weights( + [weights.pop("emb.weight")] + ) + for i in range(config["num_layers"]): + my_block = my_backbone.get_layer(f"rwkv_layer_{i}") + convert_block(my_block, weights, i) + + my_backbone.output_layer_norm.set_weights( + [ + weights.pop("ln_out.weight"), + weights.pop("ln_out.bias"), + ] + ) + model = RWKV7CausalLM(my_backbone) + my_backbone.head.set_weights([weights.pop("head.weight").T]) + return model + + +# ============================================================================== +# Main Script +# ============================================================================== +url = "https://raw.githubusercontent.com/BlinkDL/RWKV-LM/main/RWKV-v7/rwkv_vocab_v20230424.txt" + + +def main(_): + if not os.path.exists(FLAGS.preset): + os.makedirs(FLAGS.preset) + + souce_model_name = PRESET_MAP[FLAGS.preset] + # Download vocabulary file + + vocabs = requests.get(url, timeout=30).text + with open( + os.path.join(FLAGS.preset, "vocab.txt"), "w", encoding="utf-8" + ) as f: + f.write(vocabs) + tokenizer = RWKVTokenizer(FLAGS.preset) + tokenizer.load_assets() + + # Download checkpoint + download_path = snapshot_download( + repo_id="Blink_DL/rwkv-7-world", + allow_patterns=souce_model_name, + ) + weights_path = os.path.join(download_path, souce_model_name) + + # Convert to Keras format + my_model = convert_rwkv7_checkpoints(weights_path) + + # Re-build PyTorch reference model + args = types.SimpleNamespace() + args.n_layer = my_model.backbone.num_layers + args.n_embd = my_model.backbone.hidden_size + args.vocab_size = my_model.backbone.vocabulary_size + args.head_size_a = 64 + args.dim_att = args.n_embd + args.dim_ffn = my_model.backbone.intermediate_dim + + if os.environ["CUDA_VISIBLE_DEVICES"] != "-1": + standard_model = RWKV(args).cuda() + else: + standard_model = RWKV(args) + + weights = torch.load(weights_path, map_location="cpu") + standard_model.load_state_dict(weights, strict=False) + + # Sanity check: tokenize & compare outputs + x = tokenizer(["i love u"]) + x = np.reshape(x, [1, -1]) + my_output = my_model(ops.convert_to_tensor(x, "int32")) + xx = torch.from_numpy(x).int() + if torch.cuda.is_available(): + xx = xx.cuda() + standard_output = standard_model(xx) + + standard_output = standard_output.cpu().float().detach().numpy() + my_output = ops.convert_to_numpy(ops.cast(my_output, "float32")) + + try: + np.testing.assert_allclose(my_output, standard_output, atol=1e-4) + except AssertionError as err: + print("\n") + print(err.args[0]) + print("\n") + + # Export final Keras model + my_model.backbone.save_to_preset(f"./{FLAGS.preset}") + + +# ============================================================================== +# Entry Guard +# ============================================================================== +if __name__ == "__main__": + flags.mark_flag_as_required("preset") + app.run(main) From 7bc36b59921d8aa5dbffda4937ed331099e136e7 Mon Sep 17 00:00:00 2001 From: pass_lin <935499957@qq.com> Date: Sun, 28 Sep 2025 22:52:26 +0800 Subject: [PATCH 02/10] fix --- tools/checkpoint_conversion/convert_rwkv7_checkpoints.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/checkpoint_conversion/convert_rwkv7_checkpoints.py b/tools/checkpoint_conversion/convert_rwkv7_checkpoints.py index e51e0c4d79..e4d5f00d5e 100644 --- a/tools/checkpoint_conversion/convert_rwkv7_checkpoints.py +++ b/tools/checkpoint_conversion/convert_rwkv7_checkpoints.py @@ -403,8 +403,8 @@ def main(_): os.path.join(FLAGS.preset, "vocab.txt"), "w", encoding="utf-8" ) as f: f.write(vocabs) - tokenizer = RWKVTokenizer(FLAGS.preset) - tokenizer.load_assets() + tokenizer = RWKVTokenizer() + tokenizer.load_assets(FLAGS.preset) # Download checkpoint download_path = snapshot_download( From 7d4a7a1c48c382cf110189a37487d60014bd64d9 Mon Sep 17 00:00:00 2001 From: pass_lin <935499957@qq.com> Date: Sun, 28 Sep 2025 23:04:59 +0800 Subject: [PATCH 03/10] fix --- tools/checkpoint_conversion/convert_rwkv7_checkpoints.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/checkpoint_conversion/convert_rwkv7_checkpoints.py b/tools/checkpoint_conversion/convert_rwkv7_checkpoints.py index e4d5f00d5e..5c4b76467e 100644 --- a/tools/checkpoint_conversion/convert_rwkv7_checkpoints.py +++ b/tools/checkpoint_conversion/convert_rwkv7_checkpoints.py @@ -447,6 +447,7 @@ def main(_): try: np.testing.assert_allclose(my_output, standard_output, atol=1e-4) + print("Successfully passed the numerical verification! 🎯✅📊") except AssertionError as err: print("\n") print(err.args[0]) From e5bb446e582f7149960c384d06e89b619c940b20 Mon Sep 17 00:00:00 2001 From: pass_lin <935499957@qq.com> Date: Tue, 7 Oct 2025 23:15:10 +0800 Subject: [PATCH 04/10] add inference --- keras_hub/api/models/__init__.py | 2 +- keras_hub/src/models/rwkv7/rwkv7_backbone.py | 1 + keras_hub/src/models/rwkv7/rwkv7_casual_lm.py | 50 ----- keras_hub/src/models/rwkv7/rwkv7_causal_lm.py | 182 ++++++++++++++++++ .../rwkv7/rwkv7_causal_lm_preprocessor.py | 59 ++++-- keras_hub/src/models/rwkv7/rwkv7_layer.py | 181 ++++++++--------- keras_hub/src/models/rwkv7/rwkv7_tokenizer.py | 17 +- .../convert_rwkv7_checkpoints.py | 13 +- 8 files changed, 324 insertions(+), 181 deletions(-) delete mode 100644 keras_hub/src/models/rwkv7/rwkv7_casual_lm.py create mode 100644 keras_hub/src/models/rwkv7/rwkv7_causal_lm.py diff --git a/keras_hub/api/models/__init__.py b/keras_hub/api/models/__init__.py index 403bfb65ad..714b0e64ad 100644 --- a/keras_hub/api/models/__init__.py +++ b/keras_hub/api/models/__init__.py @@ -430,7 +430,7 @@ RoformerV2Tokenizer, ) from keras_hub.src.models.rwkv7.rwkv7_backbone import RWKV7Backbone -from keras_hub.src.models.rwkv7.rwkv7_casual_lm import RWKV7CausalLM +from keras_hub.src.models.rwkv7.rwkv7_causal_lm import RWKV7CausalLM from keras_hub.src.models.rwkv7.rwkv7_causal_lm_preprocessor import ( RWKV7CausalLMPreprocessor, ) diff --git a/keras_hub/src/models/rwkv7/rwkv7_backbone.py b/keras_hub/src/models/rwkv7/rwkv7_backbone.py index d6d3d9a36b..3c3fcc85b8 100644 --- a/keras_hub/src/models/rwkv7/rwkv7_backbone.py +++ b/keras_hub/src/models/rwkv7/rwkv7_backbone.py @@ -90,6 +90,7 @@ def __init__( dtype=dtype, **kwargs, ) + self.call(ops.ones([1, 16], "int32")) self.num_layers = num_layers self.head_size = head_size diff --git a/keras_hub/src/models/rwkv7/rwkv7_casual_lm.py b/keras_hub/src/models/rwkv7/rwkv7_casual_lm.py deleted file mode 100644 index c78154129b..0000000000 --- a/keras_hub/src/models/rwkv7/rwkv7_casual_lm.py +++ /dev/null @@ -1,50 +0,0 @@ -from keras_hub.src.api_export import keras_hub_export -from keras_hub.src.models.causal_lm import CausalLM -from keras_hub.src.models.rwkv7.rwkv7_backbone import RWKV7Backbone -from keras_hub.src.models.rwkv7.rwkv7_causal_lm_preprocessor import ( - RWKV7CausalLMPreprocessor, -) - - -@keras_hub_export("keras_hub.models.RWKV7CausalLM") -class RWKV7CausalLM(CausalLM): - backbone_cls = RWKV7Backbone - preprocessor_cls = RWKV7CausalLMPreprocessor - - def __init__(self, backbone, preprocessor=None, **kwargs): - # === Layers === - self.backbone = backbone - self.preprocessor = preprocessor - super().__init__( - inputs=backbone.inputs, - outputs=backbone.outputs, - **kwargs, - ) - - def call_with_cache( - self, - token_ids, - cache, - cache_update_index, - ): - pass # TODO - - def _build_cache(self, token_ids): - pass # TODO - - def generate_step( - self, - inputs, - stop_token_ids=None, - ): - pass # TODO - - def score( - self, - token_ids, - padding_mask=None, - scoring_mode="logits", - layer_intercept_fn=None, - target_ids=None, - ): - pass # TODO diff --git a/keras_hub/src/models/rwkv7/rwkv7_causal_lm.py b/keras_hub/src/models/rwkv7/rwkv7_causal_lm.py new file mode 100644 index 0000000000..b19ce735c2 --- /dev/null +++ b/keras_hub/src/models/rwkv7/rwkv7_causal_lm.py @@ -0,0 +1,182 @@ +from keras import ops + +from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.models.causal_lm import CausalLM +from keras_hub.src.models.rwkv7.rwkv7_backbone import RWKV7Backbone +from keras_hub.src.models.rwkv7.rwkv7_causal_lm_preprocessor import ( + RWKV7CausalLMPreprocessor, +) +from keras_hub.src.utils.tensor_utils import any_equal + + +@keras_hub_export("keras_hub.models.RWKV7CausalLM") +class RWKV7CausalLM(CausalLM): + backbone_cls = RWKV7Backbone + preprocessor_cls = RWKV7CausalLMPreprocessor + + def __init__(self, backbone, preprocessor=None, **kwargs): + # === Layers === + self.backbone = backbone + self.preprocessor = preprocessor + super().__init__( + inputs=backbone.inputs, + outputs=backbone.outputs, + **kwargs, + ) + self.call(ops.ones([1, 16], "int32")) + + def call_with_cache( + self, + token_ids, + cache, + compute_head=True, + padding_mask=None, + rnn_mode=True, + ): + state_cachce, last_token_cache = cache + x = self.backbone.token_embedding(token_ids) + if padding_mask is None: + padding_mask = ops.not_equal(token_ids, 0) + v_first = None + updated_state_cachce = [] + updated_last_token_cache = [] + + for i in range(self.backbone.num_layers): + current_state_cache = state_cachce[:, i, ...] + current_token_cache = last_token_cache[:, i, ...] + x, v_first, new_cache_state, cache_tmix_x, cache_cmix_x = ( + self.backbone.rwkv_layers[i].call( + x, + v_first=v_first, + padding_mask=padding_mask, + cache_state=current_state_cache, + cache_tmix_x=current_token_cache[:, 0], + cache_cmix_x=current_token_cache[:, 1], + rnn_mode=rnn_mode, + train_mode=False, + ) + ) + new_token_cache = ops.stack([cache_tmix_x, cache_cmix_x], axis=1) + updated_state_cachce.append(new_cache_state) + updated_last_token_cache.append(new_token_cache) + cache = [ + ops.stack(updated_state_cachce, axis=1), + ops.stack(updated_last_token_cache, axis=1), + ] + hidden_states = x = self.backbone.output_layer_norm(x) + if compute_head: + logits = self.backbone.head(x) + else: + logits = None + return logits, hidden_states, cache + + def _build_cache(self, token_ids): + """Build an empty cache for use with `call_with_cache()`.""" + batch_size = ops.shape(token_ids)[0] + num_layers = self.backbone.num_layers + head_dim = self.backbone.head_size + hidden_size = self.backbone.hidden_size + num_heads = hidden_size // head_dim + + state_cachce = ops.zeros( + [batch_size, num_layers, num_heads, head_dim, head_dim], + dtype=self.compute_dtype, + ) + last_token_cache = ops.zeros( + [batch_size, num_layers, 2, 1, hidden_size], + dtype=self.compute_dtype, + ) + cache = [state_cachce, last_token_cache] + + # Seed the cache. + # prefill阶段可以使用kernel,要快一点 + _, hidden_states, cache = self.call_with_cache( + token_ids, + cache, + rnn_mode=False, + compute_head=False, + ) + + return hidden_states, cache + + def generate_step( + self, + inputs, + stop_token_ids=None, + ): + """A compilable generation function for a single batch of inputs. + + This function represents the inner, XLA-compilable, generation function + for a single batch of inputs. Inputs should have the same structure as + model inputs, a dictionary with keys `"token_ids"` and `"padding_mask"`. + + Args: + inputs: A dictionary with two keys `"token_ids"` and + `"padding_mask"` and batched tensor values. + stop_token_ids: Tuple of id's of the end token to stop on. If all + sequences have produced a new stop token, generation + will stop. + """ + token_ids, padding_mask, predict_token_ids = ( + inputs["token_ids"], + inputs["padding_mask"], + inputs["predict_token_ids"], + ) + # Create and seed cache with a single forward pass. + + hidden_states, cache = self._build_cache(token_ids) + + def next(prompt, cache, index): + # The cache index is the index of our previous token. + cache_update_index = index - 1 + batch_size = ops.shape(prompt)[0] + prompt = ops.slice(prompt, [0, cache_update_index], [batch_size, 1]) + logits, hidden_states, cache = self.call_with_cache( + prompt, + cache, + ) + return ( + ops.squeeze(logits, axis=1), + ops.squeeze(hidden_states, axis=1), + cache, + ) + + output_ids = self.sampler( + next=next, + prompt=predict_token_ids, + cache=cache, + index=1, + mask=padding_mask, + stop_token_ids=stop_token_ids, + hidden_states=hidden_states, + model=self, + ) + padding_mask = ops.concatenate( + [ + ops.cast(ops.not_equal(token_ids, 0), padding_mask.dtype), + padding_mask, + ], + axis=1, + ) + token_ids = ops.concatenate([token_ids, output_ids], axis=1) + + # Compute an output padding mask with the token ids we updated. + if stop_token_ids is not None: + # Build a mask of stop token locations not in the original + # prompt (not in locations where `padding_mask` is True). + end_locations = any_equal( + token_ids, stop_token_ids, ops.logical_not(padding_mask) + ) + end_locations = ops.cast(end_locations, "int32") + # Use cumsum to get ones in all locations after end_locations. + cumsum = ops.cast(ops.cumsum(end_locations, axis=-1), "int32") + overflow = cumsum - end_locations + # Our padding mask is the inverse of these overflow locations. + padding_mask = ops.logical_not(ops.cast(overflow, "bool")) + else: + # Without early stopping, all locations will have been updated. + padding_mask = ops.ones_like(token_ids, dtype="bool") + return { + "token_ids": token_ids, + "padding_mask": padding_mask, + } diff --git a/keras_hub/src/models/rwkv7/rwkv7_causal_lm_preprocessor.py b/keras_hub/src/models/rwkv7/rwkv7_causal_lm_preprocessor.py index 9a8a88211c..064e143c9e 100644 --- a/keras_hub/src/models/rwkv7/rwkv7_causal_lm_preprocessor.py +++ b/keras_hub/src/models/rwkv7/rwkv7_causal_lm_preprocessor.py @@ -1,10 +1,11 @@ import keras +from keras import ops from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.layers.preprocessing.start_end_packer import StartEndPacker from keras_hub.src.models.causal_lm_preprocessor import CausalLMPreprocessor from keras_hub.src.models.rwkv7.rwkv7_backbone import RWKV7Backbone from keras_hub.src.models.rwkv7.rwkv7_tokenizer import RWKVTokenizer -from keras_hub.src.utils.tensor_utils import strip_to_ragged @keras_hub_export("keras_hub.models.RWKV7CausalLMPreprocessor") @@ -30,20 +31,33 @@ def call( sequence_length=None, ): sequence_length = sequence_length or self.sequence_length + # padding 长度到16的倍数,适应kernel的需求 + sequence_length = sequence_length + (16 - sequence_length % 16) x = self.tokenizer(x) - # Pad with one extra token to account for the truncation below. + token_ids, padding_mask = self.packer( - x, - sequence_length=sequence_length + 1, - add_start_value=self.add_start_token, - add_end_value=self.add_end_token, + x, sequence_length=sequence_length, add_end_value=False ) + # The last token does not have a next token, so we truncate it out. x = token_ids[..., :-1] # Target `y` will be the next token. y, sample_weight = token_ids[..., 1:], padding_mask[..., 1:] return keras.utils.pack_x_y_sample_weight(x, y, sample_weight) + def build(self, input_shape): + # Defer packer creation to `build()` so that we can be sure tokenizer + # assets have loaded when restoring a saved model. + self.packer = StartEndPacker( + start_value=None, + end_value=None, + pad_value=self.tokenizer.pad_token_id, + sequence_length=self.sequence_length, + return_padding_mask=True, + padding_side="left", + ) + self.built = True + def generate_preprocess( self, x, @@ -62,12 +76,33 @@ def generate_preprocess( """ if not self.built: self.build(None) + # 这么做的目的是为了对齐keras的api + # 输入的sequence_length是生成的最大长度 + # 而本身sequence_length则对应于prefill的最大长度 + generate_length = sequence_length + sequence_length = self.sequence_length - x = self.tokenizer(x) - token_ids, padding_mask = self.packer( + # padding 长度到16的倍数,适应kernel的需求 + sequence_length = sequence_length + (16 - sequence_length % 16) + generate_length = generate_length + (16 - generate_length % 16) + + x = [t[-sequence_length:] for t in self.tokenizer(x)] + y = ops.zeros((len(x), generate_length), "int32") + start_token = [[t[-1]] for t in x] + x = [t[:-1] if len(t) > 1 else [0] for t in x] + + token_ids, __ = self.packer( x, sequence_length=sequence_length, add_end_value=False ) - return token_ids + start_token = ops.convert_to_tensor(start_token, "int32") + y = ops.slice_update(y, [0, 0], start_token) + padding_mask = ops.not_equal(y, 0) + + return { + "token_ids": token_ids, + "padding_mask": padding_mask, + "predict_token_ids": y, + } def generate_postprocess( self, @@ -83,6 +118,6 @@ def generate_postprocess( self.build(None) token_ids, padding_mask = x["token_ids"], x["padding_mask"] - ids_to_strip = self.tokenizer.special_token_ids - token_ids = strip_to_ragged(token_ids, padding_mask, ids_to_strip) - return self.tokenizer.detokenize(token_ids) + token_ids = ops.convert_to_numpy(token_ids) + padding_mask = ops.convert_to_numpy(padding_mask) + return self.tokenizer.detokenize(token_ids * padding_mask) diff --git a/keras_hub/src/models/rwkv7/rwkv7_layer.py b/keras_hub/src/models/rwkv7/rwkv7_layer.py index 217f42ee32..0df1c14322 100644 --- a/keras_hub/src/models/rwkv7/rwkv7_layer.py +++ b/keras_hub/src/models/rwkv7/rwkv7_layer.py @@ -1,23 +1,12 @@ -import warnings - import keras from keras import initializers from keras import ops from keras.layers import Layer +import warnings def transpose_head(x, head_first): - """ - Transpose the input tensor. - - Parameters: - x: Input tensor. - head_first: Boolean flag indicating whether to transpose. - - Returns: - Transposed tensor if head_first is True, otherwise the original tensor. - """ - x = ops.cast(x, "float32") + x = ops.cast(x, dtype="float32") if head_first: return ops.transpose(x, (0, 2, 1, 3)) else: @@ -66,20 +55,11 @@ def rnn_generalized_delta_rule( if ops.shape(state)[0] == 1: state = ops.broadcast_to(state, (B, H, N, N)) else: - state = ops.zeros((B, H, N, N), dtype="float32") - out = ops.zeros((B, T, H, N), dtype=r.dtype) + state = ops.zeros((B, H, N, N)) + state = ops.cast(state, "float32") + out = ops.zeros((B, T, H, N), DTYPE) def step(t, inputs): - """ - Performs computation for a single time step. - - Parameters: - t: Current time step. - inputs: List containing current state and output. - - Returns: - Updated state and output. - """ state, out = inputs kk = ops.reshape(k[:, t, :], (B, H, 1, N)) rr = ops.reshape(r[:, t, :], (B, H, N, 1)) @@ -87,9 +67,8 @@ def step(t, inputs): aa = ops.reshape(a[:, t, :], (B, H, N, 1)) bb = ops.reshape(b[:, t, :], (B, H, 1, N)) state = state * w[:, t, :, None, :] + state @ aa @ bb + vv @ kk - out = ops.slice_update( - out, [0, t, 0, 0], ops.reshape((state @ rr), (B, 1, H, N)) - ) + o = ops.cast((state @ rr), out.dtype) + out = ops.slice_update(out, [0, t, 0, 0], ops.reshape(o, (B, 1, H, N))) return [state, out] state, out = ops.fori_loop(0, T, step, [state, out]) @@ -104,12 +83,11 @@ def __init__(self, name="time_shift"): super(TimeShift, self).__init__(name=name) def call(self, inputs, cache_x=None): - x = ops.pad(inputs, [[0, 0], [1, 0], [0, 0]], constant_values=0.0)[ - :, :-1, : - ] if cache_x is not None: - x = ops.slice_update(x, [0, 0, 0], cache_x) - return x + x = ops.concatenate([cache_x, inputs], axis=1) + else: + x = ops.pad(inputs, [[0, 0], [1, 0], [0, 0]], constant_values=0.0) + return x[:, :-1, :] def compute_output_shape(self, input_shape): return input_shape @@ -121,18 +99,16 @@ def __init__(self, dim_ffn, kernel_initializer="glorot_uniform", **kwargs): self.dim_ffn = dim_ffn self.kernel_initializer = initializers.get(kernel_initializer) - def call(self, x, last_cache_x=None): - if last_cache_x is None: - xx = self.time_shift(x) - x - else: - xx = self.time_shift(x, last_cache_x) - x - last_cache_x = x[:, -1:, :] + def call(self, x, last_cache_x=None, train_mode=True): + xx = self.time_shift(x, last_cache_x) - x + if last_cache_x is not None or not train_mode: + last_cache_x = x[:, -1:] k = x + xx * self.x_k k = ops.relu(self.key(k)) ** 2 output = self.value(k) - if last_cache_x is not None: - output = [output, last_cache_x] - return output + if train_mode: + return output + return output, last_cache_x def compute_output_shape(self, input_shape): if isinstance(input_shape, list): @@ -167,9 +143,7 @@ def build(self, input_shape): def get_config(self): config = { "dim_ffn": self.dim_ffn, - "kernel_initializer": initializers.serialize( - self.kernel_initializer - ), + "kernel_initializer": initializers.serialize(self.kernel_initializer), } base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) @@ -209,22 +183,19 @@ def __init__( self.kernel_initializer = initializers.get(kernel_initializer) self.initial_state = None try: - from rwkv_ops import RWKV7_USE_KERNEL from rwkv_ops import generalized_delta_rule + + self.RWKV7_OP = generalized_delta_rule except ImportError: warnings.warn( "The 'rwkv_ops' package is not installed. " - "Falling back to a pure-Python operator,that will very slow." + "Falling back to the default (pure-Python) operators, which will be very slow. " "Please install 'rwkv_ops' to enable the optimized kernels.", UserWarning, stacklevel=2, ) - generalized_delta_rule = rnn_generalized_delta_rule - RWKV7_USE_KERNEL = False - self.RWKV7_OP, self.USE_KERNEL = ( - generalized_delta_rule, - RWKV7_USE_KERNEL, - ) + self.RWKV7_OP = rnn_generalized_delta_rule + assert self.hidden_size % self.n_head == 0 def build(self, input_shape): @@ -358,8 +329,9 @@ def call( last_cache_x=None, cache_state=None, rnn_mode=False, + train_mode=True, ): - if cache_state is None: + if cache_state == None: initial_state = self.initial_state else: initial_state = cache_state @@ -370,11 +342,11 @@ def call( x *= padding_mask B, T, C = ops.shape(x) H = self.n_head - if last_cache_x is None: - xx = self.time_shift(x) - x - else: - xx = self.time_shift(x, last_cache_x) - x - last_cache_x = x[:, -1:, :] + xx = self.time_shift(x, last_cache_x) - x + if last_cache_x is not None or not train_mode: + last_cache_x = x[:, -1:] + if padding_mask is not None: + xx *= padding_mask xr = x + xx * self.x_r xw = x + xx * self.x_w @@ -386,10 +358,7 @@ def call( r = self.receptance(xr) w = ( -ops.softplus( - -( - self.w0 - + ops.matmul(ops.tanh(ops.matmul(xw, self.w1)), self.w2) - ) + -(self.w0 + ops.matmul(ops.tanh(ops.matmul(xw, self.w1)), self.w2)) ) - 0.5 ) # soft-clamp to (-inf, -0.5) @@ -414,27 +383,30 @@ def call( k = k * (1 + (a - 1) * self.k_a) if padding_mask is not None: - v *= padding_mask - if self.USE_KERNEL: - w += (1 - padding_mask) * -1e9 - else: - w = w * padding_mask + 1 - padding_mask - # N = self.head_size + w = ops.where(padding_mask, w, -1e9) if rnn_mode: rwkv7_op = rnn_generalized_delta_rule else: rwkv7_op = self.RWKV7_OP - x, finnal_state = rwkv7_op( - ops.reshape(r, (B, T, self.n_head, self.head_size)), - ops.reshape(w, (B, T, self.n_head, self.head_size)), - ops.reshape(k, (B, T, self.n_head, self.head_size)), - ops.reshape(v, (B, T, self.n_head, self.head_size)), - ops.reshape(-kk, (B, T, self.n_head, self.head_size)), - ops.reshape(kk * a, (B, T, self.n_head, self.head_size)), - initial_state=initial_state, - ) - x = ops.reshape(x, (B, T, C)) + def reshape_and_cast(x, new_shape, dtype="float32"): + x = ops.reshape(x, new_shape) + if rnn_mode: + return x + return ops.cast(x, dtype) + + x, finnal_state = rwkv7_op( + reshape_and_cast(r, (B, T, self.n_head, self.head_size)), + reshape_and_cast(w, (B, T, self.n_head, self.head_size)), + reshape_and_cast(k, (B, T, self.n_head, self.head_size)), + reshape_and_cast(v, (B, T, self.n_head, self.head_size)), + reshape_and_cast(-kk, (B, T, self.n_head, self.head_size)), + reshape_and_cast(kk * a, (B, T, self.n_head, self.head_size)), + initial_state=ops.cast(initial_state, "float32") + if initial_state is not None + else None, + ) + x = reshape_and_cast(x, (B, T, C), self.compute_dtype) x = ops.reshape(self.ln_x(ops.reshape(x, (B * T, C))), ops.shape(x)) @@ -449,10 +421,9 @@ def call( x = x + ops.reshape(rwkv, (B, T, C)) x = self.output_layer(x * g) - output = [x, v_first] - if last_cache_x is not None: - output.extend([last_cache_x, finnal_state]) - return output + if train_mode: + return x, v_first + return x, v_first, last_cache_x, finnal_state def compute_output_shape(self, input_shape): output_shapes = [ @@ -484,9 +455,7 @@ def get_config(self): "mv_lora": self.mv_lora, "aaa_lora": self.aaa_lora, "decay_lora": self.decay_lora, - "kernel_initializer": initializers.serialize( - self.kernel_initializer - ), + "kernel_initializer": initializers.serialize(self.kernel_initializer), } base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) @@ -520,19 +489,13 @@ def __init__( def build(self, input_shape): super().build(input_shape) if self.use_initial_norm: - self.ln0 = keras.layers.LayerNormalization( - epsilon=1e-5, name="init_norm" - ) + self.ln0 = keras.layers.LayerNormalization(epsilon=1e-5, name="init_norm") self.ln0.build(input_shape) - self.ln1 = keras.layers.LayerNormalization( - epsilon=1e-5, name="att_norm" - ) + self.ln1 = keras.layers.LayerNormalization(epsilon=1e-5, name="att_norm") self.ln1.build(input_shape) - self.ln2 = keras.layers.LayerNormalization( - epsilon=1e-5, name="ffn_norm" - ) + self.ln2 = keras.layers.LayerNormalization(epsilon=1e-5, name="ffn_norm") self.ln2.build(input_shape) self.att = RWKV7_TimeMix( @@ -563,27 +526,41 @@ def call( cache_tmix_x=None, cache_cmix_x=None, rnn_mode=False, + train_mode=True, ): + if padding_mask is not None: + padding_mask = ops.cast(padding_mask, x.dtype) + padding_mask = ops.expand_dims(padding_mask, axis=-1) if self.use_initial_norm: x = self.ln0(x) - if cache_state is None: + if train_mode: xx, v_first = self.att( - self.ln1(x), v_first=v_first, padding_mask=padding_mask + self.ln1(x), + v_first=v_first, + padding_mask=padding_mask, + train_mode=train_mode, ) x = x + xx - x = x + self.ffn(self.ln2(x)) + xx = self.ln2(x) + if padding_mask is not None: + xx = xx * padding_mask + x = x + self.ffn(xx, train_mode=train_mode) return x, v_first else: - xx, v_first, cache_tmix_x, cache_state = self.att( + xx, v_first, cache_tmix_x, cache_state = self.att.call( self.ln1(x), v_first=v_first, padding_mask=padding_mask, last_cache_x=cache_tmix_x, cache_state=cache_state, rnn_mode=rnn_mode, + train_mode=train_mode, ) x = x + xx - xx, cache_cmix_x = self.ffn(self.ln2(x), cache_cmix_x) + xx = self.ln2(x) + if padding_mask is not None: + xx = xx * padding_mask + xx, cache_cmix_x = self.ffn(xx, cache_cmix_x, train_mode=train_mode) x = x + xx return x, v_first, cache_state, cache_tmix_x, cache_cmix_x @@ -604,9 +581,7 @@ def get_config(self): "decay_lora": self.decay_lora, "intermediate_dim": self.intermediate_dim, "use_initial_norm": self.use_initial_norm, - "kernel_initializer": initializers.serialize( - self.kernel_initializer - ), + "kernel_initializer": initializers.serialize(self.kernel_initializer), } base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) diff --git a/keras_hub/src/models/rwkv7/rwkv7_tokenizer.py b/keras_hub/src/models/rwkv7/rwkv7_tokenizer.py index ce2e49535a..5f5f990e4f 100644 --- a/keras_hub/src/models/rwkv7/rwkv7_tokenizer.py +++ b/keras_hub/src/models/rwkv7/rwkv7_tokenizer.py @@ -1,7 +1,6 @@ import os import keras - from keras_hub.src.api_export import keras_hub_export from keras_hub.src.tokenizers import tokenizer from keras_hub.src.utils.tensor_utils import is_int_dtype @@ -115,13 +114,7 @@ def printTokens(self, tokens): print(f"{repr(s)}{i}", end=" ") print() - -@keras_hub_export( - [ - "keras_hub.tokenizers.RWKVTokenizer", - "keras_hub.models.RWKVTokenizer", - ] -) +@keras_hub_export("keras_hub.tokenizers.RWKVTokenizer") class RWKVTokenizer(tokenizer.Tokenizer): def __init__( self, @@ -203,7 +196,7 @@ def tokenize(self, inputs): tokens = self._tokenizer.encode(inputs) def tokens2ids(x): - return [self.token_to_id(t) for t in x] + return [self.id_to_token(t) for t in x] if is_string_dtype(self.dtype): if isinstance(inputs, str): @@ -213,7 +206,11 @@ def tokens2ids(x): def detokenize(self, inputs): self._check_vocabulary() - return self._tokenizer.decode(inputs) + strip_zero_inputs = [] + for t in inputs: + strip_zero_inputs.append([x for x in t if x != 0]) + + return self._tokenizer.decode(strip_zero_inputs) def compute_output_spec(self, input_spec): return keras.KerasTensor( diff --git a/tools/checkpoint_conversion/convert_rwkv7_checkpoints.py b/tools/checkpoint_conversion/convert_rwkv7_checkpoints.py index 5c4b76467e..28cb1c0aff 100644 --- a/tools/checkpoint_conversion/convert_rwkv7_checkpoints.py +++ b/tools/checkpoint_conversion/convert_rwkv7_checkpoints.py @@ -23,7 +23,7 @@ from modelscope import snapshot_download from keras_hub.src.models.rwkv7.rwkv7_backbone import RWKV7Backbone -from keras_hub.src.models.rwkv7.rwkv7_casual_lm import RWKV7CausalLM +from keras_hub.src.models.rwkv7.rwkv7_causal_lm import RWKV7CausalLM # Local modules from keras_hub.src.models.rwkv7.rwkv7_tokenizer import RWKVTokenizer @@ -32,10 +32,11 @@ # Model Preset Registry # ============================================================================== PRESET_MAP = { - "rwkv7_world_0.1B": "RWKV-x070-World-0.1B-v2.8-20241210-ctx4096.pth", - "rwkv7_world_0.3B": "RWKV-x070-World-0.4B-v2.9-20250107-ctx4096.pth", - "rwkv7_world_1.5B": "RWKV-x070-World-1.5B-v3-20250127-ctx4096.pth", - "rwkv7_world_2.9B": "RWKV-x070-World-2.9B-v3-20250211-ctx4096.pth", + "RWKV7_G1a_0.1B": "rwkv7-g1a-0.1b-20250728-ctx4096.pth", + "RWKV7_G1a_0.3B": "rwkv7-g1a-0.4b-20250905-ctx4096.pth", + "RWKV7_G1a_1.5B": "rwkv7-g1a-1.5b-20250922-ctx4096.pth", + "RWKV7_G1a_2.9B": "rwkv7-g1a-2.9b-20250924-ctx4096.pth", + "RWKV7_G0a_7.2B": "rwkv7-g0a-7.2b-20250829-ctx4096.pth", } # ============================================================================== @@ -431,6 +432,8 @@ def main(_): standard_model = RWKV(args) weights = torch.load(weights_path, map_location="cpu") + # Some parameters are not present in the weights, but this does not matter. + # This is because these parameters are not used standard_model.load_state_dict(weights, strict=False) # Sanity check: tokenize & compare outputs From afcff31bf865222f338ee40d6999d7d9e3da5f74 Mon Sep 17 00:00:00 2001 From: pass_lin <935499957@qq.com> Date: Tue, 7 Oct 2025 23:19:52 +0800 Subject: [PATCH 05/10] add inference --- keras_hub/src/models/rwkv7/rwkv7_layer.py | 3 +-- tools/checkpoint_conversion/convert_rwkv7_checkpoints.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/keras_hub/src/models/rwkv7/rwkv7_layer.py b/keras_hub/src/models/rwkv7/rwkv7_layer.py index 0df1c14322..65acf81195 100644 --- a/keras_hub/src/models/rwkv7/rwkv7_layer.py +++ b/keras_hub/src/models/rwkv7/rwkv7_layer.py @@ -184,13 +184,12 @@ def __init__( self.initial_state = None try: from rwkv_ops import generalized_delta_rule - self.RWKV7_OP = generalized_delta_rule except ImportError: warnings.warn( "The 'rwkv_ops' package is not installed. " "Falling back to the default (pure-Python) operators, which will be very slow. " - "Please install 'rwkv_ops' to enable the optimized kernels.", + "Please 'pip install rwkv_ops' to enable the optimized kernels.", UserWarning, stacklevel=2, ) diff --git a/tools/checkpoint_conversion/convert_rwkv7_checkpoints.py b/tools/checkpoint_conversion/convert_rwkv7_checkpoints.py index 28cb1c0aff..4504b09253 100644 --- a/tools/checkpoint_conversion/convert_rwkv7_checkpoints.py +++ b/tools/checkpoint_conversion/convert_rwkv7_checkpoints.py @@ -409,7 +409,7 @@ def main(_): # Download checkpoint download_path = snapshot_download( - repo_id="Blink_DL/rwkv-7-world", + repo_id="RWKV/rwkv7-g1", allow_patterns=souce_model_name, ) weights_path = os.path.join(download_path, souce_model_name) From ec0baf3b27eb4d52784b73d39429c7539af55345 Mon Sep 17 00:00:00 2001 From: pass_lin <935499957@qq.com> Date: Wed, 8 Oct 2025 00:10:06 +0800 Subject: [PATCH 06/10] add tokenizer doc --- keras_hub/src/models/rwkv7/rwkv7_layer.py | 15 +- keras_hub/src/models/rwkv7/rwkv7_tokenizer.py | 179 +++++++++++++++++- 2 files changed, 180 insertions(+), 14 deletions(-) diff --git a/keras_hub/src/models/rwkv7/rwkv7_layer.py b/keras_hub/src/models/rwkv7/rwkv7_layer.py index 65acf81195..1a46a59bd8 100644 --- a/keras_hub/src/models/rwkv7/rwkv7_layer.py +++ b/keras_hub/src/models/rwkv7/rwkv7_layer.py @@ -24,19 +24,8 @@ def rnn_generalized_delta_rule( output_final_state: bool = True, head_first: bool = False, ): - """ - Implements the generalized delta rule. - - Parameters: - r: Input tensor. - w: Weight tensor. - k, v, a, b: Other input tensors. - initial_state: Initial state tensor. - output_final_state: Whether to return the final state. - head_first: Whether to place the head dimension first during computation. - - Returns: - Final state if output_final_state is True, otherwise only the output. + """Implements the generalized delta rule. + """ DTYPE = r.dtype B, T, H, N = ops.shape(r) diff --git a/keras_hub/src/models/rwkv7/rwkv7_tokenizer.py b/keras_hub/src/models/rwkv7/rwkv7_tokenizer.py index 5f5f990e4f..e2ca50e418 100644 --- a/keras_hub/src/models/rwkv7/rwkv7_tokenizer.py +++ b/keras_hub/src/models/rwkv7/rwkv7_tokenizer.py @@ -7,21 +7,34 @@ from keras_hub.src.utils.tensor_utils import is_string_dtype from keras_hub.src.utils.tensor_utils import tensor_to_list +# Vocabulary file name constant VOCAB_FILENAME = "vocab.txt" class TRIE: + """Byte-level Trie structure for longest prefix matching. + + This class implements a trie data structure that stores byte + sequences and allows efficient longest prefix matching. + """ __slots__ = tuple("ch,to,values,front".split(",")) to: list values: set def __init__(self, front=None, ch=None): + """Initialize a TRIE node. + + Args: + front: Parent node reference. + ch: Byte value for this node. + """ self.ch = ch self.to = [None for ch in range(256)] self.values = set() self.front = front def __repr__(self): + """String representation of the TRIE node.""" fr = self ret = [] while fr is not None: @@ -31,6 +44,16 @@ def __repr__(self): return "" % (ret[::-1], self.values) def add(self, key: bytes, idx: int = 0, val=None): + """Add a key-value pair to the trie. + + Args: + key: Byte sequence to add. + idx: Current index in key processing. + val: Value to store (defaults to key). + + Returns: + Final node where key was inserted. + """ if idx == len(key): if val is None: val = key @@ -42,6 +65,15 @@ def add(self, key: bytes, idx: int = 0, val=None): return self.to[ch].add(key, idx=idx + 1, val=val) def find_longest(self, key: bytes, idx: int = 0): + """Find longest match in trie for given key. + + Args: + key: Byte sequence to search for. + idx: Starting index for search. + + Returns: + Tuple of (end_index, node, values) for match. + """ u: TRIE = self ch: int = key[idx] @@ -57,7 +89,18 @@ def find_longest(self, key: bytes, idx: int = 0): class RWKV_TOKENIZER: + """RWKV tokenizer implementation using byte-level trie. + + Implements tokenization using a fixed vocabulary and greedy + longest-match algorithm on byte sequences. + """ def __init__(self, vocabs): + """Initialize tokenizer with vocabulary. + + Args: + vocabs: List of vocabulary entries in format + " ". + """ self.idx2token = {} sorted = [] # must be already sorted for l in vocabs: @@ -78,6 +121,14 @@ def __init__(self, vocabs): _ = self.root.add(t, val=(t, i)) def encodeBytes(self, src: bytes): + """Encode byte sequence to token IDs. + + Args: + src: Byte sequence to encode. + + Returns: + List of token IDs. + """ idx: int = 0 tokens = [] while idx < len(src): @@ -89,15 +140,39 @@ def encodeBytes(self, src: bytes): return tokens def decodeBytes(self, tokens): + """Decode token IDs to byte sequence. + + Args: + tokens: List of token IDs. + + Returns: + Decoded byte sequence. + """ return b"".join(map(lambda i: self.idx2token[i], tokens)) def encode(self, src): + """Encode text to token IDs. + + Args: + src: Text string or list of strings. + + Returns: + Token IDs or list of token ID lists. + """ if isinstance(src, str): return self.encodeBytes(src.encode("utf-8")) else: return [self.encodeBytes(s.encode("utf-8")) for s in src] def decode(self, tokens): + """Decode token IDs to text. + + Args: + tokens: Token IDs or list of token ID lists. + + Returns: + List of decoded text strings. + """ return [self.decodeBytes(batch).decode("utf-8") for batch in tokens] # try: # return self.decodeBytes(tokens).decode('utf-8') @@ -105,6 +180,11 @@ def decode(self, tokens): # return '\ufffd' # bad utf-8 def printTokens(self, tokens): + """Print tokens with their string representations. + + Args: + tokens: List of token IDs to print. + """ for i in tokens: s = self.idx2token[i] try: @@ -114,14 +194,40 @@ def printTokens(self, tokens): print(f"{repr(s)}{i}", end=" ") print() + @keras_hub_export("keras_hub.tokenizers.RWKVTokenizer") class RWKVTokenizer(tokenizer.Tokenizer): + """RWKV byte-level tokenizer with longest-match trie search. + + This tokenizer maps raw text to a sequence of integer token ids + using a fixed vocabulary and a greedy longest-match algorithm. + + Args: + vocabulary: list of strings, each line formatted as + " ". + dtype: output dtype for tensor operations. Must be integer + or string type. + + Examples: + + >>> vocab = ["0 ' ' 1", "1 '\\n' 1", "2 'the' 3", "3 'hello' 5"] + >>> tok = RWKVTokenizer(vocabulary=vocab) + >>> tok("hello the") + [3, 0, 2] + """ def __init__( self, vocabulary=None, dtype="int32", **kwargs, ) -> None: + """Initialize RWKV tokenizer. + + Args: + vocabulary: Vocabulary list. + dtype: Output data type. + **kwargs: Additional keyword arguments. + """ if not is_int_dtype(dtype) and not is_string_dtype(dtype): raise ValueError( "Output dtype must be an integer type or a string. " @@ -136,6 +242,11 @@ def __init__( self.file_assets = [VOCAB_FILENAME] def set_vocabulary(self, vocabulary): + """Set the tokenizer vocabulary. + + Args: + vocabulary: Vocabulary list to set. + """ self.vocabulary = vocabulary self._tokenizer = RWKV_TOKENIZER(vocabulary) self.pad_token_id = 0 @@ -143,17 +254,28 @@ def set_vocabulary(self, vocabulary): self.end_token_id = self.tokenize(["\n\n"])[0][0] def save_assets(self, dir_path): + """Save vocabulary to directory. + + Args: + dir_path: Directory path to save to. + """ path = os.path.join(dir_path, VOCAB_FILENAME) with open(path, "wb") as file: file.write("\n".join(self.vocabulary)) def load_assets(self, dir_path=""): + """Load vocabulary from directory. + + Args: + dir_path: Directory path to load from. + """ path = os.path.join(dir_path, VOCAB_FILENAME) with open(path, "r", encoding="utf-8") as f: vocabulary = f.readlines() self.set_vocabulary(vocabulary) def _check_vocabulary(self): + """Check if vocabulary is set, raise error if not.""" if self.vocabulary is None: raise ValueError( "No vocabulary has been set for RWKVTokenizer. Make " @@ -161,14 +283,32 @@ def _check_vocabulary(self): ) def vocabulary_size(self): + """Get the size of the vocabulary. + + Returns: + Number of tokens in vocabulary. + """ self._check_vocabulary() return int(len(self.vocabulary)) def get_vocabulary(self): + """Get the current vocabulary. + + Returns: + Current vocabulary list. + """ self._check_vocabulary() return tensor_to_list(self.vocabulary) def id_to_token(self, id): + """Convert token ID to string representation. + + Args: + id: Token ID to convert. + + Returns: + String representation of token. + """ self._check_vocabulary() if id >= self.vocabulary_size() or id < 0: raise ValueError( @@ -183,6 +323,11 @@ def token_to_id(self, token): return int(self._tokenizer.token2idx[token]) def get_config(self): + """Get tokenizer configuration. + + Returns: + Configuration dictionary. + """ config = super().get_config() config.update( { @@ -192,6 +337,14 @@ def get_config(self): return config def tokenize(self, inputs): + """Tokenize input text. + + Args: + inputs: Text to tokenize. + + Returns: + Tokenized representation. + """ self._check_vocabulary() tokens = self._tokenizer.encode(inputs) @@ -205,6 +358,14 @@ def tokens2ids(x): return tokens def detokenize(self, inputs): + """Convert tokens back to text. + + Args: + inputs: Tokens to convert. + + Returns: + Detokenized text. + """ self._check_vocabulary() strip_zero_inputs = [] for t in inputs: @@ -213,9 +374,25 @@ def detokenize(self, inputs): return self._tokenizer.decode(strip_zero_inputs) def compute_output_spec(self, input_spec): + """Compute output specification. + + Args: + input_spec: Input specification. + + Returns: + Output tensor specification. + """ return keras.KerasTensor( input_spec.shape + (None,), dtype=self.compute_dtype ) def call(self, inputs): - return self.tokenize(inputs) + """Call the tokenizer on inputs. + + Args: + inputs: Input text. + + Returns: + Tokenized output. + """ + return self.tokenize(inputs) \ No newline at end of file From bd6c6187449d3bf5e284a4f776ed5a1cf464ec71 Mon Sep 17 00:00:00 2001 From: pass_lin <935499957@qq.com> Date: Wed, 8 Oct 2025 00:45:04 +0800 Subject: [PATCH 07/10] add doc --- keras_hub/src/models/rwkv7/rwkv7_backbone.py | 65 ++++++++ keras_hub/src/models/rwkv7/rwkv7_causal_lm.py | 78 +++++++++- .../rwkv7/rwkv7_causal_lm_preprocessor.py | 141 +++++++++++++++--- keras_hub/src/models/rwkv7/rwkv7_layer.py | 129 ++++++++++++++-- keras_hub/src/models/rwkv7/rwkv7_tokenizer.py | 74 ++++----- 5 files changed, 415 insertions(+), 72 deletions(-) diff --git a/keras_hub/src/models/rwkv7/rwkv7_backbone.py b/keras_hub/src/models/rwkv7/rwkv7_backbone.py index 3c3fcc85b8..de460d95e9 100644 --- a/keras_hub/src/models/rwkv7/rwkv7_backbone.py +++ b/keras_hub/src/models/rwkv7/rwkv7_backbone.py @@ -12,6 +12,54 @@ def rwkv7_kernel_initializer(stddev=0.02): @keras_hub_export("keras_hub.models.RWKV7Backbone") class RWKV7Backbone(Backbone): + """The [RWKV-7](https://arxiv.org/abs/2503.14456) core architecture. + + This network implements a Modern RNN architecture based on linear + attention mechanisms with recurrent processing, as described in the + RWKV papers. It includes the embedding lookups and RWKV-7 blocks. + + The default constructor gives a fully customizable, randomly initialized + RWKV-7 model with any number of layers, heads, and embedding dimensions. + To load preset architectures and weights, use the `from_preset` + constructor. + + Args: + hidden_size: int. The size of the transformer encoding and pooling + layers. + head_size: int. The size of each attention head. + num_layers: int. The number of transformer layers. + vocabulary_size: int. The size of the token vocabulary. + intermediate_dim: int. The output dimension of the first Dense layer in + a two-layer feedforward network for each transformer. + gate_lora: int. LoRA dimension for gating. + mv_lora: int. LoRA dimension for value mixing. + aaa_lora: int. LoRA dimension for alpha parameters. + decay_lora: int. LoRA dimension for decay parameters. + dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use + for model computations and weights. Note that some computations, + such as softmax and layer normalization, will always be done at + float32 precision regardless of dtype. + dropout_rate: float. Dropout rate for the dropout layer. + + Examples: + + ```python + input_data = np.ones(shape=(1, 12), dtype="int32") + + + # Randomly initialized RWKV-7 decoder with custom config. + model = keras_hub.models.RWKV7Backbone( + vocabulary_size=10, + hidden_size=512, + num_layers=2, + head_size=64, + intermediate_dim=1024, + dtype="float32" + ) + model(input_data) + ``` + """ + def __init__( self, hidden_size, @@ -27,6 +75,22 @@ def __init__( dropout_rate=0, **kwargs, ): + """Initialize RWKV7 backbone. + + Args: + hidden_size: Hidden dimension size. + head_size: Attention head size. + num_layers: Number of RWKV blocks. + vocabulary_size: Size of vocabulary. + intermediate_dim: Intermediate dimension for FFN. + gate_lora: LoRA dimension for gating. + mv_lora: LoRA dimension for value mixing. + aaa_lora: LoRA dimension for alpha parameters. + decay_lora: LoRA dimension for decay parameters. + dtype: Data type for the layer. + dropout_rate: Dropout rate for regularization. + **kwargs: Additional arguments. + """ # === Layers === self.token_embedding = keras.layers.Embedding( input_dim=vocabulary_size, @@ -90,6 +154,7 @@ def __init__( dtype=dtype, **kwargs, ) + # Initialize the graph to avoid potential errors in some cases self.call(ops.ones([1, 16], "int32")) self.num_layers = num_layers diff --git a/keras_hub/src/models/rwkv7/rwkv7_causal_lm.py b/keras_hub/src/models/rwkv7/rwkv7_causal_lm.py index b19ce735c2..a57d1d1773 100644 --- a/keras_hub/src/models/rwkv7/rwkv7_causal_lm.py +++ b/keras_hub/src/models/rwkv7/rwkv7_causal_lm.py @@ -11,10 +11,60 @@ @keras_hub_export("keras_hub.models.RWKV7CausalLM") class RWKV7CausalLM(CausalLM): + """An end-to-end RWKV-7 model for causal language modeling. + + A causal language model (LM) predicts the next token based on previous + tokens. This task setup can be used to train the model unsupervised on + plain text input, or to autoregressively generate plain text similar to + the data used for training. This task can be used for pre-training or + fine-tuning a RWKV-7 model, simply by calling `fit()`. + + This model has a generate() method, which generates text based on a + prompt. The generation strategy used is controlled by an additional + sampler argument on `compile()`. You can recompile the model with + different `keras_hub.samplers` objects to control the generation. By + default, `"greedy"` sampling will be used. + + Args: + backbone: A `keras_hub.models.RWKV7Backbone` instance. + preprocessor: A `keras_hub.models.RWKV7CausalLMPreprocessor` or `None`. + If `None`, this model will not apply preprocessing, and inputs + should be preprocessed before calling the model. + + Examples: + ```python + # Initialize the tokenizer and load assets from a local path. + tokenizer = RWKVTokenizer() + tokenizer.load_assets(rwkv_path) + + # Create a preprocessor with a sequence length of 8. + preprocessor = RWKV7CausalLMPreprocessor(tokenizer, sequence_length=8) + + # Initialize the model with a backbone and preprocessor. + causal_lm = RWKV7CausalLM(backbone, preprocessor) + + prompts = ["Bubble sort\n```python", "Hello World\n```python\n"] + + causal_lm.compile(sampler="greedy") + + outputs = causal_lm.generate(prompts, max_length=128) + for out in outputs: + print(out) + print("-" * 100) + ``` + """ + backbone_cls = RWKV7Backbone preprocessor_cls = RWKV7CausalLMPreprocessor def __init__(self, backbone, preprocessor=None, **kwargs): + """Initialize the RWKV-7 causal language model. + + Args: + backbone: The backbone model. + preprocessor: The preprocessor for tokenization. + **kwargs: Additional keyword arguments. + """ # === Layers === self.backbone = backbone self.preprocessor = preprocessor @@ -33,6 +83,26 @@ def call_with_cache( padding_mask=None, rnn_mode=True, ): + """Forward pass of `RWKV7CausalLM` with cache. + + `call_with_cache` adds an additional forward pass for the model for + autoregressive inference. Unlike calling the model directly, this method + allows caching previous state Tensors in RWKV layers, and avoids + recomputing the outputs of seen tokens. + + Args: + token_ids: a dense int Tensor with shape `(batch_size, max_length)`. + cache: a dense float Tensor, the cache of state and token values. + compute_head: bool, whether to compute the output head. + padding_mask: a dense bool Tensor, the padding mask. + rnn_mode: bool, whether to use RNN mode. + + Returns: + A (logits, hidden_states, cache) tuple. Where `logits` is the + language model logits for the input token_ids, `hidden_states` is + the final hidden representation of the input tokens, and `cache` is + the decoding cache. + """ state_cachce, last_token_cache = cache x = self.backbone.token_embedding(token_ids) if padding_mask is None: @@ -89,7 +159,7 @@ def _build_cache(self, token_ids): cache = [state_cachce, last_token_cache] # Seed the cache. - # prefill阶段可以使用kernel,要快一点 + # Prefill stage can use kernel for better performance _, hidden_states, cache = self.call_with_cache( token_ids, cache, @@ -111,8 +181,8 @@ def generate_step( model inputs, a dictionary with keys `"token_ids"` and `"padding_mask"`. Args: - inputs: A dictionary with two keys `"token_ids"` and - `"padding_mask"` and batched tensor values. + inputs: A dictionary with keys `"token_ids"`, `"padding_mask"`, and + `"predict_token_ids"` with batched tensor values. stop_token_ids: Tuple of id's of the end token to stop on. If all sequences have produced a new stop token, generation will stop. @@ -179,4 +249,4 @@ def next(prompt, cache, index): return { "token_ids": token_ids, "padding_mask": padding_mask, - } + } \ No newline at end of file diff --git a/keras_hub/src/models/rwkv7/rwkv7_causal_lm_preprocessor.py b/keras_hub/src/models/rwkv7/rwkv7_causal_lm_preprocessor.py index 064e143c9e..6187a07f35 100644 --- a/keras_hub/src/models/rwkv7/rwkv7_causal_lm_preprocessor.py +++ b/keras_hub/src/models/rwkv7/rwkv7_causal_lm_preprocessor.py @@ -10,6 +10,87 @@ @keras_hub_export("keras_hub.models.RWKV7CausalLMPreprocessor") class RWKV7CausalLMPreprocessor(CausalLMPreprocessor): + """RWKV-7 Causal LM preprocessor. + + This preprocessing layer is meant for use with + `keras_hub.models.RWKV7CausalLM`. By default, it will take in batches of + strings, and return outputs in a `(x, y, sample_weight)` format, where the + `y` label is the next token id in the `x` sequence. + + For use with generation, the layer also exposes two methods + `generate_preprocess()` and `generate_postprocess()`. When this preprocessor + is attached to a `keras_hub.models.RWKV7CausalLM` instance, these methods + will be called implicitly in generate(). They can also be called + standalone (e.g. to precompute preprocessing inputs for generation in a + separate process). + + Args: + tokenizer: A `keras_hub.models.RWKVTokenizer` instance. + sequence_length: The length of the packed inputs. + add_start_token: If `True`, the preprocessor will prepend the tokenizer + start token to each input sequence. Default is `False`. + + Call arguments: + x: A string, `tf.Tensor` or list of python strings. + y: Label data. Should always be `None` as the layer generates labels. + sample_weight: Label weights. Should always be `None` as the layer + generates label weights. + sequence_length: Pass to override the configured sequence_length of + the layer. + + + Examples: + ```python + # Initialize the tokenizer and load assets from a local path. + tokenizer = RWKVTokenizer() + tokenizer.load_assets(rwkv_path) + + # Create a preprocessor with a sequence length of 8. + preprocessor = RWKV7CausalLMPreprocessor(tokenizer, sequence_length=8) + + # Tokenize and pack a batch of sentences. + preprocessor(["Bubble sort\n```python", "Hello World\n```python\n"]) + + # Preprocess inputs for generation with a maximum generation length of 16. + preprocessor.generate_preprocess( + ["Bubble sort\n```python", "Hello World\n```python\n"], 16 + ) + ``` + Outputs (torch Backend) : + tensor([[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 893, + 1760, 2011, 32082, 11, 6884], + [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 33155, 37576, 11, 6884, 42114]], dtype=torch.int32), + tensor([[ 0, 0, 0, 0, 0, 0, 0, 0, 893, 1760, + 2011, 32082, 11, 6884, 42114], + [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 33155, + 37576, 11, 6884, 42114, 11]], dtype=torch.int32), + tensor([[False, False, False, False, False, False, False, False, True, + True, True, True, True, True, True], + [False, False, False, False, False, False, False, False, False, + True, True, True, True, True, True]]) + + {'token_ids': tensor([[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 893, 1760, 2011, 32082, 11, 6884], + [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 33155, 37576, 11, 6884, 42114]], dtype=torch.int32), + 'padding_mask': tensor([[ True, False, False, False, False, False, False, + False, False, False, False, False, False, False, False, False, + False, False, False, False, False, False, False, False, False, + False, False, False, False, False, False, False], + [True, False, False, False, False, False, False, False, False, + False, False, False, False, False, False, False, False, False, + False, False, False, False, False, False, False, False, False, + False, False, False, False, False]]), + 'predict_token_ids': tensor([[42114, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0], + [ 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0]], dtype=torch.int32)} + """ backbone_cls = RWKV7Backbone tokenizer_cls = RWKVTokenizer @@ -19,6 +100,13 @@ def __init__( add_start_token=False, **kwargs, ): + """Initialize the preprocessor. + + Args: + tokenizer: The tokenizer to use. + add_start_token: Whether to add start token. + **kwargs: Additional arguments. + """ super().__init__( tokenizer=tokenizer, add_start_token=add_start_token, **kwargs ) @@ -30,8 +118,19 @@ def call( sample_weight=None, sequence_length=None, ): + """Preprocess the input for training. + + Args: + x: Input text data. + y: Target data (optional). + sample_weight: Sample weights (optional). + sequence_length: Desired sequence length. + + Returns: + Preprocessed data tuple (x, y, sample_weight). + """ sequence_length = sequence_length or self.sequence_length - # padding 长度到16的倍数,适应kernel的需求 + # Pad length to multiples of 16 to meet kernel requirements sequence_length = sequence_length + (16 - sequence_length % 16) x = self.tokenizer(x) @@ -46,15 +145,13 @@ def call( return keras.utils.pack_x_y_sample_weight(x, y, sample_weight) def build(self, input_shape): - # Defer packer creation to `build()` so that we can be sure tokenizer - # assets have loaded when restoring a saved model. self.packer = StartEndPacker( start_value=None, end_value=None, pad_value=self.tokenizer.pad_token_id, sequence_length=self.sequence_length, return_padding_mask=True, - padding_side="left", + padding_side="left", # RWKV uses left-padding exclusively ) self.built = True @@ -63,31 +160,31 @@ def generate_preprocess( x, sequence_length=None, ): - """Convert strings to integer token input for generation. - - Similar to calling the layer for training, this method takes in strings - or tensor strings, tokenizes and packs the input, and computes a padding - mask masking all inputs not filled in with a padded value. - - Unlike calling the layer for training, this method does not compute - labels and will never append a `tokenizer.end_token_id` to the end of - the sequence (as generation is expected to continue at the end of the - inputted prompt). + """Preprocess input for generation. + + Args: + x: Input text data. + sequence_length: Maximum generation length. + + Returns: + Dictionary with preprocessed inputs for generation. """ if not self.built: self.build(None) - # 这么做的目的是为了对齐keras的api - # 输入的sequence_length是生成的最大长度 - # 而本身sequence_length则对应于prefill的最大长度 + # Align with Keras API + # Input sequence_length is the maximum generation length + # While self.sequence_length corresponds to the prefill max length generate_length = sequence_length sequence_length = self.sequence_length - # padding 长度到16的倍数,适应kernel的需求 + # Pad length to multiples of 16 to meet kernel requirements sequence_length = sequence_length + (16 - sequence_length % 16) generate_length = generate_length + (16 - generate_length % 16) x = [t[-sequence_length:] for t in self.tokenizer(x)] y = ops.zeros((len(x), generate_length), "int32") + # Utilize RNN characteristics where prefill and decode are two sequences + # But the first token of decode should be the last token of prefill start_token = [[t[-1]] for t in x] x = [t[:-1] if len(t) > 1 else [0] for t in x] @@ -109,10 +206,16 @@ def generate_postprocess( x, ): """Convert integer token output to strings for generation. - + This method reverses `generate_preprocess()`, by first removing all padding and start/end tokens, and then converting the integer sequence back to a string. + + Args: + x: Dictionary containing token_ids and padding_mask. + + Returns: + Detokenized string output. """ if not self.built: self.build(None) diff --git a/keras_hub/src/models/rwkv7/rwkv7_layer.py b/keras_hub/src/models/rwkv7/rwkv7_layer.py index 1a46a59bd8..309767eb80 100644 --- a/keras_hub/src/models/rwkv7/rwkv7_layer.py +++ b/keras_hub/src/models/rwkv7/rwkv7_layer.py @@ -1,8 +1,9 @@ +import warnings + import keras from keras import initializers from keras import ops from keras.layers import Layer -import warnings def transpose_head(x, head_first): @@ -24,9 +25,7 @@ def rnn_generalized_delta_rule( output_final_state: bool = True, head_first: bool = False, ): - """Implements the generalized delta rule. - - """ + """Implements the generalized delta rule for RWKV.""" DTYPE = r.dtype B, T, H, N = ops.shape(r) r = transpose_head(r, head_first) @@ -68,6 +67,10 @@ def step(t, inputs): class TimeShift(Layer): + """Time shift layer that shifts input sequence by one step. + It also be called short conv + """ + def __init__(self, name="time_shift"): super(TimeShift, self).__init__(name=name) @@ -83,12 +86,31 @@ def compute_output_shape(self, input_shape): class RWKV7_ChannelMix(Layer): + """RWKV-7 channel mixing layer.""" + def __init__(self, dim_ffn, kernel_initializer="glorot_uniform", **kwargs): + """Initialize RWKV7 channel mixer. + + Args: + dim_ffn: Feed-forward dimension. + kernel_initializer: Weight initializer. + **kwargs: Additional layer arguments. + """ super().__init__(**kwargs) self.dim_ffn = dim_ffn self.kernel_initializer = initializers.get(kernel_initializer) def call(self, x, last_cache_x=None, train_mode=True): + """Process input through channel mixer. + + Args: + x: Input tensor. + last_cache_x: Cached previous values. + train_mode: Whether in training mode. + + Returns: + Mixed output tensor. + """ xx = self.time_shift(x, last_cache_x) - x if last_cache_x is not None or not train_mode: last_cache_x = x[:, -1:] @@ -132,13 +154,20 @@ def build(self, input_shape): def get_config(self): config = { "dim_ffn": self.dim_ffn, - "kernel_initializer": initializers.serialize(self.kernel_initializer), + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), } base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) class GroupNorm(keras.layers.GroupNormalization): + """Group normalization with backend-specific handling. + + Extends Keras GroupNormalization with PyTorch backend support. + """ + def call(self, inputs): if keras.config.backend() == "torch": import torch.nn.functional as F @@ -150,6 +179,8 @@ def call(self, inputs): class RWKV7_TimeMix(Layer): + """RWKV-7 time mixing layer.""" + def __init__( self, hidden_size, @@ -161,6 +192,18 @@ def __init__( kernel_initializer="glorot_uniform", **kwargs, ): + """Initialize RWKV7 time mixer. + + Args: + hidden_size: Hidden dimension size. + head_size: Attention head size. + gate_lora: LoRA dimension for gating. + mv_lora: LoRA dimension for value mixing. + aaa_lora: LoRA dimension for alpha parameters. + decay_lora: LoRA dimension for decay parameters. + kernel_initializer: Weight initializer. + **kwargs: Additional layer arguments. + """ super().__init__(**kwargs) self.head_size = head_size self.hidden_size = hidden_size @@ -173,12 +216,14 @@ def __init__( self.initial_state = None try: from rwkv_ops import generalized_delta_rule + self.RWKV7_OP = generalized_delta_rule except ImportError: warnings.warn( "The 'rwkv_ops' package is not installed. " - "Falling back to the default (pure-Python) operators, which will be very slow. " - "Please 'pip install rwkv_ops' to enable the optimized kernels.", + "Falling back to the default (pure-Python) operators" + "pure-Python which will be very slow. " + "Please 'pip install rwkv_ops' to enable the optimized kernels", UserWarning, stacklevel=2, ) @@ -319,7 +364,21 @@ def call( rnn_mode=False, train_mode=True, ): - if cache_state == None: + """Process input through time mixer. + + Args: + x: Input tensor. + v_first: First value for mixing. + padding_mask: Mask for padding tokens. + last_cache_x: Cached previous values. + cache_state: Cached recurrent state. + rnn_mode: Whether to use RNN mode. + train_mode: Whether in training mode. + + Returns: + Mixed output tensor and state information. + """ + if cache_state is None: initial_state = self.initial_state else: initial_state = cache_state @@ -346,7 +405,10 @@ def call( r = self.receptance(xr) w = ( -ops.softplus( - -(self.w0 + ops.matmul(ops.tanh(ops.matmul(xw, self.w1)), self.w2)) + -( + self.w0 + + ops.matmul(ops.tanh(ops.matmul(xw, self.w1)), self.w2) + ) ) - 0.5 ) # soft-clamp to (-inf, -0.5) @@ -443,7 +505,9 @@ def get_config(self): "mv_lora": self.mv_lora, "aaa_lora": self.aaa_lora, "decay_lora": self.decay_lora, - "kernel_initializer": initializers.serialize(self.kernel_initializer), + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), } base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) @@ -463,6 +527,20 @@ def __init__( kernel_initializer="glorot_uniform", **kwargs, ): + """Initialize RWKV7 block. + + Args: + hidden_size: Hidden dimension size. + head_size: Attention head size. + intermediate_dim: Intermediate dimension for FFN. + gate_lora: LoRA dimension for gating. + mv_lora: LoRA dimension for value mixing. + aaa_lora: LoRA dimension for alpha parameters. + decay_lora: LoRA dimension for decay parameters. + use_initial_norm: Whether to use initial normalization. + kernel_initializer: Weight initializer. + **kwargs: Additional layer arguments. + """ super().__init__(**kwargs) self.head_size = head_size self.hidden_size = hidden_size @@ -477,13 +555,19 @@ def __init__( def build(self, input_shape): super().build(input_shape) if self.use_initial_norm: - self.ln0 = keras.layers.LayerNormalization(epsilon=1e-5, name="init_norm") + self.ln0 = keras.layers.LayerNormalization( + epsilon=1e-5, name="init_norm" + ) self.ln0.build(input_shape) - self.ln1 = keras.layers.LayerNormalization(epsilon=1e-5, name="att_norm") + self.ln1 = keras.layers.LayerNormalization( + epsilon=1e-5, name="att_norm" + ) self.ln1.build(input_shape) - self.ln2 = keras.layers.LayerNormalization(epsilon=1e-5, name="ffn_norm") + self.ln2 = keras.layers.LayerNormalization( + epsilon=1e-5, name="ffn_norm" + ) self.ln2.build(input_shape) self.att = RWKV7_TimeMix( @@ -516,6 +600,21 @@ def call( rnn_mode=False, train_mode=True, ): + """Process input through RWKV block. + + Args: + x: Input tensor. + v_first: First value for mixing. + padding_mask: Mask for padding tokens. + cache_state: Cached recurrent state. + cache_tmix_x: Cached time mixer values. + cache_cmix_x: Cached channel mixer values. + rnn_mode: Whether to use RNN mode. + train_mode: Whether in training mode. + + Returns: + Processed output tensor and cache information. + """ if padding_mask is not None: padding_mask = ops.cast(padding_mask, x.dtype) padding_mask = ops.expand_dims(padding_mask, axis=-1) @@ -569,7 +668,9 @@ def get_config(self): "decay_lora": self.decay_lora, "intermediate_dim": self.intermediate_dim, "use_initial_norm": self.use_initial_norm, - "kernel_initializer": initializers.serialize(self.kernel_initializer), + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), } base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) diff --git a/keras_hub/src/models/rwkv7/rwkv7_tokenizer.py b/keras_hub/src/models/rwkv7/rwkv7_tokenizer.py index e2ca50e418..bc2069a604 100644 --- a/keras_hub/src/models/rwkv7/rwkv7_tokenizer.py +++ b/keras_hub/src/models/rwkv7/rwkv7_tokenizer.py @@ -1,6 +1,7 @@ import os import keras + from keras_hub.src.api_export import keras_hub_export from keras_hub.src.tokenizers import tokenizer from keras_hub.src.utils.tensor_utils import is_int_dtype @@ -13,17 +14,18 @@ class TRIE: """Byte-level Trie structure for longest prefix matching. - + This class implements a trie data structure that stores byte sequences and allows efficient longest prefix matching. """ + __slots__ = tuple("ch,to,values,front".split(",")) to: list values: set def __init__(self, front=None, ch=None): """Initialize a TRIE node. - + Args: front: Parent node reference. ch: Byte value for this node. @@ -45,12 +47,12 @@ def __repr__(self): def add(self, key: bytes, idx: int = 0, val=None): """Add a key-value pair to the trie. - + Args: key: Byte sequence to add. idx: Current index in key processing. val: Value to store (defaults to key). - + Returns: Final node where key was inserted. """ @@ -66,11 +68,11 @@ def add(self, key: bytes, idx: int = 0, val=None): def find_longest(self, key: bytes, idx: int = 0): """Find longest match in trie for given key. - + Args: key: Byte sequence to search for. idx: Starting index for search. - + Returns: Tuple of (end_index, node, values) for match. """ @@ -90,13 +92,14 @@ def find_longest(self, key: bytes, idx: int = 0): class RWKV_TOKENIZER: """RWKV tokenizer implementation using byte-level trie. - + Implements tokenization using a fixed vocabulary and greedy longest-match algorithm on byte sequences. """ + def __init__(self, vocabs): """Initialize tokenizer with vocabulary. - + Args: vocabs: List of vocabulary entries in format " ". @@ -122,10 +125,10 @@ def __init__(self, vocabs): def encodeBytes(self, src: bytes): """Encode byte sequence to token IDs. - + Args: src: Byte sequence to encode. - + Returns: List of token IDs. """ @@ -141,10 +144,10 @@ def encodeBytes(self, src: bytes): def decodeBytes(self, tokens): """Decode token IDs to byte sequence. - + Args: tokens: List of token IDs. - + Returns: Decoded byte sequence. """ @@ -152,10 +155,10 @@ def decodeBytes(self, tokens): def encode(self, src): """Encode text to token IDs. - + Args: src: Text string or list of strings. - + Returns: Token IDs or list of token ID lists. """ @@ -166,10 +169,10 @@ def encode(self, src): def decode(self, tokens): """Decode token IDs to text. - + Args: tokens: Token IDs or list of token ID lists. - + Returns: List of decoded text strings. """ @@ -181,7 +184,7 @@ def decode(self, tokens): def printTokens(self, tokens): """Print tokens with their string representations. - + Args: tokens: List of token IDs to print. """ @@ -215,6 +218,7 @@ class RWKVTokenizer(tokenizer.Tokenizer): >>> tok("hello the") [3, 0, 2] """ + def __init__( self, vocabulary=None, @@ -222,7 +226,7 @@ def __init__( **kwargs, ) -> None: """Initialize RWKV tokenizer. - + Args: vocabulary: Vocabulary list. dtype: Output data type. @@ -243,7 +247,7 @@ def __init__( def set_vocabulary(self, vocabulary): """Set the tokenizer vocabulary. - + Args: vocabulary: Vocabulary list to set. """ @@ -255,7 +259,7 @@ def set_vocabulary(self, vocabulary): def save_assets(self, dir_path): """Save vocabulary to directory. - + Args: dir_path: Directory path to save to. """ @@ -265,7 +269,7 @@ def save_assets(self, dir_path): def load_assets(self, dir_path=""): """Load vocabulary from directory. - + Args: dir_path: Directory path to load from. """ @@ -284,7 +288,7 @@ def _check_vocabulary(self): def vocabulary_size(self): """Get the size of the vocabulary. - + Returns: Number of tokens in vocabulary. """ @@ -293,7 +297,7 @@ def vocabulary_size(self): def get_vocabulary(self): """Get the current vocabulary. - + Returns: Current vocabulary list. """ @@ -302,10 +306,10 @@ def get_vocabulary(self): def id_to_token(self, id): """Convert token ID to string representation. - + Args: id: Token ID to convert. - + Returns: String representation of token. """ @@ -324,7 +328,7 @@ def token_to_id(self, token): def get_config(self): """Get tokenizer configuration. - + Returns: Configuration dictionary. """ @@ -338,10 +342,10 @@ def get_config(self): def tokenize(self, inputs): """Tokenize input text. - + Args: inputs: Text to tokenize. - + Returns: Tokenized representation. """ @@ -359,10 +363,10 @@ def tokens2ids(x): def detokenize(self, inputs): """Convert tokens back to text. - + Args: inputs: Tokens to convert. - + Returns: Detokenized text. """ @@ -375,10 +379,10 @@ def detokenize(self, inputs): def compute_output_spec(self, input_spec): """Compute output specification. - + Args: input_spec: Input specification. - + Returns: Output tensor specification. """ @@ -388,11 +392,11 @@ def compute_output_spec(self, input_spec): def call(self, inputs): """Call the tokenizer on inputs. - + Args: inputs: Input text. - + Returns: Tokenized output. """ - return self.tokenize(inputs) \ No newline at end of file + return self.tokenize(inputs) From 4201a7f54277dad1c1af50d55e6d2bbd62cccaa9 Mon Sep 17 00:00:00 2001 From: pass_lin <935499957@qq.com> Date: Wed, 8 Oct 2025 01:47:13 +0800 Subject: [PATCH 08/10] add test case --- keras_hub/api/__init__.py | 16 +- keras_hub/api/layers/__init__.py | 144 ++-- keras_hub/api/metrics/__init__.py | 10 +- keras_hub/api/models/__init__.py | 749 +++++++++++------- keras_hub/api/samplers/__init__.py | 22 +- keras_hub/api/tokenizers/__init__.py | 146 +++- keras_hub/api/utils/__init__.py | 18 +- .../src/models/rwkv7/rwkv7_backbone_test.py | 37 + keras_hub/src/models/rwkv7/rwkv7_causal_lm.py | 10 +- .../rwkv7/rwkv7_causal_lm_preprocessor.py | 45 +- .../rwkv7_causal_lm_preprocessor_test.py | 98 +++ .../src/models/rwkv7/rwkv7_causal_lm_test.py | 92 +++ .../src/models/rwkv7/rwkv7_tokenizer_test.py | 25 + 13 files changed, 1004 insertions(+), 408 deletions(-) create mode 100644 keras_hub/src/models/rwkv7/rwkv7_backbone_test.py create mode 100644 keras_hub/src/models/rwkv7/rwkv7_causal_lm_preprocessor_test.py create mode 100644 keras_hub/src/models/rwkv7/rwkv7_causal_lm_test.py create mode 100644 keras_hub/src/models/rwkv7/rwkv7_tokenizer_test.py diff --git a/keras_hub/api/__init__.py b/keras_hub/api/__init__.py index 3796e4c7f4..2aa98bf3f9 100644 --- a/keras_hub/api/__init__.py +++ b/keras_hub/api/__init__.py @@ -4,12 +4,12 @@ since your modifications would be overwritten. """ -from keras_hub import layers -from keras_hub import metrics -from keras_hub import models -from keras_hub import samplers -from keras_hub import tokenizers -from keras_hub import utils -from keras_hub.src.utils.preset_utils import upload_preset +from keras_hub import layers as layers +from keras_hub import metrics as metrics +from keras_hub import models as models +from keras_hub import samplers as samplers +from keras_hub import tokenizers as tokenizers +from keras_hub import utils as utils +from keras_hub.src.utils.preset_utils import upload_preset as upload_preset from keras_hub.src.version import __version__ as __version__ -from keras_hub.src.version import version +from keras_hub.src.version import version as version diff --git a/keras_hub/api/layers/__init__.py b/keras_hub/api/layers/__init__.py index c4411ba889..4550cf8689 100644 --- a/keras_hub/api/layers/__init__.py +++ b/keras_hub/api/layers/__init__.py @@ -4,105 +4,149 @@ since your modifications would be overwritten. """ -from keras_hub.src.layers.modeling.alibi_bias import AlibiBias -from keras_hub.src.layers.modeling.anchor_generator import AnchorGenerator -from keras_hub.src.layers.modeling.box_matcher import BoxMatcher +from keras_hub.src.layers.modeling.alibi_bias import AlibiBias as AlibiBias +from keras_hub.src.layers.modeling.anchor_generator import ( + AnchorGenerator as AnchorGenerator, +) +from keras_hub.src.layers.modeling.box_matcher import BoxMatcher as BoxMatcher from keras_hub.src.layers.modeling.cached_multi_head_attention import ( - CachedMultiHeadAttention, + CachedMultiHeadAttention as CachedMultiHeadAttention, +) +from keras_hub.src.layers.modeling.f_net_encoder import ( + FNetEncoder as FNetEncoder, +) +from keras_hub.src.layers.modeling.masked_lm_head import ( + MaskedLMHead as MaskedLMHead, +) +from keras_hub.src.layers.modeling.non_max_supression import ( + NonMaxSuppression as NonMaxSuppression, +) +from keras_hub.src.layers.modeling.position_embedding import ( + PositionEmbedding as PositionEmbedding, ) -from keras_hub.src.layers.modeling.f_net_encoder import FNetEncoder -from keras_hub.src.layers.modeling.masked_lm_head import MaskedLMHead -from keras_hub.src.layers.modeling.non_max_supression import NonMaxSuppression -from keras_hub.src.layers.modeling.position_embedding import PositionEmbedding from keras_hub.src.layers.modeling.reversible_embedding import ( - ReversibleEmbedding, + ReversibleEmbedding as ReversibleEmbedding, +) +from keras_hub.src.layers.modeling.rms_normalization import ( + RMSNormalization as RMSNormalization, +) +from keras_hub.src.layers.modeling.rotary_embedding import ( + RotaryEmbedding as RotaryEmbedding, ) -from keras_hub.src.layers.modeling.rms_normalization import RMSNormalization -from keras_hub.src.layers.modeling.rotary_embedding import RotaryEmbedding from keras_hub.src.layers.modeling.sine_position_encoding import ( - SinePositionEncoding, + SinePositionEncoding as SinePositionEncoding, ) from keras_hub.src.layers.modeling.token_and_position_embedding import ( - TokenAndPositionEmbedding, + TokenAndPositionEmbedding as TokenAndPositionEmbedding, +) +from keras_hub.src.layers.modeling.transformer_decoder import ( + TransformerDecoder as TransformerDecoder, +) +from keras_hub.src.layers.modeling.transformer_encoder import ( + TransformerEncoder as TransformerEncoder, +) +from keras_hub.src.layers.preprocessing.audio_converter import ( + AudioConverter as AudioConverter, +) +from keras_hub.src.layers.preprocessing.image_converter import ( + ImageConverter as ImageConverter, ) -from keras_hub.src.layers.modeling.transformer_decoder import TransformerDecoder -from keras_hub.src.layers.modeling.transformer_encoder import TransformerEncoder -from keras_hub.src.layers.preprocessing.audio_converter import AudioConverter -from keras_hub.src.layers.preprocessing.image_converter import ImageConverter from keras_hub.src.layers.preprocessing.masked_lm_mask_generator import ( - MaskedLMMaskGenerator, + MaskedLMMaskGenerator as MaskedLMMaskGenerator, ) from keras_hub.src.layers.preprocessing.multi_segment_packer import ( - MultiSegmentPacker, + MultiSegmentPacker as MultiSegmentPacker, +) +from keras_hub.src.layers.preprocessing.random_deletion import ( + RandomDeletion as RandomDeletion, +) +from keras_hub.src.layers.preprocessing.random_swap import ( + RandomSwap as RandomSwap, +) +from keras_hub.src.layers.preprocessing.start_end_packer import ( + StartEndPacker as StartEndPacker, ) -from keras_hub.src.layers.preprocessing.random_deletion import RandomDeletion -from keras_hub.src.layers.preprocessing.random_swap import RandomSwap -from keras_hub.src.layers.preprocessing.start_end_packer import StartEndPacker from keras_hub.src.models.basnet.basnet_image_converter import ( - BASNetImageConverter, + BASNetImageConverter as BASNetImageConverter, +) +from keras_hub.src.models.clip.clip_image_converter import ( + CLIPImageConverter as CLIPImageConverter, ) -from keras_hub.src.models.clip.clip_image_converter import CLIPImageConverter from keras_hub.src.models.cspnet.cspnet_image_converter import ( - CSPNetImageConverter, + CSPNetImageConverter as CSPNetImageConverter, ) from keras_hub.src.models.d_fine.d_fine_image_converter import ( - DFineImageConverter, + DFineImageConverter as DFineImageConverter, ) from keras_hub.src.models.deeplab_v3.deeplab_v3_image_converter import ( - DeepLabV3ImageConverter, + DeepLabV3ImageConverter as DeepLabV3ImageConverter, +) +from keras_hub.src.models.deit.deit_image_converter import ( + DeiTImageConverter as DeiTImageConverter, ) -from keras_hub.src.models.deit.deit_image_converter import DeiTImageConverter from keras_hub.src.models.densenet.densenet_image_converter import ( - DenseNetImageConverter, + DenseNetImageConverter as DenseNetImageConverter, ) from keras_hub.src.models.depth_anything.depth_anything_image_converter import ( - DepthAnythingImageConverter, + DepthAnythingImageConverter as DepthAnythingImageConverter, ) from keras_hub.src.models.dinov2.dinov2_image_converter import ( - DINOV2ImageConverter, + DINOV2ImageConverter as DINOV2ImageConverter, ) from keras_hub.src.models.efficientnet.efficientnet_image_converter import ( - EfficientNetImageConverter, + EfficientNetImageConverter as EfficientNetImageConverter, ) from keras_hub.src.models.gemma3.gemma3_image_converter import ( - Gemma3ImageConverter, + Gemma3ImageConverter as Gemma3ImageConverter, ) from keras_hub.src.models.hgnetv2.hgnetv2_image_converter import ( - HGNetV2ImageConverter, + HGNetV2ImageConverter as HGNetV2ImageConverter, +) +from keras_hub.src.models.mit.mit_image_converter import ( + MiTImageConverter as MiTImageConverter, ) -from keras_hub.src.models.mit.mit_image_converter import MiTImageConverter from keras_hub.src.models.mobilenet.mobilenet_image_converter import ( - MobileNetImageConverter, + MobileNetImageConverter as MobileNetImageConverter, ) from keras_hub.src.models.moonshine.moonshine_audio_converter import ( - MoonshineAudioConverter, + MoonshineAudioConverter as MoonshineAudioConverter, ) from keras_hub.src.models.pali_gemma.pali_gemma_image_converter import ( - PaliGemmaImageConverter, + PaliGemmaImageConverter as PaliGemmaImageConverter, ) from keras_hub.src.models.parseq.parseq_image_converter import ( - PARSeqImageConverter, + PARSeqImageConverter as PARSeqImageConverter, ) from keras_hub.src.models.resnet.resnet_image_converter import ( - ResNetImageConverter, + ResNetImageConverter as ResNetImageConverter, ) from keras_hub.src.models.retinanet.retinanet_image_converter import ( - RetinaNetImageConverter, + RetinaNetImageConverter as RetinaNetImageConverter, +) +from keras_hub.src.models.sam.sam_image_converter import ( + SAMImageConverter as SAMImageConverter, +) +from keras_hub.src.models.sam.sam_mask_decoder import ( + SAMMaskDecoder as SAMMaskDecoder, +) +from keras_hub.src.models.sam.sam_prompt_encoder import ( + SAMPromptEncoder as SAMPromptEncoder, ) -from keras_hub.src.models.sam.sam_image_converter import SAMImageConverter -from keras_hub.src.models.sam.sam_mask_decoder import SAMMaskDecoder -from keras_hub.src.models.sam.sam_prompt_encoder import SAMPromptEncoder from keras_hub.src.models.segformer.segformer_image_converter import ( - SegFormerImageConverter, + SegFormerImageConverter as SegFormerImageConverter, ) from keras_hub.src.models.siglip.siglip_image_converter import ( - SigLIPImageConverter, + SigLIPImageConverter as SigLIPImageConverter, +) +from keras_hub.src.models.vgg.vgg_image_converter import ( + VGGImageConverter as VGGImageConverter, +) +from keras_hub.src.models.vit.vit_image_converter import ( + ViTImageConverter as ViTImageConverter, ) -from keras_hub.src.models.vgg.vgg_image_converter import VGGImageConverter -from keras_hub.src.models.vit.vit_image_converter import ViTImageConverter from keras_hub.src.models.whisper.whisper_audio_converter import ( - WhisperAudioConverter, + WhisperAudioConverter as WhisperAudioConverter, ) from keras_hub.src.models.xception.xception_image_converter import ( - XceptionImageConverter, + XceptionImageConverter as XceptionImageConverter, ) diff --git a/keras_hub/api/metrics/__init__.py b/keras_hub/api/metrics/__init__.py index 88a0a7df2b..100c2c66fb 100644 --- a/keras_hub/api/metrics/__init__.py +++ b/keras_hub/api/metrics/__init__.py @@ -4,8 +4,8 @@ since your modifications would be overwritten. """ -from keras_hub.src.metrics.bleu import Bleu -from keras_hub.src.metrics.edit_distance import EditDistance -from keras_hub.src.metrics.perplexity import Perplexity -from keras_hub.src.metrics.rouge_l import RougeL -from keras_hub.src.metrics.rouge_n import RougeN +from keras_hub.src.metrics.bleu import Bleu as Bleu +from keras_hub.src.metrics.edit_distance import EditDistance as EditDistance +from keras_hub.src.metrics.perplexity import Perplexity as Perplexity +from keras_hub.src.metrics.rouge_l import RougeL as RougeL +from keras_hub.src.metrics.rouge_n import RougeN as RougeN diff --git a/keras_hub/api/models/__init__.py b/keras_hub/api/models/__init__.py index 714b0e64ad..03fe5c0418 100644 --- a/keras_hub/api/models/__init__.py +++ b/keras_hub/api/models/__init__.py @@ -4,535 +4,752 @@ since your modifications would be overwritten. """ -from keras_hub.src.models.albert.albert_backbone import AlbertBackbone -from keras_hub.src.models.albert.albert_masked_lm import AlbertMaskedLM +from keras_hub.src.models.albert.albert_backbone import ( + AlbertBackbone as AlbertBackbone, +) +from keras_hub.src.models.albert.albert_masked_lm import ( + AlbertMaskedLM as AlbertMaskedLM, +) from keras_hub.src.models.albert.albert_masked_lm_preprocessor import ( - AlbertMaskedLMPreprocessor, + AlbertMaskedLMPreprocessor as AlbertMaskedLMPreprocessor, ) from keras_hub.src.models.albert.albert_text_classifier import ( - AlbertTextClassifier, + AlbertTextClassifier as AlbertClassifier, ) from keras_hub.src.models.albert.albert_text_classifier import ( - AlbertTextClassifier as AlbertClassifier, + AlbertTextClassifier as AlbertTextClassifier, ) from keras_hub.src.models.albert.albert_text_classifier_preprocessor import ( - AlbertTextClassifierPreprocessor, + AlbertTextClassifierPreprocessor as AlbertPreprocessor, ) from keras_hub.src.models.albert.albert_text_classifier_preprocessor import ( - AlbertTextClassifierPreprocessor as AlbertPreprocessor, + AlbertTextClassifierPreprocessor as AlbertTextClassifierPreprocessor, +) +from keras_hub.src.models.albert.albert_tokenizer import ( + AlbertTokenizer as AlbertTokenizer, +) +from keras_hub.src.models.backbone import Backbone as Backbone +from keras_hub.src.models.bart.bart_backbone import BartBackbone as BartBackbone +from keras_hub.src.models.bart.bart_seq_2_seq_lm import ( + BartSeq2SeqLM as BartSeq2SeqLM, ) -from keras_hub.src.models.albert.albert_tokenizer import AlbertTokenizer -from keras_hub.src.models.backbone import Backbone -from keras_hub.src.models.bart.bart_backbone import BartBackbone -from keras_hub.src.models.bart.bart_seq_2_seq_lm import BartSeq2SeqLM from keras_hub.src.models.bart.bart_seq_2_seq_lm_preprocessor import ( - BartSeq2SeqLMPreprocessor, -) -from keras_hub.src.models.bart.bart_tokenizer import BartTokenizer -from keras_hub.src.models.basnet.basnet import BASNetImageSegmenter -from keras_hub.src.models.basnet.basnet_backbone import BASNetBackbone -from keras_hub.src.models.basnet.basnet_preprocessor import BASNetPreprocessor -from keras_hub.src.models.bert.bert_backbone import BertBackbone -from keras_hub.src.models.bert.bert_masked_lm import BertMaskedLM + BartSeq2SeqLMPreprocessor as BartSeq2SeqLMPreprocessor, +) +from keras_hub.src.models.bart.bart_tokenizer import ( + BartTokenizer as BartTokenizer, +) +from keras_hub.src.models.basnet.basnet import ( + BASNetImageSegmenter as BASNetImageSegmenter, +) +from keras_hub.src.models.basnet.basnet_backbone import ( + BASNetBackbone as BASNetBackbone, +) +from keras_hub.src.models.basnet.basnet_preprocessor import ( + BASNetPreprocessor as BASNetPreprocessor, +) +from keras_hub.src.models.bert.bert_backbone import BertBackbone as BertBackbone +from keras_hub.src.models.bert.bert_masked_lm import ( + BertMaskedLM as BertMaskedLM, +) from keras_hub.src.models.bert.bert_masked_lm_preprocessor import ( - BertMaskedLMPreprocessor, + BertMaskedLMPreprocessor as BertMaskedLMPreprocessor, ) -from keras_hub.src.models.bert.bert_text_classifier import BertTextClassifier from keras_hub.src.models.bert.bert_text_classifier import ( BertTextClassifier as BertClassifier, ) -from keras_hub.src.models.bert.bert_text_classifier_preprocessor import ( - BertTextClassifierPreprocessor, +from keras_hub.src.models.bert.bert_text_classifier import ( + BertTextClassifier as BertTextClassifier, ) from keras_hub.src.models.bert.bert_text_classifier_preprocessor import ( BertTextClassifierPreprocessor as BertPreprocessor, ) -from keras_hub.src.models.bert.bert_tokenizer import BertTokenizer -from keras_hub.src.models.bloom.bloom_backbone import BloomBackbone -from keras_hub.src.models.bloom.bloom_causal_lm import BloomCausalLM +from keras_hub.src.models.bert.bert_text_classifier_preprocessor import ( + BertTextClassifierPreprocessor as BertTextClassifierPreprocessor, +) +from keras_hub.src.models.bert.bert_tokenizer import ( + BertTokenizer as BertTokenizer, +) +from keras_hub.src.models.bloom.bloom_backbone import ( + BloomBackbone as BloomBackbone, +) +from keras_hub.src.models.bloom.bloom_causal_lm import ( + BloomCausalLM as BloomCausalLM, +) from keras_hub.src.models.bloom.bloom_causal_lm_preprocessor import ( - BloomCausalLMPreprocessor, -) -from keras_hub.src.models.bloom.bloom_tokenizer import BloomTokenizer -from keras_hub.src.models.causal_lm import CausalLM -from keras_hub.src.models.causal_lm_preprocessor import CausalLMPreprocessor -from keras_hub.src.models.clip.clip_backbone import CLIPBackbone -from keras_hub.src.models.clip.clip_preprocessor import CLIPPreprocessor -from keras_hub.src.models.clip.clip_text_encoder import CLIPTextEncoder -from keras_hub.src.models.clip.clip_tokenizer import CLIPTokenizer -from keras_hub.src.models.clip.clip_vision_encoder import CLIPVisionEncoder -from keras_hub.src.models.cspnet.cspnet_backbone import CSPNetBackbone + BloomCausalLMPreprocessor as BloomCausalLMPreprocessor, +) +from keras_hub.src.models.bloom.bloom_tokenizer import ( + BloomTokenizer as BloomTokenizer, +) +from keras_hub.src.models.causal_lm import CausalLM as CausalLM +from keras_hub.src.models.causal_lm_preprocessor import ( + CausalLMPreprocessor as CausalLMPreprocessor, +) +from keras_hub.src.models.clip.clip_backbone import CLIPBackbone as CLIPBackbone +from keras_hub.src.models.clip.clip_preprocessor import ( + CLIPPreprocessor as CLIPPreprocessor, +) +from keras_hub.src.models.clip.clip_text_encoder import ( + CLIPTextEncoder as CLIPTextEncoder, +) +from keras_hub.src.models.clip.clip_tokenizer import ( + CLIPTokenizer as CLIPTokenizer, +) +from keras_hub.src.models.clip.clip_vision_encoder import ( + CLIPVisionEncoder as CLIPVisionEncoder, +) +from keras_hub.src.models.cspnet.cspnet_backbone import ( + CSPNetBackbone as CSPNetBackbone, +) from keras_hub.src.models.cspnet.cspnet_image_classifier import ( - CSPNetImageClassifier, + CSPNetImageClassifier as CSPNetImageClassifier, ) from keras_hub.src.models.cspnet.cspnet_image_classifier_preprocessor import ( - CSPNetImageClassifierPreprocessor, + CSPNetImageClassifierPreprocessor as CSPNetImageClassifierPreprocessor, +) +from keras_hub.src.models.d_fine.d_fine_backbone import ( + DFineBackbone as DFineBackbone, ) -from keras_hub.src.models.d_fine.d_fine_backbone import DFineBackbone from keras_hub.src.models.d_fine.d_fine_object_detector import ( - DFineObjectDetector, + DFineObjectDetector as DFineObjectDetector, ) from keras_hub.src.models.d_fine.d_fine_object_detector_preprocessor import ( - DFineObjectDetectorPreprocessor, + DFineObjectDetectorPreprocessor as DFineObjectDetectorPreprocessor, ) from keras_hub.src.models.deberta_v3.deberta_v3_backbone import ( - DebertaV3Backbone, + DebertaV3Backbone as DebertaV3Backbone, ) from keras_hub.src.models.deberta_v3.deberta_v3_masked_lm import ( - DebertaV3MaskedLM, + DebertaV3MaskedLM as DebertaV3MaskedLM, ) from keras_hub.src.models.deberta_v3.deberta_v3_masked_lm_preprocessor import ( - DebertaV3MaskedLMPreprocessor, + DebertaV3MaskedLMPreprocessor as DebertaV3MaskedLMPreprocessor, ) from keras_hub.src.models.deberta_v3.deberta_v3_text_classifier import ( - DebertaV3TextClassifier, + DebertaV3TextClassifier as DebertaV3Classifier, ) from keras_hub.src.models.deberta_v3.deberta_v3_text_classifier import ( - DebertaV3TextClassifier as DebertaV3Classifier, + DebertaV3TextClassifier as DebertaV3TextClassifier, ) from keras_hub.src.models.deberta_v3.deberta_v3_text_classifier_preprocessor import ( - DebertaV3TextClassifierPreprocessor, + DebertaV3TextClassifierPreprocessor as DebertaV3Preprocessor, ) from keras_hub.src.models.deberta_v3.deberta_v3_text_classifier_preprocessor import ( - DebertaV3TextClassifierPreprocessor as DebertaV3Preprocessor, + DebertaV3TextClassifierPreprocessor as DebertaV3TextClassifierPreprocessor, ) from keras_hub.src.models.deberta_v3.deberta_v3_tokenizer import ( - DebertaV3Tokenizer, + DebertaV3Tokenizer as DebertaV3Tokenizer, ) from keras_hub.src.models.deeplab_v3.deeplab_v3_backbone import ( - DeepLabV3Backbone, + DeepLabV3Backbone as DeepLabV3Backbone, ) from keras_hub.src.models.deeplab_v3.deeplab_v3_image_segmeter_preprocessor import ( - DeepLabV3ImageSegmenterPreprocessor, + DeepLabV3ImageSegmenterPreprocessor as DeepLabV3ImageSegmenterPreprocessor, ) from keras_hub.src.models.deeplab_v3.deeplab_v3_segmenter import ( - DeepLabV3ImageSegmenter, + DeepLabV3ImageSegmenter as DeepLabV3ImageSegmenter, +) +from keras_hub.src.models.deit.deit_backbone import DeiTBackbone as DeiTBackbone +from keras_hub.src.models.deit.deit_image_classifier import ( + DeiTImageClassifier as DeiTImageClassifier, ) -from keras_hub.src.models.deit.deit_backbone import DeiTBackbone -from keras_hub.src.models.deit.deit_image_classifier import DeiTImageClassifier from keras_hub.src.models.deit.deit_image_classifier_preprocessor import ( - DeiTImageClassifierPreprocessor, + DeiTImageClassifierPreprocessor as DeiTImageClassifierPreprocessor, +) +from keras_hub.src.models.densenet.densenet_backbone import ( + DenseNetBackbone as DenseNetBackbone, ) -from keras_hub.src.models.densenet.densenet_backbone import DenseNetBackbone from keras_hub.src.models.densenet.densenet_image_classifier import ( - DenseNetImageClassifier, + DenseNetImageClassifier as DenseNetImageClassifier, ) from keras_hub.src.models.densenet.densenet_image_classifier_preprocessor import ( - DenseNetImageClassifierPreprocessor, + DenseNetImageClassifierPreprocessor as DenseNetImageClassifierPreprocessor, ) from keras_hub.src.models.depth_anything.depth_anything_backbone import ( - DepthAnythingBackbone, + DepthAnythingBackbone as DepthAnythingBackbone, ) from keras_hub.src.models.depth_anything.depth_anything_depth_estimator import ( - DepthAnythingDepthEstimator, + DepthAnythingDepthEstimator as DepthAnythingDepthEstimator, ) from keras_hub.src.models.depth_anything.depth_anything_depth_estimator_preprocessor import ( - DepthAnythingDepthEstimatorPreprocessor, + DepthAnythingDepthEstimatorPreprocessor as DepthAnythingDepthEstimatorPreprocessor, +) +from keras_hub.src.models.depth_estimator import ( + DepthEstimator as DepthEstimator, ) -from keras_hub.src.models.depth_estimator import DepthEstimator from keras_hub.src.models.depth_estimator_preprocessor import ( - DepthEstimatorPreprocessor, + DepthEstimatorPreprocessor as DepthEstimatorPreprocessor, +) +from keras_hub.src.models.dinov2.dinov2_backbone import ( + DINOV2Backbone as DINOV2Backbone, ) -from keras_hub.src.models.dinov2.dinov2_backbone import DINOV2Backbone from keras_hub.src.models.distil_bert.distil_bert_backbone import ( - DistilBertBackbone, + DistilBertBackbone as DistilBertBackbone, ) from keras_hub.src.models.distil_bert.distil_bert_masked_lm import ( - DistilBertMaskedLM, + DistilBertMaskedLM as DistilBertMaskedLM, ) from keras_hub.src.models.distil_bert.distil_bert_masked_lm_preprocessor import ( - DistilBertMaskedLMPreprocessor, + DistilBertMaskedLMPreprocessor as DistilBertMaskedLMPreprocessor, ) from keras_hub.src.models.distil_bert.distil_bert_text_classifier import ( - DistilBertTextClassifier, + DistilBertTextClassifier as DistilBertClassifier, ) from keras_hub.src.models.distil_bert.distil_bert_text_classifier import ( - DistilBertTextClassifier as DistilBertClassifier, + DistilBertTextClassifier as DistilBertTextClassifier, ) from keras_hub.src.models.distil_bert.distil_bert_text_classifier_preprocessor import ( - DistilBertTextClassifierPreprocessor, + DistilBertTextClassifierPreprocessor as DistilBertPreprocessor, ) from keras_hub.src.models.distil_bert.distil_bert_text_classifier_preprocessor import ( - DistilBertTextClassifierPreprocessor as DistilBertPreprocessor, + DistilBertTextClassifierPreprocessor as DistilBertTextClassifierPreprocessor, ) from keras_hub.src.models.distil_bert.distil_bert_tokenizer import ( - DistilBertTokenizer, + DistilBertTokenizer as DistilBertTokenizer, ) from keras_hub.src.models.efficientnet.efficientnet_backbone import ( - EfficientNetBackbone, + EfficientNetBackbone as EfficientNetBackbone, ) from keras_hub.src.models.efficientnet.efficientnet_image_classifier import ( - EfficientNetImageClassifier, + EfficientNetImageClassifier as EfficientNetImageClassifier, ) from keras_hub.src.models.efficientnet.efficientnet_image_classifier_preprocessor import ( - EfficientNetImageClassifierPreprocessor, + EfficientNetImageClassifierPreprocessor as EfficientNetImageClassifierPreprocessor, +) +from keras_hub.src.models.electra.electra_backbone import ( + ElectraBackbone as ElectraBackbone, +) +from keras_hub.src.models.electra.electra_tokenizer import ( + ElectraTokenizer as ElectraTokenizer, ) -from keras_hub.src.models.electra.electra_backbone import ElectraBackbone -from keras_hub.src.models.electra.electra_tokenizer import ElectraTokenizer -from keras_hub.src.models.esm.esm_backbone import ESMBackbone from keras_hub.src.models.esm.esm_backbone import ESMBackbone as ESM2Backbone -from keras_hub.src.models.esm.esm_classifier import ESMProteinClassifier +from keras_hub.src.models.esm.esm_backbone import ESMBackbone as ESMBackbone +from keras_hub.src.models.esm.esm_classifier import ( + ESMProteinClassifier as ESMProteinClassifier, +) from keras_hub.src.models.esm.esm_classifier_preprocessor import ( - ESMProteinClassifierPreprocessor, + ESMProteinClassifierPreprocessor as ESMProteinClassifierPreprocessor, ) -from keras_hub.src.models.esm.esm_masked_plm import ESMMaskedPLM from keras_hub.src.models.esm.esm_masked_plm import ( ESMMaskedPLM as ESM2MaskedPLM, ) +from keras_hub.src.models.esm.esm_masked_plm import ESMMaskedPLM as ESMMaskedPLM from keras_hub.src.models.esm.esm_masked_plm_preprocessor import ( - ESMMaskedPLMPreprocessor, + ESMMaskedPLMPreprocessor as ESMMaskedPLMPreprocessor, +) +from keras_hub.src.models.esm.esm_tokenizer import ESMTokenizer as ESMTokenizer +from keras_hub.src.models.f_net.f_net_backbone import ( + FNetBackbone as FNetBackbone, +) +from keras_hub.src.models.f_net.f_net_masked_lm import ( + FNetMaskedLM as FNetMaskedLM, ) -from keras_hub.src.models.esm.esm_tokenizer import ESMTokenizer -from keras_hub.src.models.f_net.f_net_backbone import FNetBackbone -from keras_hub.src.models.f_net.f_net_masked_lm import FNetMaskedLM from keras_hub.src.models.f_net.f_net_masked_lm_preprocessor import ( - FNetMaskedLMPreprocessor, + FNetMaskedLMPreprocessor as FNetMaskedLMPreprocessor, ) -from keras_hub.src.models.f_net.f_net_text_classifier import FNetTextClassifier from keras_hub.src.models.f_net.f_net_text_classifier import ( FNetTextClassifier as FNetClassifier, ) -from keras_hub.src.models.f_net.f_net_text_classifier_preprocessor import ( - FNetTextClassifierPreprocessor, +from keras_hub.src.models.f_net.f_net_text_classifier import ( + FNetTextClassifier as FNetTextClassifier, ) from keras_hub.src.models.f_net.f_net_text_classifier_preprocessor import ( FNetTextClassifierPreprocessor as FNetPreprocessor, ) -from keras_hub.src.models.f_net.f_net_tokenizer import FNetTokenizer -from keras_hub.src.models.falcon.falcon_backbone import FalconBackbone -from keras_hub.src.models.falcon.falcon_causal_lm import FalconCausalLM +from keras_hub.src.models.f_net.f_net_text_classifier_preprocessor import ( + FNetTextClassifierPreprocessor as FNetTextClassifierPreprocessor, +) +from keras_hub.src.models.f_net.f_net_tokenizer import ( + FNetTokenizer as FNetTokenizer, +) +from keras_hub.src.models.falcon.falcon_backbone import ( + FalconBackbone as FalconBackbone, +) +from keras_hub.src.models.falcon.falcon_causal_lm import ( + FalconCausalLM as FalconCausalLM, +) from keras_hub.src.models.falcon.falcon_causal_lm_preprocessor import ( - FalconCausalLMPreprocessor, + FalconCausalLMPreprocessor as FalconCausalLMPreprocessor, +) +from keras_hub.src.models.falcon.falcon_tokenizer import ( + FalconTokenizer as FalconTokenizer, +) +from keras_hub.src.models.feature_pyramid_backbone import ( + FeaturePyramidBackbone as FeaturePyramidBackbone, +) +from keras_hub.src.models.flux.flux_model import FluxBackbone as FluxBackbone +from keras_hub.src.models.flux.flux_text_to_image import ( + FluxTextToImage as FluxTextToImage, ) -from keras_hub.src.models.falcon.falcon_tokenizer import FalconTokenizer -from keras_hub.src.models.feature_pyramid_backbone import FeaturePyramidBackbone -from keras_hub.src.models.flux.flux_model import FluxBackbone -from keras_hub.src.models.flux.flux_text_to_image import FluxTextToImage from keras_hub.src.models.flux.flux_text_to_image_preprocessor import ( - FluxTextToImagePreprocessor, + FluxTextToImagePreprocessor as FluxTextToImagePreprocessor, +) +from keras_hub.src.models.gemma.gemma_backbone import ( + GemmaBackbone as GemmaBackbone, +) +from keras_hub.src.models.gemma.gemma_causal_lm import ( + GemmaCausalLM as GemmaCausalLM, ) -from keras_hub.src.models.gemma.gemma_backbone import GemmaBackbone -from keras_hub.src.models.gemma.gemma_causal_lm import GemmaCausalLM from keras_hub.src.models.gemma.gemma_causal_lm_preprocessor import ( - GemmaCausalLMPreprocessor, + GemmaCausalLMPreprocessor as GemmaCausalLMPreprocessor, +) +from keras_hub.src.models.gemma.gemma_tokenizer import ( + GemmaTokenizer as GemmaTokenizer, +) +from keras_hub.src.models.gemma3.gemma3_backbone import ( + Gemma3Backbone as Gemma3Backbone, +) +from keras_hub.src.models.gemma3.gemma3_causal_lm import ( + Gemma3CausalLM as Gemma3CausalLM, ) -from keras_hub.src.models.gemma.gemma_tokenizer import GemmaTokenizer -from keras_hub.src.models.gemma3.gemma3_backbone import Gemma3Backbone -from keras_hub.src.models.gemma3.gemma3_causal_lm import Gemma3CausalLM from keras_hub.src.models.gemma3.gemma3_causal_lm_preprocessor import ( - Gemma3CausalLMPreprocessor, + Gemma3CausalLMPreprocessor as Gemma3CausalLMPreprocessor, +) +from keras_hub.src.models.gemma3.gemma3_tokenizer import ( + Gemma3Tokenizer as Gemma3Tokenizer, ) -from keras_hub.src.models.gemma3.gemma3_tokenizer import Gemma3Tokenizer from keras_hub.src.models.gemma3.gemma3_vision_encoder import ( - Gemma3VisionEncoder, + Gemma3VisionEncoder as Gemma3VisionEncoder, +) +from keras_hub.src.models.gpt2.gpt2_backbone import GPT2Backbone as GPT2Backbone +from keras_hub.src.models.gpt2.gpt2_causal_lm import ( + GPT2CausalLM as GPT2CausalLM, ) -from keras_hub.src.models.gpt2.gpt2_backbone import GPT2Backbone -from keras_hub.src.models.gpt2.gpt2_causal_lm import GPT2CausalLM from keras_hub.src.models.gpt2.gpt2_causal_lm_preprocessor import ( - GPT2CausalLMPreprocessor, + GPT2CausalLMPreprocessor as GPT2CausalLMPreprocessor, +) +from keras_hub.src.models.gpt2.gpt2_preprocessor import ( + GPT2Preprocessor as GPT2Preprocessor, +) +from keras_hub.src.models.gpt2.gpt2_tokenizer import ( + GPT2Tokenizer as GPT2Tokenizer, +) +from keras_hub.src.models.gpt_neo_x.gpt_neo_x_backbone import ( + GPTNeoXBackbone as GPTNeoXBackbone, +) +from keras_hub.src.models.gpt_neo_x.gpt_neo_x_causal_lm import ( + GPTNeoXCausalLM as GPTNeoXCausalLM, ) -from keras_hub.src.models.gpt2.gpt2_preprocessor import GPT2Preprocessor -from keras_hub.src.models.gpt2.gpt2_tokenizer import GPT2Tokenizer -from keras_hub.src.models.gpt_neo_x.gpt_neo_x_backbone import GPTNeoXBackbone -from keras_hub.src.models.gpt_neo_x.gpt_neo_x_causal_lm import GPTNeoXCausalLM from keras_hub.src.models.gpt_neo_x.gpt_neo_x_causal_lm_preprocessor import ( - GPTNeoXCausalLMPreprocessor, + GPTNeoXCausalLMPreprocessor as GPTNeoXCausalLMPreprocessor, +) +from keras_hub.src.models.gpt_neo_x.gpt_neo_x_tokenizer import ( + GPTNeoXTokenizer as GPTNeoXTokenizer, +) +from keras_hub.src.models.hgnetv2.hgnetv2_backbone import ( + HGNetV2Backbone as HGNetV2Backbone, ) -from keras_hub.src.models.gpt_neo_x.gpt_neo_x_tokenizer import GPTNeoXTokenizer -from keras_hub.src.models.hgnetv2.hgnetv2_backbone import HGNetV2Backbone from keras_hub.src.models.hgnetv2.hgnetv2_image_classifier import ( - HGNetV2ImageClassifier, + HGNetV2ImageClassifier as HGNetV2ImageClassifier, ) from keras_hub.src.models.hgnetv2.hgnetv2_image_classifier_preprocessor import ( - HGNetV2ImageClassifierPreprocessor, + HGNetV2ImageClassifierPreprocessor as HGNetV2ImageClassifierPreprocessor, +) +from keras_hub.src.models.image_classifier import ( + ImageClassifier as ImageClassifier, ) -from keras_hub.src.models.image_classifier import ImageClassifier from keras_hub.src.models.image_classifier_preprocessor import ( - ImageClassifierPreprocessor, + ImageClassifierPreprocessor as ImageClassifierPreprocessor, +) +from keras_hub.src.models.image_segmenter import ( + ImageSegmenter as ImageSegmenter, ) -from keras_hub.src.models.image_segmenter import ImageSegmenter from keras_hub.src.models.image_segmenter_preprocessor import ( - ImageSegmenterPreprocessor, + ImageSegmenterPreprocessor as ImageSegmenterPreprocessor, +) +from keras_hub.src.models.image_to_image import ImageToImage as ImageToImage +from keras_hub.src.models.inpaint import Inpaint as Inpaint +from keras_hub.src.models.llama.llama_backbone import ( + LlamaBackbone as LlamaBackbone, +) +from keras_hub.src.models.llama.llama_causal_lm import ( + LlamaCausalLM as LlamaCausalLM, ) -from keras_hub.src.models.image_to_image import ImageToImage -from keras_hub.src.models.inpaint import Inpaint -from keras_hub.src.models.llama.llama_backbone import LlamaBackbone -from keras_hub.src.models.llama.llama_causal_lm import LlamaCausalLM from keras_hub.src.models.llama.llama_causal_lm_preprocessor import ( - LlamaCausalLMPreprocessor, + LlamaCausalLMPreprocessor as LlamaCausalLMPreprocessor, +) +from keras_hub.src.models.llama.llama_tokenizer import ( + LlamaTokenizer as LlamaTokenizer, +) +from keras_hub.src.models.llama3.llama3_backbone import ( + Llama3Backbone as Llama3Backbone, +) +from keras_hub.src.models.llama3.llama3_causal_lm import ( + Llama3CausalLM as Llama3CausalLM, ) -from keras_hub.src.models.llama.llama_tokenizer import LlamaTokenizer -from keras_hub.src.models.llama3.llama3_backbone import Llama3Backbone -from keras_hub.src.models.llama3.llama3_causal_lm import Llama3CausalLM from keras_hub.src.models.llama3.llama3_causal_lm_preprocessor import ( - Llama3CausalLMPreprocessor, + Llama3CausalLMPreprocessor as Llama3CausalLMPreprocessor, +) +from keras_hub.src.models.llama3.llama3_tokenizer import ( + Llama3Tokenizer as Llama3Tokenizer, +) +from keras_hub.src.models.masked_lm import MaskedLM as MaskedLM +from keras_hub.src.models.masked_lm_preprocessor import ( + MaskedLMPreprocessor as MaskedLMPreprocessor, +) +from keras_hub.src.models.mistral.mistral_backbone import ( + MistralBackbone as MistralBackbone, +) +from keras_hub.src.models.mistral.mistral_causal_lm import ( + MistralCausalLM as MistralCausalLM, ) -from keras_hub.src.models.llama3.llama3_tokenizer import Llama3Tokenizer -from keras_hub.src.models.masked_lm import MaskedLM -from keras_hub.src.models.masked_lm_preprocessor import MaskedLMPreprocessor -from keras_hub.src.models.mistral.mistral_backbone import MistralBackbone -from keras_hub.src.models.mistral.mistral_causal_lm import MistralCausalLM from keras_hub.src.models.mistral.mistral_causal_lm_preprocessor import ( - MistralCausalLMPreprocessor, + MistralCausalLMPreprocessor as MistralCausalLMPreprocessor, +) +from keras_hub.src.models.mistral.mistral_tokenizer import ( + MistralTokenizer as MistralTokenizer, +) +from keras_hub.src.models.mit.mit_backbone import MiTBackbone as MiTBackbone +from keras_hub.src.models.mit.mit_image_classifier import ( + MiTImageClassifier as MiTImageClassifier, ) -from keras_hub.src.models.mistral.mistral_tokenizer import MistralTokenizer -from keras_hub.src.models.mit.mit_backbone import MiTBackbone -from keras_hub.src.models.mit.mit_image_classifier import MiTImageClassifier from keras_hub.src.models.mit.mit_image_classifier_preprocessor import ( - MiTImageClassifierPreprocessor, + MiTImageClassifierPreprocessor as MiTImageClassifierPreprocessor, +) +from keras_hub.src.models.mixtral.mixtral_backbone import ( + MixtralBackbone as MixtralBackbone, +) +from keras_hub.src.models.mixtral.mixtral_causal_lm import ( + MixtralCausalLM as MixtralCausalLM, ) -from keras_hub.src.models.mixtral.mixtral_backbone import MixtralBackbone -from keras_hub.src.models.mixtral.mixtral_causal_lm import MixtralCausalLM from keras_hub.src.models.mixtral.mixtral_causal_lm_preprocessor import ( - MixtralCausalLMPreprocessor, + MixtralCausalLMPreprocessor as MixtralCausalLMPreprocessor, +) +from keras_hub.src.models.mixtral.mixtral_tokenizer import ( + MixtralTokenizer as MixtralTokenizer, +) +from keras_hub.src.models.mobilenet.mobilenet_backbone import ( + MobileNetBackbone as MobileNetBackbone, ) -from keras_hub.src.models.mixtral.mixtral_tokenizer import MixtralTokenizer -from keras_hub.src.models.mobilenet.mobilenet_backbone import MobileNetBackbone from keras_hub.src.models.mobilenet.mobilenet_image_classifier import ( - MobileNetImageClassifier, + MobileNetImageClassifier as MobileNetImageClassifier, ) from keras_hub.src.models.mobilenet.mobilenet_image_classifier_preprocessor import ( - MobileNetImageClassifierPreprocessor, + MobileNetImageClassifierPreprocessor as MobileNetImageClassifierPreprocessor, ) from keras_hub.src.models.moonshine.moonshine_audio_to_text import ( - MoonshineAudioToText, + MoonshineAudioToText as MoonshineAudioToText, ) from keras_hub.src.models.moonshine.moonshine_audio_to_text_preprocessor import ( - MoonshineAudioToTextPreprocessor, + MoonshineAudioToTextPreprocessor as MoonshineAudioToTextPreprocessor, +) +from keras_hub.src.models.moonshine.moonshine_backbone import ( + MoonshineBackbone as MoonshineBackbone, ) -from keras_hub.src.models.moonshine.moonshine_backbone import MoonshineBackbone from keras_hub.src.models.moonshine.moonshine_tokenizer import ( - MoonshineTokenizer, + MoonshineTokenizer as MoonshineTokenizer, ) -from keras_hub.src.models.object_detector import ObjectDetector from keras_hub.src.models.object_detector import ( ObjectDetector as ImageObjectDetector, ) -from keras_hub.src.models.object_detector_preprocessor import ( - ObjectDetectorPreprocessor, +from keras_hub.src.models.object_detector import ( + ObjectDetector as ObjectDetector, ) from keras_hub.src.models.object_detector_preprocessor import ( ObjectDetectorPreprocessor as ImageObjectDetectorPreprocessor, ) -from keras_hub.src.models.opt.opt_backbone import OPTBackbone -from keras_hub.src.models.opt.opt_causal_lm import OPTCausalLM +from keras_hub.src.models.object_detector_preprocessor import ( + ObjectDetectorPreprocessor as ObjectDetectorPreprocessor, +) +from keras_hub.src.models.opt.opt_backbone import OPTBackbone as OPTBackbone +from keras_hub.src.models.opt.opt_causal_lm import OPTCausalLM as OPTCausalLM from keras_hub.src.models.opt.opt_causal_lm_preprocessor import ( - OPTCausalLMPreprocessor, + OPTCausalLMPreprocessor as OPTCausalLMPreprocessor, ) -from keras_hub.src.models.opt.opt_tokenizer import OPTTokenizer +from keras_hub.src.models.opt.opt_tokenizer import OPTTokenizer as OPTTokenizer from keras_hub.src.models.pali_gemma.pali_gemma_backbone import ( - PaliGemmaBackbone, + PaliGemmaBackbone as PaliGemmaBackbone, ) from keras_hub.src.models.pali_gemma.pali_gemma_causal_lm import ( - PaliGemmaCausalLM, + PaliGemmaCausalLM as PaliGemmaCausalLM, ) from keras_hub.src.models.pali_gemma.pali_gemma_causal_lm_preprocessor import ( - PaliGemmaCausalLMPreprocessor, + PaliGemmaCausalLMPreprocessor as PaliGemmaCausalLMPreprocessor, ) from keras_hub.src.models.pali_gemma.pali_gemma_tokenizer import ( - PaliGemmaTokenizer, + PaliGemmaTokenizer as PaliGemmaTokenizer, +) +from keras_hub.src.models.parseq.parseq_backbone import ( + PARSeqBackbone as PARSeqBackbone, +) +from keras_hub.src.models.parseq.parseq_causal_lm import ( + PARSeqCausalLM as PARSeqCausalLM, ) -from keras_hub.src.models.parseq.parseq_backbone import PARSeqBackbone -from keras_hub.src.models.parseq.parseq_causal_lm import PARSeqCausalLM from keras_hub.src.models.parseq.parseq_causal_lm_preprocessor import ( - PARSeqCausalLMPreprocessor, + PARSeqCausalLMPreprocessor as PARSeqCausalLMPreprocessor, +) +from keras_hub.src.models.parseq.parseq_tokenizer import ( + PARSeqTokenizer as PARSeqTokenizer, +) +from keras_hub.src.models.phi3.phi3_backbone import Phi3Backbone as Phi3Backbone +from keras_hub.src.models.phi3.phi3_causal_lm import ( + Phi3CausalLM as Phi3CausalLM, ) -from keras_hub.src.models.parseq.parseq_tokenizer import PARSeqTokenizer -from keras_hub.src.models.phi3.phi3_backbone import Phi3Backbone -from keras_hub.src.models.phi3.phi3_causal_lm import Phi3CausalLM from keras_hub.src.models.phi3.phi3_causal_lm_preprocessor import ( - Phi3CausalLMPreprocessor, + Phi3CausalLMPreprocessor as Phi3CausalLMPreprocessor, ) -from keras_hub.src.models.phi3.phi3_tokenizer import Phi3Tokenizer -from keras_hub.src.models.preprocessor import Preprocessor -from keras_hub.src.models.qwen.qwen_backbone import QwenBackbone +from keras_hub.src.models.phi3.phi3_tokenizer import ( + Phi3Tokenizer as Phi3Tokenizer, +) +from keras_hub.src.models.preprocessor import Preprocessor as Preprocessor from keras_hub.src.models.qwen.qwen_backbone import ( QwenBackbone as Qwen2Backbone, ) -from keras_hub.src.models.qwen.qwen_causal_lm import QwenCausalLM +from keras_hub.src.models.qwen.qwen_backbone import QwenBackbone as QwenBackbone from keras_hub.src.models.qwen.qwen_causal_lm import ( QwenCausalLM as Qwen2CausalLM, ) -from keras_hub.src.models.qwen.qwen_causal_lm_preprocessor import ( - QwenCausalLMPreprocessor, +from keras_hub.src.models.qwen.qwen_causal_lm import ( + QwenCausalLM as QwenCausalLM, ) from keras_hub.src.models.qwen.qwen_causal_lm_preprocessor import ( QwenCausalLMPreprocessor as Qwen2CausalLMPreprocessor, ) -from keras_hub.src.models.qwen.qwen_tokenizer import QwenTokenizer +from keras_hub.src.models.qwen.qwen_causal_lm_preprocessor import ( + QwenCausalLMPreprocessor as QwenCausalLMPreprocessor, +) from keras_hub.src.models.qwen.qwen_tokenizer import ( QwenTokenizer as Qwen2Tokenizer, ) -from keras_hub.src.models.qwen3.qwen3_backbone import Qwen3Backbone -from keras_hub.src.models.qwen3.qwen3_causal_lm import Qwen3CausalLM +from keras_hub.src.models.qwen.qwen_tokenizer import ( + QwenTokenizer as QwenTokenizer, +) +from keras_hub.src.models.qwen3.qwen3_backbone import ( + Qwen3Backbone as Qwen3Backbone, +) +from keras_hub.src.models.qwen3.qwen3_causal_lm import ( + Qwen3CausalLM as Qwen3CausalLM, +) from keras_hub.src.models.qwen3.qwen3_causal_lm_preprocessor import ( - Qwen3CausalLMPreprocessor, + Qwen3CausalLMPreprocessor as Qwen3CausalLMPreprocessor, +) +from keras_hub.src.models.qwen3.qwen3_tokenizer import ( + Qwen3Tokenizer as Qwen3Tokenizer, +) +from keras_hub.src.models.qwen3_moe.qwen3_moe_backbone import ( + Qwen3MoeBackbone as Qwen3MoeBackbone, +) +from keras_hub.src.models.qwen3_moe.qwen3_moe_causal_lm import ( + Qwen3MoeCausalLM as Qwen3MoeCausalLM, ) -from keras_hub.src.models.qwen3.qwen3_tokenizer import Qwen3Tokenizer -from keras_hub.src.models.qwen3_moe.qwen3_moe_backbone import Qwen3MoeBackbone -from keras_hub.src.models.qwen3_moe.qwen3_moe_causal_lm import Qwen3MoeCausalLM from keras_hub.src.models.qwen3_moe.qwen3_moe_causal_lm_preprocessor import ( - Qwen3MoeCausalLMPreprocessor, + Qwen3MoeCausalLMPreprocessor as Qwen3MoeCausalLMPreprocessor, +) +from keras_hub.src.models.qwen_moe.qwen_moe_backbone import ( + QwenMoeBackbone as QwenMoeBackbone, +) +from keras_hub.src.models.qwen_moe.qwen_moe_causal_lm import ( + QwenMoeCausalLM as QwenMoeCausalLM, ) -from keras_hub.src.models.qwen_moe.qwen_moe_backbone import QwenMoeBackbone -from keras_hub.src.models.qwen_moe.qwen_moe_causal_lm import QwenMoeCausalLM from keras_hub.src.models.qwen_moe.qwen_moe_causal_lm_preprocessor import ( - QwenMoeCausalLMPreprocessor, + QwenMoeCausalLMPreprocessor as QwenMoeCausalLMPreprocessor, +) +from keras_hub.src.models.resnet.resnet_backbone import ( + ResNetBackbone as ResNetBackbone, ) -from keras_hub.src.models.resnet.resnet_backbone import ResNetBackbone from keras_hub.src.models.resnet.resnet_image_classifier import ( - ResNetImageClassifier, + ResNetImageClassifier as ResNetImageClassifier, ) from keras_hub.src.models.resnet.resnet_image_classifier_preprocessor import ( - ResNetImageClassifierPreprocessor, + ResNetImageClassifierPreprocessor as ResNetImageClassifierPreprocessor, +) +from keras_hub.src.models.retinanet.retinanet_backbone import ( + RetinaNetBackbone as RetinaNetBackbone, ) -from keras_hub.src.models.retinanet.retinanet_backbone import RetinaNetBackbone from keras_hub.src.models.retinanet.retinanet_object_detector import ( - RetinaNetObjectDetector, + RetinaNetObjectDetector as RetinaNetObjectDetector, ) from keras_hub.src.models.retinanet.retinanet_object_detector_preprocessor import ( - RetinaNetObjectDetectorPreprocessor, + RetinaNetObjectDetectorPreprocessor as RetinaNetObjectDetectorPreprocessor, +) +from keras_hub.src.models.roberta.roberta_backbone import ( + RobertaBackbone as RobertaBackbone, +) +from keras_hub.src.models.roberta.roberta_masked_lm import ( + RobertaMaskedLM as RobertaMaskedLM, ) -from keras_hub.src.models.roberta.roberta_backbone import RobertaBackbone -from keras_hub.src.models.roberta.roberta_masked_lm import RobertaMaskedLM from keras_hub.src.models.roberta.roberta_masked_lm_preprocessor import ( - RobertaMaskedLMPreprocessor, + RobertaMaskedLMPreprocessor as RobertaMaskedLMPreprocessor, ) from keras_hub.src.models.roberta.roberta_text_classifier import ( - RobertaTextClassifier, + RobertaTextClassifier as RobertaClassifier, ) from keras_hub.src.models.roberta.roberta_text_classifier import ( - RobertaTextClassifier as RobertaClassifier, + RobertaTextClassifier as RobertaTextClassifier, ) from keras_hub.src.models.roberta.roberta_text_classifier_preprocessor import ( - RobertaTextClassifierPreprocessor, + RobertaTextClassifierPreprocessor as RobertaPreprocessor, ) from keras_hub.src.models.roberta.roberta_text_classifier_preprocessor import ( - RobertaTextClassifierPreprocessor as RobertaPreprocessor, + RobertaTextClassifierPreprocessor as RobertaTextClassifierPreprocessor, +) +from keras_hub.src.models.roberta.roberta_tokenizer import ( + RobertaTokenizer as RobertaTokenizer, ) -from keras_hub.src.models.roberta.roberta_tokenizer import RobertaTokenizer from keras_hub.src.models.roformer_v2.roformer_v2_backbone import ( - RoformerV2Backbone, + RoformerV2Backbone as RoformerV2Backbone, ) from keras_hub.src.models.roformer_v2.roformer_v2_masked_lm import ( - RoformerV2MaskedLM, + RoformerV2MaskedLM as RoformerV2MaskedLM, ) from keras_hub.src.models.roformer_v2.roformer_v2_masked_lm_preprocessor import ( - RoformerV2MaskedLMPreprocessor, + RoformerV2MaskedLMPreprocessor as RoformerV2MaskedLMPreprocessor, ) from keras_hub.src.models.roformer_v2.roformer_v2_text_classifier import ( - RoformerV2TextClassifier, + RoformerV2TextClassifier as RoformerV2TextClassifier, ) from keras_hub.src.models.roformer_v2.roformer_v2_text_classifier_preprocessor import ( - RoformerV2TextClassifierPreprocessor, + RoformerV2TextClassifierPreprocessor as RoformerV2TextClassifierPreprocessor, ) from keras_hub.src.models.roformer_v2.roformer_v2_tokenizer import ( - RoformerV2Tokenizer, + RoformerV2Tokenizer as RoformerV2Tokenizer, +) +from keras_hub.src.models.rwkv7.rwkv7_backbone import ( + RWKV7Backbone as RWKV7Backbone, +) +from keras_hub.src.models.rwkv7.rwkv7_causal_lm import ( + RWKV7CausalLM as RWKV7CausalLM, ) -from keras_hub.src.models.rwkv7.rwkv7_backbone import RWKV7Backbone -from keras_hub.src.models.rwkv7.rwkv7_causal_lm import RWKV7CausalLM from keras_hub.src.models.rwkv7.rwkv7_causal_lm_preprocessor import ( - RWKV7CausalLMPreprocessor, + RWKV7CausalLMPreprocessor as RWKV7CausalLMPreprocessor, +) +from keras_hub.src.models.sam.sam_backbone import SAMBackbone as SAMBackbone +from keras_hub.src.models.sam.sam_image_segmenter import ( + SAMImageSegmenter as SAMImageSegmenter, ) -from keras_hub.src.models.rwkv7.rwkv7_tokenizer import RWKVTokenizer -from keras_hub.src.models.sam.sam_backbone import SAMBackbone -from keras_hub.src.models.sam.sam_image_segmenter import SAMImageSegmenter from keras_hub.src.models.sam.sam_image_segmenter_preprocessor import ( - SAMImageSegmenterPreprocessor, + SAMImageSegmenterPreprocessor as SAMImageSegmenterPreprocessor, +) +from keras_hub.src.models.segformer.segformer_backbone import ( + SegFormerBackbone as SegFormerBackbone, ) -from keras_hub.src.models.segformer.segformer_backbone import SegFormerBackbone from keras_hub.src.models.segformer.segformer_image_segmenter import ( - SegFormerImageSegmenter, + SegFormerImageSegmenter as SegFormerImageSegmenter, ) from keras_hub.src.models.segformer.segformer_image_segmenter_preprocessor import ( - SegFormerImageSegmenterPreprocessor, -) -from keras_hub.src.models.seq_2_seq_lm import Seq2SeqLM -from keras_hub.src.models.seq_2_seq_lm_preprocessor import Seq2SeqLMPreprocessor -from keras_hub.src.models.siglip.siglip_backbone import SigLIPBackbone -from keras_hub.src.models.siglip.siglip_preprocessor import SigLIPPreprocessor -from keras_hub.src.models.siglip.siglip_text_encoder import SigLIPTextEncoder -from keras_hub.src.models.siglip.siglip_tokenizer import SigLIPTokenizer + SegFormerImageSegmenterPreprocessor as SegFormerImageSegmenterPreprocessor, +) +from keras_hub.src.models.seq_2_seq_lm import Seq2SeqLM as Seq2SeqLM +from keras_hub.src.models.seq_2_seq_lm_preprocessor import ( + Seq2SeqLMPreprocessor as Seq2SeqLMPreprocessor, +) +from keras_hub.src.models.siglip.siglip_backbone import ( + SigLIPBackbone as SigLIPBackbone, +) +from keras_hub.src.models.siglip.siglip_preprocessor import ( + SigLIPPreprocessor as SigLIPPreprocessor, +) +from keras_hub.src.models.siglip.siglip_text_encoder import ( + SigLIPTextEncoder as SigLIPTextEncoder, +) +from keras_hub.src.models.siglip.siglip_tokenizer import ( + SigLIPTokenizer as SigLIPTokenizer, +) from keras_hub.src.models.siglip.siglip_vision_encoder import ( - SigLIPVisionEncoder, + SigLIPVisionEncoder as SigLIPVisionEncoder, ) from keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_backbone import ( - StableDiffusion3Backbone, + StableDiffusion3Backbone as StableDiffusion3Backbone, ) from keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_image_to_image import ( - StableDiffusion3ImageToImage, + StableDiffusion3ImageToImage as StableDiffusion3ImageToImage, ) from keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_inpaint import ( - StableDiffusion3Inpaint, + StableDiffusion3Inpaint as StableDiffusion3Inpaint, ) from keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_text_to_image import ( - StableDiffusion3TextToImage, + StableDiffusion3TextToImage as StableDiffusion3TextToImage, ) from keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_text_to_image_preprocessor import ( - StableDiffusion3TextToImagePreprocessor, + StableDiffusion3TextToImagePreprocessor as StableDiffusion3TextToImagePreprocessor, +) +from keras_hub.src.models.t5.t5_backbone import T5Backbone as T5Backbone +from keras_hub.src.models.t5.t5_preprocessor import ( + T5Preprocessor as T5Preprocessor, +) +from keras_hub.src.models.t5.t5_tokenizer import T5Tokenizer as T5Tokenizer +from keras_hub.src.models.t5gemma.t5gemma_backbone import ( + T5GemmaBackbone as T5GemmaBackbone, +) +from keras_hub.src.models.t5gemma.t5gemma_seq_2_seq_lm import ( + T5GemmaSeq2SeqLM as T5GemmaSeq2SeqLM, ) -from keras_hub.src.models.t5.t5_backbone import T5Backbone -from keras_hub.src.models.t5.t5_preprocessor import T5Preprocessor -from keras_hub.src.models.t5.t5_tokenizer import T5Tokenizer -from keras_hub.src.models.t5gemma.t5gemma_backbone import T5GemmaBackbone -from keras_hub.src.models.t5gemma.t5gemma_seq_2_seq_lm import T5GemmaSeq2SeqLM from keras_hub.src.models.t5gemma.t5gemma_seq_2_seq_lm_preprocessor import ( - T5GemmaSeq2SeqLMPreprocessor, + T5GemmaSeq2SeqLMPreprocessor as T5GemmaSeq2SeqLMPreprocessor, ) -from keras_hub.src.models.t5gemma.t5gemma_tokenizer import T5GemmaTokenizer -from keras_hub.src.models.task import Task -from keras_hub.src.models.text_classifier import TextClassifier +from keras_hub.src.models.t5gemma.t5gemma_tokenizer import ( + T5GemmaTokenizer as T5GemmaTokenizer, +) +from keras_hub.src.models.task import Task as Task from keras_hub.src.models.text_classifier import TextClassifier as Classifier +from keras_hub.src.models.text_classifier import ( + TextClassifier as TextClassifier, +) from keras_hub.src.models.text_classifier_preprocessor import ( - TextClassifierPreprocessor, + TextClassifierPreprocessor as TextClassifierPreprocessor, ) -from keras_hub.src.models.text_to_image import TextToImage +from keras_hub.src.models.text_to_image import TextToImage as TextToImage from keras_hub.src.models.text_to_image_preprocessor import ( - TextToImagePreprocessor, + TextToImagePreprocessor as TextToImagePreprocessor, +) +from keras_hub.src.models.vgg.vgg_backbone import VGGBackbone as VGGBackbone +from keras_hub.src.models.vgg.vgg_image_classifier import ( + VGGImageClassifier as VGGImageClassifier, ) -from keras_hub.src.models.vgg.vgg_backbone import VGGBackbone -from keras_hub.src.models.vgg.vgg_image_classifier import VGGImageClassifier from keras_hub.src.models.vgg.vgg_image_classifier_preprocessor import ( - VGGImageClassifierPreprocessor, + VGGImageClassifierPreprocessor as VGGImageClassifierPreprocessor, +) +from keras_hub.src.models.vit.vit_backbone import ViTBackbone as ViTBackbone +from keras_hub.src.models.vit.vit_image_classifier import ( + ViTImageClassifier as ViTImageClassifier, ) -from keras_hub.src.models.vit.vit_backbone import ViTBackbone -from keras_hub.src.models.vit.vit_image_classifier import ViTImageClassifier from keras_hub.src.models.vit.vit_image_classifier_preprocessor import ( - ViTImageClassifierPreprocessor, + ViTImageClassifierPreprocessor as ViTImageClassifierPreprocessor, +) +from keras_hub.src.models.vit_det.vit_det_backbone import ( + ViTDetBackbone as ViTDetBackbone, +) +from keras_hub.src.models.whisper.whisper_backbone import ( + WhisperBackbone as WhisperBackbone, +) +from keras_hub.src.models.whisper.whisper_tokenizer import ( + WhisperTokenizer as WhisperTokenizer, +) +from keras_hub.src.models.xception.xception_backbone import ( + XceptionBackbone as XceptionBackbone, ) -from keras_hub.src.models.vit_det.vit_det_backbone import ViTDetBackbone -from keras_hub.src.models.whisper.whisper_backbone import WhisperBackbone -from keras_hub.src.models.whisper.whisper_tokenizer import WhisperTokenizer -from keras_hub.src.models.xception.xception_backbone import XceptionBackbone from keras_hub.src.models.xception.xception_image_classifier import ( - XceptionImageClassifier, + XceptionImageClassifier as XceptionImageClassifier, ) from keras_hub.src.models.xception.xception_image_classifier_preprocessor import ( - XceptionImageClassifierPreprocessor, + XceptionImageClassifierPreprocessor as XceptionImageClassifierPreprocessor, ) from keras_hub.src.models.xlm_roberta.xlm_roberta_backbone import ( - XLMRobertaBackbone, + XLMRobertaBackbone as XLMRobertaBackbone, ) from keras_hub.src.models.xlm_roberta.xlm_roberta_masked_lm import ( - XLMRobertaMaskedLM, + XLMRobertaMaskedLM as XLMRobertaMaskedLM, ) from keras_hub.src.models.xlm_roberta.xlm_roberta_masked_lm_preprocessor import ( - XLMRobertaMaskedLMPreprocessor, + XLMRobertaMaskedLMPreprocessor as XLMRobertaMaskedLMPreprocessor, ) from keras_hub.src.models.xlm_roberta.xlm_roberta_text_classifier import ( - XLMRobertaTextClassifier, + XLMRobertaTextClassifier as XLMRobertaClassifier, ) from keras_hub.src.models.xlm_roberta.xlm_roberta_text_classifier import ( - XLMRobertaTextClassifier as XLMRobertaClassifier, + XLMRobertaTextClassifier as XLMRobertaTextClassifier, ) from keras_hub.src.models.xlm_roberta.xlm_roberta_text_classifier_preprocessor import ( - XLMRobertaTextClassifierPreprocessor, + XLMRobertaTextClassifierPreprocessor as XLMRobertaPreprocessor, ) from keras_hub.src.models.xlm_roberta.xlm_roberta_text_classifier_preprocessor import ( - XLMRobertaTextClassifierPreprocessor as XLMRobertaPreprocessor, + XLMRobertaTextClassifierPreprocessor as XLMRobertaTextClassifierPreprocessor, ) from keras_hub.src.models.xlm_roberta.xlm_roberta_tokenizer import ( - XLMRobertaTokenizer, + XLMRobertaTokenizer as XLMRobertaTokenizer, +) +from keras_hub.src.models.xlnet.xlnet_backbone import ( + XLNetBackbone as XLNetBackbone, ) -from keras_hub.src.models.xlnet.xlnet_backbone import XLNetBackbone -from keras_hub.src.tokenizers.tokenizer import Tokenizer +from keras_hub.src.tokenizers.tokenizer import Tokenizer as Tokenizer diff --git a/keras_hub/api/samplers/__init__.py b/keras_hub/api/samplers/__init__.py index 9feb76c669..29bfef00fc 100644 --- a/keras_hub/api/samplers/__init__.py +++ b/keras_hub/api/samplers/__init__.py @@ -4,13 +4,15 @@ since your modifications would be overwritten. """ -from keras_hub.src.samplers.beam_sampler import BeamSampler -from keras_hub.src.samplers.contrastive_sampler import ContrastiveSampler -from keras_hub.src.samplers.greedy_sampler import GreedySampler -from keras_hub.src.samplers.random_sampler import RandomSampler -from keras_hub.src.samplers.sampler import Sampler -from keras_hub.src.samplers.serialization import deserialize -from keras_hub.src.samplers.serialization import get -from keras_hub.src.samplers.serialization import serialize -from keras_hub.src.samplers.top_k_sampler import TopKSampler -from keras_hub.src.samplers.top_p_sampler import TopPSampler +from keras_hub.src.samplers.beam_sampler import BeamSampler as BeamSampler +from keras_hub.src.samplers.contrastive_sampler import ( + ContrastiveSampler as ContrastiveSampler, +) +from keras_hub.src.samplers.greedy_sampler import GreedySampler as GreedySampler +from keras_hub.src.samplers.random_sampler import RandomSampler as RandomSampler +from keras_hub.src.samplers.sampler import Sampler as Sampler +from keras_hub.src.samplers.serialization import deserialize as deserialize +from keras_hub.src.samplers.serialization import get as get +from keras_hub.src.samplers.serialization import serialize as serialize +from keras_hub.src.samplers.top_k_sampler import TopKSampler as TopKSampler +from keras_hub.src.samplers.top_p_sampler import TopPSampler as TopPSampler diff --git a/keras_hub/api/tokenizers/__init__.py b/keras_hub/api/tokenizers/__init__.py index b13023ef3e..264bc8bdd4 100644 --- a/keras_hub/api/tokenizers/__init__.py +++ b/keras_hub/api/tokenizers/__init__.py @@ -4,69 +4,127 @@ since your modifications would be overwritten. """ -from keras_hub.src.models.albert.albert_tokenizer import AlbertTokenizer -from keras_hub.src.models.bart.bart_tokenizer import BartTokenizer -from keras_hub.src.models.bert.bert_tokenizer import BertTokenizer -from keras_hub.src.models.bloom.bloom_tokenizer import BloomTokenizer -from keras_hub.src.models.clip.clip_tokenizer import CLIPTokenizer +from keras_hub.src.models.albert.albert_tokenizer import ( + AlbertTokenizer as AlbertTokenizer, +) +from keras_hub.src.models.bart.bart_tokenizer import ( + BartTokenizer as BartTokenizer, +) +from keras_hub.src.models.bert.bert_tokenizer import ( + BertTokenizer as BertTokenizer, +) +from keras_hub.src.models.bloom.bloom_tokenizer import ( + BloomTokenizer as BloomTokenizer, +) +from keras_hub.src.models.clip.clip_tokenizer import ( + CLIPTokenizer as CLIPTokenizer, +) from keras_hub.src.models.deberta_v3.deberta_v3_tokenizer import ( - DebertaV3Tokenizer, + DebertaV3Tokenizer as DebertaV3Tokenizer, ) from keras_hub.src.models.distil_bert.distil_bert_tokenizer import ( - DistilBertTokenizer, -) -from keras_hub.src.models.electra.electra_tokenizer import ElectraTokenizer -from keras_hub.src.models.esm.esm_tokenizer import ESMTokenizer -from keras_hub.src.models.f_net.f_net_tokenizer import FNetTokenizer -from keras_hub.src.models.falcon.falcon_tokenizer import FalconTokenizer -from keras_hub.src.models.gemma.gemma_tokenizer import GemmaTokenizer -from keras_hub.src.models.gemma3.gemma3_tokenizer import Gemma3Tokenizer -from keras_hub.src.models.gpt2.gpt2_tokenizer import GPT2Tokenizer -from keras_hub.src.models.gpt_neo_x.gpt_neo_x_tokenizer import GPTNeoXTokenizer -from keras_hub.src.models.llama.llama_tokenizer import LlamaTokenizer -from keras_hub.src.models.llama3.llama3_tokenizer import Llama3Tokenizer -from keras_hub.src.models.mistral.mistral_tokenizer import MistralTokenizer -from keras_hub.src.models.mixtral.mixtral_tokenizer import MixtralTokenizer + DistilBertTokenizer as DistilBertTokenizer, +) +from keras_hub.src.models.electra.electra_tokenizer import ( + ElectraTokenizer as ElectraTokenizer, +) +from keras_hub.src.models.esm.esm_tokenizer import ESMTokenizer as ESMTokenizer +from keras_hub.src.models.f_net.f_net_tokenizer import ( + FNetTokenizer as FNetTokenizer, +) +from keras_hub.src.models.falcon.falcon_tokenizer import ( + FalconTokenizer as FalconTokenizer, +) +from keras_hub.src.models.gemma.gemma_tokenizer import ( + GemmaTokenizer as GemmaTokenizer, +) +from keras_hub.src.models.gemma3.gemma3_tokenizer import ( + Gemma3Tokenizer as Gemma3Tokenizer, +) +from keras_hub.src.models.gpt2.gpt2_tokenizer import ( + GPT2Tokenizer as GPT2Tokenizer, +) +from keras_hub.src.models.gpt_neo_x.gpt_neo_x_tokenizer import ( + GPTNeoXTokenizer as GPTNeoXTokenizer, +) +from keras_hub.src.models.llama.llama_tokenizer import ( + LlamaTokenizer as LlamaTokenizer, +) +from keras_hub.src.models.llama3.llama3_tokenizer import ( + Llama3Tokenizer as Llama3Tokenizer, +) +from keras_hub.src.models.mistral.mistral_tokenizer import ( + MistralTokenizer as MistralTokenizer, +) +from keras_hub.src.models.mixtral.mixtral_tokenizer import ( + MixtralTokenizer as MixtralTokenizer, +) from keras_hub.src.models.moonshine.moonshine_tokenizer import ( - MoonshineTokenizer, + MoonshineTokenizer as MoonshineTokenizer, ) -from keras_hub.src.models.opt.opt_tokenizer import OPTTokenizer +from keras_hub.src.models.opt.opt_tokenizer import OPTTokenizer as OPTTokenizer from keras_hub.src.models.pali_gemma.pali_gemma_tokenizer import ( - PaliGemmaTokenizer, + PaliGemmaTokenizer as PaliGemmaTokenizer, +) +from keras_hub.src.models.parseq.parseq_tokenizer import ( + PARSeqTokenizer as PARSeqTokenizer, +) +from keras_hub.src.models.phi3.phi3_tokenizer import ( + Phi3Tokenizer as Phi3Tokenizer, ) -from keras_hub.src.models.parseq.parseq_tokenizer import PARSeqTokenizer -from keras_hub.src.models.phi3.phi3_tokenizer import Phi3Tokenizer -from keras_hub.src.models.qwen.qwen_tokenizer import QwenTokenizer from keras_hub.src.models.qwen.qwen_tokenizer import ( QwenTokenizer as Qwen2Tokenizer, ) -from keras_hub.src.models.qwen3_moe.qwen3_moe_tokenizer import Qwen3MoeTokenizer -from keras_hub.src.models.qwen_moe.qwen_moe_tokenizer import QwenMoeTokenizer -from keras_hub.src.models.roberta.roberta_tokenizer import RobertaTokenizer +from keras_hub.src.models.qwen.qwen_tokenizer import ( + QwenTokenizer as QwenTokenizer, +) +from keras_hub.src.models.qwen3_moe.qwen3_moe_tokenizer import ( + Qwen3MoeTokenizer as Qwen3MoeTokenizer, +) +from keras_hub.src.models.qwen_moe.qwen_moe_tokenizer import ( + QwenMoeTokenizer as QwenMoeTokenizer, +) +from keras_hub.src.models.roberta.roberta_tokenizer import ( + RobertaTokenizer as RobertaTokenizer, +) from keras_hub.src.models.roformer_v2.roformer_v2_tokenizer import ( - RoformerV2Tokenizer, + RoformerV2Tokenizer as RoformerV2Tokenizer, +) +from keras_hub.src.models.rwkv7.rwkv7_tokenizer import ( + RWKVTokenizer as RWKVTokenizer, +) +from keras_hub.src.models.siglip.siglip_tokenizer import ( + SigLIPTokenizer as SigLIPTokenizer, +) +from keras_hub.src.models.t5.t5_tokenizer import T5Tokenizer as T5Tokenizer +from keras_hub.src.models.t5gemma.t5gemma_tokenizer import ( + T5GemmaTokenizer as T5GemmaTokenizer, +) +from keras_hub.src.models.whisper.whisper_tokenizer import ( + WhisperTokenizer as WhisperTokenizer, ) -from keras_hub.src.models.rwkv7.rwkv7_tokenizer import RWKVTokenizer -from keras_hub.src.models.siglip.siglip_tokenizer import SigLIPTokenizer -from keras_hub.src.models.t5.t5_tokenizer import T5Tokenizer -from keras_hub.src.models.t5gemma.t5gemma_tokenizer import T5GemmaTokenizer -from keras_hub.src.models.whisper.whisper_tokenizer import WhisperTokenizer from keras_hub.src.models.xlm_roberta.xlm_roberta_tokenizer import ( - XLMRobertaTokenizer, + XLMRobertaTokenizer as XLMRobertaTokenizer, +) +from keras_hub.src.tokenizers.byte_pair_tokenizer import ( + BytePairTokenizer as BytePairTokenizer, +) +from keras_hub.src.tokenizers.byte_tokenizer import ( + ByteTokenizer as ByteTokenizer, ) -from keras_hub.src.tokenizers.byte_pair_tokenizer import BytePairTokenizer -from keras_hub.src.tokenizers.byte_tokenizer import ByteTokenizer from keras_hub.src.tokenizers.sentence_piece_tokenizer import ( - SentencePieceTokenizer, + SentencePieceTokenizer as SentencePieceTokenizer, ) from keras_hub.src.tokenizers.sentence_piece_tokenizer_trainer import ( - compute_sentence_piece_proto, + compute_sentence_piece_proto as compute_sentence_piece_proto, ) -from keras_hub.src.tokenizers.tokenizer import Tokenizer +from keras_hub.src.tokenizers.tokenizer import Tokenizer as Tokenizer from keras_hub.src.tokenizers.unicode_codepoint_tokenizer import ( - UnicodeCodepointTokenizer, + UnicodeCodepointTokenizer as UnicodeCodepointTokenizer, +) +from keras_hub.src.tokenizers.word_piece_tokenizer import ( + WordPieceTokenizer as WordPieceTokenizer, ) -from keras_hub.src.tokenizers.word_piece_tokenizer import WordPieceTokenizer from keras_hub.src.tokenizers.word_piece_tokenizer_trainer import ( - compute_word_piece_vocabulary, + compute_word_piece_vocabulary as compute_word_piece_vocabulary, ) diff --git a/keras_hub/api/utils/__init__.py b/keras_hub/api/utils/__init__.py index 8ce47790b0..0bd8cb642e 100644 --- a/keras_hub/api/utils/__init__.py +++ b/keras_hub/api/utils/__init__.py @@ -4,10 +4,18 @@ since your modifications would be overwritten. """ -from keras_hub.src.utils.coco.coco_utils import coco_id_to_name -from keras_hub.src.utils.coco.coco_utils import coco_name_to_id +from keras_hub.src.utils.coco.coco_utils import ( + coco_id_to_name as coco_id_to_name, +) +from keras_hub.src.utils.coco.coco_utils import ( + coco_name_to_id as coco_name_to_id, +) +from keras_hub.src.utils.imagenet.imagenet_utils import ( + decode_imagenet_predictions as decode_imagenet_predictions, +) +from keras_hub.src.utils.imagenet.imagenet_utils import ( + imagenet_id_to_name as imagenet_id_to_name, +) from keras_hub.src.utils.imagenet.imagenet_utils import ( - decode_imagenet_predictions, + imagenet_name_to_id as imagenet_name_to_id, ) -from keras_hub.src.utils.imagenet.imagenet_utils import imagenet_id_to_name -from keras_hub.src.utils.imagenet.imagenet_utils import imagenet_name_to_id diff --git a/keras_hub/src/models/rwkv7/rwkv7_backbone_test.py b/keras_hub/src/models/rwkv7/rwkv7_backbone_test.py new file mode 100644 index 0000000000..e061c0e3e6 --- /dev/null +++ b/keras_hub/src/models/rwkv7/rwkv7_backbone_test.py @@ -0,0 +1,37 @@ +from keras import ops + +from keras_hub.src.models.rwkv7.rwkv7_backbone import RWKV7Backbone +from keras_hub.src.tests.test_case import TestCase + + +class RWKV7BackboneTest(TestCase): + def setUp(self): + """ + Set up the test case with default arguments and input data. + """ + self.init_kwargs = { + "vocabulary_size": 10, + "hidden_size": 16, + "num_layers": 2, + "head_size": 4, + "intermediate_dim": 32, + "gate_lora": 32, + "mv_lora": 16, + "aaa_lora": 16, + "decay_lora": 16, + } + self.input_data = ops.ones((2, 5), dtype="int32") + self.backbone = RWKV7Backbone(**self.init_kwargs) + + def test_backbone_basics(self): + """ + Test basic functionality of the RWKV7 backbone. + """ + y = self.backbone(self.input_data) + self.assertEqual(y.shape, (2, 5, 10)) + + def test_num_parameters(self): + """ + Test that the model has the expected number of parameters. + """ + self.assertEqual(self.backbone.count_params(), 10208) diff --git a/keras_hub/src/models/rwkv7/rwkv7_causal_lm.py b/keras_hub/src/models/rwkv7/rwkv7_causal_lm.py index a57d1d1773..c7a33c1fb9 100644 --- a/keras_hub/src/models/rwkv7/rwkv7_causal_lm.py +++ b/keras_hub/src/models/rwkv7/rwkv7_causal_lm.py @@ -36,10 +36,10 @@ class RWKV7CausalLM(CausalLM): # Initialize the tokenizer and load assets from a local path. tokenizer = RWKVTokenizer() tokenizer.load_assets(rwkv_path) - + # Create a preprocessor with a sequence length of 8. preprocessor = RWKV7CausalLMPreprocessor(tokenizer, sequence_length=8) - + # Initialize the model with a backbone and preprocessor. causal_lm = RWKV7CausalLM(backbone, preprocessor) @@ -59,7 +59,7 @@ class RWKV7CausalLM(CausalLM): def __init__(self, backbone, preprocessor=None, **kwargs): """Initialize the RWKV-7 causal language model. - + Args: backbone: The backbone model. preprocessor: The preprocessor for tokenization. @@ -87,7 +87,7 @@ def call_with_cache( `call_with_cache` adds an additional forward pass for the model for autoregressive inference. Unlike calling the model directly, this method - allows caching previous state Tensors in RWKV layers, and avoids + allows caching previous state Tensors in RWKV layers, and avoids recomputing the outputs of seen tokens. Args: @@ -249,4 +249,4 @@ def next(prompt, cache, index): return { "token_ids": token_ids, "padding_mask": padding_mask, - } \ No newline at end of file + } diff --git a/keras_hub/src/models/rwkv7/rwkv7_causal_lm_preprocessor.py b/keras_hub/src/models/rwkv7/rwkv7_causal_lm_preprocessor.py index 6187a07f35..0071cda60b 100644 --- a/keras_hub/src/models/rwkv7/rwkv7_causal_lm_preprocessor.py +++ b/keras_hub/src/models/rwkv7/rwkv7_causal_lm_preprocessor.py @@ -44,13 +44,13 @@ class RWKV7CausalLMPreprocessor(CausalLMPreprocessor): # Initialize the tokenizer and load assets from a local path. tokenizer = RWKVTokenizer() tokenizer.load_assets(rwkv_path) - + # Create a preprocessor with a sequence length of 8. preprocessor = RWKV7CausalLMPreprocessor(tokenizer, sequence_length=8) - + # Tokenize and pack a batch of sentences. preprocessor(["Bubble sort\n```python", "Hello World\n```python\n"]) - + # Preprocess inputs for generation with a maximum generation length of 16. preprocessor.generate_preprocess( ["Bubble sort\n```python", "Hello World\n```python\n"], 16 @@ -91,6 +91,7 @@ class RWKV7CausalLMPreprocessor(CausalLMPreprocessor): 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=torch.int32)} """ + backbone_cls = RWKV7Backbone tokenizer_cls = RWKVTokenizer @@ -101,7 +102,7 @@ def __init__( **kwargs, ): """Initialize the preprocessor. - + Args: tokenizer: The tokenizer to use. add_start_token: Whether to add start token. @@ -119,19 +120,26 @@ def call( sequence_length=None, ): """Preprocess the input for training. - + Args: x: Input text data. y: Target data (optional). sample_weight: Sample weights (optional). sequence_length: Desired sequence length. - + Returns: Preprocessed data tuple (x, y, sample_weight). """ + if isinstance(x, str): + x = [x] sequence_length = sequence_length or self.sequence_length # Pad length to multiples of 16 to meet kernel requirements - sequence_length = sequence_length + (16 - sequence_length % 16) + if sequence_length is None: + raise (ValueError("`sequence_length` must be specified.")) + if (sequence_length - 1) % 16 != 0: + sequence_length = sequence_length + ( + 16 - (sequence_length - 1) % 16 + ) x = self.tokenizer(x) token_ids, padding_mask = self.packer( @@ -158,28 +166,35 @@ def build(self, input_shape): def generate_preprocess( self, x, - sequence_length=None, + sequence_length, ): """Preprocess input for generation. - + Args: x: Input text data. sequence_length: Maximum generation length. - + Returns: Dictionary with preprocessed inputs for generation. """ + if isinstance(x, str): + x = [x] + if not self.built: self.build(None) # Align with Keras API # Input sequence_length is the maximum generation length # While self.sequence_length corresponds to the prefill max length generate_length = sequence_length + if sequence_length is None: + raise (ValueError("`sequence_length` must be specified.")) sequence_length = self.sequence_length # Pad length to multiples of 16 to meet kernel requirements - sequence_length = sequence_length + (16 - sequence_length % 16) - generate_length = generate_length + (16 - generate_length % 16) + if sequence_length % 16 != 0: + sequence_length = sequence_length + (16 - sequence_length % 16) + if generate_length % 16 != 0: + generate_length = generate_length + (16 - generate_length % 16) x = [t[-sequence_length:] for t in self.tokenizer(x)] y = ops.zeros((len(x), generate_length), "int32") @@ -206,14 +221,14 @@ def generate_postprocess( x, ): """Convert integer token output to strings for generation. - + This method reverses `generate_preprocess()`, by first removing all padding and start/end tokens, and then converting the integer sequence back to a string. - + Args: x: Dictionary containing token_ids and padding_mask. - + Returns: Detokenized string output. """ diff --git a/keras_hub/src/models/rwkv7/rwkv7_causal_lm_preprocessor_test.py b/keras_hub/src/models/rwkv7/rwkv7_causal_lm_preprocessor_test.py new file mode 100644 index 0000000000..a2648b9c4a --- /dev/null +++ b/keras_hub/src/models/rwkv7/rwkv7_causal_lm_preprocessor_test.py @@ -0,0 +1,98 @@ +import numpy as np + +from keras_hub.src.models.rwkv7.rwkv7_causal_lm_preprocessor import ( + RWKV7CausalLMPreprocessor, +) +from keras_hub.src.models.rwkv7.rwkv7_tokenizer import RWKVTokenizer +from keras_hub.src.tests.test_case import TestCase + + +class RWKV7CausalLMPreprocessorTest(TestCase): + def setUp(self): + self.tokenizer = RWKVTokenizer( + ["1 ' ' 1", "2 '\\n' 1", "3 'the' 3", "4 'hello' 5", "5 'world' 5"] + ) + self.preprocessor = RWKV7CausalLMPreprocessor( + tokenizer=self.tokenizer, + sequence_length=15, + ) + + def test_preprocessor_basics(self): + result = self.preprocessor(x=["hello world hello world hello world"]) + self.assertAllEqual( + result[0], [[0, 0, 0, 0, 0, 0, 4, 1, 5, 1, 4, 1, 5, 1, 4, 1]] + ) + self.assertAllEqual( + result[1], [[0, 0, 0, 0, 0, 4, 1, 5, 1, 4, 1, 5, 1, 4, 1, 5]] + ) + self.assertAllEqual( + result[2], + [ + [ + False, + False, + False, + False, + False, + True, + True, + True, + True, + True, + True, + True, + True, + True, + True, + True, + ] + ], + ) + + def test_generate_preprocess(self): + result = self.preprocessor.generate_preprocess( + ["hello world hello world hello world"], 16 + ) + self.assertAllEqual( + result["token_ids"], + [[0, 0, 0, 0, 0, 0, 4, 1, 5, 1, 4, 1, 5, 1, 4, 1]], + ) + self.assertAllEqual( + result["padding_mask"], + [ + [ + True, + False, + False, + False, + False, + False, + False, + False, + False, + False, + False, + False, + False, + False, + False, + False, + ] + ], + ) + self.assertAllEqual( + result["predict_token_ids"], + [[5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], + ) + + def test_generate_postprocess(self): + input_data = { + "token_ids": np.array( + [[3, 2, 4, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]] + ), + "padding_mask": np.array( + [[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]] + ), + } + result = self.preprocessor.generate_postprocess(input_data) + self.assertEqual(result, ["the\nhellothe"]) diff --git a/keras_hub/src/models/rwkv7/rwkv7_causal_lm_test.py b/keras_hub/src/models/rwkv7/rwkv7_causal_lm_test.py new file mode 100644 index 0000000000..ed84ef8205 --- /dev/null +++ b/keras_hub/src/models/rwkv7/rwkv7_causal_lm_test.py @@ -0,0 +1,92 @@ +from keras_hub.src.models.rwkv7.rwkv7_backbone import RWKV7Backbone +from keras_hub.src.models.rwkv7.rwkv7_causal_lm import RWKV7CausalLM +from keras_hub.src.models.rwkv7.rwkv7_causal_lm_preprocessor import ( + RWKV7CausalLMPreprocessor, +) +from keras_hub.src.models.rwkv7.rwkv7_tokenizer import RWKVTokenizer +from keras_hub.src.tests.test_case import TestCase + + +class RWKV7CausalLMTest(TestCase): + def setUp(self): + """ + Set up the test case with vocabulary, merges, preprocessor, backbone, + and other initialization parameters. + """ + # Create a small vocabulary for testing + self.vocab = [ + "0 ' ' 1", + "1 '\\n' 1", + "2 'the' 3", + "3 'hello' 5", + "4 'world' 5", + "5 'python' 6", + ] + + # Initialize tokenizer with test vocabulary + self.tokenizer = RWKVTokenizer(vocabulary=self.vocab) + + # Create preprocessor with sequence length of 8 + self.preprocessor = RWKV7CausalLMPreprocessor( + tokenizer=self.tokenizer, + sequence_length=16, + ) + + # Create a small backbone for testing + self.backbone = RWKV7Backbone( + vocabulary_size=self.preprocessor.tokenizer.vocabulary_size() + 1, + hidden_size=16, + num_layers=2, + head_size=4, + intermediate_dim=32, + gate_lora=8, + mv_lora=4, + aaa_lora=4, + decay_lora=4, + ) + + # Initialize parameters for the causal LM + self.init_kwargs = { + "preprocessor": self.preprocessor, + "backbone": self.backbone, + } + + self.causal_lm = RWKV7CausalLM(self.backbone, self.preprocessor) + self.causal_lm.compile(sampler="greedy") + + def test_generate(self): + """ + Test text generation functionality. + """ + + prompt = ["hello world"] + output = self.causal_lm.generate(prompt, 16) + self.assertTrue(isinstance(output[0], str)) + self.assertTrue(isinstance(output, list)) + + prompt = "hello world" + output = self.causal_lm.generate(prompt, 16) + self.assertTrue(isinstance(output, str)) + + def test_generate_strip_prompt(self): + """ + Test that generated text can strip the prompt from output. + """ + prompt = ["hello world"] + output = self.causal_lm.generate(prompt, 16, strip_prompt=True) + self.assertFalse(output[0].startswith(prompt[0])) + + def test_generate_compilation(self): + """ + Test that the generate function compiles correctly and + reuses compiled functions. + """ + + self.causal_lm.generate(["hello world"], 16) + first_fn = self.causal_lm.generate_function + self.causal_lm.generate(["hello world"], 16) + second_fn = self.causal_lm.generate_function + self.assertEqual(first_fn, second_fn) + + self.causal_lm.compile(sampler="greedy") + self.assertIsNone(self.causal_lm.generate_function) diff --git a/keras_hub/src/models/rwkv7/rwkv7_tokenizer_test.py b/keras_hub/src/models/rwkv7/rwkv7_tokenizer_test.py new file mode 100644 index 0000000000..f49b39ccf5 --- /dev/null +++ b/keras_hub/src/models/rwkv7/rwkv7_tokenizer_test.py @@ -0,0 +1,25 @@ +from keras_hub.src.models.rwkv7.rwkv7_tokenizer import RWKVTokenizer +from keras_hub.src.tests.test_case import TestCase + + +class RWKV7TokenizerTest(TestCase): + def setUp(self): + self.tokenizer = RWKVTokenizer( + ["1 ' ' 1", "2 '\\n' 1", "3 'the' 3", "4 'hello' 5", "5 'world' 5"] + ) + + def test_tokenizer_basics(self): + result = self.tokenizer("hello world") + self.assertAllEqual(result, [[4, 1, 5]]) + + def test_vocabulary_size(self): + self.assertEqual(self.tokenizer.vocabulary_size(), 5) + + def test_tokenize_and_detokenize(self): + # Test detokenization + text = self.tokenizer.detokenize([[4, 1, 5]]) + self.assertEqual(text[0], "hello world") + + def test_special_tokens(self): + self.assertEqual(self.tokenizer.pad_token_id, 0) + self.assertEqual(self.tokenizer.end_token_id, 2) From 897a64b56e69319b4dad5ea8f83fa6d0c07e5286 Mon Sep 17 00:00:00 2001 From: pass_lin <935499957@qq.com> Date: Wed, 8 Oct 2025 11:55:39 +0800 Subject: [PATCH 09/10] fix test --- .../src/models/rwkv7/rwkv7_causal_lm_test.py | 28 +++++++++---------- .../src/models/rwkv7/rwkv7_tokenizer_test.py | 2 +- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/keras_hub/src/models/rwkv7/rwkv7_causal_lm_test.py b/keras_hub/src/models/rwkv7/rwkv7_causal_lm_test.py index ed84ef8205..215fda095d 100644 --- a/keras_hub/src/models/rwkv7/rwkv7_causal_lm_test.py +++ b/keras_hub/src/models/rwkv7/rwkv7_causal_lm_test.py @@ -34,7 +34,7 @@ def setUp(self): # Create a small backbone for testing self.backbone = RWKV7Backbone( - vocabulary_size=self.preprocessor.tokenizer.vocabulary_size() + 1, + vocabulary_size=5, hidden_size=16, num_layers=2, head_size=4, @@ -51,21 +51,18 @@ def setUp(self): "backbone": self.backbone, } - self.causal_lm = RWKV7CausalLM(self.backbone, self.preprocessor) - self.causal_lm.compile(sampler="greedy") - def test_generate(self): """ Test text generation functionality. """ - + causal_lm = RWKV7CausalLM(self.backbone, self.preprocessor) prompt = ["hello world"] - output = self.causal_lm.generate(prompt, 16) + output = causal_lm.generate(prompt, 16) self.assertTrue(isinstance(output[0], str)) self.assertTrue(isinstance(output, list)) prompt = "hello world" - output = self.causal_lm.generate(prompt, 16) + output = causal_lm.generate(prompt, 16) self.assertTrue(isinstance(output, str)) def test_generate_strip_prompt(self): @@ -73,7 +70,8 @@ def test_generate_strip_prompt(self): Test that generated text can strip the prompt from output. """ prompt = ["hello world"] - output = self.causal_lm.generate(prompt, 16, strip_prompt=True) + causal_lm = RWKV7CausalLM(self.backbone, self.preprocessor) + output = causal_lm.generate(prompt, 16, strip_prompt=True) self.assertFalse(output[0].startswith(prompt[0])) def test_generate_compilation(self): @@ -81,12 +79,12 @@ def test_generate_compilation(self): Test that the generate function compiles correctly and reuses compiled functions. """ - - self.causal_lm.generate(["hello world"], 16) - first_fn = self.causal_lm.generate_function - self.causal_lm.generate(["hello world"], 16) - second_fn = self.causal_lm.generate_function + causal_lm = RWKV7CausalLM(self.backbone, self.preprocessor) + causal_lm.generate(["hello world"], 16) + first_fn = causal_lm.generate_function + causal_lm.generate(["hello world"], 16) + second_fn = causal_lm.generate_function self.assertEqual(first_fn, second_fn) - self.causal_lm.compile(sampler="greedy") - self.assertIsNone(self.causal_lm.generate_function) + causal_lm.compile(sampler="greedy") + self.assertIsNone(causal_lm.generate_function) diff --git a/keras_hub/src/models/rwkv7/rwkv7_tokenizer_test.py b/keras_hub/src/models/rwkv7/rwkv7_tokenizer_test.py index f49b39ccf5..69f76a2366 100644 --- a/keras_hub/src/models/rwkv7/rwkv7_tokenizer_test.py +++ b/keras_hub/src/models/rwkv7/rwkv7_tokenizer_test.py @@ -10,7 +10,7 @@ def setUp(self): def test_tokenizer_basics(self): result = self.tokenizer("hello world") - self.assertAllEqual(result, [[4, 1, 5]]) + self.assertAllEqual(result, [4, 1, 5]) def test_vocabulary_size(self): self.assertEqual(self.tokenizer.vocabulary_size(), 5) From ff11f946cfbf9d1883f575155f5f347d002b7da3 Mon Sep 17 00:00:00 2001 From: pass_lin <935499957@qq.com> Date: Wed, 8 Oct 2025 12:35:08 +0800 Subject: [PATCH 10/10] fix doc --- keras_hub/src/models/rwkv7/rwkv7_tokenizer.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/keras_hub/src/models/rwkv7/rwkv7_tokenizer.py b/keras_hub/src/models/rwkv7/rwkv7_tokenizer.py index bc2069a604..ef11a059e8 100644 --- a/keras_hub/src/models/rwkv7/rwkv7_tokenizer.py +++ b/keras_hub/src/models/rwkv7/rwkv7_tokenizer.py @@ -212,10 +212,13 @@ class RWKVTokenizer(tokenizer.Tokenizer): or string type. Examples: + ```python + vocab = ["0 ' ' 1", "1 '\\n' 1", "2 'the' 3", "3 'hello' 5"] + tok = RWKVTokenizer(vocabulary=vocab) + tok("hello the") + ``` - >>> vocab = ["0 ' ' 1", "1 '\\n' 1", "2 'the' 3", "3 'hello' 5"] - >>> tok = RWKVTokenizer(vocabulary=vocab) - >>> tok("hello the") + Output: [3, 0, 2] """