Sourcery Starbot ⭐ refactored guyrosin/temporal_attention#2
Sourcery Starbot ⭐ refactored guyrosin/temporal_attention#2SourceryAI wants to merge 1 commit intoguyrosin:mainfrom
Conversation
| ) | ||
| dataset = DatasetDict({"train": train_dataset, "validation": test_dataset}) | ||
| return dataset | ||
| return DatasetDict({"train": train_dataset, "validation": test_dataset}) |
There was a problem hiding this comment.
Function load_train_test_datasets refactored with the following changes:
- Inline variable that is immediately returned (
inline-immediately-returned-variable)
| exclude_similar_sentences = True if corpus_name.startswith("liverpool") else False | ||
| exclude_similar_sentences = bool(corpus_name.startswith("liverpool")) |
There was a problem hiding this comment.
Function split_temporal_dataset_files refactored with the following changes:
- Simplify boolean if expression (
boolean-if-exp-identity)
| logger.info(f"Finding relevant sentences in the corpus...") | ||
| logger.info("Finding relevant sentences in the corpus...") |
There was a problem hiding this comment.
Function find_sentences_of_words refactored with the following changes:
- Replace f-string with no interpolated values with string (
remove-redundant-fstring)
| kwargs.update(additional_kwargs) | ||
| config = AutoConfig.from_pretrained( | ||
| kwargs |= additional_kwargs | ||
| return AutoConfig.from_pretrained( | ||
| model_args.model_name_or_path, cache_dir=model_args.cache_dir, **kwargs | ||
| ) | ||
| return config |
There was a problem hiding this comment.
Function _load_auto_config refactored with the following changes:
- Merge dictionary updates via the union operator (
dict-assign-update-to-union) - Inline variable that is immediately returned (
inline-immediately-returned-variable)
| f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}, " | ||
| + f"distributed training: {bool(training_args.local_rank != -1)}" | ||
| ( | ||
| f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}, " | ||
| + f"distributed training: {training_args.local_rank != -1}" | ||
| ) |
There was a problem hiding this comment.
Function init_run refactored with the following changes:
- Remove unnecessary casts to int, str, float or bool (
remove-unnecessary-cast)
| # For backward compatibility, allow to try to setup 'max_len_sentences_pair'. | ||
| if ( | ||
| value == self.model_max_length - self.num_special_tokens_to_add(pair=True) | ||
| and self.verbose | ||
| value | ||
| != self.model_max_length - self.num_special_tokens_to_add(pair=True) | ||
| or not self.verbose | ||
| ): | ||
| if not self.deprecation_warnings.get("max_len_sentences_pair", False): | ||
| logger.warning( | ||
| "Setting 'max_len_sentences_pair' is now deprecated. " | ||
| "This value is automatically set up." | ||
| ) | ||
| self.deprecation_warnings["max_len_sentences_pair"] = True | ||
| else: | ||
| raise ValueError( | ||
| "Setting 'max_len_sentences_pair' is now deprecated. " | ||
| "This value is automatically set up." | ||
| ) | ||
| if not self.deprecation_warnings.get("max_len_sentences_pair", False): | ||
| logger.warning( | ||
| "Setting 'max_len_sentences_pair' is now deprecated. " | ||
| "This value is automatically set up." | ||
| ) | ||
| self.deprecation_warnings["max_len_sentences_pair"] = True |
There was a problem hiding this comment.
Function TempoPreTrainedTokenizerBase.max_len_sentences_pair refactored with the following changes:
- Swap if/else branches (
swap-if-else-branches) - Remove unnecessary else after guard condition (
remove-unnecessary-else)
This removes the following comments ( why? ):
# For backward compatibility, allow to try to setup 'max_len_sentences_pair'.
| "Model name '{}' not found in model shortcut name list ({}). " | ||
| "Assuming '{}' is a path, a model identifier, or url to a directory containing tokenizer files.".format( | ||
| pretrained_model_name_or_path, | ||
| ", ".join(s3_models), | ||
| pretrained_model_name_or_path, | ||
| ) | ||
| f"""Model name '{pretrained_model_name_or_path}' not found in model shortcut name list ({", ".join(s3_models)}). Assuming '{pretrained_model_name_or_path}' is a path, a model identifier, or url to a directory containing tokenizer files.""" |
There was a problem hiding this comment.
Function TempoPreTrainedTokenizerBase.from_pretrained refactored with the following changes:
- Replace call to format with f-string (
use-fstring-for-formatting) - Swap if/else branches (
swap-if-else-branches) - Remove unnecessary else after guard condition (
remove-unnecessary-else) - Simplify sequence length comparison (
simplify-len-comparison)
| "It may result in unexpected tokenization. \n" | ||
| f"The tokenizer class you load from this checkpoint is '{config_tokenizer_class}'. \n" | ||
| f"The class this function is called from is '{cls.__name__}'." | ||
| ) |
There was a problem hiding this comment.
Function TempoPreTrainedTokenizerBase._from_pretrained refactored with the following changes:
- Merge nested if conditions (
merge-nested-ifs) - Replace list(), dict() or set() with comprehension (
collection-builtin-to-comprehension) - Remove unnecessary casts to int, str, float or bool (
remove-unnecessary-cast)
| (filename_prefix + "-" if filename_prefix else "") | ||
| + SPECIAL_TOKENS_MAP_FILE, | ||
| ( | ||
| (f"{filename_prefix}-" if filename_prefix else "") | ||
| + SPECIAL_TOKENS_MAP_FILE | ||
| ), | ||
| ) | ||
| tokenizer_config_file = os.path.join( | ||
| save_directory, | ||
| (filename_prefix + "-" if filename_prefix else "") + TOKENIZER_CONFIG_FILE, | ||
| (f"{filename_prefix}-" if filename_prefix else "") | ||
| + TOKENIZER_CONFIG_FILE, |
There was a problem hiding this comment.
Function TempoPreTrainedTokenizerBase.save_pretrained refactored with the following changes:
- Use f-string instead of string concatenation [×2] (
use-fstring-for-concatenation) - Replace list(), dict() or set() with comprehension (
collection-builtin-to-comprehension)
| (filename_prefix + "-" if filename_prefix else "") + ADDED_TOKENS_FILE, | ||
| (f"{filename_prefix}-" if filename_prefix else "") + ADDED_TOKENS_FILE, | ||
| ) | ||
| added_vocab = self.get_added_vocab() | ||
| if added_vocab: | ||
| if added_vocab := self.get_added_vocab(): |
There was a problem hiding this comment.
Function TempoPreTrainedTokenizerBase._save_pretrained refactored with the following changes:
- Use named expression to simplify assignment and conditional (
use-named-expression) - Use f-string instead of string concatenation (
use-fstring-for-concatenation)
| if max_length is not None: | ||
| if max_length is not None and ( | ||
| truncation is False or truncation == "do_not_truncate" | ||
| ): | ||
| warnings.warn( | ||
| "`max_length` is ignored when `padding`=`True` and there is no truncation strategy. " | ||
| "To pad to max length, use `padding='max_length'`." | ||
| ) | ||
| if max_length is not None and ( | ||
| truncation is False or truncation == "do_not_truncate" | ||
| ): | ||
| warnings.warn( | ||
| "`max_length` is ignored when `padding`=`True` and there is no truncation strategy. " | ||
| "To pad to max length, use `padding='max_length'`." | ||
| ) |
There was a problem hiding this comment.
Function TempoPreTrainedTokenizerBase._get_padding_truncation_strategies refactored with the following changes:
- Merge nested if conditions (
merge-nested-ifs) - Remove redundant conditional [×3] (
remove-redundant-if)
| inputs = dict((k, v[i]) for k, v in encoded_inputs.items()) | ||
| inputs = {k: v[i] for k, v in encoded_inputs.items()} |
There was a problem hiding this comment.
Function TempoPreTrainedTokenizerBase.pad refactored with the following changes:
- Replace list(), dict() or set() with comprehension (
collection-builtin-to-comprehension)
| if token_ids_1 is None: | ||
| return token_ids_0 | ||
| return token_ids_0 + token_ids_1 | ||
| return token_ids_0 if token_ids_1 is None else token_ids_0 + token_ids_1 |
There was a problem hiding this comment.
Function TempoPreTrainedTokenizerBase.build_inputs_with_special_tokens refactored with the following changes:
- Lift code into else after jump in control flow (
reintroduce-else) - Replace if statement with if expression (
assign-if-exp)
| pair = bool(pair_ids is not None) | ||
| pair = pair_ids is not None |
There was a problem hiding this comment.
Function TempoPreTrainedTokenizerBase.prepare_for_model refactored with the following changes:
- Remove unnecessary casts to int, str, float or bool (
remove-unnecessary-cast) - Swap positions of nested conditionals [×2] (
swap-nested-ifs) - Hoist nested repeated code outside conditional statements [×2] (
hoist-similar-statement-from-if) - Split conditional into multiple branches [×2] (
split-or-ifs) - Merge duplicate blocks in conditional [×2] (
merge-duplicate-blocks) - Inline variable that is immediately returned (
inline-immediately-returned-variable) - Remove redundant conditional [×2] (
remove-redundant-if)
| error_msg = ( | ||
| error_msg + "Please select another truncation strategy than " | ||
| f"{truncation_strategy}, for instance 'longest_first' or 'only_second'." | ||
| ) | ||
| error_msg = f"{error_msg}Please select another truncation strategy than {truncation_strategy}, for instance 'longest_first' or 'only_second'." |
There was a problem hiding this comment.
Function TempoPreTrainedTokenizerBase.truncate_sequences refactored with the following changes:
- Use f-string instead of string concatenation (
use-fstring-for-concatenation)
| clean_text = self.clean_up_tokenization(text) | ||
| return clean_text | ||
| return self.clean_up_tokenization(text) |
There was a problem hiding this comment.
Function TempoPreTrainedTokenizerFast._decode refactored with the following changes:
- Inline variable that is immediately returned (
inline-immediately-returned-variable)
| (filename_prefix + "-" if filename_prefix else "") + ADDED_TOKENS_FILE, | ||
| (f"{filename_prefix}-" if filename_prefix else "") | ||
| + ADDED_TOKENS_FILE, | ||
| ) | ||
| added_vocab = self.get_added_vocab() | ||
| if added_vocab: | ||
| if added_vocab := self.get_added_vocab(): |
There was a problem hiding this comment.
Function TempoPreTrainedTokenizerFast._save_pretrained refactored with the following changes:
- Use named expression to simplify assignment and conditional (
use-named-expression) - Use f-string instead of string concatenation [×2] (
use-fstring-for-concatenation)
| if data_args.line_by_line: | ||
| tokenized_dataset = tokenize_dataset_line_by_line( | ||
| return ( | ||
| tokenize_dataset_line_by_line( |
There was a problem hiding this comment.
Function load_data refactored with the following changes:
- Replace if statement with if expression (
assign-if-exp) - Inline variable that is immediately returned (
inline-immediately-returned-variable)
| "configuration_tempobert": [ | ||
| "TempoBertConfig", | ||
| "configuration_tempobert": ["TempoBertConfig"], | ||
| "tokenization_tempobert_fast": ["TempoBertTokenizerFast"], | ||
| "modeling_tempobert": [ | ||
| "TempoBertForMaskedLM", | ||
| "TempoBertModel", | ||
| "TempoBertForPreTraining", | ||
| "TempoBertForSequenceClassification", | ||
| "TempoBertForTokenClassification", | ||
| ], | ||
| } | ||
|
|
||
| _import_structure["tokenization_tempobert_fast"] = ["TempoBertTokenizerFast"] | ||
|
|
||
| _import_structure["modeling_tempobert"] = [ | ||
| "TempoBertForMaskedLM", | ||
| "TempoBertModel", | ||
| "TempoBertForPreTraining", | ||
| "TempoBertForSequenceClassification", | ||
| "TempoBertForTokenClassification", | ||
| ] |
There was a problem hiding this comment.
Lines 6-19 refactored with the following changes:
- Merge dictionary assignment with declaration [×2] (
merge-dict-assign)
| SPECIAL_TIMES_COUNT = 2 # NOTE: hardcoded (see TempoSpecialTokensMixin) | ||
| if "attention" in self.time_embedding_type: | ||
| SPECIAL_TIMES_COUNT = 2 # NOTE: hardcoded (see TempoSpecialTokensMixin) |
There was a problem hiding this comment.
Function TempoBertEmbeddings.init_time_embeddings refactored with the following changes:
- Move assignments closer to their usage (
move-assign)
Thanks for starring sourcery-ai/sourcery ✨ 🌟 ✨
Here's your pull request refactoring your most popular Python repo.
If you want Sourcery to refactor all your Python repos and incoming pull requests install our bot.
Review changes via command line
To manually merge these changes, make sure you're on the
mainbranch, then run: