Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions brainscore_language/model_helpers/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,8 @@ def __init__(
self._logger = logging.getLogger(fullname(self))
self.model_id = model_id
self.region_layer_mapping = region_layer_mapping
self.basemodel = (model if model is not None else AutoModelForCausalLM.from_pretrained(self.model_id))
self.basemodel = (model if model is not None else AutoModelForCausalLM.from_pretrained(self.model_id, device_map="auto"))
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
self.basemodel.to(self.device)
self.tokenizer = tokenizer if tokenizer is not None else AutoTokenizer.from_pretrained(self.model_id,
truncation_side='left')
self.current_tokens = None # keep track of current tokens
Expand Down Expand Up @@ -182,6 +181,10 @@ def _tokenize_overflow_aware(self, context, num_previous_context_tokens: int) ->
context_tokens.pop('num_truncated_tokens')
if 'overflow_to_sample_mapping' in context_tokens:
context_tokens.pop('overflow_to_sample_mapping')
if 'token_type_ids' in context_tokens:
context_tokens.pop('token_type_ids')
if self.basemodel.config.is_encoder_decoder:
context_tokens['decoder_input_ids'] = context_tokens['input_ids']
context_tokens.to(self.device)
return context_tokens, num_new_context_tokens

Expand Down
47 changes: 47 additions & 0 deletions brainscore_language/models/llama/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from brainscore_language import model_registry
from brainscore_language import ArtificialSubject
from brainscore_language.model_helpers.huggingface import HuggingfaceSubject
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaTokenizer

# layer assignment based on choosing the maximally scoring layer on Pereira2018-encoding

# LLaMA 1 models

model_registry['llama-7b'] = lambda: HuggingfaceSubject(
model_id='huggyllama/llama-7b',
region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: 'model.layers.29.post_attention_layernorm'}
)

model_registry['llama-13b'] = lambda: HuggingfaceSubject(
model_id='huggyllama/llama-13b',
region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: 'model.layers.33.post_attention_layernorm'}
)

model_registry['llama-33b'] = lambda: HuggingfaceSubject(
model_id='huggyllama/llama-30b',
region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: 'model.layers.53.post_attention_layernorm'}
)

# Alpaca models

model_registry['alpaca-7b'] = lambda: HuggingfaceSubject(
model_id='chavinlo/alpaca-native',
region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: 'model.layers.29.post_attention_layernorm'}
)

# Vicuna models

model_registry['vicuna-7b'] = lambda: HuggingfaceSubject(
model_id='lmsys/vicuna-7b-v1.3',
region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: 'model.layers.28.post_attention_layernorm'}
)

model_registry['vicuna-13b'] = lambda: HuggingfaceSubject(
model_id='lmsys/vicuna-13b-v1.3',
region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: 'model.layers.33.post_attention_layernorm'}
)

model_registry['vicuna-33b'] = lambda: HuggingfaceSubject(
model_id='lmsys/vicuna-33b-v1.3',
region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: 'model.layers.52.post_attention_layernorm'}
)
62 changes: 62 additions & 0 deletions brainscore_language/models/llama/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import numpy as np
import pytest

from brainscore_language import load_model
from brainscore_language.artificial_subject import ArtificialSubject


@pytest.mark.memory_intense
@pytest.mark.parametrize('model_identifier, expected_reading_times', [
('llama-7b', [9.724511, 12.605466, 3.32503, 0.09871647, 0.725152, 0.04576033, 0.07947908, 0.08976307]),
('llama-13b',[10.53345, 11.900979, 2.576608, 0.09501585, 0.6747948, 0.06707504, 0.07982931, 0.13605802]),
('llama-33b', [11.483265, 12.449862, 1.7104287, 0.10519427, 0.9729844, 0.12699145, 0.23386568, 0.15289368]),
('alpaca-7b', [3.15336514e+01, 1.61361885e+01, 6.20819473e+00, 3.02336123e-02, 4.87159938e-01, 5.48269460e-03, 1.08295875e-02, 1.63752567e-02]),
('vicuna-7b', [1.4193897e+01, 1.4030097e+01, 4.5661983e+00, 1.7538711e-02, 5.8269405e-01, 3.2116382e-03, 8.8979863e-02, 7.2399867e-03]),
('vicuna-13b', [5.1001291e+00, 1.1878480e+01, 7.0294745e-02, 2.8342367e-03, 8.7360293e-03, 6.8028755e-03, 5.5397633e-02, 3.3574910e-03]),
('vicuna-33b', [4.8655987, 14.37647, 1.5682482, 0.02738321, 0.34660488, 0.04076412, 0.0271305, 0.03512227]),
])
def test_reading_times(model_identifier, expected_reading_times):
model = load_model(model_identifier)
text = ['the', 'quick', 'brown', 'fox', 'jumps', 'over', 'the', 'lazy']
model.start_behavioral_task(task=ArtificialSubject.Task.reading_times)
reading_times = model.digest_text(text)['behavior']
np.testing.assert_allclose(reading_times, expected_reading_times, atol=0.01)


@pytest.mark.memory_intense
@pytest.mark.parametrize('model_identifier, expected_next_words', [
('llama-7b', ['j', 'the', 'dog']),
('llama-13b', ['j', 'the', 'dog']),
('llama-33b', ['j', 'the', 'dog']),
('alpaca-7b', ['j', 'the', 'dog']),
('vicuna-7b', ['j', 'the', 'dog']),
('vicuna-13b', ['j', 'the', 'dog']),
('vicuna-33b', ['j', 'the', 'dog']),
])
def test_next_word(model_identifier, expected_next_words):
model = load_model(model_identifier)
text = ['the quick brown fox', 'jumps over', 'the lazy']
model.start_behavioral_task(task=ArtificialSubject.Task.next_word)
next_word_predictions = model.digest_text(text)['behavior']
np.testing.assert_array_equal(next_word_predictions, expected_next_words)


@pytest.mark.memory_intense
@pytest.mark.parametrize('model_identifier, feature_size', [
('llama-7b', 4096),
('llama-13b', 5120),
('llama-33b', 6656),
('alpaca-7b', 4096),
('vicuna-7b', 4096),
('vicuna-13b', 5120),
('vicuna-33b', 6656),
])
def test_neural(model_identifier, feature_size):
model = load_model(model_identifier)
text = ['the quick brown fox', 'jumps over', 'the lazy dog']
model.start_neural_recording(recording_target=ArtificialSubject.RecordingTarget.language_system,
recording_type=ArtificialSubject.RecordingType.fMRI)
representations = model.digest_text(text)['neural']
assert len(representations['presentation']) == 3
np.testing.assert_array_equal(representations['stimulus'], text)
assert len(representations['neuroid']) == feature_size
116 changes: 116 additions & 0 deletions brainscore_language/models/t5/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
from brainscore_language import model_registry
from brainscore_language import ArtificialSubject
from brainscore_language.model_helpers.huggingface import HuggingfaceSubject
from transformers import AutoModelForSeq2SeqLM

# layer assignment based on choosing the maximally scoring layer on Pereira2018-encoding

# T5 models

model_registry['t5-small'] = lambda: HuggingfaceSubject(
model_id='google/t5-v1_1-small',
model=AutoModelForSeq2SeqLM.from_pretrained('google/t5-v1_1-small', device_map="auto"),
region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: 'decoder.block.6'}
)

model_registry['t5-base'] = lambda: HuggingfaceSubject(
model_id='google/t5-v1_1-base',
model=AutoModelForSeq2SeqLM.from_pretrained('google/t5-v1_1-base', device_map="auto"),
region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: 'encoder.block.9'}
)

model_registry['t5-large'] = lambda: HuggingfaceSubject(
model_id='google/t5-v1_1-large',
model=AutoModelForSeq2SeqLM.from_pretrained('google/t5-v1_1-large', device_map="auto"),
region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: 'encoder.block.17'}
)

model_registry['t5-xl'] = lambda: HuggingfaceSubject(
model_id='google/t5-v1_1-xl',
model=AutoModelForSeq2SeqLM.from_pretrained('google/t5-v1_1-xl', device_map="auto"),
region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: 'decoder.block.2'}
)

model_registry['t5-xxl'] = lambda: HuggingfaceSubject(
model_id='google/t5-v1_1-xxl',
model=AutoModelForSeq2SeqLM.from_pretrained('google/t5-v1_1-xxl', device_map="auto"),
region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: 'decoder.block.0'}
)

# Flan-T5 models

model_registry['flan-t5-small'] = lambda: HuggingfaceSubject(
model_id='google/flan-t5-small',
model=AutoModelForSeq2SeqLM.from_pretrained('google/flan-t5-small', device_map="auto"),
region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: 'encoder.block.7'}
)

model_registry['flan-t5-base'] = lambda: HuggingfaceSubject(
model_id='google/flan-t5-base',
model=AutoModelForSeq2SeqLM.from_pretrained('google/flan-t5-base', device_map="auto"),
region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: 'decoder.block.7'}
)

model_registry['flan-t5-large'] = lambda: HuggingfaceSubject(
model_id='google/flan-t5-large',
model=AutoModelForSeq2SeqLM.from_pretrained('google/flan-t5-large', device_map="auto"),
region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: 'encoder.block.18'}
)

model_registry['flan-t5-xl'] = lambda: HuggingfaceSubject(
model_id='google/flan-t5-xl',
model=AutoModelForSeq2SeqLM.from_pretrained('google/flan-t5-xl', device_map="auto"),
region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: 'decoder.block.2'}
)

model_registry['flan-t5-xxl'] = lambda: HuggingfaceSubject(
model_id='google/flan-t5-xxl',
model=AutoModelForSeq2SeqLM.from_pretrained('google/flan-t5-xxl', device_map="auto"),
region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: 'decoder.block.0'}
)

# Flan-Alpaca T5 models

model_registry['flan-alpaca-base'] = lambda: HuggingfaceSubject(
model_id='declare-lab/flan-alpaca-base',
model=AutoModelForSeq2SeqLM.from_pretrained('declare-lab/flan-alpaca-base', device_map="auto"),
region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: 'encoder.block.11'}
)

model_registry['flan-alpaca-large'] = lambda: HuggingfaceSubject(
model_id='declare-lab/flan-alpaca-large',
model=AutoModelForSeq2SeqLM.from_pretrained('declare-lab/flan-alpaca-large', device_map="auto"),
region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: 'encoder.block.19'}
)

model_registry['flan-alpaca-xl'] = lambda: HuggingfaceSubject(
model_id='declare-lab/flan-alpaca-xl',
model=AutoModelForSeq2SeqLM.from_pretrained('declare-lab/flan-alpaca-xl', device_map="auto"),
region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: 'decoder.block.5'}
)

model_registry['flan-alpaca-xxl'] = lambda: HuggingfaceSubject(
model_id='declare-lab/flan-alpaca-xxl',
model=AutoModelForSeq2SeqLM.from_pretrained('declare-lab/flan-alpaca-xxl', device_map="auto"),
region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: 'decoder.block.0'}
)

# Other models based on T5

model_registry['flan-gpt4all-xl'] = lambda: HuggingfaceSubject(
model_id='declare-lab/flan-gpt4all-xl',
model=AutoModelForSeq2SeqLM.from_pretrained('declare-lab/flan-gpt4all-xl', device_map="auto"),
region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: 'decoder.block.2'}
)

model_registry['flan-sharegpt-xl'] = lambda: HuggingfaceSubject(
model_id='declare-lab/flan-sharegpt-xl',
model=AutoModelForSeq2SeqLM.from_pretrained('declare-lab/flan-sharegpt-xl', device_map="auto"),
region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: 'decoder.block.5'}
)

model_registry['flan-alpaca-gpt4-xl'] = lambda: HuggingfaceSubject(
model_id='declare-lab/flan-alpaca-gpt4-xl',
model=AutoModelForSeq2SeqLM.from_pretrained('declare-lab/flan-alpaca-gpt4-xl', device_map="auto"),
region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: 'decoder.block.2'}
)
2 changes: 2 additions & 0 deletions brainscore_language/models/t5/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
sentencepiece
protobuf==3.19.4
92 changes: 92 additions & 0 deletions brainscore_language/models/t5/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import numpy as np
import pytest

from brainscore_language import load_model
from brainscore_language.artificial_subject import ArtificialSubject


@pytest.mark.memory_intense
@pytest.mark.parametrize('model_identifier, expected_reading_times', [
('t5-small', [25.646585, 23.780153, 23.018826, 22.344381, 11.96658, 27.054287, 10.594951, 13.187043]),
('t5-base', [7.7039944e-03, 6.8635613e-02, 3.1093130e+01, 1.2913298e+02, 8.5430244e+01, 1.6261120e+01, 8.2980719e+00, 2.9535002e+01]),
('t5-large', [31.604916, 18.852331, 30.816673, 48.99762, 49.006733, 36.088543, 14.189968, 37.781395]),
('t5-xl', [ 5.2831264, 18.823713, 19.249414, 35.212494, 24.10475, 19.929758, 11.064505, 16.397375 ]),
('t5-xxl', [26.934216, 30.064108, 18.61358, 71.8481, 20.456089, 18.108957, 25.52297, 20.845043]),
('flan-t5-small', [4.626572, 5.4074254, 2.9690156, 5.98445, 12.027061, 11.096782, 16.912296, 14.794151]),
('flan-t5-base', [1.8610231, 1.5091983, 2.3265584, 2.5798035, 0.9352376, 2.594869, 3.4819074, 2.7790558]),
('flan-t5-large', [2.2994747, 4.1134634, 1.6111257, 10.103671, 11.365605, 3.37785, 1.4599704, 2.9243639]),
('flan-t5-xl', [2.5323708, 2.9281907, 3.2239344, 10.614168, 7.162341, 3.0385818, 2.9526176, 2.7103176]),
('flan-t5-xxl', [2.3222983, 2.3133714, 2.8529167, 11.162584, 6.798625, 4.742971, 2.9756427, 2.9877827]),
('flan-alpaca-base', [0.5997408, 1.1441187, 1.3299922, 2.1235154, 1.5477583, 0.27742645, 0.3976275, 0.21495701]),
('flan-alpaca-large', [0.03638878, 0.07655565, 0.02087213, 11.400998, 9.982766, 0.82122284, 0.42820516, 0.39627305]),
('flan-alpaca-xl', [3.2593443, 3.6223898, 3.3259575, 12.523176, 6.452489, 5.2135086, 3.7474098, 3.6356025]),
('flan-alpaca-xxl', [2.916435, 5.631528, 3.178902, 11.2796755, 5.902015, 2.294983, 2.8577528, 2.9340065]),
('flan-gpt4all-xl', [6.95467, 8.141007, 6.8901677, 7.149359, 7.247072, 7.390025, 5.7526765, 4.9763246]),
('flan-sharegpt-xl', [3.0441425, 2.9028635, 3.034965, 5.7231064, 2.282282, 2.5237873, 1.0039636, 1.014216]),
('flan-alpaca-gpt4-xl', [5.705884, 6.2532945, 5.6363673, 12.22221, 6.067267, 4.2973313, 4.1460104, 5.088393]),
])
def test_reading_times(model_identifier, expected_reading_times):
model = load_model(model_identifier)
text = ['the', 'quick', 'brown', 'fox', 'jumps', 'over', 'the', 'lazy']
model.start_behavioral_task(task=ArtificialSubject.Task.reading_times)
reading_times = model.digest_text(text)['behavior']
np.testing.assert_allclose(reading_times, expected_reading_times, atol=0.01)


@pytest.mark.memory_intense
@pytest.mark.parametrize('model_identifier, expected_next_words', [
('t5-small', ['in', 'in', 'in']),
('t5-base', ['<extra_id_27>', '</s>', '<extra_id_27>']),
('t5-large', ['<extra_id_11>', '<extra_id_11>', '<extra_id_11>']),
('t5-xl', ['', '', '']),
('t5-xxl', ['', 'ES', ',']),
('flan-t5-small', ['...', '...', '...']),
('flan-t5-base', ['</s>', '...', '</s>']),
('flan-t5-large', ['', '', '']),
('flan-t5-xl', ['', '...', '</s>']),
('flan-t5-xxl', ['</s>', '.', '.']),
('flan-alpaca-base', ['</s>', '</s>', '</s>']),
('flan-alpaca-large', ['', '</s>', '</s>']),
('flan-alpaca-xl', ['', '.', '.']),
('flan-alpaca-xxl', ['.', '.', '.']),
('flan-gpt4all-xl', ['', '', '']),
('flan-sharegpt-xl', ['the', '</s>', '</s>']),
('flan-alpaca-gpt4-xl', ['', '</s>', '</s>']),
])
def test_next_word(model_identifier, expected_next_words):
model = load_model(model_identifier)
text = ['the quick brown fox', 'jumps over', 'the lazy']
model.start_behavioral_task(task=ArtificialSubject.Task.next_word)
next_word_predictions = model.digest_text(text)['behavior']
np.testing.assert_array_equal(next_word_predictions, expected_next_words)


@pytest.mark.memory_intense
@pytest.mark.parametrize('model_identifier, feature_size', [
('t5-small', 512),
('t5-base', 768),
('t5-large', 1024),
('t5-xl', 2048),
('t5-xxl', 4096),
('flan-t5-small', 512),
('flan-t5-base', 768),
('flan-t5-large', 1024),
('flan-t5-xl', 2048),
('flan-t5-xxl', 4096),
('flan-alpaca-base', 768),
('flan-alpaca-large', 1024),
('flan-alpaca-xl', 2048),
('flan-alpaca-xxl', 4096),
('flan-gpt4all-xl', 2048),
('flan-sharegpt-xl', 2048),
('flan-alpaca-gpt4-xl', 2048),
])
def test_neural(model_identifier, feature_size):
model = load_model(model_identifier)
text = ['the quick brown fox', 'jumps over', 'the lazy dog']
model.start_neural_recording(recording_target=ArtificialSubject.RecordingTarget.language_system,
recording_type=ArtificialSubject.RecordingType.fMRI)
representations = model.digest_text(text)['neural']
assert len(representations['presentation']) == 3
np.testing.assert_array_equal(representations['stimulus'], text)
assert len(representations['neuroid']) == feature_size
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ dependencies = [
"transformers>=4.11.3",
"gensim",
"joblib",
"accelerate",
# submission dependencies
"requests"
]
Expand Down