Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions brainscore_language/benchmarks/blank2014/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from brainscore_language import benchmark_registry
from .benchmark import Blank2014Linear
from .benchmark import Blank2014_ridge, Blank2014_linear

benchmark_registry['Blank2014-linear'] = Blank2014Linear
benchmark_registry['Blank2014-ridge'] = Blank2014_ridge
benchmark_registry['Blank2014-linear'] = Blank2014_linear
42 changes: 35 additions & 7 deletions brainscore_language/benchmarks/blank2014/benchmark.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
import xarray as xr

from brainscore_core.benchmarks import BenchmarkBase
Expand All @@ -9,7 +10,20 @@
from brainscore_language.utils.ceiling import ceiling_normalize


class Blank2014Linear(BenchmarkBase):
def Blank2014_ridge():
return Blank2014(metric="ridge_pearsonr",
cross_validation_kwargs=dict(
splits=8,
split_coord="story",
kfold="group",
random_state=1234
)
)

def Blank2014_linear():
return Blank2014(metric="linear_pearsonr")

class Blank2014(BenchmarkBase):
"""
Evaluate model ability to predict neural activity in human language system functional regions of interest (fROIs)
in response to natural stories, recorded by Blank et al. 2014.
Expand All @@ -20,13 +34,13 @@ class Blank2014Linear(BenchmarkBase):
(e.g. "layer 41 corresponds to the language system"), rather than testing every layer separately.
"""

def __init__(self):
def __init__(self, metric: str, cross_validation_kwargs=None):
self.data = load_dataset('Blank2014.fROI')
self.metric = load_metric('linear_pearsonr')
self.metric = load_metric(metric, crossvalidation_kwargs=cross_validation_kwargs)
ceiler = ExtrapolationCeiling()
ceiling = ceiler(assembly=self.data, metric=self.metric)
super(Blank2014Linear, self).__init__(
identifier='Blank2014-linear',
super(Blank2014, self).__init__(
identifier=f'Blank2014-{metric}',
version=1,
parent='neural_language',
ceiling=ceiling,
Expand All @@ -43,8 +57,22 @@ def __call__(self, candidate: ArtificialSubject) -> Score:
story_stimuli = stimuli[story_indexer]
story_predictions = candidate.digest_text(story_stimuli.values)['neural']
story_predictions['stimulus_id'] = 'presentation', story_stimuli['stimulus_id'].values
try:
story_predictions['story']
except KeyError:
story_predictions['story'] = 'presentation', story_stimuli['story'].values
predictions.append(story_predictions)

predictions = xr.concat(predictions, dim='presentation')
raw_score = self.metric(predictions, self.data)
score = ceiling_normalize(raw_score, self.ceiling)
layer_names = np.unique(predictions['layer'].data)
layer_names = [layer_names] if isinstance(layer_names, str) else layer_names
layer_scores = {}
for layer_name in layer_names:
raw_score = self.metric(predictions.sel(layer=layer_name), self.data)
layer_scores[layer_name] = ceiling_normalize(raw_score, self.ceiling)

score = Score(np.mean(list(layer_scores.values())))
score.attrs['layer_scores'] = layer_scores
score.attrs['raw'] = Score(np.mean([s.attrs['raw'] for s in layer_scores.values()]))
score.attrs['ceiling'] = self.ceiling
return score
4 changes: 3 additions & 1 deletion brainscore_language/benchmarks/blank2014/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ def activity_for_text(stimuli: Union[str, List[str]]) -> NeuroidAssembly:
coords={'stimulus_seq': ('presentation', np.arange(num_stimuli)),
'stimulus_num': ('presentation', np.arange(num_stimuli)),
'neuroid_id': ('neuroid', np.arange(num_neuroids)),
'region': ('neuroid', ['some_region'] * num_neuroids)},
'region': ('neuroid', ['some_region'] * num_neuroids),
'layer': ('neuroid', ['test_layer'] * num_neuroids)},
dims=['presentation', 'neuroid'])
neural_activity['stimulus'] = 'presentation', stimuli # copy over
return neural_activity
Expand All @@ -53,6 +54,7 @@ def activity_for_text(stimuli: Union[str, List[str]]) -> NeuroidAssembly:
# remove stimulus_id and stimulus coordinates to not trip up benchmark
passage_activity = passage_activity.reset_index('presentation')
del passage_activity['stimulus_id']
passage_activity['layer'] = 'neuroid', ['test_layer'] * passage_activity.sizes['neuroid']
passage_activity = NeuroidAssembly(passage_activity) # index
return passage_activity

Expand Down
4 changes: 2 additions & 2 deletions brainscore_language/benchmarks/fedorenko2016/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from brainscore_language import benchmark_registry
from .benchmark import Fedorenko2016_ridge, Fedorenko2016_linear

benchmark_registry['Fedorenko2016-linear'] = Fedorenko2016_linear
benchmark_registry['Fedorenko2016-ridge'] = Fedorenko2016_ridge
benchmark_registry['Fedorenko2016-ridge'] = Fedorenko2016_ridge
benchmark_registry['Fedorenko2016-linear'] = Fedorenko2016_linear
38 changes: 29 additions & 9 deletions brainscore_language/benchmarks/fedorenko2016/benchmark.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import xarray as xr

from brainscore_core.benchmarks import BenchmarkBase
from brainscore_core.metrics import Score
from brainscore_language import load_dataset, load_metric
from brainscore_language.artificial_subject import ArtificialSubject
from brainscore_language.benchmarks.blank2014.ceiling import ExtrapolationCeiling
Expand All @@ -9,19 +11,26 @@

from tqdm import tqdm

def Fedorenko2016_linear():
return Fedorenko2016(metric="linear_pearsonr")

def Fedorenko2016_ridge():
return Fedorenko2016(metric="ridge_pearsonr")
return Fedorenko2016(metric="ridge_pearsonr",
cross_validation_kwargs=dict(
split_coord="sentence_id",
kfold="group",
random_state=1234
)
)

def Fedorenko2016_linear():
return Fedorenko2016(metric="linear_pearsonr")

class Fedorenko2016(BenchmarkBase):

def __init__(self, metric: str):
def __init__(self, metric: str, cross_validation_kwargs=None):
self.data = load_dataset('Fedorenko2016.language')

identifier = f"Fedorenko2016-{metric}"
self.metric = load_metric(metric)
self.metric = load_metric(metric, crossvalidation_kwargs=cross_validation_kwargs)

ceiler = ExtrapolationCeiling(subject_column="subject_UID")
ceiling = ceiler(assembly=self.data, metric=self.metric)
Expand All @@ -46,11 +55,22 @@ def __call__(self, candidate: ArtificialSubject):
stimuli_values = sentence_stimuli.values
sentence_predictions = candidate.digest_text(stimuli_values)["neural"]
sentence_predictions['stimulus_id'] = 'presentation', sentence_stimuli['stimulus_id'].values
try:
sentence_predictions['sentence_id']
except KeyError:
sentence_predictions['sentence_id'] = 'presentation', sentence_stimuli['sentence_id'].values
predictions.append(sentence_predictions)

predictions = xr.concat(predictions, dim='presentation')
layer_names = np.unique(predictions['layer'].data)
layer_names = [layer_names] if isinstance(layer_names, str) else layer_names
layer_scores = {}
for layer_name in layer_names:
raw_score = self.metric(predictions.sel(layer=layer_name), self.data)
layer_scores[layer_name] = ceiling_normalize(raw_score, self.ceiling)

raw_score = self.metric(predictions, self.data)
scores = ceiling_normalize(raw_score, self.ceiling)

return scores
score = Score(np.mean(list(layer_scores.values())))
score.attrs['layer_scores'] = layer_scores
score.attrs['raw'] = Score(np.mean([s.attrs['raw'] for s in layer_scores.values()]))
score.attrs['ceiling'] = self.ceiling
return score
10 changes: 7 additions & 3 deletions brainscore_language/benchmarks/pereira2018/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from brainscore_language import benchmark_registry
from .benchmark import Pereira2018_243sentences, Pereira2018_384sentences
from .benchmark import Pereira2018_243sentences_ridge, Pereira2018_384sentences_ridge
from .benchmark import Pereira2018_243sentences_linear, Pereira2018_384sentences_linear

benchmark_registry['Pereira2018.243sentences-linear'] = Pereira2018_243sentences
benchmark_registry['Pereira2018.384sentences-linear'] = Pereira2018_384sentences
benchmark_registry['Pereira2018.243sentences-ridge'] = Pereira2018_243sentences_ridge
benchmark_registry['Pereira2018.384sentences-ridge'] = Pereira2018_384sentences_ridge

benchmark_registry['Pereira2018.243sentences-linear'] = Pereira2018_243sentences_linear
benchmark_registry['Pereira2018.384sentences-linear'] = Pereira2018_384sentences_linear
92 changes: 72 additions & 20 deletions brainscore_language/benchmarks/pereira2018/benchmark.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
import xarray as xr

from brainscore_core.supported_data_standards.brainio.assemblies import NeuroidAssembly
Expand All @@ -8,10 +9,30 @@
from brainscore_language.data.pereira2018 import BIBTEX
from brainscore_language.utils.ceiling import ceiling_normalize
from brainscore_language.utils.s3 import load_from_s3
from brainscore_language.benchmarks.blank2014.ceiling import ExtrapolationCeiling


def Pereira2018_243sentences():
return _Pereira2018ExperimentLinear(experiment='243sentences', ceiling_s3_kwargs=dict(
def Pereira2018_243sentences_ridge():
return _Pereira2018Experiment(experiment='243sentences', metric="ridge_pearsonr",
crossvalidation_kwargs=dict(
split_coord="story",
kfold="group",
random_state=1234
)
)

def Pereira2018_384sentences_ridge():
return _Pereira2018Experiment(experiment='384sentences', metric="ridge_pearsonr",
crossvalidation_kwargs=dict(
split_coord="story",
kfold="group",
random_state=1234
)
)


def Pereira2018_243sentences_linear():
return _Pereira2018Experiment(experiment='243sentences', metric="linear_pearsonr", ceiling_s3_kwargs=dict(
version_id='CHl_9aFHIWVnPW_njePfy28yzggKuUPw',
sha1='5e23de899883828f9c886aec304bc5aa0f58f66c',
raw_kwargs=dict(
Expand All @@ -21,12 +42,12 @@ def Pereira2018_243sentences():
version_id='XVTo58Po5YrNjTuDIWrmfHI0nbN2MVZa',
sha1='34ba453dc7e8a19aed18cc9bca160e97b4a80be5'
)
)
))
),
)
)


def Pereira2018_384sentences():
return _Pereira2018ExperimentLinear(experiment='384sentences', ceiling_s3_kwargs=dict(
def Pereira2018_384sentences_linear():
return _Pereira2018Experiment(experiment='384sentences', metric="linear_pearsonr", ceiling_s3_kwargs=dict(
version_id='sjlnXr5wXUoGv6exoWu06C4kYI0KpZLk',
sha1='fc895adc52fd79cea3040961d65d8f736a9d3e29',
raw_kwargs=dict(
Expand All @@ -36,11 +57,12 @@ def Pereira2018_384sentences():
version_id='m4dq_ouKWZkYtdyNPMSP0p6rqb7wcYpi',
sha1='fe9fb24b34fd5602e18e34006ac5ccc7d4c825b8'
)
)
))
),
)
)


class _Pereira2018ExperimentLinear(BenchmarkBase):
class _Pereira2018Experiment(BenchmarkBase):
"""
Evaluate model ability to predict neural activity in the human language system in response to natural sentences,
recorded by Pereira et al. 2018.
Expand All @@ -57,20 +79,32 @@ class _Pereira2018ExperimentLinear(BenchmarkBase):
the two ceiling-normalized scores.
"""

def __init__(self, experiment: str, ceiling_s3_kwargs: dict):
self.data = self._load_data(experiment)
self.metric = load_metric('linear_pearsonr')
identifier = f'Pereira2018.{experiment}-linear'
ceiling = self._load_ceiling(identifier=identifier, **ceiling_s3_kwargs)
super(_Pereira2018ExperimentLinear, self).__init__(
def __init__(self, experiment: str,
metric: str,
ceiling_s3_kwargs: dict = {},
crossvalidation_kwargs: dict = {},
atlas: str = 'language',
):
self.data = self._load_data(experiment, atlas=atlas)
self.metric = load_metric(metric, crossvalidation_kwargs=crossvalidation_kwargs)
identifier = f"Pereira2018.{experiment}-{metric.split('_')[0]}"
if ceiling_s3_kwargs:
ceiling = self._load_ceiling(identifier=identifier, **ceiling_s3_kwargs)
else:
ceiler = ExtrapolationCeiling(subject_column='subject')
ceiling = ceiler(assembly=self.data, metric=self.metric)

super(_Pereira2018Experiment, self).__init__(
identifier=identifier,
version=1,
parent='Pereira2018-linear',
ceiling=ceiling,
bibtex=BIBTEX)

def _load_data(self, experiment: str) -> NeuroidAssembly:
data = load_dataset('Pereira2018.language')
def _load_data(self, experiment: str, atlas: str) -> NeuroidAssembly:
lang_data = load_dataset('Pereira2018.language')
data = load_dataset(f'Pereira2018.{atlas}')
data.coords["presentation"] = lang_data.coords["presentation"]
data = data.sel(experiment=experiment) # filter experiment
data = data.dropna('neuroid') # not all subjects have done both experiments, drop those that haven't
data.attrs['identifier'] = f"{data.identifier}.{experiment}"
Expand All @@ -94,8 +128,26 @@ def __call__(self, candidate: ArtificialSubject) -> Score:
passage_stimuli = stimuli[passage_indexer]
passage_predictions = candidate.digest_text(passage_stimuli.values)['neural']
passage_predictions['stimulus_id'] = 'presentation', passage_stimuli['stimulus_id'].values
try:
passage_predictions['passage_index']
except KeyError:
passage_predictions['passage_index'] = 'presentation', passage_stimuli['passage_index'].values
try:
passage_predictions['story']
except KeyError:
passage_predictions['story'] = 'presentation', passage_stimuli['story'].values
predictions.append(passage_predictions)

predictions = xr.concat(predictions, dim='presentation')
raw_score = self.metric(predictions, self.data)
score = ceiling_normalize(raw_score, self.ceiling)
layer_names = np.unique(predictions['layer'].data)
layer_names = [layer_names] if isinstance(layer_names, str) else layer_names
layer_scores = {}
for layer_name in layer_names:
raw_score = self.metric(predictions.sel(layer=layer_name), self.data)
layer_scores[layer_name] = ceiling_normalize(raw_score, self.ceiling)

score = Score(np.mean(list(layer_scores.values())))
score.attrs['layer_scores'] = layer_scores
score.attrs['raw'] = Score(np.mean([s.attrs['raw'] for s in layer_scores.values()]))
score.attrs['ceiling'] = self.ceiling
return score
4 changes: 3 additions & 1 deletion brainscore_language/benchmarks/pereira2018/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ def activity_for_text(stimuli: Union[str, List[str]]) -> NeuroidAssembly:
coords={'stimulus_seq': ('presentation', np.arange(num_stimuli)),
'stimulus_num': ('presentation', np.arange(num_stimuli)),
'neuroid_id': ('neuroid', np.arange(25)),
'region': ('neuroid', ['some_region'] * 25)},
'region': ('neuroid', ['some_region'] * 25),
'layer': ('neuroid', ['test_layer'] * 25)},
dims=['presentation', 'neuroid'])
neural_activity['stimulus'] = 'presentation', stimuli # copy over
return neural_activity
Expand All @@ -61,6 +62,7 @@ def activity_for_text(stimuli: Union[str, List[str]]) -> NeuroidAssembly:
# remove stimulus_id and stimulus coordinates to not trip up benchmark
passage_activity = passage_activity.reset_index('presentation')
del passage_activity['stimulus_id']
passage_activity['layer'] = 'neuroid', ['test_layer'] * passage_activity.sizes['neuroid']
passage_activity = NeuroidAssembly(passage_activity) # index
return passage_activity

Expand Down
14 changes: 11 additions & 3 deletions brainscore_language/benchmarks/tuckute2024/benchmark.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
import xarray as xr

from brainscore_core.benchmarks import BenchmarkBase
Expand Down Expand Up @@ -51,7 +52,14 @@ def __call__(self, candidate: ArtificialSubject):
predictions.append(sentence_predictions)

predictions = xr.concat(predictions, dim='presentation')

raw_score = self.metric(predictions, self.data)
return raw_score
layer_names = np.unique(predictions['layer'].data)
layer_names = [layer_names] if isinstance(layer_names, str) else layer_names
layer_scores = {}
for layer_name in layer_names:
raw_score = self.metric(predictions.sel(layer=layer_name), self.data)
layer_scores[layer_name] = raw_score

score = Score(np.mean(list(layer_scores.values())))
score.attrs['layer_scores'] = layer_scores
return score

Loading