Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions brainscore_language/benchmarks/blank2014/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from brainscore_language import benchmark_registry
from .benchmark import Blank2014Linear
from .benchmark import Blank2014_ridge, Blank2014_linear

benchmark_registry['Blank2014-linear'] = Blank2014Linear
benchmark_registry['Blank2014-ridge'] = Blank2014_ridge
benchmark_registry['Blank2014-linear'] = Blank2014_linear
43 changes: 35 additions & 8 deletions brainscore_language/benchmarks/blank2014/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,27 @@
from brainscore_language.utils.ceiling import ceiling_normalize


class Blank2014Linear(BenchmarkBase):
def Blank2014_ridge():
return Blank2014(metric="ridge_pearsonr",
cross_validation_kwargs=dict(
splits=8,
split_coord="story",
kfold="group",
random_state=1234
)
)

def Blank2014_linear():
return Blank2014(metric="linear_pearsonr",
cross_validation_kwargs=dict(
splits=8,
split_coord="story",
kfold="group",
random_state=1234
)
)

class Blank2014(BenchmarkBase):
"""
Evaluate model ability to predict neural activity in human language system functional regions of interest (fROIs)
in response to natural stories, recorded by Blank et al. 2014.
Expand All @@ -20,13 +40,13 @@ class Blank2014Linear(BenchmarkBase):
(e.g. "layer 41 corresponds to the language system"), rather than testing every layer separately.
"""

def __init__(self):
def __init__(self, metric: str, cross_validation_kwargs=None):
self.data = load_dataset('Blank2014.fROI')
self.metric = load_metric('linear_pearsonr')
self.metric = load_metric(metric, crossvalidation_kwargs=cross_validation_kwargs)
ceiler = ExtrapolationCeiling()
ceiling = ceiler(assembly=self.data, metric=self.metric)
super(Blank2014Linear, self).__init__(
identifier='Blank2014-linear',
super(Blank2014, self).__init__(
identifier=f'Blank2014-{metric}',
version=1,
parent='neural_language',
ceiling=ceiling,
Expand All @@ -44,7 +64,14 @@ def __call__(self, candidate: ArtificialSubject) -> Score:
story_predictions = candidate.digest_text(story_stimuli.values)['neural']
story_predictions['stimulus_id'] = 'presentation', story_stimuli['stimulus_id'].values
predictions.append(story_predictions)

scores = {}
predictions = xr.concat(predictions, dim='presentation')
raw_score = self.metric(predictions, self.data)
score = ceiling_normalize(raw_score, self.ceiling)
return score
layer_names = np.unique(predictions['layer'].data)
layer_names = [layer_names] if isinstance(layer_names, str) else layer_names # if only one layer, make it a list for consistency
for layer_name in layer_names:
raw_score = self.metric(predictions.sel(layer=layer_name), self.data)
final_score = ceiling_normalize(raw_score, self.ceiling)
scores[layer_name] = final_score

return scores
4 changes: 2 additions & 2 deletions brainscore_language/benchmarks/fedorenko2016/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from brainscore_language import benchmark_registry
from .benchmark import Fedorenko2016_ridge, Fedorenko2016_linear

benchmark_registry['Fedorenko2016-linear'] = Fedorenko2016_linear
benchmark_registry['Fedorenko2016-ridge'] = Fedorenko2016_ridge
benchmark_registry['Fedorenko2016-ridge'] = Fedorenko2016_ridge
benchmark_registry['Fedorenko2016-linear'] = Fedorenko2016_linear
33 changes: 25 additions & 8 deletions brainscore_language/benchmarks/fedorenko2016/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,32 @@

from tqdm import tqdm

def Fedorenko2016_linear():
return Fedorenko2016(metric="linear_pearsonr")

def Fedorenko2016_ridge():
return Fedorenko2016(metric="ridge_pearsonr")
return Fedorenko2016(metric="ridge_pearsonr",
cross_validation_kwargs=dict(
split_coord="sentence_id",
kfold="group",
random_state=1234
)
)

def Fedorenko2016_linear():
return Fedorenko2016(metric="linear_pearsonr",
cross_validation_kwargs=dict(
split_coord="sentence_id",
kfold="group",
random_state=1234
)
)

class Fedorenko2016(BenchmarkBase):

def __init__(self, metric: str):
def __init__(self, metric: str, cross_validation_kwargs=None):
self.data = load_dataset('Fedorenko2016.language')

identifier = f"Fedorenko2016-{metric}"
self.metric = load_metric(metric)
self.metric = load_metric(metric, crossvalidation_kwargs=cross_validation_kwargs)

ceiler = ExtrapolationCeiling(subject_column="subject_UID")
ceiling = ceiler(assembly=self.data, metric=self.metric)
Expand All @@ -48,9 +61,13 @@ def __call__(self, candidate: ArtificialSubject):
sentence_predictions['stimulus_id'] = 'presentation', sentence_stimuli['stimulus_id'].values
predictions.append(sentence_predictions)

scores = {}
predictions = xr.concat(predictions, dim='presentation')

raw_score = self.metric(predictions, self.data)
scores = ceiling_normalize(raw_score, self.ceiling)
layer_names = np.unique(predictions['layer'].data)
layer_names = [layer_names] if isinstance(layer_names, str) else layer_names # if only one layer, make it a list for consistency
for layer_name in layer_names:
raw_score = self.metric(predictions.sel(layer=layer_name), self.data)
final_score = ceiling_normalize(raw_score, self.ceiling)
scores[layer_name] = final_score

return scores
10 changes: 7 additions & 3 deletions brainscore_language/benchmarks/pereira2018/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from brainscore_language import benchmark_registry
from .benchmark import Pereira2018_243sentences, Pereira2018_384sentences
from .benchmark import Pereira2018_243sentences_ridge, Pereira2018_384sentences_ridge
from .benchmark import Pereira2018_243sentences_linear, Pereira2018_384sentences_linear

benchmark_registry['Pereira2018.243sentences-linear'] = Pereira2018_243sentences
benchmark_registry['Pereira2018.384sentences-linear'] = Pereira2018_384sentences
benchmark_registry['Pereira2018.243sentences-ridge'] = Pereira2018_243sentences_ridge
benchmark_registry['Pereira2018.384sentences-ridge'] = Pereira2018_384sentences_ridge

benchmark_registry['Pereira2018.243sentences-linear'] = Pereira2018_243sentences_linear
benchmark_registry['Pereira2018.384sentences-linear'] = Pereira2018_384sentences_linear
95 changes: 74 additions & 21 deletions brainscore_language/benchmarks/pereira2018/benchmark.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
import xarray as xr

from brainscore_core.supported_data_standards.brainio.assemblies import NeuroidAssembly
Expand All @@ -8,10 +9,30 @@
from brainscore_language.data.pereira2018 import BIBTEX
from brainscore_language.utils.ceiling import ceiling_normalize
from brainscore_language.utils.s3 import load_from_s3
from brainscore_language.benchmarks.blank2014.ceiling import ExtrapolationCeiling


def Pereira2018_243sentences():
return _Pereira2018ExperimentLinear(experiment='243sentences', ceiling_s3_kwargs=dict(
def Pereira2018_243sentences_ridge():
return _Pereira2018Experiment(experiment='243sentences', metric="ridge_pearsonr",
crossvalidation_kwargs=dict(
split_coord="story",
kfold="group",
random_state=1234
)
)

def Pereira2018_384sentences_ridge():
return _Pereira2018Experiment(experiment='384sentences', metric="ridge_pearsonr",
crossvalidation_kwargs=dict(
split_coord="story",
kfold="group",
random_state=1234
)
)


def Pereira2018_243sentences_linear():
return _Pereira2018Experiment(experiment='243sentences', metric="linear_pearsonr", ceiling_s3_kwargs=dict(
version_id='CHl_9aFHIWVnPW_njePfy28yzggKuUPw',
sha1='5e23de899883828f9c886aec304bc5aa0f58f66c',
raw_kwargs=dict(
Expand All @@ -21,12 +42,17 @@ def Pereira2018_243sentences():
version_id='XVTo58Po5YrNjTuDIWrmfHI0nbN2MVZa',
sha1='34ba453dc7e8a19aed18cc9bca160e97b4a80be5'
)
)
))
),
),
crossvalidation_kwargs=dict(
split_coord="story",
kfold="group",
random_state=1234
)
)


def Pereira2018_384sentences():
return _Pereira2018ExperimentLinear(experiment='384sentences', ceiling_s3_kwargs=dict(
def Pereira2018_384sentences_linear():
return _Pereira2018Experiment(experiment='384sentences', metric="linear_pearsonr", ceiling_s3_kwargs=dict(
version_id='sjlnXr5wXUoGv6exoWu06C4kYI0KpZLk',
sha1='fc895adc52fd79cea3040961d65d8f736a9d3e29',
raw_kwargs=dict(
Expand All @@ -36,11 +62,17 @@ def Pereira2018_384sentences():
version_id='m4dq_ouKWZkYtdyNPMSP0p6rqb7wcYpi',
sha1='fe9fb24b34fd5602e18e34006ac5ccc7d4c825b8'
)
)
))
),
),
crossvalidation_kwargs=dict(
split_coord="story",
kfold="group",
random_state=1234
)
)


class _Pereira2018ExperimentLinear(BenchmarkBase):
class _Pereira2018Experiment(BenchmarkBase):
"""
Evaluate model ability to predict neural activity in the human language system in response to natural sentences,
recorded by Pereira et al. 2018.
Expand All @@ -57,20 +89,32 @@ class _Pereira2018ExperimentLinear(BenchmarkBase):
the two ceiling-normalized scores.
"""

def __init__(self, experiment: str, ceiling_s3_kwargs: dict):
self.data = self._load_data(experiment)
self.metric = load_metric('linear_pearsonr')
identifier = f'Pereira2018.{experiment}-linear'
ceiling = self._load_ceiling(identifier=identifier, **ceiling_s3_kwargs)
super(_Pereira2018ExperimentLinear, self).__init__(
def __init__(self, experiment: str,
metric: str,
ceiling_s3_kwargs: dict = {},
crossvalidation_kwargs: dict = {},
atlas: str = 'language',
):
self.data = self._load_data(experiment, atlas=atlas)
self.metric = load_metric(metric, crossvalidation_kwargs=crossvalidation_kwargs)
identifier = f"Pereira2018.{experiment}-{metric.split('_')[0]}"
if ceiling_s3_kwargs:
ceiling = self._load_ceiling(identifier=identifier, **ceiling_s3_kwargs)
else:
ceiler = ExtrapolationCeiling(subject_column='subject')
ceiling = ceiler(assembly=self.data, metric=self.metric)

super(_Pereira2018Experiment, self).__init__(
identifier=identifier,
version=1,
parent='Pereira2018-linear',
ceiling=ceiling,
bibtex=BIBTEX)

def _load_data(self, experiment: str) -> NeuroidAssembly:
data = load_dataset('Pereira2018.language')
def _load_data(self, experiment: str, atlas: str) -> NeuroidAssembly:
lang_data = load_dataset('Pereira2018.language')
data = load_dataset(f'Pereira2018.{atlas}')
data.coords["presentation"] = lang_data.coords["presentation"]
data = data.sel(experiment=experiment) # filter experiment
data = data.dropna('neuroid') # not all subjects have done both experiments, drop those that haven't
data.attrs['identifier'] = f"{data.identifier}.{experiment}"
Expand All @@ -94,8 +138,17 @@ def __call__(self, candidate: ArtificialSubject) -> Score:
passage_stimuli = stimuli[passage_indexer]
passage_predictions = candidate.digest_text(passage_stimuli.values)['neural']
passage_predictions['stimulus_id'] = 'presentation', passage_stimuli['stimulus_id'].values
passage_predictions['passage_index'] = 'presentation', passage_stimuli['passage_index'].values
passage_predictions['story'] = 'presentation', passage_stimuli['story'].values
predictions.append(passage_predictions)

scores = {}
predictions = xr.concat(predictions, dim='presentation')
raw_score = self.metric(predictions, self.data)
score = ceiling_normalize(raw_score, self.ceiling)
return score
layer_names = np.unique(predictions['layer'].data)
layer_names = [layer_names] if isinstance(layer_names, str) else layer_names # if only one layer, make it a list for consistency
for layer_name in layer_names:
raw_score = self.metric(predictions.sel(layer=layer_name), self.data)
final_score = ceiling_normalize(raw_score, self.ceiling)
scores[layer_name] = final_score

return scores
11 changes: 8 additions & 3 deletions brainscore_language/benchmarks/tuckute2024/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,13 @@ def __call__(self, candidate: ArtificialSubject):
sentence_predictions['stimulus_id'] = 'presentation', sentence_stimuli['stimulus_id'].values
predictions.append(sentence_predictions)

scores = {}
predictions = xr.concat(predictions, dim='presentation')

raw_score = self.metric(predictions, self.data)
return raw_score
layer_names = np.unique(predictions['layer'].data)
layer_names = [layer_names] if isinstance(layer_names, str) else layer_names # if only one layer, make it a list for consistency
for layer_name in layer_names:
raw_score = self.metric(predictions.sel(layer=layer_name), self.data)
scores[layer_name] = raw_score

return scores

45 changes: 42 additions & 3 deletions brainscore_language/metrics/linear_predictivity/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from brainscore_core.supported_data_standards.brainio.assemblies import walk_coords
from brainscore_core.metrics import Score, Metric
from brainscore_language.utils.transformations import CrossValidation
from brainscore_language.metrics.linear_predictivity.ridgecv_gpu import RidgeGCVTorch


class Defaults:
Expand Down Expand Up @@ -102,6 +103,44 @@ def __call__(self, prediction, target) -> Score:
dims=neuroid_dims)
return result

def pearsonr(x, y):
xmean = x.mean(axis=0, keepdims=True)
ymean = y.mean(axis=0, keepdims=True)

xm = x - xmean
ym = y - ymean

normxm = scipy.linalg.norm(xm, axis=0, keepdims=True) + 1e-8
normym = scipy.linalg.norm(ym, axis=0, keepdims=True) + 1e-8

r = ((xm / normxm) * (ym / normym)).sum(axis=0)

return r

class XarrayCorrelationBatched:
def __init__(self, correlation_coord=Defaults.stimulus_coord, neuroid_coord=Defaults.neuroid_coord):
self._correlation = pearsonr
self._correlation_coord = correlation_coord
self._neuroid_coord = neuroid_coord

def __call__(self, prediction, target):
# align
prediction = prediction.sortby([self._correlation_coord, self._neuroid_coord])
target = target.sortby([self._correlation_coord, self._neuroid_coord])
assert np.array(prediction[self._correlation_coord].values == target[self._correlation_coord].values).all()
assert np.array(prediction[self._neuroid_coord].values == target[self._neuroid_coord].values).all()
# compute correlation per neuroid
neuroid_dims = target[self._neuroid_coord].dims
assert len(neuroid_dims) == 1
prediction = prediction.transpose(..., *neuroid_dims)
target = target.transpose(..., *neuroid_dims)
correlations = self._correlation(prediction.values, target.values)
# package
result = Score(correlations,
coords={coord: (dims, values)
for coord, dims, values in walk_coords(target) if dims == neuroid_dims},
dims=neuroid_dims)
return result

class CrossRegressedCorrelation(Metric):
def __init__(self, regression, correlation, crossvalidation_kwargs=None, store_regression_weights=False):
Expand Down Expand Up @@ -158,7 +197,7 @@ def __call__(self, source: DataAssembly, target: DataAssembly) -> Score:
return self.cross_regressed_correlation(source, target)

def ridge_regression(xarray_kwargs=None):
regression = RidgeCV(alphas=np.logspace(-3, 3, 7))
regression = RidgeGCVTorch(alphas=np.logspace(-3, 3, 7))
xarray_kwargs = xarray_kwargs or {}
regression = XarrayRegression(regression, **xarray_kwargs)
return regression
Expand All @@ -171,7 +210,7 @@ def linear_regression(xarray_kwargs=None):

def pearsonr_correlation(xarray_kwargs=None):
xarray_kwargs = xarray_kwargs or {}
return XarrayCorrelation(scipy.stats.pearsonr, **xarray_kwargs)
return XarrayCorrelationBatched(**xarray_kwargs)

def linear_pearsonr(*args, regression_kwargs=None, correlation_kwargs=None, **kwargs):
regression = linear_regression(regression_kwargs or {})
Expand All @@ -181,4 +220,4 @@ def linear_pearsonr(*args, regression_kwargs=None, correlation_kwargs=None, **kw
def ridge_pearsonr(*args, regression_kwargs=None, correlation_kwargs=None, **kwargs):
regression = ridge_regression(regression_kwargs or {})
correlation = pearsonr_correlation(correlation_kwargs or {})
return CrossRegressedCorrelation(*args, regression=regression, correlation=correlation, **kwargs)
return CrossRegressedCorrelation(*args, regression=regression, correlation=correlation, **kwargs)
Loading
Loading