LDDMM: PLS regression#
Uses data from LDDMM: how to estimate a deterministic atlas?.
TODO: add purpose of notebook
[1]:
from pathlib import Path
import numpy as np
from sklearn.cross_decomposition import PLSRegression
from sklearn.model_selection import train_test_split
import polpo.lddmm as plddmm
import polpo.preprocessing.pd as ppd
from polpo.model_eval import (
R2Score,
collect_eval_results,
)
from polpo.preprocessing.load.pregnancy.jacobs import TabularDataLoader
from polpo.preprocessing.np import FlattenButFirst
from polpo.sklearn.adapter import EvaluatedModel
No CUDA runtime is found, using CUDA_HOME='/usr'
[2]:
DATA_DIR = Path("results") / "atlas_example"
ATLAS_DIR = DATA_DIR / "atlas"
[3]:
cp = plddmm.io.load_cp(ATLAS_DIR)
momenta = plddmm.io.load_momenta(ATLAS_DIR)
momenta = FlattenButFirst()(momenta)
cp.shape, momenta.shape
[3]:
((24, 3), (26, 72))
[4]:
data = TabularDataLoader(subject_subset=["01"], index_by_session=True)()
INFO: Data has already been downloaded... using cached file ('/home/luisfpereira/.herbrain/data/maternal/maternal_brain_project_pilot/rawdata/28Baby_Hormones.csv').
[5]:
data_ = ppd.ColumnToDict("gestWeek")(data)
[6]:
x = np.array(list(data_.values()))[:, None]
y = momenta
x.shape, y.shape
[6]:
((26, 1), (26, 72))
[7]:
model = EvaluatedModel(
PLSRegression(),
R2Score(),
)
[8]:
X_train, X_test, y_train, y_test = train_test_split(
y,
x,
train_size=0.8,
shuffle=False,
)
model.fit(X_train, y_train);
[9]:
eval_res_train = collect_eval_results(model, unnest=True, outer_key="obj_regr")
print(list(eval_res_train.keys()))
['obj_regr']
[10]:
eval_res_train
[10]:
{'obj_regr': {'r2': array([0.71588517])}}
[11]:
model.predict_eval(X_test, y_test);
[12]:
eval_res_test = collect_eval_results(
model, unnest=True, outer_key="obj_regr", train=False
)
print(list(eval_res_test.keys()))
['obj_regr']
[13]:
eval_res_test
[13]:
{'obj_regr': {'r2': array([-2.32858353])}}