How to do multiple structures mesh-valued regression?#

NB: an alternative way to using a for loop on “How to do mesh-valued regression?.

[1]:

import pyvista as pv
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

import polpo.preprocessing.dict as ppdict
import polpo.preprocessing.pd as ppd
from polpo.models import DictMeshes2Comps, ObjectRegressor
from polpo.preprocessing import (
    Map,
    PartiallyInitializedStep,
)
from polpo.preprocessing.learning import NestedDictsToXY
from polpo.preprocessing.load.pregnancy import (
    DenseMaternalCsvDataLoader,
    DenseMaternalMeshLoader,
)
from polpo.preprocessing.mesh.io import PvReader
from polpo.preprocessing.mesh.registration import PvAlign
from polpo.preprocessing.mri import segmtool2encoding

[KeOps] Warning : cuda was detected, but driver API could not be initialized. Switching to cpu only.

[2]:

STATIC_VIZ = True

if STATIC_VIZ:
    pv.set_jupyter_backend("static")

Loading meshes#

[3]:

tool = "fsl"
subject_id = "01"

encoding = segmtool2encoding(tool)

struct_keys = encoding.structs

n_structs = len(struct_keys)

[4]:

prep_pipe = PartiallyInitializedStep(
    Step=lambda **kwargs: ppdict.DictMap(PvAlign(**kwargs)),
    _target=lambda meshes: meshes[list(meshes.keys())[0]],
    max_iterations=500,
)

[5]:

mesh_loader = ppdict.HashWithIncoming(
    Map(
        PartiallyInitializedStep(
            Step=DenseMaternalMeshLoader,
            pass_data=False,
            subject_id=subject_id,
            _struct=lambda name: name.split("_")[-1],
            _left=lambda name: name.split("_")[0] == "L",
            as_dict=True,
        )
        + ppdict.DictMap(PvReader())
    )
)

pipe = mesh_loader + ppdict.DictMap(prep_pipe)

meshes = pipe(struct_keys)

Loading tabular data#

[6]:

pilot = subject_id == "01"

pipe = DenseMaternalCsvDataLoader(pilot=pilot, subject_id=subject_id)

df = pipe()

INFO: Data has already been downloaded... using cached file ('/home/luisfpereira/.herbrain/data/maternal/28Baby_Hormones.csv').

Here, we filter the tabular data.

[7]:

session_selector = ppd.DfIsInFilter("stage", ["post"], negate=True)

predictor_selector = (
    session_selector + ppd.ColumnsSelector("gestWeek") + ppd.SeriesToDict()
)

[8]:

x_dict = predictor_selector(df)

Merge data#

We get the data in the proper format for fitting.

[9]:

dict_pipe = NestedDictsToXY()

# meshes_ : dict[list]
X, meshes_ = dict_pipe([x_dict, meshes])

Create and fit regressor#

[10]:

pca = PCA(n_components=4)

objs2y = DictMeshes2Comps(n_pipes=n_structs, dim_reduction=pca)

[11]:

model = ObjectRegressor(LinearRegression(fit_intercept=True), objs2y=objs2y)

[12]:

model.fit(X, meshes_)

[12]:

Evaluate fit#

model.predict outputs meshes, but we know LinearRegression sees PCA components. We can evaluate r2_score by applying transform.

NB: these are values on the training data.

[13]:

meshes_pred = model.predict(X)

y_true = objs2y.transform(meshes_)
y_pred = objs2y.transform(meshes_pred)

scores = r2_score(y_true, y_pred, multioutput="raw_values")

dict(zip(struct_keys, scores.reshape(n_structs, -1)))

[13]:

{'BrStem': array([0.43173305, 0.00093647, 0.12750477, 0.02389184]),
 'L_Thal': array([0.01415542, 0.53246225, 0.01803256, 0.1944647 ]),
 'R_Thal': array([0.29409483, 0.19884927, 0.0071038 , 0.04048802]),
 'L_Caud': array([0.19577605, 0.43087542, 0.03419801, 0.0149218 ]),
 'R_Caud': array([0.28438227, 0.25672121, 0.06444696, 0.03906288]),
 'L_Puta': array([0.66647759, 0.0129195 , 0.03550186, 0.02858566]),
 'R_Puta': array([0.3135088 , 0.13438528, 0.1297084 , 0.13376509]),
 'L_Pall': array([0.35915277, 0.04855107, 0.24886398, 0.05712195]),
 'R_Pall': array([0.52617963, 0.0208839 , 0.06184913, 0.07608356]),
 'L_Hipp': array([0.04356306, 0.00644147, 0.24583467, 0.00145252]),
 'R_Hipp': array([0.03846718, 0.06083203, 0.25813068, 0.0021323 ]),
 'L_Amyg': array([0.00954856, 0.25933572, 0.39715277, 0.00524614]),
 'R_Amyg': array([0.31313141, 0.10727703, 0.01111522, 0.00072597]),
 'L_Accu': array([3.66642523e-01, 9.99478421e-03, 5.68751851e-02, 1.89862875e-04]),
 'R_Accu': array([0.2658962 , 0.01162296, 0.01511773, 0.20374928])}