MFPCA of 1-dimensional sparse data#

Example of multivariate functional principal components analysis of 1-dimensional data.

# Author: Steven Golovkine <steven_golovkine@icloud.com>
# License: MIT

# Load packages
import matplotlib.pyplot as plt
import numpy as np

from FDApy.representation import DenseArgvals
from FDApy.simulation import KarhunenLoeve
from FDApy.preprocessing import MFPCA
from FDApy.visualization import plot, plot_multivariate

# Set general parameters
rng = 42
n_obs = 50
idx = 5
colors = np.array([[0.5, 0, 0, 1]])


# Parameters of the basis
name = ["bsplines", "fourier"]
n_functions = [5, 5]
argvals = [
    DenseArgvals({"input_dim_0": np.linspace(0, 1, 101)}),
    DenseArgvals({"input_dim_0": np.linspace(0, 1, 101)}),
]

We simulate \(N = 50\) curves of a 2-dimensional process. The first component of the process is defined on the one-dimensional observation grid \(\{0, 0.01, 0.02, \cdots, 1\}\), based on the first \(K = 5\) B-splines basis functions on \([0, 1]\) and the variance of the scores random variables equal to \(1\). The second component of the process is defined on the one-dimensional observation grid \(\{0, 0.01, 0.02, \cdots, 1\}\), based on the first \(K = 5\) Fourier basis functions on \([0, 1]\) and the variance of the scores random variables equal to \(1\).

kl = KarhunenLoeve(
    basis_name=name, n_functions=n_functions, argvals=argvals, random_state=rng
)
kl.new(n_obs=n_obs)
kl.add_noise_and_sparsify(noise_variance=0.05, percentage=0.5, epsilon=0.05)
data = kl.sparse_data

_ = plot_multivariate(data)
plot mfpca 1d sparse

Covariance decomposition#

# Perform multivariate FPCA with an estimation of the number of components by
# the percentage of variance explained using a decomposition of the covariance
# operator.
univariate_expansions = [
    {"method": "UFPCA", "n_components": 15, "method_smoothing": "PS"},
    {"method": "UFPCA", "n_components": 15, "method_smoothing": "PS"},
]
mfpca_cov = MFPCA(
    n_components=3, method="covariance", univariate_expansions=univariate_expansions
)
mfpca_cov.fit(data, method_smoothing="PS")

# # Plot the eigenfunctions
_ = plot_multivariate(mfpca_cov.eigenfunctions)
plot mfpca 1d sparse

Estimate the scores – projection of the curves onto the eigenfunctions.

scores_numint = mfpca_cov.transform(data, method="NumInt")

# Plot of the scores
_ = plt.scatter(scores_numint[:, 0], scores_numint[:, 1])
plot mfpca 1d sparse

Reconstruct the curves using the scores.

data_recons_numint = mfpca_cov.inverse_transform(scores_numint)

Inner-product matrix decomposition#

Perform univariate FPCA with an estimation of the number of components by the percentage of variance explained using a decomposition of the inner-product matrix.

mfpca_innpro = MFPCA(n_components=3, method="inner-product")
mfpca_innpro.fit(data, method_smoothing="PS")

# Plot the eigenfunctions
_ = plot_multivariate(mfpca_innpro.eigenfunctions)
plot mfpca 1d sparse

Estimate the scores – projection of the curves onto the eigenfunctions – using the eigenvectors from the decomposition of the inner-product matrix.

scores_innpro = mfpca_innpro.transform(method="InnPro")

# Plot of the scores
_ = plt.scatter(scores_innpro[:, 0], scores_innpro[:, 1])
plot mfpca 1d sparse

Reconstruct the curves using the scores.

data_recons_innpro = mfpca_innpro.inverse_transform(scores_innpro)

Comparison of the methods.

colors_numint = np.array([[0.9, 0, 0, 1]])
colors_pace = np.array([[0, 0.9, 0, 1]])
colors_innpro = np.array([[0.9, 0, 0.9, 1]])

fig, axes = plt.subplots(nrows=5, ncols=2, figsize=(16, 16))
for idx_plot, idx in enumerate(np.random.choice(n_obs, 5)):
    for idx_data, (dd, dd_numint, dd_innpro) in enumerate(
        zip(kl.data.data, data_recons_numint.data, data_recons_innpro.data)
    ):
        axes[idx_plot, idx_data] = plot(
            dd[idx], ax=axes[idx_plot, idx_data], label="True"
        )
        axes[idx_plot, idx_data] = plot(
            dd_numint[idx],
            colors=colors_numint,
            ax=axes[idx_plot, idx_data],
            label="Reconstruction NumInt",
        )
        axes[idx_plot, idx_data] = plot(
            dd_innpro[idx],
            colors=colors_innpro,
            ax=axes[idx_plot, idx_data],
            label="Reconstruction InnPro",
        )
        axes[idx_plot, idx_data].legend()
plt.show()
plot mfpca 1d sparse

Total running time of the script: (0 minutes 7.242 seconds)

Gallery generated by Sphinx-Gallery