Note
Go to the end to download the full example code.
MFPCA of 1-dimensional sparse data#
Example of multivariate functional principal components analysis of 1-dimensional data.
# Author: Steven Golovkine <steven_golovkine@icloud.com>
# License: MIT
# Load packages
import matplotlib.pyplot as plt
import numpy as np
from FDApy.representation import DenseArgvals
from FDApy.simulation import KarhunenLoeve
from FDApy.preprocessing import MFPCA
from FDApy.visualization import plot, plot_multivariate
# Set general parameters
rng = 42
n_obs = 50
idx = 5
colors = np.array([[0.5, 0, 0, 1]])
# Parameters of the basis
name = ["bsplines", "fourier"]
n_functions = [5, 5]
argvals = [
DenseArgvals({"input_dim_0": np.linspace(0, 1, 101)}),
DenseArgvals({"input_dim_0": np.linspace(0, 1, 101)}),
]
We simulate \(N = 50\) curves of a 2-dimensional process. The first component of the process is defined on the one-dimensional observation grid \(\{0, 0.01, 0.02, \cdots, 1\}\), based on the first \(K = 5\) B-splines basis functions on \([0, 1]\) and the variance of the scores random variables equal to \(1\). The second component of the process is defined on the one-dimensional observation grid \(\{0, 0.01, 0.02, \cdots, 1\}\), based on the first \(K = 5\) Fourier basis functions on \([0, 1]\) and the variance of the scores random variables equal to \(1\).
kl = KarhunenLoeve(
basis_name=name, n_functions=n_functions, argvals=argvals, random_state=rng
)
kl.new(n_obs=n_obs)
kl.add_noise_and_sparsify(noise_variance=0.05, percentage=0.5, epsilon=0.05)
data = kl.sparse_data
_ = plot_multivariate(data)

Covariance decomposition#
# Perform multivariate FPCA with an estimation of the number of components by
# the percentage of variance explained using a decomposition of the covariance
# operator.
univariate_expansions = [
{"method": "UFPCA", "n_components": 15, "method_smoothing": "PS"},
{"method": "UFPCA", "n_components": 15, "method_smoothing": "PS"},
]
mfpca_cov = MFPCA(
n_components=3, method="covariance", univariate_expansions=univariate_expansions
)
mfpca_cov.fit(data, method_smoothing="PS")
# # Plot the eigenfunctions
_ = plot_multivariate(mfpca_cov.eigenfunctions)

Estimate the scores – projection of the curves onto the eigenfunctions.
scores_numint = mfpca_cov.transform(data, method="NumInt")
# Plot of the scores
_ = plt.scatter(scores_numint[:, 0], scores_numint[:, 1])

Reconstruct the curves using the scores.
data_recons_numint = mfpca_cov.inverse_transform(scores_numint)
Inner-product matrix decomposition#
Perform univariate FPCA with an estimation of the number of components by the percentage of variance explained using a decomposition of the inner-product matrix.
mfpca_innpro = MFPCA(n_components=3, method="inner-product")
mfpca_innpro.fit(data, method_smoothing="PS")
# Plot the eigenfunctions
_ = plot_multivariate(mfpca_innpro.eigenfunctions)

Estimate the scores – projection of the curves onto the eigenfunctions – using the eigenvectors from the decomposition of the inner-product matrix.
scores_innpro = mfpca_innpro.transform(method="InnPro")
# Plot of the scores
_ = plt.scatter(scores_innpro[:, 0], scores_innpro[:, 1])

Reconstruct the curves using the scores.
data_recons_innpro = mfpca_innpro.inverse_transform(scores_innpro)
Comparison of the methods.
colors_numint = np.array([[0.9, 0, 0, 1]])
colors_pace = np.array([[0, 0.9, 0, 1]])
colors_innpro = np.array([[0.9, 0, 0.9, 1]])
fig, axes = plt.subplots(nrows=5, ncols=2, figsize=(16, 16))
for idx_plot, idx in enumerate(np.random.choice(n_obs, 5)):
for idx_data, (dd, dd_numint, dd_innpro) in enumerate(
zip(kl.data.data, data_recons_numint.data, data_recons_innpro.data)
):
axes[idx_plot, idx_data] = plot(
dd[idx], ax=axes[idx_plot, idx_data], label="True"
)
axes[idx_plot, idx_data] = plot(
dd_numint[idx],
colors=colors_numint,
ax=axes[idx_plot, idx_data],
label="Reconstruction NumInt",
)
axes[idx_plot, idx_data] = plot(
dd_innpro[idx],
colors=colors_innpro,
ax=axes[idx_plot, idx_data],
label="Reconstruction InnPro",
)
axes[idx_plot, idx_data].legend()
plt.show()

Total running time of the script: (0 minutes 7.242 seconds)