import os  # for makedirs
import homcloud.interface as hc  # HomCloud 
import homcloud.interface.codebook  # HomCloud's implementation of persistence codebook
import numpy as np  # Numerical array library
from tqdm.notebook import tqdm  # For progressbar
import matplotlib.pyplot as plt  # Plotting
import sklearn.linear_model as lm  # Machine learning
from sklearn.decomposition import PCA  # for PCA
from sklearn.model_selection import train_test_split
import pyvista as pv  # for 3D visualization

labels = np.loadtxt("pc/label.txt")
labels

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

pointclouds = [np.loadtxt("pc/{:04d}.txt".format(i)) for i in tqdm(range(200))]

os.makedirs("pd", exist_ok=True)
for i in tqdm(range(200)):
    hc.PDList.from_alpha_filtration(pointclouds[i], save_boundary_map=True, 
                                    save_to="pd/{:04d}.pdgm".format(i))

pds = [hc.PDList("pd/{:04d}.pdgm".format(i)).dth_diagram(2) for i in tqdm(range(200))]

pds[0].histogram(x_range=(0, 0.03)).plot(colorbar={"type": "log"})
pds[100].histogram(x_range=(0, 0.03)).plot(colorbar={"type": "log"})

def weight(b, d):
    return np.atan(0.01 * (d - b) ** 3)

vectorize_spec = hc.codebook.PBoWSpec(100000, 20, weight)

vectorize_spec.fit(pds)

pdvects = np.vstack([vectorize_spec.vectorize(pd) for pd in pds])

pca = PCA(n_components=2)
pca.fit(pdvects)

PCA(n_components=2)

reduced = pca.transform(pdvects)  # すべてのデータを2次元に落とす
plt.gca().set_aspect('equal')  # 縦横のアスペクト比を揃える
plt.scatter(reduced[labels == 0, 0], reduced[labels == 0, 1], c="r")  # ラベル0のデータを赤で描画
plt.scatter(reduced[labels == 1, 0], reduced[labels == 1, 1], c="b")  # ラベル1のデータを青で描画

<matplotlib.collections.PathCollection at 0x7222fa8cc7d0>

pca.mean_

array([ 887.67177047, 2327.94914397, 1782.78320217,  711.00937383,
       1760.99938204,  921.26199958, 1359.12806977, 1074.28708104,
        191.30566907, 1320.59584702, 1683.88649595,  904.08721297,
       1139.17980005,  540.53777362, 1698.10817367,  394.30825652,
       1420.52850831,   99.13475996, 1722.51635129,  922.24637401])

pca.components_

array([[-0.16248915,  0.18149301, -0.22522264, -0.15413602,  0.56838662,
        -0.02071444,  0.37385651, -0.16734014, -0.0313471 ,  0.00667238,
         0.1222831 , -0.14655119,  0.23000033, -0.0834336 ,  0.46963424,
         0.00567342, -0.16018487, -0.0204199 ,  0.17563894,  0.00883001],
       [ 0.04624292,  0.00935448,  0.87372356, -0.05488365,  0.11254662,
        -0.07416827,  0.08494841, -0.34372708, -0.03396957, -0.13186716,
        -0.01209322, -0.2070329 ,  0.01350096, -0.06840765, -0.04161041,
        -0.07315032, -0.09228697, -0.00212709,  0.04095625, -0.06435805]])

vectorize_spec.cluster_centers

array([[0.01031203, 0.02101732],
       [0.00655505, 0.00918908],
       [0.00705532, 0.01873634],
       [0.00835285, 0.02238782],
       [0.00715336, 0.01626907],
       [0.01539656, 0.0200287 ],
       [0.00958651, 0.0167049 ],
       [0.00819985, 0.020055  ],
       [0.01078351, 0.02477207],
       [0.01052481, 0.0189247 ],
       [0.00577619, 0.00738266],
       [0.0072798 , 0.02093751],
       [0.0123626 , 0.01745975],
       [0.01309108, 0.02256271],
       [0.00792145, 0.01768588],
       [0.01753463, 0.02380253],
       [0.00890562, 0.01884734],
       [0.00831976, 0.02494063],
       [0.00709004, 0.01106345],
       [0.01282712, 0.0196708 ]])

import matplotlib.lines

fig, ax = plt.subplots()
ax.scatter(vectorize_spec.cluster_centers[:, 0], vectorize_spec.cluster_centers[:, 1])
ax.set_aspect("equal")
ax.add_artist(matplotlib.lines.Line2D([0, 0.03], [0, 0.03], color="k"))
ax.set_xlim(0, 0.03)
ax.set_ylim(0, 0.03)

(0.0, 0.03)

fig, axes = plt.subplots(1, 2, figsize=(12, 4))

for k in [0, 1]:
    ax = axes[k]
    sc = ax.scatter(
        vectorize_spec.cluster_centers[:, 0], 
        vectorize_spec.cluster_centers[:, 1],
        c=pca.components_[k, :],
        vmax=0.7,
        vmin=-0.7,
        cmap='RdBu'
    )
    
    cbar = fig.colorbar(sc, ax=ax)
    ax.grid(True)
    ax.set_aspect("equal")
    ax.add_artist(matplotlib.lines.Line2D([0, 0.03], [0, 0.03], color="k"))
    ax.set_xlim(0, 0.03)
    ax.set_ylim(0, 0.03)

# グラフの表示
plt.show()

pdvects_train, pdvects_test, labels_train, labels_test = train_test_split(pdvects, labels, test_size=0.25)

model = lm.LogisticRegression(C=0.01, solver="lbfgs")

model.fit(pdvects_train, labels_train, )

LogisticRegression(C=0.01)

model.score(pdvects_test, labels_test)

0.98

model.coef_

array([[ 0.00063404, -0.00383811,  0.0018775 ,  0.0009852 , -0.00659765,
         0.00772874, -0.00355251,  0.00132356,  0.00113951,  0.00233676,
        -0.00040768,  0.00261072, -0.00099554,  0.00234868, -0.00199369,
         0.00429888,  0.00183706,  0.00297109, -0.00273338,  0.0037768 ]])

model.coef_.max(), model.coef_.min()

(np.float64(0.007728736540741548), np.float64(-0.006597652991082844))

fig, ax = plt.subplots()

sc = ax.scatter(
    vectorize_spec.cluster_centers[:, 0], 
    vectorize_spec.cluster_centers[:, 1],
    c=model.coef_,
    vmax=0.01,
    vmin=-0.01,
    cmap='RdBu'
)

cbar = fig.colorbar(sc, ax=ax)
ax.grid(True)
ax.set_aspect("equal")
ax.add_artist(matplotlib.lines.Line2D([0, 0.03], [0, 0.03], color="k"))
ax.set_xlim(0, 0.03)
ax.set_ylim(0, 0.03)

# グラフの表示
plt.show()

pdvects[1, :] * model.coef_

array([[ 0.00000000e+00, -7.23058444e+00,  9.23286504e+00,
         0.00000000e+00, -2.15614357e+01,  3.58966155e+00,
        -1.09179277e+01,  0.00000000e+00,  0.00000000e+00,
         9.38370097e+00, -6.63908603e-01,  0.00000000e+00,
        -1.14285909e+00,  0.00000000e+00, -7.92658909e+00,
         1.08440045e-06,  0.00000000e+00,  0.00000000e+00,
        -4.39493796e+00,  9.52437259e+00]])

pdvects[101, :] * model.coef_

array([[ 1.09132177e+00, -3.52691356e+00,  3.91866593e+00,
         0.00000000e+00, -0.00000000e+00,  4.31452755e+00,
        -0.00000000e+00,  4.00605888e+00,  0.00000000e+00,
         0.00000000e+00, -3.10805748e-01,  0.00000000e+00,
        -2.68996219e-02,  0.00000000e+00, -4.75517715e+00,
         9.24152387e-04,  6.28288138e+00,  0.00000000e+00,
        -2.40194137e+00,  0.00000000e+00]])

fig, axes = plt.subplots(1, 2, figsize=(14, 6))

sc = axes[0].scatter(
    vectorize_spec.cluster_centers[:, 0], 
    vectorize_spec.cluster_centers[:, 1],
    c=pdvects[1, :] * model.coef_,
    vmax=30,
    vmin=-30,
    cmap='RdBu'
)

cbar = fig.colorbar(sc, ax=axes[0])
axes[0].grid(True)
axes[0].set_aspect("equal")
axes[0].add_artist(matplotlib.lines.Line2D([0, 0.03], [0, 0.03], color="k"))
axes[0].set_xlim(0, 0.03)
axes[0].set_ylim(0, 0.03)
pds[1].histogram((0, 0.03), 128).plot(colorbar={"type": "log"}, ax=axes[1])
fig.tight_layout()
# グラフの表示
plt.show()

fig, axes = plt.subplots(1, 2, figsize=(14, 6))

sc = axes[0].scatter(
    vectorize_spec.cluster_centers[:, 0], 
    vectorize_spec.cluster_centers[:, 1],
    c=pdvects[101, :] * model.coef_,
    vmax=10,
    vmin=-10,
    cmap='RdBu'
)

cbar = fig.colorbar(sc, ax=axes[0])
axes[0].grid(True)
axes[0].set_aspect("equal")
axes[0].add_artist(matplotlib.lines.Line2D([0, 0.03], [0, 0.03], color="k"))
axes[0].set_xlim(0, 0.03)
axes[0].set_ylim(0, 0.03)
pds[101].histogram((0, 0.03), 128).plot(colorbar={"type": "log"}, ax=axes[1])
fig.tight_layout()
# グラフの表示
plt.show()

	n_components	2
	copy	True
	whiten	False
	svd_solver	'auto'
	tol	0.0
	iterated_power	'auto'
	n_oversamples	10
	power_iteration_normalizer	'auto'
	random_state	None

	penalty	'l2'
	dual	False
	tol	0.0001
	C	0.01
	fit_intercept	True
	intercept_scaling	1
	class_weight	None
	random_state	None
	solver	'lbfgs'
	max_iter	100
	multi_class	'deprecated'
	verbose	0
	warm_start	False
	n_jobs	None
	l1_ratio	None

Persistence Codebook を使った機械学習¶

ポイントクラウドデータの読み込み¶

パーシステンス図の計算¶

Persistence Codebookによるベクトル化¶

主成分分析（PCA）¶

ロジスティック回帰¶