summaryrefslogtreecommitdiff
path: root/BDA/tp_red_dim.py
blob: c23b95d77b71c71c990fdd7eaf9db0871b5de8f0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.datasets import make_moons
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

df = pd.read_csv("decathlon.txt", sep='\t')
clean = df.drop(["Points", "Rank", "Competition"], axis="columns")
mat = clean.to_numpy()
scaler = StandardScaler()
normalized = scaler.fit_transform(mat)
pca = PCA(n_components=2)
pca.fit(normalized)
print(pca.explained_variance_ratio_)

reduction = pca.transform(normalized)
print(reduction)

#plt.scatter(x=reduction[:,0], y=reduction[:,1], c=df["Rank"])
#plt.show()
components = pca.components_
print(components)
#plt.scatter(x=components[0], y=components[1])
#plt.show()

print("===============================================")

X, y = make_moons(n_samples=200, noise=0.1)
#plt.scatter(X[:,0], X[:,1])
#plt.show()

scaled = scaler.fit_transform(X)
X_embedded = TSNE(n_components=1).fit_transform(X)
print(X_embedded)