summaryrefslogtreecommitdiff
path: root/BDA/tp_red_dim.py
diff options
context:
space:
mode:
authorMarcellus <msimon_fr@hotmail.com>2026-03-09 14:46:14 +0100
committerMarcellus <msimon_fr@hotmail.com>2026-03-09 14:46:14 +0100
commitf2e9fecc8d42913e5a32e06bc3a77f0147736b41 (patch)
treea2ed43bf59a758392407aa18dfca94de34a59b17 /BDA/tp_red_dim.py
parent5ba18af15a8d18d9ec8e576afc72a30129f26dcd (diff)
feat: BDA 09-03
Diffstat (limited to 'BDA/tp_red_dim.py')
-rw-r--r--BDA/tp_red_dim.py35
1 files changed, 35 insertions, 0 deletions
diff --git a/BDA/tp_red_dim.py b/BDA/tp_red_dim.py
new file mode 100644
index 0000000..c23b95d
--- /dev/null
+++ b/BDA/tp_red_dim.py
@@ -0,0 +1,35 @@
+import pandas as pd
+from sklearn.preprocessing import StandardScaler
+from sklearn.decomposition import PCA
+from sklearn.datasets import make_moons
+from sklearn.manifold import TSNE
+import matplotlib.pyplot as plt
+
+df = pd.read_csv("decathlon.txt", sep='\t')
+clean = df.drop(["Points", "Rank", "Competition"], axis="columns")
+mat = clean.to_numpy()
+scaler = StandardScaler()
+normalized = scaler.fit_transform(mat)
+pca = PCA(n_components=2)
+pca.fit(normalized)
+print(pca.explained_variance_ratio_)
+
+reduction = pca.transform(normalized)
+print(reduction)
+
+#plt.scatter(x=reduction[:,0], y=reduction[:,1], c=df["Rank"])
+#plt.show()
+components = pca.components_
+print(components)
+#plt.scatter(x=components[0], y=components[1])
+#plt.show()
+
+print("===============================================")
+
+X, y = make_moons(n_samples=200, noise=0.1)
+#plt.scatter(X[:,0], X[:,1])
+#plt.show()
+
+scaled = scaler.fit_transform(X)
+X_embedded = TSNE(n_components=1).fit_transform(X)
+print(X_embedded)