添加 init
This commit is contained in:
commit
b2c7715338
90
init
Normal file
90
init
Normal file
@ -0,0 +1,90 @@
|
||||
from copy import deepcopy
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
plt.rcParams['figure.figsize'] = (16, 9)
|
||||
plt.style.use('ggplot')
|
||||
|
||||
# 创建示例数据
|
||||
def create_sample_data():
|
||||
np.random.seed(42)
|
||||
cluster1 = np.random.normal(loc=[0, 0], scale=1, size=(100, 2))
|
||||
cluster2 = np.random.normal(loc=[10, 5], scale=1.5, size=(100, 2))
|
||||
cluster3 = np.random.normal(loc=[5, 10], scale=1.2, size=(100, 2))
|
||||
data = np.vstack([cluster1, cluster2, cluster3])
|
||||
df = pd.DataFrame(data, columns=['V1', 'V2'])
|
||||
df.to_csv('xclara.csv', index=False)
|
||||
|
||||
# 如果文件不存在则创建
|
||||
try:
|
||||
data = pd.read_csv('xclara.csv')
|
||||
except:
|
||||
create_sample_data()
|
||||
data = pd.read_csv('xclara.csv')
|
||||
|
||||
f1 = data['V1'].values
|
||||
f2 = data['V2'].values
|
||||
X = np.array(list(zip(f1, f2)))
|
||||
|
||||
# 距离计算函数
|
||||
def dist(a, b, ax=1):
|
||||
return np.linalg.norm(a - b, axis=ax)
|
||||
|
||||
# 设置聚类数(cluster)
|
||||
k = 3
|
||||
|
||||
# 随机初始化质心
|
||||
C_x = np.random.uniform(np.min(f1), np.max(f1), size=k)
|
||||
C_y = np.random.uniform(np.min(f2), np.max(f2), size=k)
|
||||
C = np.array(list(zip(C_x, C_y)), dtype=np.float32)
|
||||
|
||||
# 绘制初始数据点
|
||||
plt.scatter(f1, f2, c='black', s=7)
|
||||
plt.scatter(C_x, C_y, marker='*', s=200, c='red')
|
||||
plt.title("Initial Data Points and Centroids")
|
||||
plt.show()
|
||||
|
||||
# 初始化变量
|
||||
C_old = np.zeros(C.shape)
|
||||
clusters = np.zeros(len(X))
|
||||
iteration_flag = dist(C, C_old, None) # 初始距离
|
||||
tmp = 1
|
||||
|
||||
# K-Means循环
|
||||
while iteration_flag != 0 and tmp < 20:
|
||||
# 1. 分配点到最近的质心,划到簇里
|
||||
for i in range(len(X)):
|
||||
distances = dist(X[i], C, 1) # 计算点到所有质心的距离
|
||||
cluster_idx = np.argmin(distances) # 找到最近的质心索引
|
||||
clusters[i] = cluster_idx
|
||||
|
||||
# 2. 保存旧质心
|
||||
C_old = deepcopy(C)
|
||||
|
||||
# 3. 更新质心位置
|
||||
for i in range(k):
|
||||
# 获取属于当前簇的所有点
|
||||
points = X[clusters == i]
|
||||
if len(points) > 0:
|
||||
C[i] = np.mean(points, axis=0)
|
||||
else:
|
||||
# 如果簇为空,重新初始化质心
|
||||
C[i] = np.random.uniform(np.min(X), np.max(X), size=2)
|
||||
|
||||
print(f'Iteration {tmp}')
|
||||
tmp += 1
|
||||
|
||||
# 4. 计算质心移动距离
|
||||
iteration_flag = dist(C, C_old, None)
|
||||
print(f'Centroid movement distance: {iteration_flag:.4f}')
|
||||
|
||||
# 绘制最终聚类结果
|
||||
colors = ['r', 'g', 'b', 'y', 'c', 'm']
|
||||
fig, ax = plt.subplots()
|
||||
for i in range(k):
|
||||
points = X[clusters == i] # 选择属于当前簇的点
|
||||
ax.scatter(points[:, 0], points[:, 1], s=7, c=colors[i])
|
||||
ax.scatter(C[:, 0], C[:, 1], marker="*", s=200, c='black')
|
||||
plt.title("Final Clustering Result")
|
||||
plt.show()
|
Loading…
x
Reference in New Issue
Block a user