更新 k-means2
This commit is contained in:
		
							
								
								
									
										95
									
								
								k-means2
									
									
									
									
									
								
							
							
						
						
									
										95
									
								
								k-means2
									
									
									
									
									
								
							| @@ -1,13 +1,34 @@ | ||||
| from copy import deepcopy | ||||
| import numpy as np | ||||
| import pandas as pd | ||||
| from matplotlib import pyplot as plt | ||||
| plt.rcParams['figure.figsize'] = (16,9) | ||||
| from copy import deepcopy | ||||
|  | ||||
| # 设置图形样式 | ||||
| plt.rcParams['figure.figsize'] = (16, 9) | ||||
| plt.style.use('ggplot') | ||||
|  | ||||
| # 创建示例数据并保存为CSV文件 | ||||
| def create_sample_data(): | ||||
|     # 生成三个簇的示例数据 | ||||
|     np.random.seed(42) | ||||
|     cluster1 = np.random.normal(loc=[0, 0], scale=1, size=(100, 2)) | ||||
|     cluster2 = np.random.normal(loc=[10, 5], scale=1.5, size=(100, 2)) | ||||
|     cluster3 = np.random.normal(loc=[5, 10], scale=1.2, size=(100, 2)) | ||||
|     data = np.vstack([cluster1, cluster2, cluster3]) | ||||
|      | ||||
|     # 创建DataFrame并保存为CSV | ||||
|     df = pd.DataFrame(data, columns=['V1', 'V2']) | ||||
|     df.to_csv('xclara.csv', index=False) | ||||
|  | ||||
| # 创建示例CSV文件 | ||||
| create_sample_data() | ||||
|  | ||||
| # 从CSV读取数据 | ||||
| data = pd.read_csv('xclara.csv') | ||||
| f1 = data['V1'].values | ||||
| f2 = data['V2'].values | ||||
| X = np.array(list(zip(f1, f2))) | ||||
|  | ||||
| # 距离计算函数 | ||||
| def dist(a, b, ax=1): | ||||
|     return np.linalg.norm(a - b, axis=ax) | ||||
| @@ -16,33 +37,49 @@ def dist(a, b, ax=1): | ||||
| k = 3 | ||||
|  | ||||
| # 随机初始化质心(修正:使用数据范围) | ||||
| C_x = np.random.randint(0,np.max(X)-20, size=k) | ||||
| C_y = np.random.randint(0,np.max(X)-20, size=k) | ||||
| C_x = np.random.uniform(np.min(f1), np.max(f1), size=k) | ||||
| C_y = np.random.uniform(np.min(f2), np.max(f2), size=k) | ||||
| C = np.array(list(zip(C_x, C_y)), dtype=np.float32) | ||||
|  | ||||
| C_old = np.zeros(C.shape) | ||||
| print(C) | ||||
| clusters = np.zeros(len(X)) | ||||
| iteration_flag = dist(C,C_old,1) | ||||
| tmp = 1 | ||||
| while iteration_flag.any() != 0 and tmp<20: | ||||
|     for i in range(len(X)): | ||||
|         distances = dist(X[i],C,1) | ||||
|         clusters[i] = clusters | ||||
|     C_old = deepcopy(C) | ||||
|     for i in range(C): | ||||
|         points = [X[j] for j in range(len(X)) if clusters[j] == i] | ||||
|         C[i] = np.mean(points,axis=0) | ||||
|  | ||||
|          | ||||
|     print('%d'%tmp) | ||||
|     tmp = tmp + 1 | ||||
|     iteraction_flag = dist(C,C_old,1) | ||||
|     print('distance:',iteraction_flag) | ||||
| colors = ['r','g','b','y','c','m'] | ||||
| fig,ax = plt.subplots() | ||||
| for i in range(k): | ||||
|     points = np.array([X[j] for j in range(len(X) if clusters[j] == i)]) | ||||
|     ax.scatter(points[:,0],points[:,1],s=7,c=colors[i]) | ||||
| ax.scatter(C[:,0],C[:,1],marker="*",s=200,c='black') | ||||
| # 绘制初始数据点和质心(修正颜色拼写错误) | ||||
| plt.scatter(f1, f2, c='black', s=7)  # 修正:'balck' -> 'black' | ||||
| plt.scatter(C_x, C_y, marker='*', s=200, c='red') | ||||
| plt.title("Initial Data Points and Centroids") | ||||
| plt.show() | ||||
|  | ||||
| # ---- 可选:添加完整的K-Means算法实现 ---- | ||||
| # 复制原始质心用于后续更新 | ||||
| C_old = np.zeros(C.shape) | ||||
| clusters = np.zeros(len(X)) | ||||
| error = dist(C, C_old, None) | ||||
|  | ||||
| # K-Means迭代 | ||||
| while error != 0: | ||||
|     # 分配点到最近质心 | ||||
|     for i in range(len(X)): | ||||
|         distances = dist(X[i], C) | ||||
|         cluster = np.argmin(distances) | ||||
|         clusters[i] = cluster | ||||
|      | ||||
|     # 保存旧质心 | ||||
|     C_old = deepcopy(C) | ||||
|      | ||||
|     # 计算新质心 | ||||
|     for i in range(k): | ||||
|         points = [X[j] for j in range(len(X)) if clusters[j] == i] | ||||
|         if points: | ||||
|             C[i] = np.mean(points, axis=0) | ||||
|      | ||||
|     # 计算质心移动距离 | ||||
|     error = dist(C, C_old, None) | ||||
|  | ||||
| # 绘制最终聚类结果 | ||||
| colors = ['r', 'g', 'b', 'c', 'm', 'y'] | ||||
| fig, ax = plt.subplots() | ||||
| for i in range(k): | ||||
|     points = np.array([X[j] for j in range(len(X)) if clusters[j] == i]) | ||||
|     ax.scatter(points[:, 0], points[:, 1], s=7, c=colors[i]) | ||||
| ax.scatter(C[:, 0], C[:, 1], marker='*', s=200, c='black') | ||||
| plt.title("Final Clustering Result") | ||||
| plt.show() | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user