更新 k-means2
This commit is contained in:
		
							
								
								
									
										95
									
								
								k-means2
									
									
									
									
									
								
							
							
						
						
									
										95
									
								
								k-means2
									
									
									
									
									
								
							| @@ -1,13 +1,34 @@ | |||||||
| from copy import deepcopy |  | ||||||
| import numpy as np | import numpy as np | ||||||
| import pandas as pd | import pandas as pd | ||||||
| from matplotlib import pyplot as plt | from matplotlib import pyplot as plt | ||||||
| plt.rcParams['figure.figsize'] = (16,9) | from copy import deepcopy | ||||||
|  |  | ||||||
|  | # 设置图形样式 | ||||||
|  | plt.rcParams['figure.figsize'] = (16, 9) | ||||||
| plt.style.use('ggplot') | plt.style.use('ggplot') | ||||||
|  |  | ||||||
|  | # 创建示例数据并保存为CSV文件 | ||||||
|  | def create_sample_data(): | ||||||
|  |     # 生成三个簇的示例数据 | ||||||
|  |     np.random.seed(42) | ||||||
|  |     cluster1 = np.random.normal(loc=[0, 0], scale=1, size=(100, 2)) | ||||||
|  |     cluster2 = np.random.normal(loc=[10, 5], scale=1.5, size=(100, 2)) | ||||||
|  |     cluster3 = np.random.normal(loc=[5, 10], scale=1.2, size=(100, 2)) | ||||||
|  |     data = np.vstack([cluster1, cluster2, cluster3]) | ||||||
|  |      | ||||||
|  |     # 创建DataFrame并保存为CSV | ||||||
|  |     df = pd.DataFrame(data, columns=['V1', 'V2']) | ||||||
|  |     df.to_csv('xclara.csv', index=False) | ||||||
|  |  | ||||||
|  | # 创建示例CSV文件 | ||||||
|  | create_sample_data() | ||||||
|  |  | ||||||
|  | # 从CSV读取数据 | ||||||
| data = pd.read_csv('xclara.csv') | data = pd.read_csv('xclara.csv') | ||||||
| f1 = data['V1'].values | f1 = data['V1'].values | ||||||
| f2 = data['V2'].values | f2 = data['V2'].values | ||||||
| X = np.array(list(zip(f1, f2))) | X = np.array(list(zip(f1, f2))) | ||||||
|  |  | ||||||
| # 距离计算函数 | # 距离计算函数 | ||||||
| def dist(a, b, ax=1): | def dist(a, b, ax=1): | ||||||
|     return np.linalg.norm(a - b, axis=ax) |     return np.linalg.norm(a - b, axis=ax) | ||||||
| @@ -16,33 +37,49 @@ def dist(a, b, ax=1): | |||||||
| k = 3 | k = 3 | ||||||
|  |  | ||||||
| # 随机初始化质心(修正:使用数据范围) | # 随机初始化质心(修正:使用数据范围) | ||||||
| C_x = np.random.randint(0,np.max(X)-20, size=k) | C_x = np.random.uniform(np.min(f1), np.max(f1), size=k) | ||||||
| C_y = np.random.randint(0,np.max(X)-20, size=k) | C_y = np.random.uniform(np.min(f2), np.max(f2), size=k) | ||||||
| C = np.array(list(zip(C_x, C_y)), dtype=np.float32) | C = np.array(list(zip(C_x, C_y)), dtype=np.float32) | ||||||
|  |  | ||||||
| C_old = np.zeros(C.shape) | # 绘制初始数据点和质心(修正颜色拼写错误) | ||||||
| print(C) | plt.scatter(f1, f2, c='black', s=7)  # 修正:'balck' -> 'black' | ||||||
| clusters = np.zeros(len(X)) | plt.scatter(C_x, C_y, marker='*', s=200, c='red') | ||||||
| iteration_flag = dist(C,C_old,1) | plt.title("Initial Data Points and Centroids") | ||||||
| tmp = 1 |  | ||||||
| while iteration_flag.any() != 0 and tmp<20: |  | ||||||
|     for i in range(len(X)): |  | ||||||
|         distances = dist(X[i],C,1) |  | ||||||
|         clusters[i] = clusters |  | ||||||
|     C_old = deepcopy(C) |  | ||||||
|     for i in range(C): |  | ||||||
|         points = [X[j] for j in range(len(X)) if clusters[j] == i] |  | ||||||
|         C[i] = np.mean(points,axis=0) |  | ||||||
|  |  | ||||||
|          |  | ||||||
|     print('%d'%tmp) |  | ||||||
|     tmp = tmp + 1 |  | ||||||
|     iteraction_flag = dist(C,C_old,1) |  | ||||||
|     print('distance:',iteraction_flag) |  | ||||||
| colors = ['r','g','b','y','c','m'] |  | ||||||
| fig,ax = plt.subplots() |  | ||||||
| for i in range(k): |  | ||||||
|     points = np.array([X[j] for j in range(len(X) if clusters[j] == i)]) |  | ||||||
|     ax.scatter(points[:,0],points[:,1],s=7,c=colors[i]) |  | ||||||
| ax.scatter(C[:,0],C[:,1],marker="*",s=200,c='black') |  | ||||||
| plt.show() | plt.show() | ||||||
|  |  | ||||||
|  | # ---- 可选:添加完整的K-Means算法实现 ---- | ||||||
|  | # 复制原始质心用于后续更新 | ||||||
|  | C_old = np.zeros(C.shape) | ||||||
|  | clusters = np.zeros(len(X)) | ||||||
|  | error = dist(C, C_old, None) | ||||||
|  |  | ||||||
|  | # K-Means迭代 | ||||||
|  | while error != 0: | ||||||
|  |     # 分配点到最近质心 | ||||||
|  |     for i in range(len(X)): | ||||||
|  |         distances = dist(X[i], C) | ||||||
|  |         cluster = np.argmin(distances) | ||||||
|  |         clusters[i] = cluster | ||||||
|  |      | ||||||
|  |     # 保存旧质心 | ||||||
|  |     C_old = deepcopy(C) | ||||||
|  |      | ||||||
|  |     # 计算新质心 | ||||||
|  |     for i in range(k): | ||||||
|  |         points = [X[j] for j in range(len(X)) if clusters[j] == i] | ||||||
|  |         if points: | ||||||
|  |             C[i] = np.mean(points, axis=0) | ||||||
|  |      | ||||||
|  |     # 计算质心移动距离 | ||||||
|  |     error = dist(C, C_old, None) | ||||||
|  |  | ||||||
|  | # 绘制最终聚类结果 | ||||||
|  | colors = ['r', 'g', 'b', 'c', 'm', 'y'] | ||||||
|  | fig, ax = plt.subplots() | ||||||
|  | for i in range(k): | ||||||
|  |     points = np.array([X[j] for j in range(len(X)) if clusters[j] == i]) | ||||||
|  |     ax.scatter(points[:, 0], points[:, 1], s=7, c=colors[i]) | ||||||
|  | ax.scatter(C[:, 0], C[:, 1], marker='*', s=200, c='black') | ||||||
|  | plt.title("Final Clustering Result") | ||||||
|  | plt.show() | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user