机器学习西瓜书上的例题
[Python] 纯文本查看 复制代码 import random as ra
import math
# 参考西瓜书P205页
# k = int(input())
# 全部样本---值为每个样本的密度和含糖量
D = [[0.697, 0.460], [0.774, 0.376], [0.634, 0.264], [0.608, 0.318], [0.556, 0.215], [0.403, 0.237], [0.481, 0.149],
[0.437, 0.211], [0.666, 0.091], [0.243, 0.267]]
# 开始设置分成几类 k=2 从样本中取两个初始点当作两个簇的中心
Prim = [[0.634, 0.264], [0.403, 0.237]]
length_D = len(D)
length_P = len(Prim)
count = 0
# 迭代轮数
Total = 4
while count < Total:
count += 1
# 按初始Prim个数生成两簇
C = [[i] for i in Prim] # [ [[0.634,0.264]] , [[0.403,0.237]] ]
for j in range(length_D):
# 计算样本到两个中心的距离
dist = []
for i in range(length_P):
temp = math.sqrt(pow(D[j][0] - Prim[i][0], 2) + pow(D[j][1] - Prim[i][1], 2))
dist.append(temp)
# 把样本划分到不同簇中
if dist[0] < dist[1]:
C[0].append(D[j])
else:
C[1].append(D[j])
# 完成当前所有样本划分后,对新的簇重新生成两个中心
for i in range(length_P):
sum_x = 0
sum_y = 0
length_C = len(C[i])
for j in C[i]:
sum_x += j[0]
sum_y += j[1]
ave_x = round(sum_x / length_C, 3)
ave_y = round(sum_y / length_C, 3)
# 更新簇中心
if Prim[i][0] != ave_x or Prim[i][1] != ave_y:
Prim[i][0] = ave_x
Prim[i][1] = ave_y
# 聚类结果
print('第%d轮 当前迭代的当前簇划分' %count)
print('第一类')
for i in range(1, len(C[0])):
print('编号', D.index(C[0][i]) + 1, end=' ')
print('\n第二类')
for i in range(1, len(C[1])):
print('编号', D.index(C[1][i]) + 1, end=' ')
print('\n均值向量为')
print('第一类 ', Prim[0], '第二类 ', Prim[1], end='\n==============\n')
|