tensorflow 基础模型应用3 —— k-means 【转】

浏览: 981

1.create样本

import tensorflow as tf
import numpy as np


def create_samples(n_clusters, n_samples_per_cluster, n_features, embiggen_factor, seed):
np.random.seed(seed)
slices = []
centroids = []
# Create samples for each cluster
for i in range(n_clusters):
samples = tf.random_normal((n_samples_per_cluster, n_features),
mean=0.0, stddev=5.0, dtype=tf.float32, seed=seed, name="cluster_{}".format(i))
current_centroid = (np.random.random((1, n_features)) * embiggen_factor) - (embiggen_factor/2)
centroids.append(current_centroid)
samples += current_centroid
slices.append(samples)
# Create a big "samples" dataset
samples = tf.concat(0, slices, name='samples')
centroids = tf.concat(0, centroids, name='centroids')
return centroids, samples

2.generate样本

import tensorflow as tf
import numpy as np

from functions import create_samples

n_features = 2
n_clusters = 3
n_samples_per_cluster = 500
seed = 700
embiggen_factor = 70

np.random.seed(seed)

centroids, samples = create_samples(n_clusters, n_samples_per_cluster, n_features, embiggen_factor, seed)

model = tf.initialize_all_variables()
with tf.Session() as session:
sample_values = session.run(samples)
centroid_values = session.run(centroids)

3.visualise the results

def plot_clusters(all_samples, centroids, n_samples_per_cluster):
import matplotlib.pyplot as plt
# Plot out the different clusters
# Choose a different colour for each cluster
colour = plt.cm.rainbow(np.linspace(0,1,len(centroids)))
for i, centroid in enumerate(centroids):
# Grab just the samples fpr the given cluster and plot them out with a new colour
samples = all_samples[i*n_samples_per_cluster:(i+1)*n_samples_per_cluster]
plt.scatter(samples[:,0], samples[:,1], c=colour[i])
# Also plot centroid
plt.plot(centroid[0], centroid[1], markersize=35, marker="x", color='k', mew=10)
plt.plot(centroid[0], centroid[1], markersize=30, marker="x", color='m', mew=5)
plt.show()

plot_clusters(sample_values, centroid_values, n_samples_per_cluster)

4.Initialisation

def choose_random_centroids(samples, n_clusters):
# Step 0: Initialisation: Select `n_clusters` number of random points
n_samples = tf.shape(samples)[0]
random_indices = tf.random_shuffle(tf.range(0, n_samples))
begin = [0,]
size = [n_clusters,]
size[0] = n_clusters
centroid_indices = tf.slice(random_indices, begin, size)
initial_centroids = tf.gather(samples, centroid_indices)
return initial_centroids

n_features = 2
n_clusters = 3
n_samples_per_cluster = 500
seed = 700
embiggen_factor = 70

centroids, samples = create_samples(n_clusters, n_samples_per_cluster, n_features, embiggen_factor, seed)
initial_centroids = choose_random_centroids(samples, n_clusters)

model = tf.initialize_all_variables()
with tf.Session() as session:
sample_values = session.run(samples)
updated_centroid_value = session.run(initial_centroids)

plot_clusters(sample_values, updated_centroid_value, n_samples_per_cluster)

5.Updating Centroids

def assign_to_nearest(samples, centroids):
# Finds the nearest centroid for each sample

# START from http://esciencegroup.com/2016/01/05/an-encounter-with-googles-tensorflow/
expanded_vectors = tf.expand_dims(samples, 0)
expanded_centroids = tf.expand_dims(centroids, 1)
distances = tf.reduce_sum( tf.square(
tf.sub(expanded_vectors, expanded_centroids)), 2)
mins = tf.argmin(distances, 0)
# END from http://esciencegroup.com/2016/01/05/an-encounter-with-googles-tensorflow/
nearest_indices = mins
return nearest_indices

def update_centroids(samples, nearest_indices, n_clusters):
# Updates the centroid to be the mean of all samples associated with it.
nearest_indices = tf.to_int32(nearest_indices)
partitions = tf.dynamic_partition(samples, nearest_indices, n_clusters)
new_centroids = tf.concat(0, [tf.expand_dims(tf.reduce_mean(partition, 0), 0) for partition in partitions])
return new_centroids

n_features = 2
n_clusters = 3
n_samples_per_cluster = 500
seed = 700
embiggen_factor = 70


data_centroids, samples = create_samples(n_clusters, n_samples_per_cluster, n_features, embiggen_factor, seed)
initial_centroids = choose_random_centroids(samples, n_clusters)
nearest_indices = assign_to_nearest(samples, initial_centroids)
updated_centroids = update_centroids(samples, nearest_indices, n_clusters)

model = tf.initialize_all_variables()
with tf.Session() as session:
sample_values = session.run(samples)
updated_centroid_value = session.run(updated_centroids)
print(updated_centroid_value)

plot_clusters(sample_values, updated_centroid_value, n_samples_per_cluster)

推荐 0
本文由 亲爱得xin 创作,采用 知识共享署名-相同方式共享 3.0 中国大陆许可协议 进行许可。
转载、引用前需联系作者,并署名作者且注明文章出处。
本站文章版权归原作者及原出处所有 。内容为作者个人观点, 并不代表本站赞同其观点和对其真实性负责。本站是一个个人学习交流的平台,并不用于任何商业目的,如果有任何问题,请及时联系我们,我们将根据著作权人的要求,立即更正或者删除有关内容。本站拥有对此声明的最终解释权。

0 个评论

要回复文章请先登录注册