
2023-09-04 01:52:31 作者:冷風襲懷~デ


|   id|           x|         y|         z|
| row0|  -6.0776997|-2.9096103|-1.5181729|
| row1|  -1.0122601|  7.322841|-5.4424076|
| row2|   -8.297007| 6.3228936| 1.1672047|
| row3|  -3.5071216|  4.784812|-5.4449472|
| row4|   -5.122823|-3.3220499|-0.5069805|
| row5|  -2.4764006|  8.255791|  4.409478|
| row6|   7.3153954| -5.079449| -7.291215|
| row7|  -2.0167463|  9.303454|  7.095179|
| row8|  -0.2338185| -4.892681| 2.1228876|
| row9|    6.565442| -6.855994|-6.7983212|
|row10|  -5.6902847|-6.4827404|-0.9246967|
|row11|-0.017986143| 2.7632365| -8.814824|
|row12|  -6.9042625|-6.1491723|-3.5354295|
|row13|  -10.389865|  9.537853|  0.674591|
|row14|   3.9688683|-6.0467844| -5.462389|
|row15|   -7.337052|-3.7689247| -5.261122|
|row16|   -8.991589|  8.738728|  3.864116|
|row17| -0.18098584|  5.482743| -4.900118|
|row18|   3.3193955|-6.3573766| -6.978025|
|row19|  -2.0266335|-3.4171724|0.48218703|


print("==========================Short report==================================== ")

n_clusters = model.summary.k
print("Number of predicted clusters: " + str(n_clusters))

cluster_Sizes = model.summary.clusterSizes

col = ['size']
df2 = pd.DataFrame(cluster_Sizes, columns=col).sort_values(by=['size'], ascending=True)  #sorting
cluster_Sizes = df2["size"].unique()
print("Size of predicted clusters: " + str(cluster_Sizes))

#==========================Short report==================================== 
#Number of predicted clusters: 10
#Size of predicted clusters: [ 486  496  504  529  985  998  999 1003 2000]

|     |prediction|
|    2|       486|
|    6|       496|
|    0|       504|
|    8|       529|
|    5|       985|
|    9|       998|
|    7|       999|
|    3|      1003|
|    1|      2000|
|    4|      2000|
教你们如何使用 Python 进行数据可视化 Plotly


|   id|prediction|           x|         y|         z|
| row0|         9|  -6.0776997|-2.9096103|-1.5181729|
| row1|         4|  -1.0122601|  7.322841|-5.4424076|
| row2|         1|   -8.297007| 6.3228936| 1.1672047|
| row3|         4|  -3.5071216|  4.784812|-5.4449472|
| row4|         3|   -5.122823|-3.3220499|-0.5069805|
| row5|         1|  -2.4764006|  8.255791|  4.409478|
| row6|         5|   7.3153954| -5.079449| -7.291215|
| row7|         1|  -2.0167463|  9.303454|  7.095179|
| row8|         7|  -0.2338185| -4.892681| 2.1228876|
| row9|         5|    6.565442| -6.855994|-6.7983212|
|row10|         3|  -5.6902847|-6.4827404|-0.9246967|
|row11|         4|-0.017986143| 2.7632365| -8.814824|
|row12|         9|  -6.9042625|-6.1491723|-3.5354295|
|row13|         1|  -10.389865|  9.537853|  0.674591|
|row14|         2|   3.9688683|-6.0467844| -5.462389|
|row15|         9|   -7.337052|-3.7689247| -5.261122|
|row16|         1|   -8.991589|  8.738728|  3.864116|
|row17|         4| -0.18098584|  5.482743| -4.900118|
|row18|         2|   3.3193955|-6.3573766| -6.978025|
|row19|         7|  -2.0266335|-3.4171724|0.48218703|


color_names = ["red", "blue", "yellow", "black", "pink", "purple", "orange"]

def plot_3d_transformed_data(df, title, colors="red"):
  # Imports.
  import matplotlib as mpl
  import matplotlib.pyplot as plt
  from mpl_toolkits.mplot3d import Axes3D
  import pandas as pd
  import numpy as np
  import plotly.express as px
  import matplotlib.cm as cm

  # Figure.
  figure = plt.figure(figsize=(12, 10))
  ax = figure.add_subplot(projection="3d")
  ax.set_xlabel("PC1: x")
  ax.set_ylabel("PC2: y")
  ax.set_zlabel("PC3: z")
  ax.set_title("scatter 3D legend") 

  # Data and 3D scatter.
  #colors = ["red", "blue", "yellow", "black", "pink", "purple", "orange", "black", "red" ,"blue"]
  colors = cm.rainbow(np.linspace(0, 1, len(cluster_Sizes)))

  # Create your plot
  #px.scatter(df1, x='x', y='y', size=df2['size'], color='jet')
  sc = ax.scatter(df1.x, df1.y, df1.z, alpha=0.6, c=colors, sizes=df2['size'], marker="o")

  # Legend 1.
  handles, labels = sc.legend_elements(prop="sizes", alpha=0.6)
  legend1 = ax.legend(handles, labels, bbox_to_anchor=(1, 1), loc="upper right", title="Sizes")
  ax.add_artist(legend1) # <- this is important.

  # Legend 2.
  unique_colors = set(colors)
  handles = []
  labels = []
  for n, color in enumerate(unique_colors, start=1):
      artist = mpl.lines.Line2D([], [], color=color, lw=0, marker="o")
  legend2 = ax.legend(handles, labels, bbox_to_anchor=(0.05, 0.05), loc="lower left", title="Classes")


问题是创建合适的色彩映射表支持簇号(避免ValueError: 'c' argument has 9 elements, which is inconsistent with 'x' and 'y' with size 10000.),并找到两个数据帧之间大小不匹配的解决方案(避免ValueError: s must be a scalar, or the same size as x and y)以用于:

sc = ax.scatter(df1.x,
                c=colors,   #colors=cm.rainbow(np.linspace(0, 1, len(cluster_Sizes)))
所以有一个想法是,我将df2['size']赋给df1,但这很昂贵,而且不是一个好主意。所以我想知道是否有一种很好的方法来更新def plot_3d_transformed_data(),并使用它来更好地可视化可以通过一个曲线图来指示预测的星团标签和星团大小。为了快速调试,我提供了一个colab notebook。




color_names = ["red", "blue", "yellow", "black", "pink", "purple", "orange"]

def plot_3d_transformed_data(df, title, colors="red"):

  # Imports.
  import matplotlib as mpl
  import matplotlib.pyplot as plt
  from mpl_toolkits.mplot3d import Axes3D
  import pandas as pd
  import numpy as np
  import plotly.express as px
  import matplotlib.cm as cm

  #clusterSizes = pd.read_csv(io.StringIO(data1), delim_whitespace=True)

  #pddf_pred = df_pred.set_index('id')
  cluster_Sizes = clusterSizes["size"].unique()
  #x_train = np.random.randint(20,500,(20,))

  # Figure.
  figure = plt.figure(figsize=(12, 10))
  ax = figure.add_subplot(projection="3d")
  ax.set_xlabel("PC1: x")
  ax.set_ylabel("PC2: y")
  ax.set_zlabel("PC3: z")
  ax.set_title("scatter 3D legend") 

  colors2 = ["red", "blue", "yellow", "black", "pink", "purple", "orange", "black", "red" ,"blue"]
  colors = cm.rainbow(np.linspace(0, 1, 20))

  # Create 3D scatter plot
  sc = ax.scatter(pddf_pred.x.values, pddf_pred.y.values, pddf_pred.z.values, alpha=0.6, s=x_train, c=colors, marker="o")

  # Legend 1.
  handles, labels = sc.legend_elements(prop="sizes", alpha=0.6)
  legend1 = ax.legend(handles, labels, bbox_to_anchor=(1.2, 1), loc="upper right", title="Sizes")

  # Legend 2.
  unique_colors = set(colors2)
  handles = []
  labels = []
  for n, color in enumerate(unique_colors, start=1):
      artist = mpl.lines.Line2D([], [], color=color, lw=0, marker="o")
  legend2 = ax.legend(handles, labels, bbox_to_anchor=(-0.05, 0.05), loc="lower left", title="Classes")
