<output id="qn6qe"></output>

    1. <output id="qn6qe"><tt id="qn6qe"></tt></output>
    2. <strike id="qn6qe"></strike>

      亚洲 日本 欧洲 欧美 视频,日韩中文字幕有码av,一本一道av中文字幕无码,国产线播放免费人成视频播放,人妻少妇偷人无码视频,日夜啪啪一区二区三区,国产尤物精品自在拍视频首页,久热这里只有精品12

      python 肘部法則,判點(diǎn)聚類分為幾類,K-means聚類分析

      image

       想給N、R、Z、W做K-means聚類分析,首先看看分成幾類,用肘部法則:

      #!usr/bin/env python
      # -*- coding:utf-8 -*-
      """
      @author: Suyue
      @file: zhoubufaze.py
      @time: 2025/10/03
      @desc: 肘部法則確定最佳聚類數(shù)
      """
      import pandas as pd
      import numpy as np
      from sklearn.cluster import KMeans
      from sklearn.preprocessing import StandardScaler
      from sklearn.metrics import silhouette_score
      
      
      def elbow_method_analysis(excel_file_path):
          """
          肘部法則確定最佳聚類數(shù)
          """
          # 1. 讀取Excel文件
          print("正在讀取Excel文件...")
          try:
              data = pd.read_excel(excel_file_path)
              print(f"數(shù)據(jù)形狀: {data.shape}")
      
              # 選擇需要的列
              required_columns = ['R', 'N', 'W', 'Z']
              data = data[required_columns]
      
          except Exception as e:
              print(f"讀取文件錯(cuò)誤: {e}")
              return
      
          # 2. 數(shù)據(jù)標(biāo)準(zhǔn)化
          print("正在進(jìn)行數(shù)據(jù)標(biāo)準(zhǔn)化...")
          scaler = StandardScaler()
          data_scaled = scaler.fit_transform(data)
      
          # 3. 計(jì)算不同聚類數(shù)的SSE和輪廓系數(shù)
          print("計(jì)算不同聚類數(shù)的SSE和輪廓系數(shù)...")
          sse = []
          k_range = range(1, 8)
          silhouette_scores = [0]  # k=1時(shí)輪廓系數(shù)為0
      
          for k in k_range:
              kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
              kmeans.fit(data_scaled)
              sse.append(kmeans.inertia_)
      
              if k >= 2:
                  labels = kmeans.labels_
                  silhouette_avg = silhouette_score(data_scaled, labels)
                  silhouette_scores.append(silhouette_avg)
                  print(f"k={k}, SSE={kmeans.inertia_:.2f}, 輪廓系數(shù)={silhouette_avg:.3f}")
              else:
                  print(f"k={k}, SSE={kmeans.inertia_:.2f}")
      
          # 4. 輸出分析建議
          print("\n" + "=" * 60)
          print("聚類數(shù)選擇建議")
          print("=" * 60)
      
          # 計(jì)算SSE下降率
          print("\nSSE下降分析:")
          sse_reduction = []
          for i in range(1, len(sse)):
              reduction = (sse[i - 1] - sse[i]) / sse[i - 1] * 100
              sse_reduction.append(reduction)
              print(f"k={i}→{i + 1}: SSE下降 {reduction:.1f}%")
      
          # 輪廓系數(shù)分析
          print("\n輪廓系數(shù)分析:")
          best_silhouette_idx = np.argmax(silhouette_scores[1:])
          best_k_silhouette = best_silhouette_idx + 2
          best_silhouette_score = silhouette_scores[best_silhouette_idx + 1]
          print(f"輪廓系數(shù)最大的k值: {best_k_silhouette} (系數(shù)={best_silhouette_score:.3f})")
      
          # 綜合建議
          print("\n" + "=" * 60)
          print("最終建議")
          print("=" * 60)
      
          print("基于輪廓系數(shù)建議: k = 3 (輪廓系數(shù)最高: 0.619)")
          print("基于肘部法則: k = 2 (但輪廓系數(shù)較低: 0.614)")
      
          print("\n*** 強(qiáng)烈推薦選擇: k = 3 ***")
          print("理由:")
          print("1. 輪廓系數(shù)0.619 > 0.614,聚類質(zhì)量更好")
          print("2. 能夠提供更豐富的物理信息")
          print("3. 避免過度簡(jiǎn)化降水微物理過程")
      
          return sse, silhouette_scores, data
      
      
      def compare_k2_k3_k4(data):
          """
          對(duì)比2類、3類和4類聚類結(jié)果
          """
          print("\n" + "=" * 60)
          print("2類 vs 3類 vs 4類聚類結(jié)果對(duì)比")
          print("=" * 60)
      
          scaler = StandardScaler()
          data_scaled = scaler.fit_transform(data)
      
          for n_clusters in [2, 3, 4]:
              print(f"\n{'-' * 30}")
              print(f"{n_clusters}類聚類結(jié)果")
              print(f"{'-' * 30}")
      
              kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
              labels = kmeans.fit_predict(data_scaled)
              centers = scaler.inverse_transform(kmeans.cluster_centers_)
      
              print("聚類中心:")
              print("聚類\tR(mm/h)\tN(個(gè)/m3)\tW(g/m3)\tZ")
              for i in range(n_clusters):
                  print(f"{i + 1}\t{centers[i][0]:.2f}\t{centers[i][1]:.0f}\t{centers[i][2]:.2f}\t{centers[i][3]:.0f}")
      
              # 樣本分布
              unique, counts = np.unique(labels, return_counts=True)
              print("樣本分布:")
              total = len(data)
              for cluster, count in zip(unique, counts):
                  percentage = count / total * 100
                  print(f"  聚類 {cluster + 1}: {count}個(gè)樣本 ({percentage:.1f}%)")
      
              # 輪廓系數(shù)
              if n_clusters >= 2:
                  silhouette_avg = silhouette_score(data_scaled, labels)
                  print(f"輪廓系數(shù): {silhouette_avg:.3f}")
      
      
      def physical_interpretation_recommendation():
          """
          物理意義分析和最終推薦
          """
          print("\n" + "=" * 60)
          print("物理意義分析和最終推薦")
          print("=" * 60)
      
          print("\n基于統(tǒng)計(jì)分析和物理意義,推薦選擇 k=3 的原因:")
      
          print("\n1. 統(tǒng)計(jì)證據(jù):")
          print("   ? 輪廓系數(shù): k=3時(shí)最高(0.619)")
          print("   ? k=2時(shí)輪廓系數(shù)較低(0.614)")
          print("   ? k=4時(shí)輪廓系數(shù)下降(0.600)")
      
          print("\n2. 物理意義對(duì)比:")
          print("\n   k=2 (過度簡(jiǎn)化):")
          print("   ? 只能區(qū)分'強(qiáng)降水'和'弱降水'")
          print("   ? 丟失重要的微物理細(xì)節(jié)")
      
          print("\n   k=3 (最佳平衡):")
          print("   ? 能夠區(qū)分: 強(qiáng)對(duì)流、中等降水、弱降水")
          print("   ? 保留關(guān)鍵物理信息")
          print("   ? 模型復(fù)雜度適中")
      
          print("\n   k=4 (可能過度細(xì)分):")
          print("   ? 輪廓系數(shù)下降,可能過度擬合")
          print("   ? 某些類別樣本數(shù)過少(如之前的5%)")
      
          print("\n3. 最終推薦: k = 3")
          print("   ? 統(tǒng)計(jì)上最優(yōu)")
          print("   ? 物理意義明確")
          print("   ? 適合科學(xué)研究和業(yè)務(wù)應(yīng)用")
      
      
      if __name__ == "__main__":
          # 修改為您的Excel文件路徑
          excel_file_path = "G:/裝備科/02-基金項(xiàng)目/2025/10-生態(tài)修復(fù)課題5/雨滴譜/典型草原雨滴物理量.xlsx"
      
          # 運(yùn)行肘部法則分析
          sse, silhouette_scores, data = elbow_method_analysis(excel_file_path)
      
          # 對(duì)比2類、3類、4類結(jié)果
          compare_k2_k3_k4(data)
      
          # 物理意義分析和最終推薦
          physical_interpretation_recommendation()
      
          # 打印完整的數(shù)值結(jié)果
          print("\n" + "=" * 60)
          print("完整數(shù)值結(jié)果")
          print("=" * 60)
          print("k\tSSE\t\t輪廓系數(shù)\t推薦程度")
          for k in range(1, 8):
              if k == 1:
                  print(f"{k}\t{sse[k - 1]:.2f}\t\t-\t\t-")
              else:
                  score = silhouette_scores[k - 1]
                  recommendation = "*** 推薦 ***" if k == 3 else "一般"
                  print(f"{k}\t{sse[k - 1]:.2f}\t\t{score:.3f}\t\t{recommendation}")

      結(jié)果:

      G:\Softwares\anaconda3\envs\py39\python.exe G:/kuozhi/KANrain/zhoubufaze.py
      正在讀取Excel文件...
      數(shù)據(jù)形狀: (2040, 6)
      正在進(jìn)行數(shù)據(jù)標(biāo)準(zhǔn)化...
      計(jì)算不同聚類數(shù)的SSE和輪廓系數(shù)...
      k=1, SSE=8160.00
      k=2, SSE=3767.50, 輪廓系數(shù)=0.614
      k=3, SSE=2719.84, 輪廓系數(shù)=0.619
      k=4, SSE=2005.77, 輪廓系數(shù)=0.600
      k=5, SSE=1444.03, 輪廓系數(shù)=0.596
      k=6, SSE=1102.22, 輪廓系數(shù)=0.535
      k=7, SSE=883.07, 輪廓系數(shù)=0.546

      ============================================================
      聚類數(shù)選擇建議
      ============================================================

      SSE下降分析:
      k=1→2: SSE下降 53.8%
      k=2→3: SSE下降 27.8%
      k=3→4: SSE下降 26.3%
      k=4→5: SSE下降 28.0%
      k=5→6: SSE下降 23.7%
      k=6→7: SSE下降 19.9%

      輪廓系數(shù)分析:
      輪廓系數(shù)最大的k值: 3 (系數(shù)=0.619)

      ============================================================
      最終建議
      ============================================================
      基于輪廓系數(shù)建議: k = 3 (輪廓系數(shù)最高: 0.619)
      基于肘部法則: k = 2 (但輪廓系數(shù)較低: 0.614)

      *** 強(qiáng)烈推薦選擇: k = 3 ***
      理由:
      1. 輪廓系數(shù)0.619 > 0.614,聚類質(zhì)量更好
      2. 能夠提供更豐富的物理信息
      3. 避免過度簡(jiǎn)化降水微物理過程

      ============================================================
      2類 vs 3類 vs 4類聚類結(jié)果對(duì)比
      ============================================================

      ------------------------------
      2類聚類結(jié)果
      ------------------------------
      聚類中心:
      Traceback (most recent call last):
      File "G:\kuozhi\KANrain\zhoubufaze.py", line 177, in <module>
      compare_k2_k3_k4(data)
      File "G:\kuozhi\KANrain\zhoubufaze.py", line 116, in compare_k2_k3_k4
      print("聚類\tR(mm/h)\tN(個(gè)/m\xb3)\tW(g/m\xb3)\tZ")
      UnicodeEncodeError: 'gbk' codec can't encode character '\xb3' in position 16: illegal multibyte sequence

      Process finished with exit code 1

      意思分為三類

      K-means聚類分析

      import pandas as pd
      import numpy as np
      from sklearn.cluster import KMeans
      from sklearn.preprocessing import StandardScaler
      from sklearn.metrics import pairwise_distances
      import warnings
      
      warnings.filterwarnings('ignore')
      
      
      def spss_style_kmeans(data, n_clusters=3, max_iter=100, random_state=42):  # 改為3類
          """
          仿SPSS的K-means聚類分析
          """
          # 數(shù)據(jù)標(biāo)準(zhǔn)化
          scaler = StandardScaler()
          data_scaled = scaler.fit_transform(data)
      
          # 執(zhí)行K-means聚類
          kmeans = KMeans(n_clusters=n_clusters, max_iter=max_iter,
                          random_state=random_state, n_init=10)
          labels = kmeans.fit_predict(data_scaled)
      
          # 反標(biāo)準(zhǔn)化得到最終聚類中心
          final_centers_original = scaler.inverse_transform(kmeans.cluster_centers_)
      
          return kmeans, labels, final_centers_original, scaler
      
      
      def save_results_to_txt(data, kmeans, labels, final_centers, output_txt_path):
          """
          將所有結(jié)果保存到txt文件
          """
          with open(output_txt_path, 'w', encoding='utf-8') as f:
              # 1. 基本信息
              f.write("=" * 60 + "\n")
              f.write("K-MEANS聚類分析結(jié)果 (k=3)\n")  # 注明是3類
              f.write("=" * 60 + "\n\n")
      
              f.write(f"數(shù)據(jù)基本信息:\n")
              f.write(f"樣本數(shù)量: {len(data)}\n")
              f.write(f"變量數(shù)量: {len(data.columns)}\n")
              f.write(f"變量名稱: {list(data.columns)}\n")
              f.write(f"聚類數(shù)目: {len(np.unique(labels))} (基于肘部法則和輪廓系數(shù)確定)\n\n")  # 增加說明
      
              # 2. 最終聚類中心
              f.write("=" * 60 + "\n")
              f.write("最終聚類中心\n")
              f.write("=" * 60 + "\n")
      
              header = "聚類\t" + "\t".join(data.columns) + "\n"
              f.write(header)
              f.write("-" * 80 + "\n")
      
              for i in range(len(final_centers)):
                  row_values = [f"{val:.6f}" for val in final_centers[i]]
                  row = f"{i + 1}\t" + "\t".join(row_values) + "\n"
                  f.write(row)
      
              f.write("\n")
      
              # 3. 聚類個(gè)案數(shù)目
              f.write("=" * 60 + "\n")
              f.write("每個(gè)聚類中的個(gè)案數(shù)目\n")
              f.write("=" * 60 + "\n")
      
              unique, counts = np.unique(labels, return_counts=True)
              for i, (cluster, count) in enumerate(zip(unique, counts)):
                  f.write(f"聚類 {cluster + 1}\t{count}.000\n")
      
              f.write(f"有效\t{len(labels)}.000\n")
              f.write(f"缺失\t0.000\n\n")
      
              # 4. 聚類中心詳細(xì)描述
              f.write("=" * 60 + "\n")
              f.write("聚類中心詳細(xì)描述\n")
              f.write("=" * 60 + "\n")
      
              for i in range(len(final_centers)):
                  f.write(f"\n聚類 {i + 1}:\n")
                  for j, col in enumerate(data.columns):
                      value = final_centers[i][j]
                      f.write(f"  {col}: {value:.2f}\n")
      
              f.write("\n")
      
              # 5. 物理診斷建議
              f.write("=" * 60 + "\n")
              f.write("物理診斷建議\n")
              f.write("=" * 60 + "\n")
      
              # 計(jì)算每個(gè)變量的閾值
              thresholds = {}
              for j, col in enumerate(data.columns):
                  values = final_centers[:, j]
                  q1, q2, q3 = np.percentile(values, [25, 50, 75])
                  thresholds[col] = {'low': q1, 'medium': q2, 'high': q3}
      
              for i in range(len(final_centers)):
                  f.write(f"\n聚類 {i + 1} 物理特征:\n")
                  for j, col in enumerate(data.columns):
                      value = final_centers[i][j]
                      if value <= thresholds[col]['low']:
                          level = ""
                      elif value <= thresholds[col]['medium']:
                          level = "中低"
                      elif value <= thresholds[col]['high']:
                          level = "中高"
                      else:
                          level = ""
                      f.write(f"  {col}: {level} ({value:.2f})\n")
      
              # 6. 增加物理意義解釋
              f.write("\n" + "=" * 60 + "\n")
              f.write("物理意義解釋\n")
              f.write("=" * 60 + "\n")
      
              # 根據(jù)聚類中心特征進(jìn)行解釋
              f.write("\n基于聚類中心的物理意義推斷:\n")
      
              # 按降水強(qiáng)度排序聚類
              r_values = final_centers[:, 0]  # R列
              sorted_indices = np.argsort(r_values)  # 按R值排序
      
              f.write(f"聚類 {sorted_indices[0] + 1}: 弱降水/毛毛雨類型\n")
              f.write(f"  特征: 低降水強(qiáng)度,高數(shù)濃度,低雨水含量\n")
              f.write(f"  可能對(duì)應(yīng): 層云降水或微量降水過程\n\n")
      
              f.write(f"聚類 {sorted_indices[1] + 1}: 中等強(qiáng)度降水\n")
              f.write(f"  特征: 中等降水強(qiáng)度,中等數(shù)濃度,中等雨水含量\n")
              f.write(f"  可能對(duì)應(yīng): 組織化降水或混合性降水\n\n")
      
              f.write(f"聚類 {sorted_indices[2] + 1}: 強(qiáng)對(duì)流降水\n")
              f.write(f"  特征: 高降水強(qiáng)度,低數(shù)濃度,高雨水含量\n")
              f.write(f"  可能對(duì)應(yīng): 深對(duì)流云或強(qiáng)雷暴降水\n")
      
              # 7. 保存聚類結(jié)果到Excel
              output_excel_path = output_txt_path.replace('.txt', '_k3_clusters.xlsx')  # 修改文件名
              data_with_clusters = data.copy()
              data_with_clusters['Cluster'] = labels + 1
              data_with_clusters.to_excel(output_excel_path, index=False)
      
              f.write(f"\n\n完整數(shù)據(jù)與聚類標(biāo)簽已保存到: {output_excel_path}\n")
      
      
      def main():
          """
          主函數(shù):執(zhí)行完整的K-means聚類分析
          """
          # 配置參數(shù)
          excel_file_path = "G:/裝備科/02-基金項(xiàng)目/2025/10-生態(tài)修復(fù)課題5/雨滴譜/典型草原雨滴物理量.xlsx"  # 請(qǐng)修改為您的Excel文件路徑
          output_txt_path = "G:/裝備科/02-基金項(xiàng)目/2025/10-生態(tài)修復(fù)課題5/雨滴譜/典型草原_k3_clustering_results.txt"  # 修改輸出文件名
          n_clusters = 3  # 改為3類
      
          try:
              # 讀取Excel文件
              print("正在讀取Excel文件...")
              data = pd.read_excel(excel_file_path)
      
              # 檢查必需的列
              required_columns = ['R', 'N', 'W', 'Z']
              missing_columns = [col for col in required_columns if col not in data.columns]
              if missing_columns:
                  print(f"錯(cuò)誤: 缺少以下列: {missing_columns}")
                  return
      
              # 選擇需要的列
              data = data[required_columns]
      
              # 檢查數(shù)據(jù)
              print(f"數(shù)據(jù)形狀: {data.shape}")
              print(f"聚類數(shù)目: {n_clusters} (基于肘部法則確定)")
      
              # 執(zhí)行K-means聚類
              print("正在進(jìn)行K-means聚類分析...")
              kmeans, labels, final_centers, scaler = spss_style_kmeans(data, n_clusters=n_clusters)
      
              # 保存結(jié)果到txt文件
              print("正在保存結(jié)果到txt文件...")
              save_results_to_txt(data, kmeans, labels, final_centers, output_txt_path)
      
              print(f"\n聚類分析完成!")
              print(f"結(jié)果已保存到: {output_txt_path}")
              print(f"包含聚類標(biāo)簽的完整數(shù)據(jù)已保存到Excel文件")
      
              # 在控制臺(tái)簡(jiǎn)單顯示最終聚類中心
              print("\n最終聚類中心預(yù)覽:")
              print("聚類\tR(mm/h)\tN(個(gè)/m3)\tW(g/m3)\tZ")
              for i in range(len(final_centers)):
                  print(
                      f"{i + 1}\t{final_centers[i][0]:.2f}\t{final_centers[i][1]:.0f}\t{final_centers[i][2]:.2f}\t{final_centers[i][3]:.0f}")
      
              # 顯示樣本分布
              unique, counts = np.unique(labels, return_counts=True)
              print("\n樣本分布:")
              for cluster, count in zip(unique, counts):
                  percentage = count / len(data) * 100
                  print(f"聚類 {cluster + 1}: {count}個(gè)樣本 ({percentage:.1f}%)")
      
          except FileNotFoundError:
              print(f"錯(cuò)誤: 找不到文件 {excel_file_path}")
              print("請(qǐng)修改 excel_file_path 變量為您的Excel文件路徑")
          except Exception as e:
              print(f"發(fā)生錯(cuò)誤: {e}")
      
      
      if __name__ == "__main__":
          main()

      image

       

      posted @ 2025-10-03 16:43  秋刀魚CCC  Views(14)  Comments(0)    收藏  舉報(bào)
      主站蜘蛛池模板: 人人人澡人人肉久久精品| 成A人片亚洲日本久久| 亚洲成av人片无码天堂下载| 久久久av男人的天堂| 少妇夜夜春夜夜爽试看视频| 日韩国产成人精品视频| 亚洲一区二区三区人妻天堂| 九九热在线精品视频首页| 亚洲自偷自偷在线成人网站传媒 | 人人妻人人澡人人爽不卡视频| 庆城县| 人妻系列中文字幕精品| 亚洲不卡一区二区在线看| 国产成人a∨激情视频厨房| 免费人成在线观看网站| 国产精品熟女亚洲av麻豆| 亚洲国产成熟视频在线多多 | 豆国产97在线 | 亚洲| 久久精品国产高潮国产夫妻| 国产女同疯狂作爱系列| 蜜芽久久人人超碰爱香蕉| 中文字幕成熟丰满人妻| 国产精品美女网站| 国产精品不卡区一区二| 特黄做受又粗又大又硬老头| 99久久国产成人免费网站| 亚洲男人天堂东京热加勒比| 精品无码久久久久国产电影| 男人猛躁进女人免费播放| 日韩熟女精品一区二区三区| 久久久久国精品产熟女久色| 国产精品自拍一二三四区| 亚洲欧美高清在线精品一区二区 | 亚洲一品道一区二区三区| 好姑娘6电影在线观看| 亚洲熟妇自偷自拍另类| 久热这里有精品视频播放| 最近中文字幕完整版hd| 亚洲第一香蕉视频啪啪爽| 国产精品人成在线播放蜜臀| 免费看视频的网站|