python 按excel的經緯度提取對應柵格tif文件的數值

柵格文件
批量處理代碼：
# -*- coding:utf-8 -*-
"""
@author: suyue
@file: extract_stations_from_excel.py
@time: 2025/09/09
@desc: 從Excel讀取站點信息并提取所有站點的CTT值
"""
import rasterio
import numpy as np
import pandas as pd
import os
import glob
from tqdm import tqdm


def read_stations_from_excel(excel_path):
    """
    從Excel文件中讀取站點信息

    參數:
    excel_path: Excel文件路徑

    返回:
    站點列表，格式為 [(lon1, lat1, '站名1'), (lon2, lat2, '站名2'), ...]
    """
    try:
        # 讀取Excel文件
        df = pd.read_excel(excel_path)
        print(f"成功讀取站點文件: {excel_path}")
        print(f"共找到 {len(df)} 個站點")

        # 檢查必要的列是否存在
        required_columns = ['站名', '經度', '緯度']
        for col in required_columns:
            if col not in df.columns:
                raise ValueError(f"Excel文件中缺少必要的列: {col}")

        # 提取站點信息
        stations = []
        for _, row in df.iterrows():
            stations.append((row['經度'], row['緯度'], row['站名']))

        # 顯示前幾個站點信息
        print("前5個站點信息:")
        for i, (lon, lat, name) in enumerate(stations[:5], 1):
            print(f"  站點{i}: {name} - 經度: {lon}, 緯度: {lat}")
        if len(stations) > 5:
            print(f"  ... 共 {len(stations)} 個站點")

        return stations

    except Exception as e:
        print(f"讀取站點文件時出錯: {e}")
        return None


def extract_station_value(raster_path, stations):
    """
    提取指定站點的柵格值

    參數:
    raster_path: 柵格文件路徑
    stations: 站點列表，格式為 [(lon1, lat1, '站名1'), (lon2, lat2, '站名2'), ...]

    返回:
    包含所有站點值的列表，每個元素為 (站名, 時間, 值)
    """
    try:
        with rasterio.open(raster_path) as src:
            # 從文件名中提取時間信息
            file_name = os.path.splitext(os.path.basename(raster_path))[0]
            time_str = file_name.replace('CER_', '')  # 去掉CER_前綴

            results = []

            for lon, lat, station_name in stations:
                try:
                    # 將經緯度轉換為行列號
                    row, col = src.index(lon, lat)

                    # 讀取該位置的值
                    value = src.read(1, window=((row, row + 1), (col, col + 1)))

                    if value.size > 0:
                        pixel_value = float(value[0, 0])
                        # 檢查是否為有效值（非NaN和無數據值）
                        if not np.isnan(pixel_value) and pixel_value != src.nodata:
                            results.append({
                                '站名': station_name,
                                '時間': time_str,
                                'CTT值': pixel_value
                            })
                        else:
                            results.append({
                                '站名': station_name,
                                '時間': time_str,
                                'CTT值': np.nan
                            })
                    else:
                        results.append({
                            '站名': station_name,
                            '時間': time_str,
                            'CTT值': np.nan
                        })
                except Exception as e:
                    # 單個站點提取失敗時繼續處理其他站點
                    print(f"  提取站點 {station_name} 時出錯: {e}")
                    results.append({
                        '站名': station_name,
                        '時間': time_str,
                        'CTT值': np.nan
                    })

            return results

    except Exception as e:
        print(f"處理文件 {raster_path} 時出錯: {e}")
        return None


def batch_extract_stations(input_folder, stations_excel_path, output_excel_path):
    """
    批量處理文件夾中的所有TIF文件，提取所有站點的值

    參數:
    input_folder: 輸入文件夾路徑（包含TIF文件）
    stations_excel_path: 站點信息Excel文件路徑
    output_excel_path: 輸出Excel文件路徑
    """

    # 讀取站點信息
    stations = read_stations_from_excel(stations_excel_path)
    if not stations:
        return None

    # 查找所有的TIF文件
    tif_files = glob.glob(os.path.join(input_folder, "*.tif"))
    tif_files.sort()  # 按文件名排序

    print(f"找到 {len(tif_files)} 個TIF文件")

    # 存儲所有結果
    all_results = []

    # 處理每個文件
    for tif_file in tqdm(tif_files, desc="處理TIF文件"):
        file_results = extract_station_value(tif_file, stations)

        if file_results:
            all_results.extend(file_results)

    # 轉換為DataFrame
    if all_results:
        df = pd.DataFrame(all_results)

        # 按站名和時間排序
        df = df.sort_values(['站名', '時間'])

        # 重置索引
        df = df.reset_index(drop=True)

        # 保存到Excel
        df.to_excel(output_excel_path, index=False, engine='openpyxl')
        print(f"成功保存到: {output_excel_path}")
        print(f"共提取了 {len(df)} 條記錄")

        # 顯示各站點的數據統計
        print("\n各站點數據統計:")
        station_stats = df.groupby('站名')['CTT值'].agg([
            ('有效數據量', lambda x: x.notna().sum()),
            ('數據總量', 'count'),
            ('有效率', lambda x: f"{x.notna().sum() / len(x) * 100:.1f}%")
        ]).reset_index()

        print(station_stats.to_string(index=False))

        # 總體統計
        total_valid = df['CTT值'].notna().sum()
        total_records = len(df)
        print(f"\n總體統計: {total_valid}/{total_records} 條有效數據 ({total_valid / total_records * 100:.1f}%)")

        return df, station_stats
    else:
        print("沒有成功提取到任何數據")
        return None, None


def create_pivot_table(df, output_pivot_path):
    """
    創建數據透視表，便于查看
    """
    if df is not None:
        # 創建透視表：行是時間，列是站名，值是CTT值
        pivot_df = df.pivot_table(
            index='時間',
            columns='站名',
            values='CTT值',
            aggfunc='first'  # 取第一個值
        )

        # 重置索引，讓時間成為一列
        pivot_df.reset_index(inplace=True)

        # 保存透視表
        pivot_df.to_excel(output_pivot_path, index=False, engine='openpyxl')
        print(f"透視表已保存到: {output_pivot_path}")

        return pivot_df


# 使用示例
if __name__ == "__main__":
    # 設置文件路徑
    input_folder = "D:/20240809example/CTT/"  # TIF文件所在文件夾
    stations_excel_path = "D:/20240809example/錫林郭勒示范區站點.xlsx"  # 站點信息Excel文件
    output_excel_path = "D:/20240809example/CTT/錫林郭勒示范區站點CTT數據匯總.xlsx"  # 輸出Excel文件
    output_pivot_path = "D:/20240809example/CTT/錫林郭勒示范區站點CTT數據透視表.xlsx"  # 透視表文件

    # 確保輸出文件夾存在
    output_dir = os.path.dirname(output_excel_path)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        print(f"創建輸出文件夾: {output_dir}")

    print("開始提取所有站點的CTT值...")

    # 執行批量提取
    result_df, station_stats = batch_extract_stations(
        input_folder=input_folder,
        stations_excel_path=stations_excel_path,
        output_excel_path=output_excel_path
    )

    if result_df is not None:
        print("\n提取完成！結果預覽:")
        print(result_df.head(10))

        # 創建透視表
        pivot_df = create_pivot_table(result_df, output_pivot_path)

        if pivot_df is not None:
            print("\n透視表預覽:")
            print(pivot_df.head())

        # 保存統計信息
        if station_stats is not None:
            stats_path = output_excel_path.replace('.xlsx', '_統計信息.xlsx')
            station_stats.to_excel(stats_path, index=False, engine='openpyxl')
            print(f"統計信息已保存到: {stats_path}")

    print("\n程序執行完畢！")
結果：
posted @ 2025-09-15 17:46 秋刀魚CCC Views(14) Comments(0) 收藏舉報
刷新頁面返回頂部
秋刀魚CCC

Never be ashamed of trying

python 按excel的經緯度提取對應柵格tif文件的數值

公告