容器中pytorch的cpu速度很慢,原因找到了
- 容器中pytorch的cpu速度很慢,原因找到了
# import numpy as np
# import time
# import torch
# import os
# os.environ['OMP_NUM_THREADS'] = '8' # 增加線程數
# torch.set_num_threads(8) # 設置PyTorch線程數
# # 創建測試矩陣
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
# print(f"使用設備: {device}")
# B = torch.randn(2000, 2000).to(device) # 使用GPU
# # 預熱GPU
# if device == 'cuda':
# _ = torch.svd(torch.randn(100,100).to(device))
# # 計時SVD
# start = time.time()
# with torch.no_grad(): # 禁用梯度計算
# x = torch.svd(B)
# end = time.time()
# print(f"SVD耗時: {end - start:.2f}秒")
import time
import numpy as np
import torch
import os
def diagnose_torch_svd():
print("=== PyTorch SVD性能診斷 ===")
# 1. 檢查系統配置
print("\n1. 系統配置:")
print(f"PyTorch線程數: {torch.get_num_threads()}")
print(f"PyTorch interop線程數: {torch.get_num_interop_threads()}")
print(f"OMP_NUM_THREADS: {os.environ.get('OMP_NUM_THREADS', '未設置')}")
print(f"MKL_NUM_THREADS: {os.environ.get('MKL_NUM_THREADS', '未設置')}")
# 2. 創建測試數據
print("\n2. 性能測試:")
size = 1000
numpy_array = np.random.randn(size, size).astype(np.float64)
torch_tensor = torch.from_numpy(numpy_array.copy())
# 3. NumPy基準
start = time.time()
U_np, s_np, Vt_np = np.linalg.svd(numpy_array, full_matrices=False)
numpy_time = time.time() - start
# 4. PyTorch測試
start = time.time()
U_pt, s_pt, Vt_pt = torch.svd(torch_tensor, some=True)
torch_time = time.time() - start
print(f"NumPy SVD: {numpy_time:.4f}s")
print(f"PyTorch SVD: {torch_time:.4f}s")
print(f"速度比: {torch_time/numpy_time:.2f}x")
# 5. 優化建議
print("\n3. 優化建議:")
if torch_time > 2 * numpy_time:
print("?? PyTorch SVD明顯慢于NumPy,建議:")
print(" - 設置 torch.set_num_threads(4)")
print(" - 檢查環境變量 OMP_NUM_THREADS 和 MKL_NUM_THREADS")
print(" - 考慮對大型矩陣使用NumPy后端")
else:
print("? PyTorch SVD性能正常")
# 運行診斷
# diagnose_torch_svd()
# 優化后重新測試
print("\n" + "="*50)
print("應用優化后:")
# 應用優化
# os.environ['OMP_NUM_THREADS'] = '4'
# os.environ['MKL_NUM_THREADS'] = '4'
torch.set_num_threads(8)
diagnose_torch_svd()
--- 她說, 她是仙,她不是神

浙公網安備 33010602011771號