Pytorch實現二分類問題

In [1]:

import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split

In [2]:

data = pd.read_csv('./HR.csv')
data.head(10)

	satisfaction_level	last_evaluation	number_project	average_montly_hours	time_spend_company	left	part	salary
0	0.38	0.53	2	157	3	1	sales	low
1	0.80	0.86	5	262	6	1	sales	medium
2	0.11	0.88	7	272	4	1	sales	medium
3	0.72	0.87	5	223	5	1	sales	low
4	0.37	0.52	2	159	3	1	sales	low
5	0.41	0.50	2	153	3	1	sales	low
6	0.10	0.77	6	247	4	1	sales	low
7	0.92	0.85	5	259	5	1	sales	low
8	0.89	1.00	5	224	5	1	sales	low
9	0.42	0.53	2	142	3	1	sales	low

In [3]:

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14999 entries, 0 to 14998
Data columns (total 10 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   satisfaction_level     14999 non-null  float64
 1   last_evaluation        14999 non-null  float64
 2   number_project         14999 non-null  int64  
 3   average_montly_hours   14999 non-null  int64  
 4   time_spend_company     14999 non-null  int64  
 5   Work_accident          14999 non-null  int64  
 6   left                   14999 non-null  int64  
 7   promotion_last_5years  14999 non-null  int64  
 8   part                   14999 non-null  object 
 9   salary                 14999 non-null  object 
dtypes: float64(2), int64(6), object(2)
memory usage: 1.1+ MB

In [4]:

data.part.unique()

Out[4]:

array(['sales', 'accounting', 'hr', 'technical', 'support', 'management',
       'IT', 'product_mng', 'marketing', 'RandD'], dtype=object)

In [5]:

data = data.join(pd.get_dummies(data.part)).join(pd.get_dummies(data.salary))
data.drop(columns=['part', 'salary'], inplace=True)
data

	satisfaction_level	last_evaluation	number_project	average_montly_hours	time_spend_company	Work_accident	left	promotion_last_5years	IT	RandD	...	hr	management	marketing	product_mng	sales	support	technical	high	low	medium
0	0.38	0.53	2	157	3	0	1	0	False	False	...	False	False	False	False	True	False	False	False	True	False
1	0.80	0.86	5	262	6	0	1	0	False	False	...	False	False	False	False	True	False	False	False	False	True
2	0.11	0.88	7	272	4	0	1	0	False	False	...	False	False	False	False	True	False	False	False	False	True
3	0.72	0.87	5	223	5	0	1	0	False	False	...	False	False	False	False	True	False	False	False	True	False
4	0.37	0.52	2	159	3	0	1	0	False	False	...	False	False	False	False	True	False	False	False	True	False
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
14994	0.40	0.57	2	151	3	0	1	0	False	False	...	False	False	False	False	False	True	False	False	True	False
14995	0.37	0.48	2	160	3	0	1	0	False	False	...	False	False	False	False	False	True	False	False	True	False
14996	0.37	0.53	2	143	3	0	1	0	False	False	...	False	False	False	False	False	True	False	False	True	False
14997	0.11	0.96	6	280	4	0	1	0	False	False	...	False	False	False	False	False	True	False	False	True	False
14998	0.37	0.52	2	158	3	0	1	0	False	False	...	False	False	False	False	False	True	False	False	True	False

14999 rows × 21 columns

In [6]:

data.left.value_counts()

Out[6]:

left
0    11428
1     3571
Name: count, dtype: int64

In [7]:

Y_data = data.left.values.reshape(-1, 1)
Y = torch.from_numpy(Y_data).type(torch.FloatTensor)

In [8]:

data = data.drop(columns='left')
X_data = data.values.astype(float)
X = torch.from_numpy(X_data).type(torch.FloatTensor)

In [9]:

class HRModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.lin_1 = nn.Linear(20, 64)
        self.lin_2 = nn.Linear(64, 64)
        self.lin_3 = nn.Linear(64, 1)
        self.activate = nn.SELU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, input):
        x = self.lin_1(input)
        x = self.activate(x)
        x = self.lin_2(x)
        x = self.activate(x)
        x = self.lin_3(x)
        return self.sigmoid(x)

In [10]:

lr = 0.001
model = HRModel()
opt = torch.optim.Adam(model.parameters(), lr=lr)
batch_size = 64
steps = len(data) // batch_size
epochs = 501
loss_fn = nn.BCELoss()

In [11]:

train_x, test_x, train_y, test_y = train_test_split(X_data, Y_data)
train_x = torch.from_numpy(train_x).type(torch.FloatTensor)
test_x = torch.from_numpy(test_x).type(torch.FloatTensor)
train_y = torch.from_numpy(train_y).type(torch.FloatTensor)
test_y = torch.from_numpy(test_y).type(torch.FloatTensor)

train_ds = TensorDataset(train_x, train_y)
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
test_ds = TensorDataset(test_x, test_y)
test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=True)

In [12]:

def accuracy(out, yb):
    return ((out.data.numpy()>0.5)==yb.numpy()).mean()

In [13]:

%%time
for epoch in range(epochs):
    model.train()
    for xb, yb in train_dl:
        pred = model(xb)
        loss = loss_fn(pred, yb)
        opt.zero_grad()
        loss.backward()
        opt.step()

    if epoch%50 == 0:
        model.eval()
        with torch.no_grad():
            valid_loss = sum([ loss_fn(model(x), y) for x, y in test_dl ])
            acc_mean = np.mean([accuracy(model(x), y) for x, y in test_dl])
        print('訓練次數:', epoch, ' 損失:', valid_loss/len(test_dl), ' 準確率:', acc_mean)

訓練次數: 0  損失: tensor(0.5407)  準確率: 0.7669770294380017
訓練次數: 50  損失: tensor(0.1423)  準確率: 0.9626588983050848
訓練次數: 100  損失: tensor(0.1460)  準確率: 0.9528462310437109
訓練次數: 150  損失: tensor(0.1282)  準確率: 0.9607911462979483
訓練次數: 200  損失: tensor(0.1150)  準確率: 0.9667986173059768
訓練次數: 250  損失: tensor(0.1369)  準確率: 0.9555084745762712
訓練次數: 300  損失: tensor(0.1175)  準確率: 0.96671498661909
訓練次數: 350  損失: tensor(0.1191)  準確率: 0.9638018510258698
訓練次數: 400  損失: tensor(0.1176)  準確率: 0.964861173059768
訓練次數: 450  損失: tensor(0.1346)  準確率: 0.9592161016949152
訓練次數: 500  損失: tensor(0.1246)  準確率: 0.9661016949152542
CPU times: total: 1min 48s
Wall time: 1min 38s

posted @ 2025-01-31 16:03 Funsion Wu Views(27) Comments(0) 收藏舉報

刷新頁面返回頂部

Funsion Wu

修身治國平天下 & 洗臉刷牙寫代碼

Pytorch實現二分類問題

公告