推排序 Verilog實現(xiàn)原理
引言
推排序常常應(yīng)用在操作系統(tǒng)的任務(wù)調(diào)度中,嘗試使用硬件對堆排序進行實現(xiàn),在實現(xiàn)的過程中不使用function和tasks語法,即真·硬件實現(xiàn)
參考的博客
也就這一個博客有介紹
堆排序的Verilog實現(xiàn)
原理
堆排序還需要復(fù)習(xí)一遍嗎? 我肯定是要的
菜鳥-堆排序
圖解排序算法(三)之堆排序
可以看到,推排序很復(fù)雜,所以我們需要祭出我們的FSM(有限狀態(tài)機)
首先,將整個堆排序分為兩個階段:
- 構(gòu)建大根堆或小根堆
- 從最后一個節(jié)點開始,和根節(jié)點交換,并維護大根堆
我們這里統(tǒng)一構(gòu)建大根堆
大根堆的構(gòu)建
直接上流程:
-
從第一個非葉子節(jié)點開始,讀取左右孩子的值;
-
比較大小,確定是否需要交換,以及交換的數(shù)據(jù);
-
寫入或不寫入,如果這個節(jié)點是根節(jié)點,那么構(gòu)建結(jié)束。
-
如果寫入了,需要對子樹進行判斷,也就是保證子樹也是大根堆
還是很簡單的,就是需要在讀寫時注意控制,以及節(jié)點編號的判斷與更改
交換數(shù)據(jù),進行排序
流程如下:
- 從最后一個節(jié)點開始;
- 交換根節(jié)點和這個節(jié)點的值;
- 維護大根堆
3.1 從根節(jié)點開始,讀取左右孩子的值;
3.2 比較大小,確定是否需要交換,以及交換的數(shù)據(jù);
3.3 寫入或不寫入,如果這個節(jié)點是葉子節(jié)點,那么維護結(jié)束。 - 如果這個節(jié)點已經(jīng)是根節(jié)點,排序結(jié)束。
我們可以發(fā)現(xiàn)在這個第三步和之前構(gòu)建的步驟是相似的,雖然我很想優(yōu)化掉它,但是能力不太夠
實現(xiàn)
這個部分最容易出現(xiàn)的問題就是讀寫,然后自己也是調(diào)試了好幾天才弄出來
頂層代碼
module top
#(
parameter ADDR_WIDTH = 12,
parameter DATA_WIDTH = 8
)
(
input clk,rst,
output [ADDR_WIDTH:1] addr1,addr2,addr3, //read addr
output [ADDR_WIDTH-1:0] w_addr1,w_addr2,
output [DATA_WIDTH-1:0] data1,data2,data3, //data
output we,re,w_ok,r_ok, //enable write and read
output [DATA_WIDTH-1:0] w_data1,w_data2 //write data
);
heap_sort hp
(.clk(clk),.rst(rst),.w_ok(w_ok),.r_ok(r_ok),.re(re),
.data1(data1),.data2(data2),.data3(data3),
.addr1(addr1),.addr2(addr2),.addr3(addr3),
.we(we),.w_data1(w_data1),.w_data2(w_data2),
.w_addr1(w_addr1),.w_addr2(w_addr2));
ram ram01
(.clk(clk),.we(we),.re(re),.w_ok(w_ok),.r_ok(r_ok),
.addr1(addr1),.addr2(addr2),.addr3(addr3),
.w_data1(w_data1),.w_data2(w_data2),
.w_addr1(w_addr1),.w_addr2(w_addr2),
.r_data1(data1),.r_data2(data2),.r_data3(data3));
endmodule
排序部分狀態(tài)機算法
module heap_sort
#(
parameter ADDR_WIDTH = 12,
parameter DATA_WIDTH = 8,
parameter FSM_STATE = 5
)
(
input clk,rst,
input [DATA_WIDTH-1:0] data1,data2,data3, //data from ram
input w_ok,r_ok, //finish read and write
output reg [ADDR_WIDTH:1] addr1,addr2,addr3, //read addr
output reg [ADDR_WIDTH-1:0] w_addr1,w_addr2,
output reg we,re, //enable write and read
output reg [DATA_WIDTH-1:0] w_data1,w_data2 //write data
);
parameter IDLE = 0;
parameter BUILD_READ = 1;
parameter BUILD_COMPARED = 2;
parameter BUILD_WRITE = 3;
parameter UPKEEP_READ = 4;
parameter UPKEEP_COMPARED = 5;
parameter UPKEEP_WRITE = 6;
parameter SORT_READY = 7;
parameter SORT_CHANGE = 8;
parameter SORT_CWRITE = 9;
parameter SORT_READ = 10;
parameter SORT_COMPARED = 11;
parameter SORT_WRITE = 12;
parameter COMPLETED = 13;
parameter ERROR = 14;
reg [FSM_STATE-1:0] state;
reg [ADDR_WIDTH-1:0] i,j,k;
reg lf;
always @(posedge clk) begin
if (rst) begin
state <= 0;
i <= 0;
j <= 0;
k <= 0;
lf <= 0;
end
else begin
case (state)
IDLE:begin
state <= BUILD_READ;
i <= ADDR_WIDTH / 2;
j <= ADDR_WIDTH;
k <= ADDR_WIDTH;
end
BUILD_READ:begin
//disable write
we = 0;
//condition of ending build
if(i)begin
state <= BUILD_COMPARED;
re = 1;
addr1 <= i;
addr2 <= 2 * i;
if(i * 2 + 1 <= ADDR_WIDTH) addr3 <= i * 2 + 1;
else addr3 <= 1;
end
else begin
state <= SORT_READY;
re <= 0;
end
end
BUILD_COMPARED:begin
// big-node
if(r_ok) begin
if(i * 2 + 1 <= ADDR_WIDTH) begin
if(data1 <= data3 && data2 <= data3) begin
w_data1 <= data3;
w_data2 <= data1;
w_addr1 <= addr1;
w_addr2 <= addr3;
k <= addr3;
state = BUILD_WRITE;
end
else if(data1 <= data2 && data3 < data2) begin
w_data1 <= data2;
w_data2 <= data1;
w_addr1 <= addr1;
w_addr2 <= addr2;
k <= addr2;
state = BUILD_WRITE;
end
else begin
state = BUILD_READ;
i = i - 1;
end
end
else begin
if(data1 <= data2)begin
w_data1 <= data2;
w_data2 <= data1;
w_addr1 <= addr1;
w_addr2 <= addr2;
state = BUILD_WRITE;
end
else begin
state = BUILD_READ;
i = i - 1;
end
end
re <= 0;
end
end
BUILD_WRITE:begin
we = 1;
state <= UPKEEP_READ;
i = i - 1;
end
UPKEEP_READ:begin
//leaf node
if(k > ADDR_WIDTH / 2 ) state <= BUILD_READ;
else begin
state <= UPKEEP_COMPARED;
re = 1;
end
//disable write
we = 0;
addr1 <= k;
addr2 <= 2 * k;
if(k * 2 + 1 <= ADDR_WIDTH) addr3 <= k * 2 + 1;
else addr3 <= 1;
end
UPKEEP_COMPARED:begin
if(r_ok) begin
if(k * 2 + 1 <= ADDR_WIDTH) begin
if(data1 <= data3 && data2 <= data3) begin
w_data1 <= data3;
w_data2 <= data1;
w_addr1 <= addr1;
w_addr2 <= addr3;
k <= addr3;
state <= UPKEEP_WRITE;
end
else if(data1 <= data2 && data3 < data2) begin
w_data1 <= data2;
w_data2 <= data1;
w_addr1 <= addr1;
w_addr2 <= addr2;
k <= addr2;
state <= UPKEEP_WRITE;
end
else state <= BUILD_READ;
end
else begin
if(data1 <= data2)begin
w_data1 <= data2;
w_data2 <= data1;
w_addr1 <= addr1;
w_addr2 <= addr2;
k <= ADDR_WIDTH;
state <= UPKEEP_WRITE;
end
else state <= BUILD_READ;
end
re <= 0;
end
end
UPKEEP_WRITE:begin
we = 1;
state <= UPKEEP_READ;
end
SORT_READY:begin
we = 0;
//condition of ending sort
if(j) state <= SORT_CHANGE;
else state <= COMPLETED;
re <= 1;
addr1 <= 1;
addr2 <= j;
k <= 1;
end
SORT_CHANGE:begin
if(r_ok)begin
w_data1 <= data2;
w_data2 <= data1;
w_addr1 <= addr1;
w_addr2 <= addr2;
state <= SORT_CWRITE;
re <= 0;
end
end
SORT_CWRITE: begin
we = 1;
state <= SORT_READ;
j = j - 1;
end
SORT_READ:begin
//disable write
we = 0;
if(k <= j / 2) begin
state <= SORT_COMPARED;
addr1 <= k;
addr2 <= 2 * k;
if(2 * k + 1 <= j) addr3 <= 2 * k + 1;
re <= 1;
end
else begin
state <= SORT_READY;
end
end
SORT_COMPARED:begin
// big-node
if(r_ok) begin
if(k * 2 + 1 <= j) begin
if(data1 <= data3 && data2 <= data3) begin
w_data1 <= data3;
w_data2 <= data1;
w_addr1 <= addr1;
w_addr2 <= addr3;
state <= SORT_WRITE;
lf <= 1;
end
else if(data1 <= data2 && data3 < data2) begin
w_data1 <= data2;
w_data2 <= data1;
w_addr1 <= addr1;
w_addr2 <= addr2;
state <= SORT_WRITE;
lf <= 0;
end
else state <= SORT_READY;
end
else begin
if(data1 <= data2)begin
w_data1 <= data2;
w_data2 <= data1;
w_addr1 <= addr1;
w_addr2 <= addr2;
state = SORT_WRITE;
lf <= 0;
end
else state <= SORT_READY;
end
re <= 0;
end
end
SORT_WRITE:begin
we = 1;
state <= SORT_READ;
if(lf) k = k * 2 + 1;
else k = k * 2;
end
COMPLETED:begin
state <= COMPLETED;
end
default:begin
state <= ERROR;
end
endcase
end
end
endmodule
寄存器數(shù)組部分代碼
module ram
#(
parameter ADDR_WIDTH = 12,
parameter DATA_WIDTH = 8
)
(
input [ADDR_WIDTH-1:0] addr1,addr2,addr3,
input [ADDR_WIDTH-1:0] w_addr1,w_addr2,
input we,clk,re,
input [DATA_WIDTH-1:0] w_data1,w_data2,
output reg [DATA_WIDTH-1:0] r_data1,r_data2,r_data3,
output reg w_ok,r_ok
);
reg [DATA_WIDTH-1:0] data[ADDR_WIDTH:1];
initial begin
$readmemh("data.mem",data); //需要創(chuàng)建對應(yīng)的文件,用于填充數(shù)據(jù)
end
always @(posedge clk) begin
if (we ) begin
data[w_addr1] <= w_data1;
data[w_addr2] <= w_data2;
w_ok <= 1;
end
else w_ok <= 0;
if(re) begin
r_data1 <= data[addr1];
r_data2 <= data[addr2];
r_data3 <= data[addr3];
r_ok <= 1;
end
else r_ok <= 0;
end
endmodule
激勵模塊
module top_tb
#(
parameter ADDR_WIDTH = 12,
parameter DATA_WIDTH = 8
)();
reg clk,rst;
wire [ADDR_WIDTH:1] addr1,addr2,addr3; //read addr
wire [DATA_WIDTH-1:0] data1,data2,data3; //data
wire we; //enable write
wire [DATA_WIDTH-1:0] w_data1,w_data2; //write data
initial begin
clk <= 0;
rst <= 1;
#10;
clk <= 1;
#10;
rst <= 0;
forever begin
clk <= ~clk;
#50;
end
end
top tp(clk,rst,addr1,addr2,addr3,data1,data2,data3,we,w_data1,w_data2);
endmodule
缺陷
實現(xiàn)過程中,最大的缺陷是排序的數(shù)不能太多,以為I/O端口有限

浙公網(wǎng)安備 33010602011771號