PGSQL快速生成模擬數據
背景
有時候,我們為了測試數據庫的性能,通常需要快速構建測試數據,PgSql 提供了快速構建數據的工具,方便我們能夠快捷的構建模擬數據。
生成函數
順序生成
生成 SQL
-- 生成一批順序值
SELECT
id
FROM
GENERATE_SERIES(1, 10) t(id);
結果
| id |
|---|
| 1 |
| 2 |
| 3 |
| 4 |
| 5 |
| 6 |
| 7 |
| 8 |
| 9 |
| 10 |
隨機數
生成 SQL
-- 生成一批隨機整型
SELECT
(RANDOM() * 100)::INT
FROM
GENERATE_SERIES(1, 10)
;
結果
| int4 |
|---|
| 66 |
| 8 |
| 11 |
| 36 |
| 53 |
| 43 |
| 22 |
| 94 |
| 99 |
| 36 |
隨機字符串
生成 SQL
-- 生成一批隨機字符串
SELECT
MD5(RANDOM()::TEXT)
FROM
GENERATE_SERIES(1, 10);
| md5 |
|---|
| 717e8603559b452af4bfd6e9631096d0 |
| 07320af223c367eb02984eb7e85d74a9 |
| d99e7ccb95873e7bf69a64c04ba5e7d3 |
| 2a6bac203480291cd1bdbaa5e8eb856f |
| 320e52646959fdc8d3f17d10e50be6e4 |
| d4e35b9bcbe466e2b5516602f7cf76cd |
| 8695b0d27430bc9a2635954bb2246b51 |
| 448e4d4985c0dac75536045abcfc21e9 |
| 32d9de8bda7c16d6d4ee4225fe9e62fa |
| 70e0092ed83668552b5304d052fcdcf0 |
隨機中文
生成隨機中文函數
-- 隨機中文函數
CREATE OR REPLACE FUNCTION gen_hanzi(INT) RETURNS TEXT AS
$$
DECLARE
res TEXT;
BEGIN
IF $1 >= 1 THEN
SELECT STRING_AGG(CHR(19968 + (RANDOM() * 20901)::INT), '') INTO res FROM GENERATE_SERIES(1, $1);
RETURN res;
END IF;
RETURN NULL;
END;
$$ LANGUAGE plpgsql STRICT;
生成漢字 SQL
-- 生成隨機漢字
SELECT
gen_hanzi(10)
FROM
GENERATE_SERIES(1, 10);
結果
| gen_hanzi |
|---|
| 鬁絹囬橩微鯤馴潳嫕甡 |
| 擭嫩馣倂差榦逝薍艾荰 |
| 猧炒疇稛瑵操綂愘馮粌 |
| 款應闦岷衕後槀鷰鞃絑 |
| 巹翦騷騕裰簡鑝櫒窂産 |
| 魴旻閖烀呾岨垯勣拏澥 |
| 鵅伩睰蹵臱嘦刃鍑璓冠 |
| 睛錙賃轋賁件蝔婘鰻椩 |
| 瞉斑酃惪犖桔灪濭脦娰 |
| 豟噹癀僨緯翁豃揵嗪訅 |
身份證號
生成身份證號函數
-- 隨機身份證號函數
CREATE OR REPLACE FUNCTION gen_id(
a DATE,
b DATE
)
RETURNS TEXT AS
$$
SELECT
LPAD((RANDOM() * 99)::INT::TEXT, 2, '0') ||
LPAD((RANDOM() * 99)::INT::TEXT, 2, '0') ||
LPAD((RANDOM() * 99)::INT::TEXT, 2, '0') ||
TO_CHAR(a + (RANDOM() * (b - a))::INT, 'yyyymmdd') ||
LPAD((RANDOM() * 99)::INT::TEXT, 2, '0') ||
RANDOM()::INT ||
(CASE WHEN RANDOM() * 10 > 9 THEN 'X' ELSE (RANDOM() * 9)::INT::TEXT END) ;
$$ LANGUAGE sql STRICT;
生成隨機身份證號 SQL
-- 隨機身份證號
SELECT
gen_id('1900-01-01', '2017-10-16')
FROM
GENERATE_SERIES(1, 10);
結果
| gen_id |
|---|
| 409560192602091816 |
| 163490201705024702 |
| 797974192007296218 |
| 429540201212090405 |
| 846256194810318314 |
| 244942195609190207 |
| 729623200508134305 |
| 589417196110115508 |
| 882567199310083306 |
| 67153519821129281X |
隨機日期
生成隨機日期函數
-- 生成隨機日期
CREATE OR REPLACE FUNCTION get_rand_date(start_date DATE, end_date DATE) RETURNS DATE AS
$BODY$
DECLARE
interval_days INTEGER ;
random_days INTEGER ;
random_date DATE ;
BEGIN
interval_days := end_date - start_date;
random_days := TRUNC(RANDOM() * (interval_days - 1) + 1);
random_date := start_date + random_days;
RETURN random_date;
END ;
$BODY$
LANGUAGE plpgsql;
生成隨機日期 SQL
SELECT
get_rand_date('2018-01-01', '2023-12-01')
FROM
GENERATE_SERIES(1, 10);
結果
| get_rand_date |
|---|
| 2020-06-23 |
| 2021-04-16 |
| 2019-01-18 |
| 2022-09-18 |
| 2018-06-08 |
| 2023-11-24 |
| 2022-01-25 |
| 2019-08-21 |
| 2023-06-08 |
| 2018-09-02 |
隨機時間
生成隨機時間函數
-- 生成隨機時間
CREATE OR REPLACE FUNCTION get_rand_datetime(start_date DATE, end_date DATE) RETURNS TIMESTAMP AS
$BODY$
DECLARE
interval_days INTEGER;
random_seconds INTEGER;
random_dates INTEGER;
random_date DATE;
random_time TIME;
BEGIN
interval_days := end_date - start_date;
random_dates := TRUNC(RANDOM() * interval_days);
random_date := start_date + random_dates;
random_seconds := TRUNC(RANDOM() * 3600 * 24);
random_time := ' 00:00:00'::TIME + (random_seconds || ' second')::INTERVAL;
RETURN random_date + random_time;
END;
$BODY$
LANGUAGE plpgsql;
生成隨機時間 SQL
SELECT
get_rand_datetime('2018-01-01', '2023-12-01')
FROM
GENERATE_SERIES(1, 10);
結果
| get_rand_datetime |
|---|
| 2021-06-20 20:33:32.000000 |
| 2021-10-01 18:47:55.000000 |
| 2021-07-12 00:10:05.000000 |
| 2019-03-23 18:32:06.000000 |
| 2023-04-03 20:23:25.000000 |
| 2022-07-09 10:12:32.000000 |
| 2019-12-02 20:08:22.000000 |
| 2021-11-14 06:51:23.000000 |
| 2023-05-10 07:42:13.000000 |
| 2021-09-07 23:11:36.000000 |
枚舉字符串
生成枚舉字符串函數
-- 返回隨機字符串函數
CREATE OR REPLACE FUNCTION get_split_str(str VARCHAR) RETURNS VARCHAR AS
$BODY$
DECLARE
array_str VARCHAR[];
i INT;
rand_index INT;
BEGIN
str := TRIM(str);
array_str := REGEXP_SPLIT_TO_ARRAY(str, '[,,]');
--正在表達式匹配
-- array_length(array_str,1)1代表維度,一維、二維
rand_index := CAST((RANDOM() * (ARRAY_LENGTH(array_str, 1) - 1) + 1) AS INT);
RETURN array_str[rand_index];
END ;
$BODY$
LANGUAGE plpgsql;
生成枚舉字符串 SQL
-- 返回指定的字符串
SELECT
get_split_str('hello ,zdata ,asher ,enmo,mogdb,zcloud')
FROM
GENERATE_SERIES(1, 10);
結果
| get_split_str |
|---|
| asher |
| mogdb |
| asher |
| zcloud |
| asher |
| asher |
| mogdb |
| enmo |
| asher |
| enmo |
指定范圍 INT
生成指定范圍的 INT 函數
CREATE OR REPLACE FUNCTION get_rand_int(m INT, n INT) RETURNS INT AS
$BODY$
BEGIN
RETURN CAST((RANDOM() * (n - m) + m) AS INT);
END ;
$BODY$
LANGUAGE plpgsql;
SQL
SELECT
get_rand_int(100, 1000)
FROM
GENERATE_SERIES(1, 10)
;
結果
| get_split_str |
|---|
| asher |
| mogdb |
| asher |
| zcloud |
| asher |
| asher |
| mogdb |
| enmo |
| asher |
| enmo |
指定范圍 BIGINT
生成指定范圍 BIGINT 函數
CREATE OR REPLACE FUNCTION get_rand_bigint(m BIGINT, n BIGINT) RETURNS BIGINT AS
$BODY$
BEGIN
RETURN CAST((RANDOM() * (n - m) + m) AS BIGINT);
END ;
$BODY$
LANGUAGE plpgsql;
SQL
SELECT
get_rand_bigint(100, 1000)
FROM
GENERATE_SERIES(1, 10)
;
結果
| get_rand_bigint |
|---|
| 703 |
| 958 |
| 213 |
| 835 |
| 699 |
| 152 |
| 561 |
| 832 |
| 834 |
| 478 |
生成模擬數據
-- 創建 user 數據表
CREATE TABLE users (
id SERIAL PRIMARY KEY,
username VARCHAR(255) NOT NULL UNIQUE,
password VARCHAR(255) NOT NULL,
id_card VARCHAR(255) NOT NULL,
email VARCHAR(255) NOT NULL UNIQUE,
phone VARCHAR(20),
address VARCHAR(255),
job_title VARCHAR(255),
education_level VARCHAR(255),
salary DECIMAL(10, 2),
hire_date DATE,
leave_date DATE,
remarks TEXT,
status VARCHAR(255),
created_at TIMESTAMP,
created_by VARCHAR(255) DEFAULT 'system'
);
-- 批量生成 1W 條模擬數據
INSERT INTO
users (username, password, id_card, email, phone, address, job_title, education_level, salary, hire_date, leave_date, remarks, status, created_at)
SELECT
SUBSTR(MD5(RANDOM()::TEXT), 10),
MD5(RANDOM()::TEXT),
gen_id('1970-01-01', '2018-10-16'),
SUBSTR(MD5(RANDOM()::TEXT), 10) || '@qq.com',
get_rand_bigint(13812341234, 19912341234),
gen_hanzi(10),
get_split_str('JAVA,C++,SQL,VUE,WEB,C#,C,GO'),
get_split_str('本科,碩士,博士,專科'),
get_rand_int(6000, 30000),
get_rand_date('2018-01-01', '2023-12-01'),
get_rand_date('2018-01-01', '2023-12-01'),
SUBSTR(MD5(RANDOM()::TEXT), 10),
get_split_str('在職,離職'),
get_rand_datetime('2018-01-01', '2023-12-01')
FROM
GENERATE_SERIES(1, 10000);

浙公網安備 33010602011771號