好友
阅读权限25
听众
最后登录1970-1-1
|
完成大我
发表于 2024-7-8 18:03
本帖最后由 完成大我 于 2024-7-8 18:08 编辑
研发组里的BI小姐姐需要造测试数据,花了半小时写了这个按照指定配置生成excel随机数据表格的脚本,分享给需要的人。
1.脚本:[Python] 纯文本查看 复制代码 #!/usr/bin/env python
# -*- coding: UTF-8 -*-
import pandas as pd
import numpy as np
import json
import string
import secrets
# 定义生成随机字符串的函数
def generate_random_string(length):
letters = string.ascii_letters # 包含大小写字母的字符串
return ''.join(secrets.choice(letters) for _ in range(length))
def random_split_float(total, n):
# 生成 n-1 个随机小数并保留两位小数
random_floats = np.round(np.random.rand(n - 1) * total, 2)
# 计算这些随机小数的和
sum_of_floats = np.sum(random_floats)
# 计算最后一个小数
last_value = np.round(total - sum_of_floats, 2)
# 如果最后一个小数小于0或大于总数(极端情况),重新生成
while last_value < 0 or last_value > total:
random_floats = np.round(np.random.rand(n - 1) * total, 2)
sum_of_floats = np.sum(random_floats)
last_value = np.round(total - sum_of_floats, 2)
# 将最后一个小数加入数组
random_floats = np.append(random_floats, last_value)
return random_floats
def random_split(total, n):
# 生成 n-1 个随机点
split_points = sorted(np.random.randint(1, total, n-1))
# 将起始点和结束点加入切分点
split_points = [0] + split_points + [total]
# 计算每段的值
segments = [split_points[i+1] - split_points[i] for i in range(n)]
return segments
def genExcel():
# 自定义列字段
with open('config.json', 'r') as f:
dataj = json.load(f)
allcolumn = [column['name'] for column in dataj['columns']]
# 随机生成N行数据
N = dataj["number"] # 生成N行数据
data = {}
for column in dataj["columns"]:
if(column["type"] == 0):
data[column["name"]] = np.random.choice(column["valueArea"], N)
if(column["type"] == 1):
data[column["name"]] = np.random.randint(column["valueArea"][0], column["valueArea"][1], N)
if (column["type"] == 2):
data[column["name"]] = np.round(np.random.uniform(column["valueArea"][0], column["valueArea"][1], N),2)
if(column["type"] == 3):
if(type(column["length"]) == int):
data[column["name"]] = [generate_random_string(column["length"]) for _ in range(N)]
else:
data[column["name"]] = [generate_random_string(np.random.randint(column["length"][0], column["length"][1])) for _ in range(N)]
if(column["type"] == 4):
emum = dataj["enums"][column["parent"]]
data[column["name"]] = []
for i in range(N):
for item in emum:
if(data[column["parent"]][i] == item["value"]):
data[column["name"]].append(np.random.choice(item["children"]))
if(column["type"] == 5):
data[column["name"]] = np.round(np.random.uniform(column["valueArea"][0], column["valueArea"][1], N),2)
for i in range(len(column["parameters"])):
allcolumn.append(column["parameters"][i])
data[column["parameters"][i]] = []
for i in range(N):
paramlist = random_split_float(data[column["name"]][i],len(column["parameters"]))
for j in range(len(column["parameters"])):
data[column["parameters"][j]].append(paramlist[j])
# 创建DataFrame
df = pd.DataFrame(data, columns=allcolumn)
# 将DataFrame写入Excel文件
df.to_excel('random_data.xlsx', index=False)
2.json配置文件:
[Asm] 纯文本查看 复制代码 {
"columns": [
{
"name":"id",
"type": 3,
"length": 32
},
{
"name":"NickName",
"type": 3,
"length": [0,2]
},
{
"name":"Name",
"type": 0,
"valueArea": [
"Alice", "Bob", "Charlie", "David", "Eva"
]
},
{
"name":"Age",
"type": 1,
"valueArea": [20, 60]
},
{
"name":"City",
"type": 0,
"valueArea": [
"Alice", "Bob", "Charlie", "David", "Eva"
]
},
{
"name":"Salary",
"type": 2,
"valueArea": [30000.0, 100000.0]
},
{
"name":"Street",
"type": 4,
"parent": "City"
},
{
"name":"sum",
"type": 5,
"parameters": ["x","y","z"],
"valueArea": [5000.0, 10000.0]
}
],
"number": 1000,
"enums": {
"City":[
{
"value": "Alice",
"children": ["a","b","c"]
},
{
"value": "Bob",
"children": ["d","e","f"]
},
{
"value": "Charlie",
"children": ["g","h","i"]
},
{
"value": "David",
"children": ["j","k","l"]
},
{
"value": "Eva",
"children": ["m","n","o"]
}
]
}
}
3.配置规则说明:
type | 规则说明 | 备注 | 0 | 随机字符串,从指定的字典里随机取一个字符串,参考json文件案例 | | 1 | 随机整数,从指定的数值范围内随机取一个整数 | | 2 | 随机小数,从指定的数值范围内随机取一个小数 | | 3 | 随机长度随机字符串,根据指定的长度范围,随机生成字符串,如果length是一个整数,则生成指定长度随机字符串 | | 4 | 随机关联关系,parent指定父字段,只能是0类型,enums重配置父类型每个取值关联的值范围,比如代码案例配置文件中,父字段是City,某一行取值是Alice,那Street字段,这一行取值的范围就是a/b/c中随机 | | 5 | 随机数值分解,name定义总和的字段名,parameters定义分解字段名,valueArea定义总和的取值范围,随机生成取值范围内的值(保留2位小数),然后随机拆分成分解字段的几个值之和(保留2位小数) | | 4.脚本文件
randomExcel.zip
(2.16 KB, 下载次数: 30)
|
免费评分
-
查看全部评分
|