对于随机数是否随机问题的讨论
# 随机数测试
本次随机测试主要测试以下两个指标:
1、随机结果是否满足期望,即:100 次抽取如果 10% 的概率中奖,那么抽 100 次能中间 10 次吗
2、两次相同结果之间的出现间隔是否满足或者接近一个概率周期,即:如果 10% 概率中奖,那么每 10 次就会中奖一次
# Random.1
直接用整数随机接口
@classmethod | |
def _random_int(cls, start, end): | |
"""整数随机, range[start, end]""" | |
return random.randint(start, end) |
# Random.2
随机浮点数,再转整数
@classmethod | |
def _random_float(cls, start, end): | |
"""浮点随机, range[start, end]""" | |
float_random = random.random() | |
float_random = float_random * (end + 1 - start) + start | |
return int(float_random) |
# Random.3
先随机整数集合,再逐个抽取
@classmethod | |
def _random_choice(cls, start, end): | |
"""洗牌算法的有限集抽取, range[start, end]""" | |
if not cls.CHOICE_POOL: | |
cls.CHOICE_POOL = list(range(start, end + 1)) | |
random.shuffle(cls.CHOICE_POOL) | |
return cls.CHOICE_POOL.pop(0) |
# Random.4
先正太分布随机抽取期望,再逐个抽取,本算法参考 https://huangwei.pro/2015-07/game-random/
@classmethod | |
def _random_gass(cls, start, end): | |
"""抽取期望的正太分布随机, range[start, end]""" | |
total = end + 1 - start | |
expect_cnt_lst = [(random.normalvariate(total, total / 3), _id) | |
for _id in range(start, end + 1)] | |
heapq.heapify(expect_cnt_lst) | |
_, _id = heapq.heappop(expect_cnt_lst) | |
return _id |
# 结论
针对随机进行了四种测试
- 随机整数
random.randint
- 随机浮点数转整数
random.random
- 洗牌算法限定池随机整数
random.shuffle + pool
- 正太分布抽取期望随机
random.normalvariate or random.gauss
# 结果
case 1:同概率集合单词抽取 1 个,执行 N 次。
- ①、② 算法在 N 足够大(> total * 100)时期望符合要求
- ③、④ 算法在 N 较大(> total * 10)时期望符合要求
- ③ 在 N 足够大(> total * 100)满足频率正太分布
case 2:同概率集合单词抽取 N 个,执行 1 次。
- ①、② 算法在 N 足够大(> total * 100)时期望符合要求
- ③、④ 算法在 N 较大(> total * 10)时期望符合要求
- ③、④ 在 N 足够大(> total * 100)时频率接近正太分布
case 3:加权集合单词抽取 1 个,执行 N 次。
- ①、②、③ 在 N 足够大(> total * 100)时期望符合要求
- ④ 在有权值较大项的情况下会放大较大项的出现概率,不满足期望
- ③ 在 N 足够大(> total * 100)满足频率正太分布
case 4:加权集合单词抽取 N 个,执行 1 次。
- ①、② 算法在 N 足够大(> total * 100)时期望符合要求
- ③、④ 算法在 N 较大(> total * 10)时期望符合要求
- ③、④ 在 N 足够大(> total * 10)时频率接近正太分布
# 完整代码
import unittest | |
import random | |
import time | |
import math | |
from collections import Counter | |
import heapq | |
import matplotlib.pyplot as plt | |
""" | |
本次随机测试主要测试以下两个指标: | |
1、随机结果是否满足期望,即:100次抽取如果10%的概率中奖,那么是否确实如此 | |
2、两次相同结果之间的出现间隔是否满足或者接近一个概率周期,即:如果10%概率中奖,那么基本上没间隔10次就会中奖一次 | |
直接说结果吧,针对随机进行了四种测试 | |
①、随机整数 random.randint | |
②、随机浮点数转整数 random.random | |
③、洗牌算法限定池随机整数 random.shuffle + pool | |
④、正太分布抽取期望随机 random.normalvariate or random.gauss | |
case 1:同概率集合单词抽取 1 个,执行 N 次。 | |
①、② 算法在 N 足够大(> total * 100)时期望符合要求 | |
③、④ 算法在 N 较大(> total * 10)时期望符合要求 | |
③ 在 N 足够大(> total * 100)满足频率正太分布 | |
case 2:同概率集合单词抽取 N 个,执行 1 次。 | |
①、② 算法在 N 足够大(> total * 100)时期望符合要求 | |
③、④ 算法在 N 较大(> total * 10)时期望符合要求 | |
③、④ 在 N 足够大(> total * 100)时频率接近正太分布 | |
case 3:加权集合单词抽取 1 个,执行 N 次。 | |
①、②、③ 在 N 足够大(> total * 100)时期望符合要求 | |
④ 在有权值较大项的情况下会放大较大项的出现概率,不满足期望 | |
③ 在 N 足够大(> total * 100)满足频率正太分布 | |
case 4:加权集合单词抽取 N 个,执行 1 次。 | |
①、② 算法在 N 足够大(> total * 100)时期望符合要求 | |
③、④ 算法在 N 较大(> total * 10)时期望符合要求 | |
③、④ 在 N 足够大(> total * 10)时频率接近正太分布 | |
""" | |
def gen_random_seed(): | |
seed = list(str(int(time.time() * 7777777773))) | |
random.shuffle(seed) | |
seed = int("".join(seed)) | |
print(f"random seed {seed}") | |
return seed | |
class RandomTestCase(unittest.TestCase): | |
RANDOM_SEED = gen_random_seed() | |
CHOICE_POOL = None | |
# ================= random once ================= | |
@classmethod | |
def _random_choice(cls, start, end): | |
global xxx | |
"""洗牌算法的有限集抽取, range[start, end]""" | |
if not cls.CHOICE_POOL: | |
cls.CHOICE_POOL = list(range(start, end + 1)) | |
random.shuffle(cls.CHOICE_POOL) | |
return cls.CHOICE_POOL.pop(0) | |
@classmethod | |
def _random_float(cls, start, end): | |
"""浮点随机, range[start, end]""" | |
float_random = random.random() | |
float_random = float_random * (end + 1 - start) + start | |
return int(float_random) | |
@classmethod | |
def _random_int(cls, start, end): | |
"""整数随机, range[start, end]""" | |
return random.randint(start, end) | |
@classmethod | |
def _random_gass(cls, start, end): | |
"""抽取期望的正太分布随机, range[start, end]""" | |
total = end + 1 - start | |
expect_cnt_lst = [(random.normalvariate(total, total / 3), _id) | |
for _id in range(start, end + 1)] | |
heapq.heapify(expect_cnt_lst) | |
_, _id = heapq.heappop(expect_cnt_lst) | |
return _id | |
# ================= random multi ================= | |
@classmethod | |
def _random_choice_multi(cls, random_cnt, start, end): | |
"""洗牌算法的有限集抽取, range[start, end]""" | |
result_lst = [] | |
for _ in range(random_cnt): | |
if not cls.CHOICE_POOL: | |
cls.CHOICE_POOL = list(range(start, end + 1)) | |
random.shuffle(cls.CHOICE_POOL) | |
random_number = cls.CHOICE_POOL.pop(0) | |
result_lst.append(random_number) | |
return result_lst | |
@classmethod | |
def _random_float_multi(cls, random_cnt, start, end): | |
"""浮点随机, range[start, end]""" | |
result_lst = [] | |
for _ in range(random_cnt): | |
float_random = random.random() | |
float_random = float_random * (end + 1 - start) + start | |
result_lst.append(int(float_random)) | |
return result_lst | |
@classmethod | |
def _random_int_multi(cls, random_cnt, start, end): | |
"""整数随机, range[start, end]""" | |
return [random.randint(start, end) for _ in range(random_cnt)] | |
@classmethod | |
def _random_gass_multi(cls, random_cnt, start, end): | |
"""抽取期望的正太分布随机, range[start, end]""" | |
total = end + 1 - start | |
result_lst = [] | |
expect_cnt_lst = [(random.normalvariate(total, total / 3), _id) | |
for _id in range(start, end + 1)] | |
heapq.heapify(expect_cnt_lst) | |
for _ in range(random_cnt): | |
min_expect_cnt, _id = heapq.heappop(expect_cnt_lst) | |
result_lst.append(_id) | |
heapq.heappush(expect_cnt_lst, | |
(random.normalvariate(total, total / 3) + min_expect_cnt, _id)) | |
return result_lst | |
# ================= random weight ================= | |
@classmethod | |
def _random_choice_weight_1(cls, weight_map): | |
"""洗牌算法的有限集抽取带权重, range[start, end]""" | |
random_number = 0 | |
if not cls.CHOICE_POOL: | |
result_lst = [] | |
for _id, weight in weight_map.items(): | |
result_lst += [_id] * weight | |
cls.CHOICE_POOL = result_lst | |
random.shuffle(cls.CHOICE_POOL) | |
random_number = cls.CHOICE_POOL.pop(0) | |
return random_number | |
@classmethod | |
def _random_choice_weight_2(cls, weight_map): | |
"""洗牌算法的有限集抽取带权重, range[start, end]""" | |
return random.choices(list(weight_map.keys()), weights=list(weight_map.values()))[0] | |
@classmethod | |
def _random_float_weight(cls, weight_map): | |
"""浮点随机带权重, range[start, end]""" | |
total = sum(weight_map.values()) | |
float_random = random.random() * total | |
result = 0 | |
for _id, weight in weight_map.items(): | |
result = _id | |
if weight < float_random: | |
float_random -= weight | |
else: | |
break | |
return result | |
@classmethod | |
def _random_int_weight(cls, weight_map): | |
"""整数随机带权重, range[start, end]""" | |
total = sum(weight_map.values()) | |
int_random = random.randint(0, total - 1) | |
result = 0 | |
for _id, weight in weight_map.items(): | |
result = _id | |
if weight <= int_random: | |
int_random -= weight | |
else: | |
break | |
return result | |
@classmethod | |
def _random_gass_weight(cls, weight_map): | |
"""抽取期望的正太分布随机, range[start, end]""" | |
total = sum(weight_map.values()) | |
expect_cnt_lst = [(random.normalvariate(total / weight, total / 3 / weight), _id) | |
for _id, weight in weight_map.items()] | |
heapq.heapify(expect_cnt_lst) | |
_, _id = heapq.heappop(expect_cnt_lst) | |
return _id | |
# ================= random multi weight ================= | |
@classmethod | |
def _random_choice_multi_weight_1(cls, weight_map, cnt): | |
"""洗牌算法的有限集抽取带权重, range[start, end]""" | |
result = [] | |
for _ in range(cnt): | |
if not cls.CHOICE_POOL: | |
result_lst = [] | |
for _id, weight in weight_map.items(): | |
result_lst += [_id] * weight | |
random.shuffle(result_lst) | |
cls.CHOICE_POOL = result_lst | |
print(f"cls.CHOICE_POOL {cls.CHOICE_POOL}") | |
random_number = cls.CHOICE_POOL.pop(0) | |
result.append(random_number) | |
return result | |
@classmethod | |
def _random_choice_multi_weight_2(cls, weight_map, cnt): | |
"""抽取带权重, range[start, end]""" | |
return random.choices(list(weight_map.keys()), weights=list(weight_map.values()), k=cnt) | |
@classmethod | |
def _random_float_multi_weight(cls, weight_map, cnt): | |
"""浮点随机带权重, range[start, end]""" | |
total = sum(weight_map.values()) | |
result_lst = [] | |
for _ in range(cnt): | |
float_random = random.random() * total | |
result = 0 | |
for _id, weight in weight_map.items(): | |
result = _id | |
if weight < float_random: | |
float_random -= weight | |
else: | |
break | |
result_lst.append(result) | |
return result_lst | |
@classmethod | |
def _random_int_multi_weight(cls, weight_map, cnt): | |
"""整数随机带权重, range[start, end]""" | |
total = sum(weight_map.values()) | |
result_lst = [] | |
for _ in range(cnt): | |
int_random = random.randint(0, total - 1) | |
result = 0 | |
for _id, weight in weight_map.items(): | |
result = _id | |
if weight <= int_random: | |
int_random -= weight | |
else: | |
break | |
result_lst.append(result) | |
return result_lst | |
@classmethod | |
def _random_gass_multi_weight(cls, weight_map, cnt): | |
"""抽取期望的正太分布随机, range[start, end]""" | |
total = sum(weight_map.values()) | |
expect_cnt_lst = [(random.normalvariate(total / weight, total / 3 / weight), _id) | |
for _id, weight in weight_map.items()] | |
heapq.heapify(expect_cnt_lst) | |
result_lst = [] | |
for _ in range(cnt): | |
min_expect_cnt, _id = heapq.heappop(expect_cnt_lst) | |
result_lst.append(_id) | |
heapq.heappush( | |
expect_cnt_lst, | |
(random.normalvariate(total / weight_map[_id], total / weight_map[_id] / 3) + | |
min_expect_cnt, _id)) | |
return result_lst | |
@classmethod | |
def _draw_expect(cls, result_map, color): | |
"""统计所有抽取的结果""" | |
cls._draw_map(result_map, color) | |
@classmethod | |
def _draw_simple_interval(cls, result_lst, random_simple, color): | |
"""统计两次抽取到相同数据的间隔""" | |
interval = 0 | |
interval_lst = [] | |
for number in result_lst: | |
interval += 1 | |
if number == random_simple: | |
interval_lst.append(interval) | |
interval = 0 | |
cls._draw_list(interval_lst, color) | |
@classmethod | |
def _draw_simple_interval_distribution(cls, result_lst, random_simple, color): | |
"""间隔分布统计""" | |
distribute_count = Counter() | |
interval = 0 | |
for number in result_lst: | |
interval += 1 | |
if number == random_simple: | |
distribute_count[interval] += 1 | |
interval = 0 | |
cls._draw_map_bar(distribute_count, color) | |
@classmethod | |
def _draw_list(cls, result_lst, color): | |
plt.xlim(0, len(result_lst)) | |
plt.ylim(math.floor(min(result_lst)) - 1, math.ceil(max(result_lst)) + 1) | |
x = list(range(len(result_lst))) | |
y = result_lst | |
plt.scatter(x, y, color=color) | |
@classmethod | |
def _draw_map(cls, result_map, color): | |
plt.xlim(min(result_map.keys()) - 1, max(result_map.keys()) + 1) | |
plt.ylim(math.floor(min(result_map.values())) - 1, math.ceil(max(result_map.values())) + 1) | |
x = list(result_map.keys()) | |
y = list(result_map.values()) | |
plt.scatter(x, y, color=color) | |
@classmethod | |
def _draw_map_bar(cls, result_map, color): | |
plt.xlim(min(result_map.keys()) - 1, max(result_map.keys()) + 1) | |
plt.ylim(math.floor(min(result_map.values())) - 1, math.ceil(max(result_map.values())) + 1) | |
x = list(result_map.keys()) | |
y = list(result_map.values()) | |
plt.bar(x, y, color=color) | |
def test_random_once(self): | |
"""单次均等权重抽取 1 个,执行 N 次""" | |
random.seed(self.RANDOM_SEED) | |
random_start, random_simple, random_end, random_cnt = 0, 6, 9, 100000 | |
random_handler = { | |
self._random_float: "yellow", | |
self._random_int: "blue", | |
self._random_choice: "red", | |
self._random_gass: "green", | |
} | |
for handler, color in random_handler.items(): | |
name = handler.__name__ | |
result_lst = [] | |
for _ in range(random_cnt): | |
result_lst.append(handler(random_start, random_end)) | |
expect_val = sum(val * cnt for val, cnt in Counter(result_lst).items()) | |
cnt_total = sum(cnt for cnt in Counter(result_lst).values()) | |
print(f"handler: {name} {Counter(result_lst)} {expect_val} {cnt_total}") | |
self._draw_expect(Counter(result_lst), color) | |
plt.savefig(f'./logic/at/random_expect/{name}_expect.jpg') | |
plt.clf() | |
self._draw_simple_interval(result_lst, random_simple, color) | |
plt.savefig(f'./logic/at/random_simple_interval/{name}_simple_interval.jpg') | |
plt.clf() | |
self._draw_simple_interval_distribution(result_lst, random_simple, color) | |
plt.savefig(f'./logic/at/simple_interval_distribution/{name}_interval_distribution.jpg') | |
plt.clf() | |
def test_random_once_weight(self): | |
"""单次加权权重抽取 1 个,执行 N 次""" | |
random.seed(self.RANDOM_SEED) | |
weight_map, random_simple, random_cnt = {1: 1, 2: 8, 3: 1}, 1, 1000 | |
random_handler = { | |
self._random_float_weight: "yellow", | |
self._random_int_weight: "blue", | |
self._random_choice_weight_1: "red", | |
self._random_gass_weight: "green", | |
} | |
for handler, color in random_handler.items(): | |
name = handler.__name__ | |
result_lst = [] | |
for _ in range(random_cnt): | |
result_lst.append(handler(weight_map)) | |
expect_val = sum(val * cnt for val, cnt in Counter(result_lst).items()) | |
cnt_total = sum(cnt for cnt in Counter(result_lst).values()) | |
print(f"handler: {name} {expect_val} {cnt_total} {Counter(result_lst)}") | |
self._draw_expect(Counter(result_lst), color) | |
plt.savefig(f'./logic/at/random_expect/{name}_expect.jpg') | |
plt.clf() | |
self._draw_simple_interval(result_lst, random_simple, color) | |
plt.savefig(f'./logic/at/random_simple_interval/{name}_simple_interval.jpg') | |
plt.clf() | |
self._draw_simple_interval_distribution(result_lst, random_simple, color) | |
plt.savefig(f'./logic/at/simple_interval_distribution/{name}_interval_distribution.jpg') | |
plt.clf() | |
def test_random_multi(self): | |
"""单次均等权重抽取 N 个,执行 1 次""" | |
random.seed(self.RANDOM_SEED) | |
random_start, random_simple, random_end, random_cnt = 0, 3, 9, 100 | |
random_handler = { | |
self._random_float_multi: "yellow", | |
self._random_int_multi: "blue", | |
self._random_choice_multi: "red", | |
self._random_gass_multi: "green", | |
} | |
for handler, color in random_handler.items(): | |
name = handler.__name__ | |
result_lst = handler(random_cnt, random_start, random_end) | |
expect_val = sum(val * cnt for val, cnt in Counter(result_lst).items()) | |
cnt_total = sum(cnt for cnt in Counter(result_lst).values()) | |
print(f"handler: {name} {expect_val} {cnt_total} {Counter(result_lst)}") | |
self._draw_expect(Counter(result_lst), color) | |
plt.savefig(f'./logic/at/random_expect/{name}_random_expect.jpg') | |
plt.clf() | |
self._draw_simple_interval(result_lst, random_simple, color) | |
plt.savefig(f'./logic/at/random_simple_interval/{name}_simple_interval.jpg') | |
plt.clf() | |
self._draw_simple_interval_distribution(result_lst, random_simple, color) | |
plt.savefig(f'./logic/at/simple_interval_distribution/{name}_interval_dis.jpg') | |
plt.clf() | |
def test_random_multi_weight(self): | |
"""单次加权权重抽取 N 个,执行 1 次""" | |
random.seed(self.RANDOM_SEED) | |
weight_map, random_simple, random_cnt = {1: 1, 2: 8, 3: 1, 4: 1, 5: 89}, 2, 10000 | |
random_handler = { | |
self._random_float_multi_weight: "yellow", | |
self._random_int_multi_weight: "blue", | |
self._random_choice_multi_weight_1: "red", | |
self._random_gass_multi_weight: "green", | |
} | |
for handler, color in random_handler.items(): | |
name = handler.__name__ | |
result_lst = handler(weight_map, random_cnt) | |
expect_val = sum(val * cnt for val, cnt in Counter(result_lst).items()) | |
cnt_total = sum(cnt for cnt in Counter(result_lst).values()) | |
print(f"handler: {name} {expect_val} {cnt_total} {Counter(result_lst)}") | |
self._draw_expect(Counter(result_lst), color) | |
plt.savefig(f'./logic/at/random_expect/{name}_expect.jpg') | |
plt.clf() | |
self._draw_simple_interval(result_lst, random_simple, color) | |
plt.savefig(f'./logic/at/random_simple_interval/{name}_simple_interval.jpg') | |
plt.clf() | |
self._draw_simple_interval_distribution(result_lst, random_simple, color) | |
plt.savefig(f'./logic/at/simple_interval_distribution/{name}_interval_distribution.jpg') | |
plt.clf() |