对于随机数是否随机问题的讨论

# 随机数测试

本次随机测试主要测试以下两个指标：

1、随机结果是否满足期望，即：100 次抽取如果 10% 的概率中奖，那么抽 100 次能中间 10 次吗

2、两次相同结果之间的出现间隔是否满足或者接近一个概率周期，即：如果 10% 概率中奖，那么每 10 次就会中奖一次

# Random.1

直接用整数随机接口

	@classmethod
	def _random_int(cls, start, end):
	"""整数随机, range[start, end]"""
	return random.randint(start, end)

# Random.2

随机浮点数，再转整数

	@classmethod
	def _random_float(cls, start, end):
	"""浮点随机, range[start, end]"""
	float_random = random.random()
	float_random = float_random * (end + 1 - start) + start
	return int(float_random)

# Random.3

先随机整数集合，再逐个抽取

	@classmethod
	def _random_choice(cls, start, end):
	"""洗牌算法的有限集抽取, range[start, end]"""
	if not cls.CHOICE_POOL:
	cls.CHOICE_POOL = list(range(start, end + 1))
	random.shuffle(cls.CHOICE_POOL)
	return cls.CHOICE_POOL.pop(0)

# Random.4

先正太分布随机抽取期望，再逐个抽取，本算法参考 https://huangwei.pro/2015-07/game-random/

	@classmethod
	def _random_gass(cls, start, end):
	"""抽取期望的正太分布随机, range[start, end]"""
	total = end + 1 - start
	expect_cnt_lst = [(random.normalvariate(total, total / 3), _id)
	for _id in range(start, end + 1)]
	heapq.heapify(expect_cnt_lst)
	_, _id = heapq.heappop(expect_cnt_lst)
	return _id

# 结论

针对随机进行了四种测试

随机整数 random.randint
随机浮点数转整数 random.random
洗牌算法限定池随机整数 random.shuffle + pool
正太分布抽取期望随机 random.normalvariate or random.gauss

# 结果

case 1：同概率集合单词抽取 1 个，执行 N 次。
- ①、② 算法在 N 足够大（> total * 100）时期望符合要求
- ③、④ 算法在 N 较大（> total * 10）时期望符合要求
- ③ 在 N 足够大（> total * 100）满足频率正太分布
case 2：同概率集合单词抽取 N 个，执行 1 次。
- ①、② 算法在 N 足够大（> total * 100）时期望符合要求
- ③、④ 算法在 N 较大（> total * 10）时期望符合要求
- ③、④ 在 N 足够大（> total * 100）时频率接近正太分布
case 3：加权集合单词抽取 1 个，执行 N 次。
- ①、②、③ 在 N 足够大（> total * 100）时期望符合要求
- ④ 在有权值较大项的情况下会放大较大项的出现概率，不满足期望
- ③ 在 N 足够大（> total * 100）满足频率正太分布
case 4：加权集合单词抽取 N 个，执行 1 次。
- ①、② 算法在 N 足够大（> total * 100）时期望符合要求
- ③、④ 算法在 N 较大（> total * 10）时期望符合要求
- ③、④ 在 N 足够大（> total * 10）时频率接近正太分布

# 完整代码

	import unittest
	import random
	import time
	import math
	from collections import Counter
	import heapq
	import matplotlib.pyplot as plt

	"""
	本次随机测试主要测试以下两个指标：
	1、随机结果是否满足期望，即：100次抽取如果10%的概率中奖，那么是否确实如此
	2、两次相同结果之间的出现间隔是否满足或者接近一个概率周期，即：如果10%概率中奖，那么基本上没间隔10次就会中奖一次

	直接说结果吧，针对随机进行了四种测试
	①、随机整数 random.randint
	②、随机浮点数转整数 random.random
	③、洗牌算法限定池随机整数 random.shuffle + pool
	④、正太分布抽取期望随机 random.normalvariate or random.gauss

	case 1:同概率集合单词抽取 1 个,执行 N 次。
	①、② 算法在 N 足够大（> total * 100）时期望符合要求
	③、④ 算法在 N 较大（> total * 10）时期望符合要求
	③ 在 N 足够大（> total * 100）满足频率正太分布
	case 2:同概率集合单词抽取 N 个,执行 1 次。
	①、② 算法在 N 足够大（> total * 100）时期望符合要求
	③、④ 算法在 N 较大（> total * 10）时期望符合要求
	③、④ 在 N 足够大（> total * 100）时频率接近正太分布
	case 3:加权集合单词抽取 1 个,执行 N 次。
	①、②、③ 在 N 足够大（> total * 100）时期望符合要求
	④ 在有权值较大项的情况下会放大较大项的出现概率，不满足期望
	③ 在 N 足够大（> total * 100）满足频率正太分布
	case 4:加权集合单词抽取 N 个,执行 1 次。
	①、② 算法在 N 足够大（> total * 100）时期望符合要求
	③、④ 算法在 N 较大（> total * 10）时期望符合要求
	③、④ 在 N 足够大（> total * 10）时频率接近正太分布
	"""

	def gen_random_seed():
	seed = list(str(int(time.time() * 7777777773)))
	random.shuffle(seed)
	seed = int("".join(seed))
	print(f"random seed {seed}")
	return seed


	class RandomTestCase(unittest.TestCase):
	RANDOM_SEED = gen_random_seed()
	CHOICE_POOL = None

	# ================= random once =================
	@classmethod
	def _random_choice(cls, start, end):
	global xxx
	"""洗牌算法的有限集抽取, range[start, end]"""
	if not cls.CHOICE_POOL:
	cls.CHOICE_POOL = list(range(start, end + 1))
	random.shuffle(cls.CHOICE_POOL)
	return cls.CHOICE_POOL.pop(0)

	@classmethod
	def _random_float(cls, start, end):
	"""浮点随机, range[start, end]"""
	float_random = random.random()
	float_random = float_random * (end + 1 - start) + start
	return int(float_random)

	@classmethod
	def _random_int(cls, start, end):
	"""整数随机, range[start, end]"""
	return random.randint(start, end)

	@classmethod
	def _random_gass(cls, start, end):
	"""抽取期望的正太分布随机, range[start, end]"""
	total = end + 1 - start
	expect_cnt_lst = [(random.normalvariate(total, total / 3), _id)
	for _id in range(start, end + 1)]
	heapq.heapify(expect_cnt_lst)
	_, _id = heapq.heappop(expect_cnt_lst)
	return _id

	# ================= random multi =================
	@classmethod
	def _random_choice_multi(cls, random_cnt, start, end):
	"""洗牌算法的有限集抽取, range[start, end]"""
	result_lst = []
	for _ in range(random_cnt):
	if not cls.CHOICE_POOL:
	cls.CHOICE_POOL = list(range(start, end + 1))
	random.shuffle(cls.CHOICE_POOL)
	random_number = cls.CHOICE_POOL.pop(0)
	result_lst.append(random_number)
	return result_lst

	@classmethod
	def _random_float_multi(cls, random_cnt, start, end):
	"""浮点随机, range[start, end]"""
	result_lst = []
	for _ in range(random_cnt):
	float_random = random.random()
	float_random = float_random * (end + 1 - start) + start
	result_lst.append(int(float_random))
	return result_lst

	@classmethod
	def _random_int_multi(cls, random_cnt, start, end):
	"""整数随机, range[start, end]"""
	return [random.randint(start, end) for _ in range(random_cnt)]

	@classmethod
	def _random_gass_multi(cls, random_cnt, start, end):
	"""抽取期望的正太分布随机, range[start, end]"""
	total = end + 1 - start
	result_lst = []
	expect_cnt_lst = [(random.normalvariate(total, total / 3), _id)
	for _id in range(start, end + 1)]
	heapq.heapify(expect_cnt_lst)
	for _ in range(random_cnt):
	min_expect_cnt, _id = heapq.heappop(expect_cnt_lst)
	result_lst.append(_id)
	heapq.heappush(expect_cnt_lst,
	(random.normalvariate(total, total / 3) + min_expect_cnt, _id))
	return result_lst

	# ================= random weight =================
	@classmethod
	def _random_choice_weight_1(cls, weight_map):
	"""洗牌算法的有限集抽取带权重, range[start, end]"""
	random_number = 0
	if not cls.CHOICE_POOL:
	result_lst = []
	for _id, weight in weight_map.items():
	result_lst += [_id] * weight

	cls.CHOICE_POOL = result_lst
	random.shuffle(cls.CHOICE_POOL)
	random_number = cls.CHOICE_POOL.pop(0)
	return random_number

	@classmethod
	def _random_choice_weight_2(cls, weight_map):
	"""洗牌算法的有限集抽取带权重, range[start, end]"""
	return random.choices(list(weight_map.keys()), weights=list(weight_map.values()))[0]

	@classmethod
	def _random_float_weight(cls, weight_map):
	"""浮点随机带权重, range[start, end]"""
	total = sum(weight_map.values())
	float_random = random.random() * total
	result = 0
	for _id, weight in weight_map.items():
	result = _id
	if weight < float_random:
	float_random -= weight
	else:
	break
	return result

	@classmethod
	def _random_int_weight(cls, weight_map):
	"""整数随机带权重, range[start, end]"""
	total = sum(weight_map.values())
	int_random = random.randint(0, total - 1)
	result = 0
	for _id, weight in weight_map.items():
	result = _id
	if weight <= int_random:
	int_random -= weight
	else:
	break
	return result

	@classmethod
	def _random_gass_weight(cls, weight_map):
	"""抽取期望的正太分布随机, range[start, end]"""
	total = sum(weight_map.values())
	expect_cnt_lst = [(random.normalvariate(total / weight, total / 3 / weight), _id)
	for _id, weight in weight_map.items()]
	heapq.heapify(expect_cnt_lst)
	_, _id = heapq.heappop(expect_cnt_lst)
	return _id

	# ================= random multi weight =================
	@classmethod
	def _random_choice_multi_weight_1(cls, weight_map, cnt):
	"""洗牌算法的有限集抽取带权重, range[start, end]"""
	result = []
	for _ in range(cnt):
	if not cls.CHOICE_POOL:
	result_lst = []
	for _id, weight in weight_map.items():
	result_lst += [_id] * weight
	random.shuffle(result_lst)
	cls.CHOICE_POOL = result_lst
	print(f"cls.CHOICE_POOL {cls.CHOICE_POOL}")
	random_number = cls.CHOICE_POOL.pop(0)
	result.append(random_number)
	return result

	@classmethod
	def _random_choice_multi_weight_2(cls, weight_map, cnt):
	"""抽取带权重, range[start, end]"""
	return random.choices(list(weight_map.keys()), weights=list(weight_map.values()), k=cnt)

	@classmethod
	def _random_float_multi_weight(cls, weight_map, cnt):
	"""浮点随机带权重, range[start, end]"""
	total = sum(weight_map.values())
	result_lst = []
	for _ in range(cnt):
	float_random = random.random() * total
	result = 0
	for _id, weight in weight_map.items():
	result = _id
	if weight < float_random:
	float_random -= weight
	else:

	break
	result_lst.append(result)
	return result_lst

	@classmethod
	def _random_int_multi_weight(cls, weight_map, cnt):
	"""整数随机带权重, range[start, end]"""
	total = sum(weight_map.values())
	result_lst = []
	for _ in range(cnt):
	int_random = random.randint(0, total - 1)
	result = 0
	for _id, weight in weight_map.items():
	result = _id
	if weight <= int_random:
	int_random -= weight
	else:
	break
	result_lst.append(result)
	return result_lst

	@classmethod
	def _random_gass_multi_weight(cls, weight_map, cnt):
	"""抽取期望的正太分布随机, range[start, end]"""
	total = sum(weight_map.values())
	expect_cnt_lst = [(random.normalvariate(total / weight, total / 3 / weight), _id)
	for _id, weight in weight_map.items()]
	heapq.heapify(expect_cnt_lst)
	result_lst = []
	for _ in range(cnt):
	min_expect_cnt, _id = heapq.heappop(expect_cnt_lst)
	result_lst.append(_id)
	heapq.heappush(
	expect_cnt_lst,
	(random.normalvariate(total / weight_map[_id], total / weight_map[_id] / 3) +
	min_expect_cnt, _id))
	return result_lst

	@classmethod
	def _draw_expect(cls, result_map, color):
	"""统计所有抽取的结果"""
	cls._draw_map(result_map, color)

	@classmethod
	def _draw_simple_interval(cls, result_lst, random_simple, color):
	"""统计两次抽取到相同数据的间隔"""
	interval = 0
	interval_lst = []
	for number in result_lst:
	interval += 1
	if number == random_simple:
	interval_lst.append(interval)
	interval = 0
	cls._draw_list(interval_lst, color)

	@classmethod
	def _draw_simple_interval_distribution(cls, result_lst, random_simple, color):
	"""间隔分布统计"""
	distribute_count = Counter()
	interval = 0
	for number in result_lst:
	interval += 1
	if number == random_simple:
	distribute_count[interval] += 1
	interval = 0
	cls._draw_map_bar(distribute_count, color)

	@classmethod
	def _draw_list(cls, result_lst, color):
	plt.xlim(0, len(result_lst))
	plt.ylim(math.floor(min(result_lst)) - 1, math.ceil(max(result_lst)) + 1)
	x = list(range(len(result_lst)))
	y = result_lst
	plt.scatter(x, y, color=color)

	@classmethod
	def _draw_map(cls, result_map, color):
	plt.xlim(min(result_map.keys()) - 1, max(result_map.keys()) + 1)
	plt.ylim(math.floor(min(result_map.values())) - 1, math.ceil(max(result_map.values())) + 1)
	x = list(result_map.keys())
	y = list(result_map.values())
	plt.scatter(x, y, color=color)

	@classmethod
	def _draw_map_bar(cls, result_map, color):
	plt.xlim(min(result_map.keys()) - 1, max(result_map.keys()) + 1)
	plt.ylim(math.floor(min(result_map.values())) - 1, math.ceil(max(result_map.values())) + 1)
	x = list(result_map.keys())
	y = list(result_map.values())
	plt.bar(x, y, color=color)

	def test_random_once(self):
	"""单次均等权重抽取 1 个，执行 N 次"""
	random.seed(self.RANDOM_SEED)
	random_start, random_simple, random_end, random_cnt = 0, 6, 9, 100000
	random_handler = {
	self._random_float: "yellow",
	self._random_int: "blue",
	self._random_choice: "red",
	self._random_gass: "green",
	}
	for handler, color in random_handler.items():
	name = handler.__name__
	result_lst = []
	for _ in range(random_cnt):
	result_lst.append(handler(random_start, random_end))
	expect_val = sum(val * cnt for val, cnt in Counter(result_lst).items())
	cnt_total = sum(cnt for cnt in Counter(result_lst).values())
	print(f"handler: {name} {Counter(result_lst)} {expect_val} {cnt_total}")
	self._draw_expect(Counter(result_lst), color)
	plt.savefig(f'./logic/at/random_expect/{name}_expect.jpg')
	plt.clf()
	self._draw_simple_interval(result_lst, random_simple, color)
	plt.savefig(f'./logic/at/random_simple_interval/{name}_simple_interval.jpg')
	plt.clf()
	self._draw_simple_interval_distribution(result_lst, random_simple, color)
	plt.savefig(f'./logic/at/simple_interval_distribution/{name}_interval_distribution.jpg')
	plt.clf()

	def test_random_once_weight(self):
	"""单次加权权重抽取 1 个，执行 N 次"""
	random.seed(self.RANDOM_SEED)
	weight_map, random_simple, random_cnt = {1: 1, 2: 8, 3: 1}, 1, 1000
	random_handler = {
	self._random_float_weight: "yellow",
	self._random_int_weight: "blue",
	self._random_choice_weight_1: "red",
	self._random_gass_weight: "green",
	}
	for handler, color in random_handler.items():
	name = handler.__name__
	result_lst = []
	for _ in range(random_cnt):
	result_lst.append(handler(weight_map))
	expect_val = sum(val * cnt for val, cnt in Counter(result_lst).items())
	cnt_total = sum(cnt for cnt in Counter(result_lst).values())
	print(f"handler: {name} {expect_val} {cnt_total} {Counter(result_lst)}")
	self._draw_expect(Counter(result_lst), color)
	plt.savefig(f'./logic/at/random_expect/{name}_expect.jpg')
	plt.clf()
	self._draw_simple_interval(result_lst, random_simple, color)
	plt.savefig(f'./logic/at/random_simple_interval/{name}_simple_interval.jpg')
	plt.clf()
	self._draw_simple_interval_distribution(result_lst, random_simple, color)
	plt.savefig(f'./logic/at/simple_interval_distribution/{name}_interval_distribution.jpg')
	plt.clf()

	def test_random_multi(self):
	"""单次均等权重抽取 N 个，执行 1 次"""
	random.seed(self.RANDOM_SEED)
	random_start, random_simple, random_end, random_cnt = 0, 3, 9, 100
	random_handler = {
	self._random_float_multi: "yellow",
	self._random_int_multi: "blue",
	self._random_choice_multi: "red",
	self._random_gass_multi: "green",
	}
	for handler, color in random_handler.items():
	name = handler.__name__
	result_lst = handler(random_cnt, random_start, random_end)
	expect_val = sum(val * cnt for val, cnt in Counter(result_lst).items())
	cnt_total = sum(cnt for cnt in Counter(result_lst).values())
	print(f"handler: {name} {expect_val} {cnt_total} {Counter(result_lst)}")
	self._draw_expect(Counter(result_lst), color)
	plt.savefig(f'./logic/at/random_expect/{name}_random_expect.jpg')
	plt.clf()
	self._draw_simple_interval(result_lst, random_simple, color)
	plt.savefig(f'./logic/at/random_simple_interval/{name}_simple_interval.jpg')
	plt.clf()
	self._draw_simple_interval_distribution(result_lst, random_simple, color)
	plt.savefig(f'./logic/at/simple_interval_distribution/{name}_interval_dis.jpg')
	plt.clf()

	def test_random_multi_weight(self):
	"""单次加权权重抽取 N 个，执行 1 次"""
	random.seed(self.RANDOM_SEED)
	weight_map, random_simple, random_cnt = {1: 1, 2: 8, 3: 1, 4: 1, 5: 89}, 2, 10000
	random_handler = {
	self._random_float_multi_weight: "yellow",
	self._random_int_multi_weight: "blue",
	self._random_choice_multi_weight_1: "red",
	self._random_gass_multi_weight: "green",
	}
	for handler, color in random_handler.items():
	name = handler.__name__
	result_lst = handler(weight_map, random_cnt)
	expect_val = sum(val * cnt for val, cnt in Counter(result_lst).items())
	cnt_total = sum(cnt for cnt in Counter(result_lst).values())
	print(f"handler: {name} {expect_val} {cnt_total} {Counter(result_lst)}")
	self._draw_expect(Counter(result_lst), color)
	plt.savefig(f'./logic/at/random_expect/{name}_expect.jpg')
	plt.clf()
	self._draw_simple_interval(result_lst, random_simple, color)
	plt.savefig(f'./logic/at/random_simple_interval/{name}_simple_interval.jpg')
	plt.clf()
	self._draw_simple_interval_distribution(result_lst, random_simple, color)
	plt.savefig(f'./logic/at/simple_interval_distribution/{name}_interval_distribution.jpg')
	plt.clf()

随机数