faker_diary/bin/faker_diary.py

134 lines
4.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
import jinja2
import datetime
import subprocess
import random
import shutil
from pathlib import Path
from faker import Faker
from chinese_calendar import is_workday
NOW = Path(__file__).parent
ETC = NOW.parent / "etc"
OUTPUT = NOW.parent / "output"
def faker_datas(number=7):
"""
生成日志模板所需要的内容,一天一个默认7天,排除掉工作日(排除掉的也算一天)
"""
f = Faker(locale="zh_CN")
# 用来存所有的
datas = []
# 现在的时间
today = datetime.datetime.now().date()
# 默认生成前7天的数据
for n in range(0, number):
# 前N天
date = today + datetime.timedelta(days=-n)
# 判断是不是工作日
if not is_workday(date):
continue
# 用来保存1天所需要的内容
diary_log = {}
# 创建日期
diary_log["create_date"] = date
# 创建时间控制在8点~11点之间
str_time1 = date.strftime("%Y-%m-%d ") + "08:00:00"
start_time = datetime.datetime.strptime(str_time1, "%Y-%m-%d %H:%M:%S")
str_time2 = date.strftime("%Y-%m-%d ") + "11:00:00"
end_time = datetime.datetime.strptime(str_time2, "%Y-%m-%d %H:%M:%S")
CreationTime = f.date_time_between(start_date=start_time, end_date=end_time)
diary_log["CreationTime"] = CreationTime
# 修改时间比创建时间晚6~10小时
LastWriteTime = CreationTime + datetime.timedelta(
hours=f.pyint(min_value=6, max_value=10),
seconds=f.pyint(min_value=1, max_value=300),
)
diary_log["LastWriteTime"] = LastWriteTime
# 访问时间比修改时间晚1~n天
LastAccessTime = LastWriteTime + datetime.timedelta(
days=f.pyint(min_value=0, max_value=n),
seconds=f.pyint(min_value=1, max_value=300),
)
diary_log["LastAccessTime"] = LastAccessTime
# 随机的状态,成功失败比 82
status_list = {"sucess": 80, "failed": 20}
diary_log["status1"] = random_weight(status_list)
datas.append(diary_log)
return datas
def random_weight(weight_data):
"""
按照权重,随机取值
eg:
weight_data = {'a': 10, 'b': 40, 'c': 50}
random_weight(weight_data)
"""
total = sum(weight_data.values()) # 权重求和
ra = random.uniform(0, total) # 在0与权重和之前获取一个随机数
curr_sum = 0
ret = None
# keys = weight_data.iterkeys() # 使用Python2.x中的iterkeys
keys = weight_data.keys() # 使用Python3.x中的keys
for k in keys:
curr_sum += weight_data[k] # 在遍历中,累加当前权重值
if ra <= curr_sum: # 当随机数<=当前权重和时返回权重key
ret = k
break
return ret
def main(number):
# 获取数据
datas = faker_datas(number)
# 初始化输出目录
if OUTPUT.exists():
shutil.rmtree(OUTPUT)
OUTPUT.mkdir()
# 根据模板来生成数据
template_file = ETC / "template.md.j2"
for day_log in datas:
# 读取模板
jinja2_env = jinja2.Environment()
tempate_data = open(template_file, encoding="utf-8").read()
# 渲染模板
temp_out = jinja2_env.from_string(tempate_data).render({"datas": day_log})
# 输出结果
file_name = day_log["create_date"].strftime("%Y-%m-%d") + ".md"
output_file = OUTPUT / file_name
with open(output_file, "w", encoding="utf-8") as f:
f.writelines(temp_out)
# 修改生成文本的创建时间、修改时间和访问时间
# ls 'sysin.txt' | foreach-object { $_.CreationTime = '02/02/2021 01:01:01'; $_.LastWriteTime = '01/01/2021 01:01:01'; $_.LastAccessTime = '03/03/2021 01:01:01' }
args = (
"powershell.exe"
+ ' "ls '
+ "'"
+ str(output_file)
+ "'"
+ " | foreach-object { $_.CreationTime = '"
+ day_log["CreationTime"].strftime("%m/%d/%Y %H:%M:%S")
+ "'; $_.LastWriteTime = '"
+ day_log["LastWriteTime"].strftime("%m/%d/%Y %H:%M:%S")
+ "'; $_.LastAccessTime = '"
+ day_log["LastAccessTime"].strftime("%m/%d/%Y %H:%M:%S")
+ "' }\""
)
subprocess.call(args, shell=True)
if __name__ == "__main__":
# 生成30天的数据
main(30)