dataCreating/bin/fileDataCreating.py

117 lines
3.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding:utf-8 -*-
import _load
import faker_data
import csv
from schedule import every, repeat, run_pending
from pathlib import Path
from datetime import datetime
from random import choice
"""
说明:
1. 用于文件的持续创建、写入、修改和创建
2. 输出目录为data会在里面创建当天的子目录
3. 每1小时生成一个10M大小左右的csv文件
4. 每2秒向上面的文件中写入10条数据
5. 每10分钟从当天的文件中随机找个1个文件删除前100条数据
6. 每6小时从所有的文件中随机删除1个文件
"""
#TODO(MH):目前这些参数是写死的,后续看使用情况,也可以去读配置文件的
# 定义公共的部分
path = Path(__file__)
data_dir = path.parent.parent / "data"
# 初始化目录
if not data_dir.exists():
# shutil.rmtree(data_dir)
data_dir.mkdir()
# 定义文件的信息
def file_info():
file_info_list = []
now = datetime.now()
now_day = now.strftime("%Y-%m-%d")
now_hours = now.strftime("%Y-%m-%d-%H")
today_dir = data_dir/now_day
filename = "filetest_" + now_hours + ".csv"
filepath = today_dir/filename
file_info_list.append(now_day)
file_info_list.append(now_hours)
file_info_list.append(today_dir)
file_info_list.append(filename)
file_info_list.append(filepath)
return (file_info_list)
# 每小时生成一个10M大小的csv文件
# @repeat(every(1).hours) TODO(MH):这个是基于运行的时间,不是基于系统时间
def new():
print('new...')
# 创建文件夹
finfo = file_info()
if not finfo[2].exists():
finfo[2].mkdir()
# 创建文件
faker_data.save_data_csv(finfo[4], lines=200000)
# 每2秒插入10条数据
@repeat(every(2).seconds)
def inserting():
finfo = file_info()
if finfo[4].exists():
print('insert...')
datas = faker_data.faker_data(lines=10)[1:]
with open(finfo[4], 'a+', encoding='utf-8', newline='') as file_csv:
writer = csv.writer(file_csv, delimiter=',',
quotechar='"', quoting=csv.QUOTE_ALL)
writer.writerows(datas)
else:
print(str(finfo[4])+" is not exists,wating")
# 手动去调一下,让其整点创建
new()
# 每隔10分钟删除100条数据
@repeat(every(10).minutes)
def deleting_data():
finfo = file_info()
# 获取所有文件
files = list(finfo[2].glob('*.csv'))
if len(files) > 1:
file = choice(files[:-1])
print(str(file) + "start delete data ....")
# 删除掉前100条数据
with open(file, 'rb') as fr:
data = fr.readlines()
new_data = data[100:]
# 少于100条的不删除
if len(new_data) > 100:
with open(file, 'wb') as fw:
fw.writelines(new_data)
else:
print("file number is less 1,wait next time.")
# 每隔6小时删除1个文件,低于3个不删除
@repeat(every(6).hours)
# @repeat(every(2).seconds)
def deleting_file():
print("deleting file ....")
# 从data目录中随机选一个
files = list(data_dir.rglob('*.csv'))
if len(files) > 3:
file = choice(files[:-1])
file.unlink()
else:
print("file num is less 3, not delete. wait next time.")
if __name__ == '__main__':
while True:
run_pending()