2023-10-25 08:34:21 +00:00
|
|
|
|
# -*- coding:utf-8 -*-
|
2023-10-25 08:38:45 +00:00
|
|
|
|
import _load
|
|
|
|
|
|
2023-10-25 08:34:21 +00:00
|
|
|
|
import faker_data
|
|
|
|
|
import csv
|
|
|
|
|
|
|
|
|
|
from schedule import every, repeat, run_pending
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
from random import choice
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
说明:
|
|
|
|
|
1. 用于文件的持续创建、写入、修改和创建
|
|
|
|
|
2. 输出目录为data,会在里面创建当天的子目录
|
2024-01-08 06:27:58 +00:00
|
|
|
|
3. 每1小时,生成一个sv文件
|
|
|
|
|
4. 每1秒,向上面的文件中写入100条数据
|
2023-10-25 08:34:21 +00:00
|
|
|
|
5. 每10分钟,从当天的文件中随机找个1个文件,删除前100条数据
|
|
|
|
|
6. 每6小时,从所有的文件中随机删除1个文件
|
|
|
|
|
"""
|
2023-10-25 10:38:23 +00:00
|
|
|
|
#TODO(MH):目前这些参数是写死的,后续看使用情况,也可以去读配置文件的
|
2023-10-25 08:34:21 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 定义公共的部分
|
|
|
|
|
path = Path(__file__)
|
|
|
|
|
data_dir = path.parent.parent / "data"
|
|
|
|
|
# 初始化目录
|
|
|
|
|
if not data_dir.exists():
|
|
|
|
|
# shutil.rmtree(data_dir)
|
|
|
|
|
data_dir.mkdir()
|
|
|
|
|
|
|
|
|
|
# 定义文件的信息
|
|
|
|
|
def file_info():
|
|
|
|
|
file_info_list = []
|
|
|
|
|
now = datetime.now()
|
|
|
|
|
now_day = now.strftime("%Y-%m-%d")
|
|
|
|
|
now_hours = now.strftime("%Y-%m-%d-%H")
|
|
|
|
|
today_dir = data_dir/now_day
|
|
|
|
|
filename = "filetest_" + now_hours + ".csv"
|
|
|
|
|
filepath = today_dir/filename
|
|
|
|
|
|
|
|
|
|
file_info_list.append(now_day)
|
|
|
|
|
file_info_list.append(now_hours)
|
|
|
|
|
file_info_list.append(today_dir)
|
|
|
|
|
file_info_list.append(filename)
|
|
|
|
|
file_info_list.append(filepath)
|
|
|
|
|
|
|
|
|
|
return (file_info_list)
|
|
|
|
|
|
2024-01-08 06:27:58 +00:00
|
|
|
|
# 每小时生成一个csv文件
|
2023-10-25 10:38:23 +00:00
|
|
|
|
# @repeat(every(1).hours) TODO(MH):这个是基于运行的时间,不是基于系统时间
|
2023-10-25 08:34:21 +00:00
|
|
|
|
def new():
|
|
|
|
|
print('new...')
|
|
|
|
|
# 创建文件夹
|
|
|
|
|
finfo = file_info()
|
|
|
|
|
if not finfo[2].exists():
|
|
|
|
|
finfo[2].mkdir()
|
|
|
|
|
# 创建文件
|
2024-01-08 06:27:58 +00:00
|
|
|
|
faker_data.save_data_csv(finfo[4], lines=100)
|
2023-10-25 08:34:21 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 每2秒插入10条数据
|
|
|
|
|
@repeat(every(2).seconds)
|
|
|
|
|
def inserting():
|
|
|
|
|
finfo = file_info()
|
|
|
|
|
if finfo[4].exists():
|
|
|
|
|
print('insert...')
|
2024-01-08 06:27:58 +00:00
|
|
|
|
datas = faker_data.faker_data(lines=200)[1:]
|
2023-10-25 08:34:21 +00:00
|
|
|
|
with open(finfo[4], 'a+', encoding='utf-8', newline='') as file_csv:
|
|
|
|
|
writer = csv.writer(file_csv, delimiter=',',
|
|
|
|
|
quotechar='"', quoting=csv.QUOTE_ALL)
|
|
|
|
|
writer.writerows(datas)
|
|
|
|
|
else:
|
|
|
|
|
print(str(finfo[4])+" is not exists,wating")
|
|
|
|
|
# 手动去调一下,让其整点创建
|
|
|
|
|
new()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 每隔10分钟删除100条数据
|
|
|
|
|
@repeat(every(10).minutes)
|
|
|
|
|
def deleting_data():
|
|
|
|
|
finfo = file_info()
|
|
|
|
|
# 获取所有文件
|
|
|
|
|
files = list(finfo[2].glob('*.csv'))
|
|
|
|
|
if len(files) > 1:
|
|
|
|
|
file = choice(files[:-1])
|
|
|
|
|
print(str(file) + "start delete data ....")
|
|
|
|
|
# 删除掉前100条数据
|
|
|
|
|
with open(file, 'rb') as fr:
|
|
|
|
|
data = fr.readlines()
|
|
|
|
|
new_data = data[100:]
|
|
|
|
|
# 少于100条的不删除
|
|
|
|
|
if len(new_data) > 100:
|
|
|
|
|
with open(file, 'wb') as fw:
|
|
|
|
|
fw.writelines(new_data)
|
|
|
|
|
else:
|
|
|
|
|
print("file number is less 1,wait next time.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 每隔6小时删除1个文件,低于3个不删除
|
|
|
|
|
@repeat(every(6).hours)
|
|
|
|
|
# @repeat(every(2).seconds)
|
|
|
|
|
def deleting_file():
|
|
|
|
|
print("deleting file ....")
|
|
|
|
|
# 从data目录中随机选一个
|
|
|
|
|
files = list(data_dir.rglob('*.csv'))
|
|
|
|
|
if len(files) > 3:
|
|
|
|
|
file = choice(files[:-1])
|
|
|
|
|
file.unlink()
|
|
|
|
|
else:
|
|
|
|
|
print("file num is less 3, not delete. wait next time.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
while True:
|
|
|
|
|
run_pending()
|