一共需要两个文件
文件1 get_flush_detail.sh
#!/bin/bashif [ $# -eq 0 ]; thenecho "usage: sh $0 osdid; for example: sh $0 3 will parse ceph-osd.3.log "exit 0;
fi
cat ceph-osd.$1.log |grep 'flush t' |grep OK| awk '{print $1,$13/1024/1024 }' >input.txt
python calculate.py
文件2 calculate.py
import datetimedef process_data(filename):# 创建一个字典来存储每个10秒区间的总收入income_per_interval = {}# 打开文件并读取数据with open(filename, 'r') as file:for line in file:# 拆分每一行parts = line.strip().split()# 检查是否有两个部分if len(parts) != 2:print(f"Warning: Skipping invalid line '{line}'")continuetimestamp_str, income_str = partstry:# 解析时间戳timestamp = datetime.datetime.strptime(timestamp_str, '%Y-%m-%dT%H:%M:%S.%f%z')income = float(income_str)# 计算该记录属于哪个10秒区间interval_key = timestamp.replace(second=(timestamp.second // 10) * 10, microsecond=0)# 更新对应区间的收入if interval_key in income_per_interval:income_per_interval[interval_key] += incomeelse:income_per_interval[interval_key] = incomeexcept ValueError:print(f"Warning: Skipping invalid line '{line}'")# 输出每个10秒区间的总收入for interval, total_income in sorted(income_per_interval.items()):print(interval.strftime('%Y-%m-%dT%H:%M:%S%z'), int(total_income))# 调用函数并传入文件名
process_data('input.txt')
中间文件 input.txt
2024-09-20T15:42:38.803+0800 111.95
2024-09-20T15:42:39.795+0800 110.549
2024-09-20T15:42:40.793+0800 111.382
2024-09-20T15:42:41.834+0800 109.97
2024-09-20T15:42:42.882+0800 109.47
最终结果
2024-09-20T15:42:30+0800 222
2024-09-20T15:42:40+0800 1022
2024-09-20T15:42:50+0800 1005
2024-09-20T15:43:00+0800 889
2024-09-20T15:43:10+0800 675
2024-09-20T15:43:20+0800 442