Issue
I want to plot a graph to show machine status by time (such as running time is green, idle time is blue).the input i have is only log file, as below txt file. the running time is defined that during unit running ( from start time to end time of each SN) or gap of next unit start time and current unit end time is less than 80s. i don't know how to deal with multi level like year, month, day,hour,... and minimize the dataframe/array we need to use to handle the data before ploting. kindly help to advise me this case:
full_log.txt content:
SerialNumber,Tester,Process_step,Start_time,End_time,Test_result
23082900161,tester1,test,2023-08-30 09:35:52,2023-08-30 09:36:15,P
23082900161,tester1,test,2023-08-30 09:39:37,2023-08-30 09:40:00,F
23082900161,tester1,test,2023-08-30 09:41:37,2023-08-30 09:42:00,F
23082900161,tester1,test,2023-08-30 11:41:37,2023-08-30 11:42:00,F
231127501930,tester1,test,2023-12-07 12:19:25,2023-12-07 12:19:51,F
231127501900,tester1,test,2023-12-07 14:04:46,2023-12-07 14:05:11,P
231127501900,tester1,test,2023-12-07 14:04:46,2023-12-07 14:05:11,P
231127501920,tester1,test,2023-12-07 14:12:51,2023-12-07 14:13:18,P
231127501920,tester1,test,2023-12-07 14:12:51,2023-12-07 14:13:18,P
231127501920,tester1,test,2023-12-07 14:12:51,2023-12-07 14:13:18,P
231127501930,tester1,test,2023-12-07 14:19:25,2023-12-07 14:19:51,P
231127501930,tester1,test,2023-12-07 14:19:25,2023-12-07 14:19:51,P
23082900141,tester1,test,2023-12-14 09:35:52,2023-12-14 09:36:15,P
231127501220,tester1,test,2023-12-14 14:12:51,2023-12-14 14:13:18,P
23082900161,tester1,test,2023-12-30 09:30:37,2023-08-30 09:30:42,F
23082900171,tester1,test,2023-12-30 09:30:37,2023-08-30 09:30:42,F
23082900171,tester1,test,2023-12-15 09:41:37,2023-12-15 09:42:00,P
23082900161,tester1,test,2023-08-30 09:41:37,2023-08-30 09:42:00,F
Here is my code and explanation:
from tkinter import *
import pandas as pd
class gui():
def __init__(self, container):
self.container = container
self.container.title('Machine monitor')
self.container.geometry('800x600')
self.df=pd.read_csv("full_log.txt",parse_dates=['Start_time','End_time'])
self.running_arr=self.create_60_min_template()
self.idle_arr=[]
self.define_time_gap=80
print(self.running_arr)
def set_up(self):
self.function_frame = Frame(self.container, borderwidth=2, height="300", width="600",
highlightbackground="black",
highlightcolor="red", highlightthickness=1)
self.function_frame.pack()
self.show_chart=Button(self.function_frame, text='show chart',
command= self.show_graph)
self.show_chart.pack()
def create_60_min_template(self):
arr=[]
for i in range(60):
arr.append(0)
return arr
def show_graph(self):
'''
i don't know how to handle to at level year, month, day, hour.
assume i have code for those level here
.......
Now i process minutes level:
'''
for i in range(len(self.df['SerialNumber'])):
start_time = self.df.iloc[i]['Start_time']
end_time = self.df.iloc[i]['End_time']
print(start_time)
print(end_time)
# get minute from start time to end time of each unit and mark it is 1 in running arr
for i in range(start_time,end_time):
self.running_arr[i]=1
time_gap=(self.df.iloc[i+1]['Start_time']-self.df.iloc[i]['End_time']).total_seconds()
#accept the gap <=80s , consider it is running time as well
if time_gap<=80:
start_time = self.df.iloc[i]['Start_time']
end_time = self.df.iloc[i]['End_time']
for i in range(start_time, end_time):
self.running_arr[i] = 1
#get the idle time by opposite the value of running time:
self.idle_arr = [1 if value == 0 else 0 for value in self.running_arr]
'''
here i will plot the runnign_arr and idle_arr in same chart , running time is green, and idle is blue
'''
if __name__ == "__main__":
mainview = Tk()
interface = gui(mainview)
interface.set_up()
mainview.mainloop()
Solution
It looks like you need to use datetime.strptime(date_string, format) to convert time-stamp strings from your log file to datetime objects. Then you can do anything you want with those objects, e.g. calculate the difference between them, use them for plotting something on a timescale, etc. However, there is a serious issue with your log file, since some lines are duplicate, some contain erroneous timestamps, where the start time is later than the end time, and the like. So I took only these first 3 lines
SerialNumber | Tester | Process_step | Start_time | End_time | Test_result |
---|---|---|---|---|---|
23082900161 | tester1 | test | 2023-08-30 09:35:52 | 2023-08-30 09:36:15 | P |
23082900161 | tester1 | test | 2023-08-30 09:39:37 | 2023-08-30 09:40:00 | F |
23082900161 | tester1 | test | 2023-08-30 09:41:37 | 2023-08-30 09:42:00 | F |
to illustrate how the respective time intervals can be displayed:
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import matplotlib.dates as mdates # used for nice formatting of dates
# read data from the CSV file
data = pd.read_csv('full_log.txt', delimiter=',')
# get all start time values
start_times = list(data['Start_time'])
# get all end time values
end_times = list(data['End_time'])
time_format = "%Y-%m-%d %H:%M:%S"
fig = plt.figure(figsize=(12, 6))
ax = fig.add_subplot()
ax.set_ylim([0, 2])
# y coordinates of the first curve limiting filled areas
y1 = [0, 0]
# y coordinates of the second curve limiting filled areas
y2 = [1, 1]
times_num = len(start_times)
# loop through all the time pairs and plot a filled area for each
for s, e in zip(start_times, end_times):
start = datetime.strptime(s, time_format)
end = datetime.strptime(e, time_format)
# fill the current time area with green
x = [start, end]
ax.fill_between(x, y1, y2, fc='green', edgecolor='white')
index = start_times.index(s)
# check if there can be a trailing gap (not after the last time pair)
if index != times_num - 1:
next_index = index + 1
next_start = datetime.strptime(start_times[next_index], time_format)
gap = next_start - end
gap_s = gap.total_seconds()
x = [end, next_start]
if gap_s <= 80:
# fill the gap with green, if it's no longer than 80 s
ax.fill_between(x, y1, y2, fc='green', edgecolor='white')
else:
# fill the gap with blue, if it's longer than 80 s
ax.fill_between(x, y1, y2, fc='blue', edgecolor='white')
# report the data about gaps
print('{0}) {1} --> {2} --> {3}, gap: {4}'.format(index, start, end, next_start, gap_s))
ax.yaxis.set_visible(False)
ax.set_xlabel('Time', loc='right')
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%b-%d %H:%M:%S'))
plt.setp(ax.get_xticklabels(), rotation=20, ha="right")
fig.tight_layout()
plt.show()
This is what I got:
If you plot all the time intervals from the log file (after discarding the duplicate and incorrect ones), you will get something looking like a long blue rectangle with tiny green stripes, almost invisible without zooming in. So the blue gaps are really long - maybe they should be visualized in some other way.
If you need to show that all time intervals are composed of some "ticks" of equal duration, then the code above could be modified like this:
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import matplotlib.dates as mdates # used for nice formatting of dates
from datetime import timedelta
# read data from the CSV file
data = pd.read_csv('full_log.txt', delimiter=',')
# get all start time values
start_times = list(data['Start_time'])
# get all end time values
end_times = list(data['End_time'])
time_format = "%Y-%m-%d %H:%M:%S"
fig = plt.figure(figsize=(12, 6))
ax = fig.add_subplot()
ax.set_ylim([0, 2])
# y coordinates of the first curve limiting filled areas
y1 = [0, 0]
# y coordinates of the second curve limiting filled areas
y2 = [1, 1]
times_num = len(start_times)
# loop through all the time pairs and plot a filled area for each
for s, e in zip(start_times, end_times):
start = datetime.strptime(s, time_format)
end = datetime.strptime(e, time_format)
# fill the current time area with green
duration = end - start
duration_s = duration.total_seconds()
for t in range(round(duration_s)):
x = [start + timedelta(seconds=t), start + timedelta(seconds=t+1)]
ax.fill_between(x, y1, y2, facecolor='green', edgecolor='white')
index = start_times.index(s)
# check if there can be a trailing gap (not after the last time pair)
if index != times_num - 1:
next_index = index + 1
next_start = datetime.strptime(start_times[next_index], time_format)
gap = next_start - end
gap_s = gap.total_seconds()
gap_color = 'blue'
if gap_s <= 80:
# fill the gap with green, if it's no longer than 80 s
gap_color = 'green'
for t in range(round(gap_s)):
x = [end + timedelta(seconds=t), end + timedelta(seconds=t+1)]
ax.fill_between(x, y1, y2, facecolor=gap_color, edgecolor='white')
# report the data about gaps
print('{0}) {1} --> {2} --> {3}, gap: {4}'.format(index, start, end, next_start, gap_s))
ax.yaxis.set_visible(False)
ax.set_xlabel('Time', loc='right')
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%b-%d %H:%M:%S'))
plt.setp(ax.get_xticklabels(), rotation=20, ha="right")
fig.tight_layout()
plt.show()
Which, if you zoom in the leftmost part, will look like this:
One "tick" here equals 1 second. Surely this visualization is not suitable for very long time intervals, since all these "ticks" are going to be lumped together.
P.S.
If you don't need to represent the time realistically, then you could simply generate binary arrays indicating active (1) or idle (0) status, and plot them as vertical bars:
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import math
# read data from the CSV file
data = pd.read_csv('full_log.txt', delimiter=',')
# get all start time values
start_times = list(data['Start_time'])
# get all end time values
end_times = list(data['End_time'])
time_format = "%Y-%m-%d %H:%M:%S"
# binary arrays indicating active time slots in seconds and minutes
active_s = []
active_m = []
times_num = len(start_times)
# loop through all the time pairs and plot a filled area for each one
for s, e in zip(start_times, end_times):
# convert time stamp strings to datetime objects
start = datetime.strptime(s, time_format)
end = datetime.strptime(e, time_format)
# fill the current time area with green
duration = end - start
duration_s = duration.seconds # compute the number of seconds in an experiment
# round up the number of minutes to have at least one minute
duration_m = math.ceil(duration_s / 60)
active_s = active_s + [1] * duration_s
active_m = active_m + [1] * duration_m
index = start_times.index(s)
# check if there can be a trailing gap (not after the last time pair)
if index != times_num - 1:
next_index = index + 1
# convert the next start time stamp string to a datetime object
next_start = datetime.strptime(start_times[next_index], time_format)
gap = next_start - end
gap_s = gap.seconds # compute the number of seconds in a gap
# round up the number of minutes to have at least one minute
gap_m = math.ceil(gap_s / 60)
# gaps correspond to the idle time by default
gap_value = [0]
if gap_s <= 80:
# gaps no longer than 80 s are regarded as active time
gap_value = [1]
active_s = active_s + gap_value * gap_s
active_m = active_m + gap_value * gap_m
# idle arrays are inverted versions of active arrays
idle_s = list(map(lambda x: int(not x), active_s))
idle_m = list(map(lambda x: int(not x), active_m))
width = 0.8 # bar width in charts
fig = plt.figure(figsize=(12, 6))
# show active and idle time slots in second-resolution
ax_s = fig.add_subplot(2,1,1)
ax_s.set_ylim([0, 2])
ticks_s = [x for x in range(len(active_s))]
ax_s.bar(ticks_s, active_s, width, color='green')
ax_s.bar(ticks_s, idle_s, width, color='blue')
ax_s.yaxis.set_visible(False)
# uncomment these if you want to make all ticks and their labels visible
#ax_s.set_xticks(ticks_s)
#ax_s.set_xticklabels(ticks_s)
ax_s.set_xlabel('Time [s]', loc='right')
# show active and idle time slots in minute-resolution
ax_m = fig.add_subplot(2,1,2)
ax_m.set_ylim([0, 2])
ticks_m = [x for x in range(len(active_m))]
ax_m.bar(ticks_m, active_m, width, color='green')
ax_m.bar(ticks_m, idle_m, width, color='blue')
ax_m.yaxis.set_visible(False)
ax_m.set_xticks(ticks_m)
ax_m.set_xticklabels(ticks_m)
ax_m.set_xlabel('Time [m]', loc='right')
fig.tight_layout()
plt.show()
In this way, the time scale is not realistic, esp. in minute-resolution, and you have tick-numbers instead of time values:
Answered By - Ratislaus
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.