Issue
I've created a Python
script that generates a bar chart using Matplotlib
and converts it into an interactive HTML
graphic using mpld3
. The chart displays average hashing times for different algorithms and string lengths. However, I'd like to enhance the user experience by dynamically adjusting the dot size as I zoom in on the graphic in the HTML
document.
Here's part of the script:
import hashlib
import random
import string
import time
import matplotlib.pyplot as plt
import multiprocessing
import os
from datetime import datetime
import mpld3
algorithms = ['sha1', 'sha256', 'sha3_256']
string_lengths = [10, 25, 50, 75, 100]
num_samples = 50000
random_seed = 42
random.seed(random_seed)
custom_colors = ['#1f77b4', '#FFD700', '#2ca02c']
def generate_random_string(length):
characters = string.ascii_letters + string.digits
return ''.join(random.choice(characters) for _ in range(length))
def hash_string(input_string, algorithm):
hash_func = hashlib.new(algorithm)
hash_func.update(input_string.encode())
return hash_func.hexdigest()
def hash_and_measure_avg_time(args):
algorithm, random_strings = args
num_samples = len(random_strings)
start_time = time.time_ns()
[hash_string(s, algorithm) for s in random_strings]
end_time = time.time_ns()
total_time = end_time - start_time
return total_time / num_samples
def main():
for i in range(10):
num_cpus = multiprocessing.cpu_count()
cpu_count = os.cpu_count()
print(f"\nUsing {num_cpus} CPU cores for analysis")
print(f"Number of CPU cores available: {cpu_count}")
random_strings = {}
for length in string_lengths:
random_strings[length] = [generate_random_string(length) for _ in range(num_samples)]
results = {}
timestamp = datetime.now().strftime("%m-%d-%Y-%H-%M-%S")
results_folder = f"results_{timestamp}"
os.makedirs(results_folder, exist_ok=True)
for algorithm in algorithms:
results[algorithm] = {}
print(f"\nTesting hashing algorithm: {algorithm}")
with multiprocessing.Pool(processes=num_cpus) as pool:
args = [(algorithm, random_strings[length]) for length in string_lengths]
results_list = pool.map(hash_and_measure_avg_time, args)
for length, result in zip(string_lengths, results_list):
results[algorithm][length] = result
time.sleep(1)
plt.figure(figsize=(10, 6))
for i, (algorithm, avg_times) in enumerate(results.items()):
avg_times = [avg_times[length] for length in string_lengths]
plt.plot(
string_lengths,
avg_times,
marker='o',
markersize=4,
label=algorithm,
color=custom_colors[i]
)
plt.title('Average Hashing Time vs. String Length')
plt.xlabel('String Length')
plt.ylabel('Average Hashing Time (ns)')
plt.legend()
plt.grid(True)
interactive_plot = mpld3.fig_to_html(plt.gcf())
plot_filename = f"c_result_{timestamp}.html"
with open(os.path.join(results_folder, plot_filename), 'w') as html_file:
html_file.write(interactive_plot)
if __name__ == "__main__":
print("\n----- Welcome to the Hashing Performance Benchmark -----")
main()
As it currently stands, the chart displays dots representing the data points, but they remain the same size when zooming in or out in the HTML graphic. I want to make the dot size change dynamically based on the zoom level so that when I zoom in, the dots appear smaller, and when I zoom out, they appear larger.
It reaches a point where, if I zoom in enough, the marker stops reducing its size and stays 'gigantic.' Is there a way to change this and make it continue dynamically decreasing until the maximum zoom level?
Without Zoom
With Zoom
Max Zoom Level
Solution
Hey, Olla, as far as I know, the markers do not dynamically change their size when zooming in or out in the HTML graphic due to the limitations of the mpld3 library. I don't know if it would fit your project, but you could use the Plotly
library instead of matplotlib
and mpld3
. This way, you could achieve dynamic marker size changes based on the zoom level and have it continue to decrease even with high zoom levels and lots of other functionalities.
import hashlib
import random
import string
import time
import multiprocessing
import os
from datetime import datetime
import plotly.graph_objects as go
algorithms = ['sha1', 'sha256', 'sha3_256']
string_lengths = [1, 10, 25, 50, 75]
num_samples = 50000
random_seed = 42
random.seed(random_seed)
custom_colors = ['#1f77b4', '#FFD700', '#2ca02c']
def generate_random_string(length):
characters = string.ascii_letters + string.digits
return ''.join(random.choice(characters) for _ in range(length))
def hash_string(input_string, algorithm):
hash_func = hashlib.new(algorithm)
hash_func.update(input_string.encode())
return hash_func.hexdigest()
def hash_and_measure_avg_time(args):
algorithm, random_strings = args
num_samples = len(random_strings)
start_time = time.time_ns()
[hash_string(s, algorithm) for s in random_strings]
end_time = time.time_ns()
total_time = end_time - start_time
return total_time / num_samples
def main():
for i in range(10):
num_cpus = multiprocessing.cpu_count()
cpu_count = os.cpu_count()
print(f"\nUsing {num_cpus} CPU cores for analysis")
print(f"Number of CPU cores available: {cpu_count}")
random_strings = {}
for length in string_lengths:
random_strings[length] = [generate_random_string(length) for _ in range(num_samples)]
results = {}
timestamp = datetime.now().strftime("%m-%d-%Y-%H-%M-%S")
results_folder = f"results_{timestamp}"
os.makedirs(results_folder, exist_ok=True)
for algorithm in algorithms:
results[algorithm] = {}
print(f"\nTesting hashing algorithm: {algorithm}")
with multiprocessing.Pool(processes=num_cpus) as pool:
args = [(algorithm, random_strings[length]) for length in string_lengths]
results_list = pool.map(hash_and_measure_avg_time, args)
for length, result in zip(string_lengths, results_list):
results[algorithm][length] = result
time.sleep(1)
# Create a Plotly scatter plot
fig = go.Figure()
for algorithm in algorithms:
avg_times = [results[algorithm][length] for length in string_lengths]
fig.add_trace(go.Scatter(x=string_lengths, y=avg_times, mode='markers', name=algorithm, marker=dict(size=4, color=custom_colors[algorithms.index(algorithm)])))
# Add lines between the dots
for i in range(1, len(string_lengths)):
for algorithm in algorithms:
x = string_lengths
y = [results[algorithm][length] for length in x]
fig.add_trace(go.Scatter(x=x, y=y, mode='lines', name=f'{algorithm} Lines', line=dict(color=custom_colors[algorithms.index(algorithm)], dash='dot')))
# Customize the layout
fig.update_layout(title='Average Hashing Time vs. String Length', xaxis_title='String Length', yaxis_title='Average Hashing Time (ns)', legend_title='Algorithm')
fig.update_xaxes(type='log')
# Save the interactive plot as an HTML file
html_file = os.path.join(results_folder, f"c_result_{timestamp}.html")
fig.write_html(html_file)
if __name__ == "__main__":
print("\n----- Welcome to the Hashing Performance Benchmark -----")
main()
Answered By - wingspedia
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.