Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions lib/argument_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class ArgumentParser
:skip_zjit,
:with_pre_init,
:pvalue,
:interleave,
keyword_init: true
)

Expand Down Expand Up @@ -149,6 +150,10 @@ def parse(argv)
args.pvalue = true
end

opts.on("--interleave", "run benchmarks interleaved across executables to reduce thermal drift") do
args.interleave = true
end

opts.on("--graph", "generate a graph image of benchmark results") do
args.graph = true
end
Expand Down Expand Up @@ -230,6 +235,7 @@ def default_args
excludes: [],
rss: false,
pvalue: false,
interleave: false,
graph: false,
no_pinning: false,
force_pinning: false,
Expand Down
42 changes: 33 additions & 9 deletions lib/benchmark_runner/cli.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,43 @@ def run
force_pinning: args.force_pinning
)

# Benchmark with and without YJIT
# Collect ruby version descriptions for all executables upfront
args.executables.each do |name, executable|
ruby_descriptions[name] = `#{executable.shelljoin} -v`.chomp
end

bench_start_time = Time.now.to_f
bench_data = {}
bench_failures = {}
args.executables.each do |name, executable|
ruby_descriptions[name] = `#{executable.shelljoin} -v`.chomp

bench_data[name], failures = suite.run(
ruby: executable,
ruby_description: ruby_descriptions[name]
)
# Make it easier to query later.
bench_failures[name] = failures unless failures.empty?
if args.interleave
args.executables.each_key { |name| bench_data[name] = {} }
entries = suite.benchmarks

entries.each_with_index do |entry, idx|
# Alternate executable order to cancel cache-warming bias
exes = ruby_descriptions.keys
exes = exes.reverse if idx.odd?

exes.each do |name|
puts("Running benchmark \"#{entry.name}\" [#{name}] (#{idx+1}/#{entries.length})")
result = suite.run_benchmark(entry, ruby: args.executables[name], ruby_description: ruby_descriptions[name])
if result[:data]
bench_data[name][entry.name] = result[:data]
else
bench_failures[name] ||= {}
bench_failures[name][entry.name] = result[:failure]
end
end
end
else
args.executables.each do |name, executable|
bench_data[name], failures = suite.run(
ruby: executable,
ruby_description: ruby_descriptions[name]
)
bench_failures[name] = failures unless failures.empty?
end
end

bench_end_time = Time.now.to_f
Expand Down
67 changes: 42 additions & 25 deletions lib/benchmark_suite.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,41 +33,53 @@ def initialize(categories:, name_filters:, excludes: [], out_path:, harness:, ha
@bench_dir = BENCHMARKS_DIR
end

# Run all the benchmarks and record execution times
# Returns [bench_data, bench_failures]
def run(ruby:, ruby_description:)
bench_data = {}
bench_failures = {}
# Discovered and filtered benchmark entries, memoized.
def benchmarks
@benchmarks ||= discover_benchmarks
end

benchmark_entries = discover_benchmarks
# Run a single benchmark entry on a single executable.
# Returns { name:, data: } on success, { name:, failure: } on error.
def run_benchmark(entry, ruby:, ruby_description:)
env = benchmark_env(ruby)
caller_json_path = ENV["RESULT_JSON_PATH"]

# Capture quiet setting before entering unbundled env (which clears ENV)
quiet = ENV['BENCHMARK_QUIET'] == '1'

benchmark_entries.each_with_index do |entry, idx|
puts("Running benchmark \"#{entry.name}\" (#{idx+1}/#{benchmark_entries.length})")
result_json_path = caller_json_path || File.join(out_path, "temp#{Process.pid}.json")
cmd_prefix = base_cmd(ruby_description, entry.name)

result_json_path = caller_json_path || File.join(out_path, "temp#{Process.pid}.json")
cmd_prefix = base_cmd(ruby_description, entry.name)

# Clear project-level Bundler environment so benchmarks run in a clean context.
# Benchmarks that need Bundler (e.g., railsbench) set up their own via use_gemfile.
# This is important when running tests under `bundle exec rake test`.
result = if defined?(Bundler)
Bundler.with_unbundled_env do
run_single_benchmark(entry.script_path, result_json_path, ruby, cmd_prefix, env, entry.name, quiet: quiet)
end
else
# Clear project-level Bundler environment so benchmarks run in a clean context.
# Benchmarks that need Bundler (e.g., railsbench) set up their own via use_gemfile.
result = if defined?(Bundler)
Bundler.with_unbundled_env do
run_single_benchmark(entry.script_path, result_json_path, ruby, cmd_prefix, env, entry.name, quiet: quiet)
end
else
run_single_benchmark(entry.script_path, result_json_path, ruby, cmd_prefix, env, entry.name, quiet: quiet)
end

if result[:success]
{ name: entry.name, data: process_benchmark_result(result_json_path, result[:command], delete_file: !caller_json_path) }
else
FileUtils.rm_f(result_json_path) unless caller_json_path
{ name: entry.name, failure: result[:status].exitstatus }
end
end

if result[:success]
bench_data[entry.name] = process_benchmark_result(result_json_path, result[:command], delete_file: !caller_json_path)
# Run all the benchmarks and record execution times.
# Returns [bench_data, bench_failures]
def run(ruby:, ruby_description:)
bench_data = {}
bench_failures = {}

benchmarks.each_with_index do |entry, idx|
puts("Running benchmark \"#{entry.name}\" (#{idx+1}/#{benchmarks.length})")

result = run_benchmark(entry, ruby: ruby, ruby_description: ruby_description)
if result[:data]
bench_data[entry.name] = result[:data]
else
bench_failures[entry.name] = result[:status].exitstatus
FileUtils.rm_f(result_json_path) unless caller_json_path
bench_failures[entry.name] = result[:failure]
end
end

Expand Down Expand Up @@ -174,6 +186,11 @@ def benchmark_harness_for(benchmark_name)
end

def benchmark_env(ruby)
@benchmark_env_cache ||= {}
@benchmark_env_cache[ruby] ||= compute_benchmark_env(ruby)
end

def compute_benchmark_env(ruby)
# When the Ruby running this script is not the first Ruby in PATH, shell commands
# like `bundle install` in a child process will not use the Ruby being benchmarked.
# It overrides PATH to guarantee the commands of the benchmarked Ruby will be used.
Expand Down
10 changes: 10 additions & 0 deletions test/argument_parser_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def setup_mock_ruby(path)
assert_equal [], args.name_filters
assert_equal false, args.rss
assert_equal false, args.pvalue
assert_equal false, args.interleave
assert_equal false, args.graph
assert_equal false, args.no_pinning
assert_equal false, args.turbo
Expand Down Expand Up @@ -438,6 +439,15 @@ def setup_mock_ruby(path)
end
end

describe '--interleave option' do
it 'sets interleave flag' do
parser = ArgumentParser.new
args = parser.parse(['--interleave'])

assert_equal true, args.interleave
end
end

describe '--graph option' do
it 'sets graph flag' do
parser = ArgumentParser.new
Expand Down
32 changes: 32 additions & 0 deletions test/benchmark_runner_cli_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def create_args(overrides = {})
name_filters: [],
excludes: [],
rss: false,
interleave: false,
graph: false,
no_pinning: true,
turbo: true,
Expand Down Expand Up @@ -318,6 +319,37 @@ def create_args(overrides = {})
end
end

it 'runs benchmarks interleaved when --interleave is set' do
Dir.mktmpdir do |tmpdir|
args = create_args(
name_filters: ['fib', 'respond_to'],
out_path: tmpdir,
interleave: true
)

cli = BenchmarkRunner::CLI.new(args)
output = capture_io { cli.run }.join

# Progress output should include executable names in brackets
assert_match(/\[.+\]/, output, "Interleaved output should include executable name in brackets")
assert_match(/Total time spent benchmarking:/, output)

# Verify output files were created with data from all executables
json_files = Dir.glob(File.join(tmpdir, "*.json"))
assert_equal 1, json_files.size

json_data = JSON.parse(File.read(json_files.first))
raw_data = json_data['raw_data']

# All executables should have results
args.executables.each_key do |name|
assert raw_data.key?(name), "Expected raw_data to contain '#{name}'"
assert raw_data[name].key?('fib'), "Expected '#{name}' to have 'fib' results"
assert raw_data[name].key?('respond_to'), "Expected '#{name}' to have 'respond_to' results"
end
end
end

it 'creates output directory if it does not exist' do
Dir.mktmpdir do |parent_tmpdir|
nested_dir = File.join(parent_tmpdir, 'nested', 'output', 'dir')
Expand Down
99 changes: 99 additions & 0 deletions test/benchmark_suite_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,105 @@
end
end

describe '#benchmarks' do
it 'returns discovered benchmark entries' do
suite = BenchmarkSuite.new(
categories: [],
name_filters: ['simple'],
out_path: @out_path,
harness: 'harness',
no_pinning: true
)

entries = suite.benchmarks
assert_instance_of Array, entries
assert_equal 1, entries.length
assert_equal 'simple', entries.first.name
end

it 'memoizes the result' do
suite = BenchmarkSuite.new(
categories: [],
name_filters: ['simple'],
out_path: @out_path,
harness: 'harness',
no_pinning: true
)

assert_same suite.benchmarks, suite.benchmarks
end
end

describe '#run_benchmark' do
it 'returns data hash on success' do
suite = BenchmarkSuite.new(
categories: [],
name_filters: ['simple'],
out_path: @out_path,
harness: 'harness',
no_pinning: true
)

entry = suite.benchmarks.first
result = nil
capture_io do
result = suite.run_benchmark(entry, ruby: [RbConfig.ruby], ruby_description: 'ruby 3.2.0')
end

assert_equal 'simple', result[:name]
assert_instance_of Hash, result[:data]
assert_includes result[:data], 'warmup'
assert_includes result[:data], 'bench'
assert_includes result[:data], 'rss'
assert_nil result[:failure]
end

it 'returns failure hash on error' do
File.write('benchmarks/failing.rb', "exit(1)\n")

suite = BenchmarkSuite.new(
categories: [],
name_filters: ['failing'],
out_path: @out_path,
harness: 'harness',
no_pinning: true
)

entry = suite.benchmarks.first
result = nil
capture_io do
result = suite.run_benchmark(entry, ruby: [RbConfig.ruby], ruby_description: 'ruby 3.2.0')
end

assert_equal 'failing', result[:name]
assert_nil result[:data]
assert_equal 1, result[:failure]
end

it 'produces same data as run for the same benchmark' do
suite = BenchmarkSuite.new(
categories: [],
name_filters: ['simple'],
out_path: @out_path,
harness: 'harness',
no_pinning: true
)

entry = suite.benchmarks.first
single_result = nil
capture_io do
single_result = suite.run_benchmark(entry, ruby: [RbConfig.ruby], ruby_description: 'ruby 3.2.0')
end

run_data = nil
capture_io do
run_data, _ = suite.run(ruby: [RbConfig.ruby], ruby_description: 'ruby 3.2.0')
end

assert_equal run_data['simple'].keys.sort, single_result[:data].keys.sort
end
end

describe 'integration with BenchmarkFilter' do
it 'uses BenchmarkFilter to match benchmarks' do
# Create benchmarks with different categories
Expand Down
Loading