From 90cb822d29e6671b886975ee1d960ed03f720bf7 Mon Sep 17 00:00:00 2001 From: Alex Korzun Date: Wed, 19 Feb 2025 23:12:30 +0400 Subject: [PATCH 1/2] fix: Step 1 --- .gitignore | 5 ++ Gemfile | 16 ++++ Gemfile.lock | 54 ++++++++++++++ app/models/bus.rb | 8 ++ app/models/city.rb | 7 ++ app/models/service.rb | 7 ++ app/models/trip.rb | 12 +++ app/services/reloader.rb | 130 +++++++++++++++++++++++++++++++++ case-study-template.md | 59 +++++++++++++++ db/schema.rb | 16 ++-- lib/benchmark/bench_wrapper.rb | 29 ++++++++ lib/profilers/profile.rb | 19 +++++ lib/tasks/annotate_rb.rake | 8 ++ lib/tasks/profile.rake | 30 ++++++++ lib/tasks/utils.rake | 35 +-------- spec/services/reloader_spec.rb | 35 +++++++++ 16 files changed, 430 insertions(+), 40 deletions(-) create mode 100644 app/services/reloader.rb create mode 100644 case-study-template.md create mode 100644 lib/benchmark/bench_wrapper.rb create mode 100644 lib/profilers/profile.rb create mode 100644 lib/tasks/annotate_rb.rake create mode 100644 lib/tasks/profile.rake create mode 100644 spec/services/reloader_spec.rb diff --git a/.gitignore b/.gitignore index 59c74047..9cce5565 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,8 @@ /tmp /log /public +/lib/profilers/reports +.rspec +spec/spec_helper.rb +spec/rails_helper.rb +/fixtures \ No newline at end of file diff --git a/Gemfile b/Gemfile index 34074dfd..bdd449fa 100644 --- a/Gemfile +++ b/Gemfile @@ -9,6 +9,22 @@ gem 'puma' gem 'listen' gem 'bootsnap' gem 'rack-mini-profiler' +gem 'pry' +gem 'oj' +gem 'activerecord-import' + +group :development, :test do + # RSpec + gem 'rspec-rails' + gem 'rspec-benchmark' + + # Benchmarking + gem 'ruby-prof' + gem 'stackprof' + gem 'memory_profiler' + + gem 'annotaterb' +end # Windows does not include zoneinfo files, so bundle the tzinfo-data gem gem 'tzinfo-data', platforms: [:mingw, :mswin, :x64_mingw, :jruby] diff --git a/Gemfile.lock b/Gemfile.lock index a9ddd818..0a9a4f0d 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -53,6 +53,8 @@ GEM activemodel (= 8.0.1) activesupport (= 8.0.1) timeout (>= 0.4.0) + activerecord-import (2.1.0) + activerecord (>= 4.2) activestorage (8.0.1) actionpack (= 8.0.1) activejob (= 8.0.1) @@ -72,16 +74,22 @@ GEM securerandom (>= 0.3) tzinfo (~> 2.0, >= 2.0.5) uri (>= 0.13.1) + annotaterb (4.14.0) base64 (0.2.0) benchmark (0.4.0) + benchmark-malloc (0.2.0) + benchmark-perf (0.6.0) + benchmark-trend (0.4.0) bigdecimal (3.1.9) bootsnap (1.18.4) msgpack (~> 1.2) builder (3.3.0) + coderay (1.1.3) concurrent-ruby (1.3.5) connection_pool (2.5.0) crass (1.0.6) date (3.4.1) + diff-lcs (1.6.0) drb (2.2.1) erubi (1.13.1) ffi (1.17.1-arm64-darwin) @@ -107,6 +115,8 @@ GEM net-pop net-smtp marcel (1.0.4) + memory_profiler (1.1.0) + method_source (1.1.0) mini_mime (1.1.5) minitest (5.25.4) msgpack (1.8.0) @@ -122,10 +132,17 @@ GEM nio4r (2.7.4) nokogiri (1.18.2-arm64-darwin) racc (~> 1.4) + oj (3.16.9) + bigdecimal (>= 3.0) + ostruct (>= 0.2) + ostruct (0.6.1) pg (1.5.9) pp (0.6.2) prettyprint prettyprint (0.2.0) + pry (0.15.2) + coderay (~> 1.1) + method_source (~> 1.0) psych (5.2.3) date stringio @@ -179,7 +196,35 @@ GEM psych (>= 4.0.0) reline (0.6.0) io-console (~> 0.5) + rspec (3.13.0) + rspec-core (~> 3.13.0) + rspec-expectations (~> 3.13.0) + rspec-mocks (~> 3.13.0) + rspec-benchmark (0.6.0) + benchmark-malloc (~> 0.2) + benchmark-perf (~> 0.6) + benchmark-trend (~> 0.4) + rspec (>= 3.0) + rspec-core (3.13.3) + rspec-support (~> 3.13.0) + rspec-expectations (3.13.3) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.13.0) + rspec-mocks (3.13.2) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.13.0) + rspec-rails (7.1.1) + actionpack (>= 7.0) + activesupport (>= 7.0) + railties (>= 7.0) + rspec-core (~> 3.13) + rspec-expectations (~> 3.13) + rspec-mocks (~> 3.13) + rspec-support (~> 3.13) + rspec-support (3.13.2) + ruby-prof (1.7.1) securerandom (0.4.1) + stackprof (0.2.27) stringio (3.1.2) thor (1.3.2) timeout (0.4.3) @@ -197,12 +242,21 @@ PLATFORMS arm64-darwin-24 DEPENDENCIES + activerecord-import + annotaterb bootsnap listen + memory_profiler + oj pg + pry puma rack-mini-profiler rails (~> 8.0.1) + rspec-benchmark + rspec-rails + ruby-prof + stackprof tzinfo-data RUBY VERSION diff --git a/app/models/bus.rb b/app/models/bus.rb index 1dcc54cb..0363540b 100644 --- a/app/models/bus.rb +++ b/app/models/bus.rb @@ -1,3 +1,11 @@ +# == Schema Information +# +# Table name: buses +# +# id :bigint not null, primary key +# model :string +# number :string +# class Bus < ApplicationRecord MODELS = [ 'Икарус', diff --git a/app/models/city.rb b/app/models/city.rb index 19ec7f36..957290d8 100644 --- a/app/models/city.rb +++ b/app/models/city.rb @@ -1,3 +1,10 @@ +# == Schema Information +# +# Table name: cities +# +# id :bigint not null, primary key +# name :string +# class City < ApplicationRecord validates :name, presence: true, uniqueness: true validate :name_has_no_spaces diff --git a/app/models/service.rb b/app/models/service.rb index 9cbb2a32..b2cdd9a2 100644 --- a/app/models/service.rb +++ b/app/models/service.rb @@ -1,3 +1,10 @@ +# == Schema Information +# +# Table name: services +# +# id :bigint not null, primary key +# name :string +# class Service < ApplicationRecord SERVICES = [ 'WiFi', diff --git a/app/models/trip.rb b/app/models/trip.rb index 9d63dfff..88521fef 100644 --- a/app/models/trip.rb +++ b/app/models/trip.rb @@ -1,3 +1,15 @@ +# == Schema Information +# +# Table name: trips +# +# id :bigint not null, primary key +# duration_minutes :integer +# price_cents :integer +# start_time :string +# bus_id :integer +# from_id :integer +# to_id :integer +# class Trip < ApplicationRecord HHMM_REGEXP = /([0-1][0-9]|[2][0-3]):[0-5][0-9]/ diff --git a/app/services/reloader.rb b/app/services/reloader.rb new file mode 100644 index 00000000..c1c3d937 --- /dev/null +++ b/app/services/reloader.rb @@ -0,0 +1,130 @@ +# frozen_string_literal: true + +class Reloader + def self.reload(file_name) + new(file_name).reload + end + + def initialize(file_name) + @file_name = file_name + end + + def reload + clear_db + load_json + end + + private + + def clear_db + ActiveRecord::Base.connection + .execute("TRUNCATE cities, buses, services, trips, buses_services RESTART IDENTITY CASCADE;") + end + + def load_json + @cities = {} + @buses = {} + @services = {} + @buses_services = Set.new + + trips_command = "COPY trips (from_id, to_id, start_time, duration_minutes, price_cents, bus_id) FROM STDIN WITH CSV DELIMITER ';'" + + connection.copy_data(trips_command) do + File.open(@file_name, 'r') do |file| + nesting = 0 + buffer = +"" + + file.each_char do |ch| + case ch + when '{' + nesting += 1 + buffer << ch + when '}' + nesting -= 1 + buffer << ch + + if nesting == 0 + trip = Oj.load(buffer) + import_trip(trip) + + buffer = +"" + end + else + buffer << ch if nesting >= 1 + end + end + end + end + + flush_cities + flush_buses + flush_services + flush_buses_services + end + + def import_trip(trip) + from_id = find_or_create_city(trip['from']) + to_id = find_or_create_city(trip['to']) + bus_id = find_or_create_bus(trip['bus']) + service_ids = trip['bus']['services'].map { |service| find_or_create_service(service) } + + connection.put_copy_data("#{from_id};#{to_id};#{trip['start_time']};#{trip['duration_minutes']};#{trip['price_cents']};#{bus_id}\n") + + service_ids.each do |service_id| + @buses_services << [bus_id, service_id] + end + end + + def find_or_create_city(name) + @cities[name] ||= @cities.size + 1 + end + + def find_or_create_bus(bus) + bus_number = bus['number'] + model = bus['model'] + key = [model, bus_number] + + @buses[key] ||= @buses.size + 1 + end + + def find_or_create_service(name) + @services[name] ||= @services.size + 1 + end + + def flush_cities + return if @cities.empty? + + values = @cities.map { |name, id| "(#{id}, '#{name}')" }.join(', ') + ActiveRecord::Base.connection.execute("INSERT INTO cities (id, name) VALUES #{values}") + end + + def flush_buses + return if @buses.empty? + + values = @buses.map do |(model, number), id| + "(#{id}, '#{number}', '#{model}')" + end.join(', ') + ActiveRecord::Base.connection.execute("INSERT INTO buses (id, number, model) VALUES #{values}") + end + + def flush_services + return if @services.empty? + + values = @services.map { |name, id| "(#{id}, '#{name}')" }.join(', ') + ActiveRecord::Base.connection.execute("INSERT INTO services (id, name) VALUES #{values}") + end + + def flush_buses_services + return if @buses_services.empty? + + values = @buses_services.map do |(bus_id, service_id)| + "(#{bus_id}, #{service_id})" + end.join(', ') + + ActiveRecord::Base.connection.execute("INSERT INTO buses_services (bus_id, service_id) VALUES #{values}") + end + + def connection + @connection ||= ActiveRecord::Base.connection.raw_connection + end +end diff --git a/case-study-template.md b/case-study-template.md new file mode 100644 index 00000000..03fc5a96 --- /dev/null +++ b/case-study-template.md @@ -0,0 +1,59 @@ +# Case-study оптимизации + +## Актуальная проблема +При импорте данных в нашем проекте из json файла в базу возникает проблема. + +Необходимо обработать файл размером более 30Mb. + +Для мпорта данныз в настоящее время у нас есть rake таска, но на большом количестве данных она выполняется очень медленно. + +Я решил исправить эту проблему, оптимизировав rake скрипт импорта данных. + +### А Импорт данных +## Формирование метрики +Для того, чтобы понимать, дают ли мои изменения положительный эффект на быстродействие программы я придумал использовать такую метрику: Импорт файла large.json должен занимать менее 1 минуты. + +## Feedback-Loop +Для того, чтобы иметь возможность быстро проверять гипотезы я выстроил эффективный `feedback-loop`, который позволил мне получать обратную связь по эффективности сделанных изменений. + +Вот как я построил `feedback_loop`: +Добавил в скрипт бенчмарк для измерения продолжительности импорта. + +Вот как я построил `feedback_loop`: +- Запускаю имопрт данных. +- Запускаю профилировщик. +- Определяю главную точку роста. +- Вношу изменения. +- Защищаю изменения тестом. + +Проверил работу исходной rake таски на разных объеиах данных: + +* {"3.4.1":{"gc":"enabled","time":8.45,"gc_count":79,"memory":"58 MB"}} - small.json +* {"3.4.1":{"gc":"enabled","time":61.38,"gc_count":309,"memory":"94 MB"}} - medium.json +* {"3.4.1":{"gc":"enabled","time":572.29,"gc_count":763,"memory":"244 MB"}} - large.json + +Для удобства работы с моделями добавил gem annotate. + +Перед началом оптимизации написал тест который позволит контролировать что после изменений скрипт работает валидно(spec/services/reloader_spec.rb). + +## Поиск точек роста +Проанализировал время выполнения с помощью 'ruby-prof', но не получил больших подробностей кроме того что большую часть времени +занимают подключения и раюота БД. + +Решил проанализировать логи 'development.log'. Заметил огромное количество SELECT запросов для City, Bus, Servise моделей +по каждой операции. + +## Оптимизация +- Для удобства тестирования и работы со скриптом вынес логику в отдельный Reload сервис. +- Переписал скрипт в потоковом стиле, записывая трипы в базу на лету. +- Для очистки таблиц использовал TRUNCATE вместо .delete_all + +## Метрика +* {"3.4.1":{"gc":"enabled","time":0.35,"gc_count":2,"memory":"14 MB"}} - small.json +* {"3.4.1":{"gc":"enabled","time":0.56,"gc_count":24,"memory":"14 MB"}} - medium.json +* {"3.4.1":{"gc":"enabled","time":2.6,"gc_count":243,"memory":"15 MB"}} - large.json +* {"3.4.1":{"gc":"enabled","time":24.16,"gc_count":586,"memory":"32 MB"}} - 1M.json +* {"3.4.1":{"gc":"enabled","time":222.63,"gc_count":5441,"memory":"18 MB"}} - 10M.json + +Как результат, вложился в поставленную метрику в 1 мин для large.json - 2.6 сек. + diff --git a/db/schema.rb b/db/schema.rb index f6921e45..40c9c8ae 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -2,18 +2,17 @@ # of editing this file, please use the migrations feature of Active Record to # incrementally modify your database, and then regenerate this schema definition. # -# Note that this schema.rb definition is the authoritative source for your -# database schema. If you need to create the application database on another -# system, you should be using db:schema:load, not running all the migrations -# from scratch. The latter is a flawed and unsustainable approach (the more migrations -# you'll amass, the slower it'll run and the greater likelihood for issues). +# This file is the source Rails uses to define your schema when running `bin/rails +# db:schema:load`. When creating a new database, `bin/rails db:schema:load` tends to +# be faster and is potentially less error prone than running all of your +# migrations from scratch. Old migrations may fail to apply correctly if those +# migrations use external dependencies or application code. # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2019_03_30_193044) do - +ActiveRecord::Schema[8.0].define(version: 2019_03_30_193044) do # These are extensions that must be enabled in order to support this database - enable_extension "plpgsql" + enable_extension "pg_catalog.plpgsql" create_table "buses", force: :cascade do |t| t.string "number" @@ -41,5 +40,4 @@ t.integer "price_cents" t.integer "bus_id" end - end diff --git a/lib/benchmark/bench_wrapper.rb b/lib/benchmark/bench_wrapper.rb new file mode 100644 index 00000000..7dfb4e38 --- /dev/null +++ b/lib/benchmark/bench_wrapper.rb @@ -0,0 +1,29 @@ +require "benchmark" + +def measure(&block) + no_gc = (ARGV[0] == "--no-gc") + + if no_gc + GC.disable + else + GC.start + end + + memory_before = `ps -o rss= -p #{Process.pid}`.to_i/1024 + gc_stat_before = GC.stat + time = Benchmark.realtime do + yield + end + + memory_after = `ps -o rss= -p #{Process.pid}`.to_i/1024 + gc_stat_after = GC.stat + + puts({ + RUBY_VERSION => { + gc: no_gc ? 'disabled' : 'enabled', + time: time.round(2), + gc_count: gc_stat_after[:count] - gc_stat_before[:count], + memory: "%d MB" % (memory_after - memory_before) + } + }.to_json) +end diff --git a/lib/profilers/profile.rb b/lib/profilers/profile.rb new file mode 100644 index 00000000..d491017a --- /dev/null +++ b/lib/profilers/profile.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require 'rake' +require 'ruby-prof' + +REPORTS_DIR = 'lib/profilers/reports' + +profile = RubyProf::Profile.new(measure_mode: RubyProf::WALL_TIME) + +Rake.application.rake_require('tasks/utils') +Rake::Task.define_task(:environment) + +result = profile.profile do + Rake::Task['reload_json'].invoke('fixtures/small.json') +end + +printer = RubyProf::CallTreePrinter.new(result) +printer.print(path: REPORTS_DIR, profile: "callgrind") +puts "Callgrind report generated at #{REPORTS_DIR}/callgrind.out" diff --git a/lib/tasks/annotate_rb.rake b/lib/tasks/annotate_rb.rake new file mode 100644 index 00000000..1ad0ec39 --- /dev/null +++ b/lib/tasks/annotate_rb.rake @@ -0,0 +1,8 @@ +# This rake task was added by annotate_rb gem. + +# Can set `ANNOTATERB_SKIP_ON_DB_TASKS` to be anything to skip this +if Rails.env.development? && ENV["ANNOTATERB_SKIP_ON_DB_TASKS"].nil? + require "annotate_rb" + + AnnotateRb::Core.load_rake_tasks +end diff --git a/lib/tasks/profile.rake b/lib/tasks/profile.rake new file mode 100644 index 00000000..56882069 --- /dev/null +++ b/lib/tasks/profile.rake @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +task :profile do + require 'fileutils' + require 'rake' + require 'ruby-prof' + require 'memory_profiler' + + REPORTS_DIR = 'lib/profilers/reports' + + report = MemoryProfiler.report do + Rake::Task['reload_json'].invoke('fixtures/small.json') + end + + report.pretty_print(scale_bytes: true) + # profile = RubyProf::Profile.new(measure_mode: RubyProf::WALL_TIME) + + # # Rake.application.rake_require('tasks/utils') + # # Rake::Task.define_task(:environment) + + # result = profile.profile do + # Rake::Task['reload_json'].invoke('fixtures/small.json') + # end + + # printer = RubyProf::FlatPrinter.new(result) + # File.open("#{REPORTS_DIR}/flat.txt", 'w+') { |file| printer.print(file) } + # puts "Flat profile report generated at #{REPORTS_DIR}/flat.txt" + + +end diff --git a/lib/tasks/utils.rake b/lib/tasks/utils.rake index 540fe871..7acca175 100644 --- a/lib/tasks/utils.rake +++ b/lib/tasks/utils.rake @@ -1,34 +1,7 @@ -# Наивная загрузка данных из json-файла в БД -# rake reload_json[fixtures/small.json] -task :reload_json, [:file_name] => :environment do |_task, args| - json = JSON.parse(File.read(args.file_name)) - - ActiveRecord::Base.transaction do - City.delete_all - Bus.delete_all - Service.delete_all - Trip.delete_all - ActiveRecord::Base.connection.execute('delete from buses_services;') +require_relative '../benchmark/bench_wrapper' - json.each do |trip| - from = City.find_or_create_by(name: trip['from']) - to = City.find_or_create_by(name: trip['to']) - services = [] - trip['bus']['services'].each do |service| - s = Service.find_or_create_by(name: service) - services << s - end - bus = Bus.find_or_create_by(number: trip['bus']['number']) - bus.update(model: trip['bus']['model'], services: services) - - Trip.create!( - from: from, - to: to, - bus: bus, - start_time: trip['start_time'], - duration_minutes: trip['duration_minutes'], - price_cents: trip['price_cents'], - ) - end +task :reload_json, [:file_name] => :environment do |_task, args| + measure do + Reloader.reload(args.file_name) end end diff --git a/spec/services/reloader_spec.rb b/spec/services/reloader_spec.rb new file mode 100644 index 00000000..8519bbb1 --- /dev/null +++ b/spec/services/reloader_spec.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +require 'rails_helper' + +RSpec.describe Reloader do + describe '.reload' do + subject(:reload_data) { described_class.reload(json_file_path) } + + let(:json_file_path) { file_fixture('example.json') } + + it 'clears existing data' do + City.create!(name: 'DummyCity') + reload_data + + expect(City.find_by(name: 'DummyCity')).to be_nil + end + + it 'loads trips data from JSON and create records in DB' do + reload_data + + expect(City.count).to eq(2) + expect(Bus.count).to eq(1) + expect(Service.count).to eq(2) + expect(Trip.count).to eq(10) + + moscow = City.find_by(name: 'Москва') + expect(moscow).to be_present + + bus = Bus.find_by(number: '123') + expect(bus).to be_present + expect(bus.model).to eq('Икарус') + expect(bus.services.map(&:name).uniq.size).to eq(bus.services.size) + end + end +end From f338ad7a1b117c9a75382a3cecf1d56485c72f6d Mon Sep 17 00:00:00 2001 From: Alex Korzun Date: Sun, 23 Feb 2025 15:03:32 +0400 Subject: [PATCH 2/2] fix: Step 2 --- .gitignore | 3 +- Gemfile | 14 +++- Gemfile.lock | 70 +++++++++++++++---- app/controllers/trips_controller.rb | 8 ++- app/models/city.rb | 4 ++ app/models/trip.rb | 6 +- app/views/trips/_delimiter.html.erb | 1 - app/views/trips/_service.html.erb | 1 - app/views/trips/_services.html.erb | 2 +- app/views/trips/_trip.html.erb | 4 ++ app/views/trips/index.html.erb | 15 ++-- case-study-template.md | 70 +++++++++++++++---- config/environments/development.rb | 9 +++ config/environments/test.rb | 6 ++ config/routes.rb | 2 + ...0250222155254_create_pghero_query_stats.rb | 15 ++++ ...0250222160422_create_pghero_space_stats.rb | 13 ++++ .../20250222163633_remove_unused_indexes.rb | 5 ++ ...50223085937_add_index_to_cities_on_name.rb | 5 ++ .../20250223092208_add_index_from_trips.rb | 5 ++ db/schema.rb | 24 ++++++- spec/controllers/trips_controller.rb | 27 +++++++ spec/system/trips_spec.rb | 34 +++++++++ 23 files changed, 296 insertions(+), 47 deletions(-) delete mode 100644 app/views/trips/_delimiter.html.erb delete mode 100644 app/views/trips/_service.html.erb create mode 100644 db/migrate/20250222155254_create_pghero_query_stats.rb create mode 100644 db/migrate/20250222160422_create_pghero_space_stats.rb create mode 100644 db/migrate/20250222163633_remove_unused_indexes.rb create mode 100644 db/migrate/20250223085937_add_index_to_cities_on_name.rb create mode 100644 db/migrate/20250223092208_add_index_from_trips.rb create mode 100644 spec/controllers/trips_controller.rb create mode 100644 spec/system/trips_spec.rb diff --git a/.gitignore b/.gitignore index 9cce5565..8af401c6 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ .rspec spec/spec_helper.rb spec/rails_helper.rb -/fixtures \ No newline at end of file +/fixtures +.env \ No newline at end of file diff --git a/Gemfile b/Gemfile index bdd449fa..356a3de3 100644 --- a/Gemfile +++ b/Gemfile @@ -4,24 +4,36 @@ git_source(:github) { |repo| "https://github.com/#{repo}.git" } ruby file: '.ruby-version' gem 'rails', '~> 8.0.1' +gem "sprockets-rails" gem 'pg' gem 'puma' gem 'listen' gem 'bootsnap' gem 'rack-mini-profiler' + gem 'pry' gem 'oj' gem 'activerecord-import' +# PG +gem 'pghero' +gem "pg_query" + group :development, :test do + # ENV + gem "dotenv-rails", require: "dotenv/load", groups: %i[development test] + # RSpec gem 'rspec-rails' - gem 'rspec-benchmark' + gem 'capybara' + gem 'selenium-webdriver' + gem 'webdrivers' # Benchmarking gem 'ruby-prof' gem 'stackprof' gem 'memory_profiler' + gem 'bullet' gem 'annotaterb' end diff --git a/Gemfile.lock b/Gemfile.lock index 0a9a4f0d..2fb6aa7f 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -74,27 +74,45 @@ GEM securerandom (>= 0.3) tzinfo (~> 2.0, >= 2.0.5) uri (>= 0.13.1) + addressable (2.8.7) + public_suffix (>= 2.0.2, < 7.0) annotaterb (4.14.0) base64 (0.2.0) benchmark (0.4.0) - benchmark-malloc (0.2.0) - benchmark-perf (0.6.0) - benchmark-trend (0.4.0) bigdecimal (3.1.9) bootsnap (1.18.4) msgpack (~> 1.2) builder (3.3.0) + bullet (8.0.1) + activesupport (>= 3.0.0) + uniform_notifier (~> 1.11) + capybara (3.40.0) + addressable + matrix + mini_mime (>= 0.1.3) + nokogiri (~> 1.11) + rack (>= 1.6.0) + rack-test (>= 0.6.3) + regexp_parser (>= 1.5, < 3.0) + xpath (~> 3.2) coderay (1.1.3) concurrent-ruby (1.3.5) connection_pool (2.5.0) crass (1.0.6) date (3.4.1) diff-lcs (1.6.0) + dotenv (3.1.7) + dotenv-rails (3.1.7) + dotenv (= 3.1.7) + railties (>= 6.1) drb (2.2.1) erubi (1.13.1) ffi (1.17.1-arm64-darwin) globalid (1.2.1) activesupport (>= 6.1) + google-protobuf (4.29.3-arm64-darwin) + bigdecimal + rake (>= 13) i18n (1.14.7) concurrent-ruby (~> 1.0) io-console (0.8.0) @@ -115,6 +133,7 @@ GEM net-pop net-smtp marcel (1.0.4) + matrix (0.4.2) memory_profiler (1.1.0) method_source (1.1.0) mini_mime (1.1.5) @@ -137,6 +156,10 @@ GEM ostruct (>= 0.2) ostruct (0.6.1) pg (1.5.9) + pg_query (6.0.0) + google-protobuf (>= 3.25.3) + pghero (3.6.1) + activerecord (>= 6.1) pp (0.6.2) prettyprint prettyprint (0.2.0) @@ -146,6 +169,7 @@ GEM psych (5.2.3) date stringio + public_suffix (6.0.1) puma (6.6.0) nio4r (~> 2.0) racc (1.8.1) @@ -194,17 +218,10 @@ GEM ffi (~> 1.0) rdoc (6.12.0) psych (>= 4.0.0) + regexp_parser (2.10.0) reline (0.6.0) io-console (~> 0.5) - rspec (3.13.0) - rspec-core (~> 3.13.0) - rspec-expectations (~> 3.13.0) - rspec-mocks (~> 3.13.0) - rspec-benchmark (0.6.0) - benchmark-malloc (~> 0.2) - benchmark-perf (~> 0.6) - benchmark-trend (~> 0.4) - rspec (>= 3.0) + rexml (3.4.1) rspec-core (3.13.3) rspec-support (~> 3.13.0) rspec-expectations (3.13.3) @@ -223,19 +240,39 @@ GEM rspec-support (~> 3.13) rspec-support (3.13.2) ruby-prof (1.7.1) + rubyzip (2.4.1) securerandom (0.4.1) + selenium-webdriver (4.10.0) + rexml (~> 3.2, >= 3.2.5) + rubyzip (>= 1.2.2, < 3.0) + websocket (~> 1.0) + sprockets (4.2.1) + concurrent-ruby (~> 1.0) + rack (>= 2.2.4, < 4) + sprockets-rails (3.5.2) + actionpack (>= 6.1) + activesupport (>= 6.1) + sprockets (>= 3.0.0) stackprof (0.2.27) stringio (3.1.2) thor (1.3.2) timeout (0.4.3) tzinfo (2.0.6) concurrent-ruby (~> 1.0) + uniform_notifier (1.16.0) uri (1.0.2) useragent (0.16.11) + webdrivers (5.3.1) + nokogiri (~> 1.6) + rubyzip (>= 1.3.0) + selenium-webdriver (~> 4.0, < 4.11) + websocket (1.2.11) websocket-driver (0.7.7) base64 websocket-extensions (>= 0.1.0) websocket-extensions (0.1.5) + xpath (3.2.0) + nokogiri (~> 1.8) zeitwerk (2.7.1) PLATFORMS @@ -245,19 +282,26 @@ DEPENDENCIES activerecord-import annotaterb bootsnap + bullet + capybara + dotenv-rails listen memory_profiler oj pg + pg_query + pghero pry puma rack-mini-profiler rails (~> 8.0.1) - rspec-benchmark rspec-rails ruby-prof + selenium-webdriver + sprockets-rails stackprof tzinfo-data + webdrivers RUBY VERSION ruby 3.4.1p0 diff --git a/app/controllers/trips_controller.rb b/app/controllers/trips_controller.rb index acb38be2..f8b28fb6 100644 --- a/app/controllers/trips_controller.rb +++ b/app/controllers/trips_controller.rb @@ -1,7 +1,9 @@ class TripsController < ApplicationController def index - @from = City.find_by_name!(params[:from]) - @to = City.find_by_name!(params[:to]) - @trips = Trip.where(from: @from, to: @to).order(:start_time) + @from = City.find_by!(name: params[:from]) + @to = City.find_by!(name: params[:to]) + @trips = Trip.eager_load(bus: :services).where(from: @from, to: @to).order(:start_time).to_a + + @count = @trips.size end end diff --git a/app/models/city.rb b/app/models/city.rb index 957290d8..d4d0b5e6 100644 --- a/app/models/city.rb +++ b/app/models/city.rb @@ -5,6 +5,10 @@ # id :bigint not null, primary key # name :string # +# Indexes +# +# index_cities_on_name (name) UNIQUE +# class City < ApplicationRecord validates :name, presence: true, uniqueness: true validate :name_has_no_spaces diff --git a/app/models/trip.rb b/app/models/trip.rb index 88521fef..c01ef9b4 100644 --- a/app/models/trip.rb +++ b/app/models/trip.rb @@ -10,6 +10,10 @@ # from_id :integer # to_id :integer # +# Indexes +# +# index_trips_on_from_id_and_to_id (from_id,to_id) +# class Trip < ApplicationRecord HHMM_REGEXP = /([0-1][0-9]|[2][0-3]):[0-5][0-9]/ @@ -37,7 +41,7 @@ def to_h bus: { number: bus.number, model: bus.model, - services: bus.services.map(&:name), + services: bus.services.pluck(:name), }, } end diff --git a/app/views/trips/_delimiter.html.erb b/app/views/trips/_delimiter.html.erb deleted file mode 100644 index 3f845ad0..00000000 --- a/app/views/trips/_delimiter.html.erb +++ /dev/null @@ -1 +0,0 @@ -==================================================== diff --git a/app/views/trips/_service.html.erb b/app/views/trips/_service.html.erb deleted file mode 100644 index 178ea8c0..00000000 --- a/app/views/trips/_service.html.erb +++ /dev/null @@ -1 +0,0 @@ -
  • <%= "#{service.name}" %>
  • diff --git a/app/views/trips/_services.html.erb b/app/views/trips/_services.html.erb index 2de639fc..6cfe1c57 100644 --- a/app/views/trips/_services.html.erb +++ b/app/views/trips/_services.html.erb @@ -1,6 +1,6 @@
  • Сервисы в автобусе:
  • diff --git a/app/views/trips/_trip.html.erb b/app/views/trips/_trip.html.erb index fa1de9aa..9c042687 100644 --- a/app/views/trips/_trip.html.erb +++ b/app/views/trips/_trip.html.erb @@ -3,3 +3,7 @@
  • <%= "В пути: #{trip.duration_minutes / 60}ч. #{trip.duration_minutes % 60}мин." %>
  • <%= "Цена: #{trip.price_cents / 100}р. #{trip.price_cents % 100}коп." %>
  • <%= "Автобус: #{trip.bus.model} №#{trip.bus.number}" %>
  • + +<% if trip.bus.services.present? %> + <%= render "services", services: trip.bus.services %> +<% end %> diff --git a/app/views/trips/index.html.erb b/app/views/trips/index.html.erb index a60bce41..6dc00ae7 100644 --- a/app/views/trips/index.html.erb +++ b/app/views/trips/index.html.erb @@ -2,15 +2,10 @@ <%= "Автобусы #{@from.name} – #{@to.name}" %>

    - <%= "В расписании #{@trips.count} рейсов" %> + <%= "В расписании #{@count} рейсов" %>

    -<% @trips.each do |trip| %> - - <%= render "delimiter" %> -<% end %> + diff --git a/case-study-template.md b/case-study-template.md index 03fc5a96..6031365e 100644 --- a/case-study-template.md +++ b/case-study-template.md @@ -1,5 +1,6 @@ # Case-study оптимизации +### А Импорт данных ## Актуальная проблема При импорте данных в нашем проекте из json файла в базу возникает проблема. @@ -9,23 +10,9 @@ Я решил исправить эту проблему, оптимизировав rake скрипт импорта данных. -### А Импорт данных ## Формирование метрики Для того, чтобы понимать, дают ли мои изменения положительный эффект на быстродействие программы я придумал использовать такую метрику: Импорт файла large.json должен занимать менее 1 минуты. -## Feedback-Loop -Для того, чтобы иметь возможность быстро проверять гипотезы я выстроил эффективный `feedback-loop`, который позволил мне получать обратную связь по эффективности сделанных изменений. - -Вот как я построил `feedback_loop`: -Добавил в скрипт бенчмарк для измерения продолжительности импорта. - -Вот как я построил `feedback_loop`: -- Запускаю имопрт данных. -- Запускаю профилировщик. -- Определяю главную точку роста. -- Вношу изменения. -- Защищаю изменения тестом. - Проверил работу исходной rake таски на разных объеиах данных: * {"3.4.1":{"gc":"enabled","time":8.45,"gc_count":79,"memory":"58 MB"}} - small.json @@ -57,3 +44,58 @@ Как результат, вложился в поставленную метрику в 1 мин для large.json - 2.6 сек. +### Б. Отображение расписаний +## Актуальная проблема +Загрузка страницы `/автобусы/Самара/Москва` занимает длительное время. +Нужно найти и устранить проблемы, замедляющие формирование этих страниц. + +## Формирование метрики +Придумал использовать метрику - время загрузки страницы на объеме данных `large.json` +Задаю целевой бюджет в 1 секунду. + +С помощью 'rack-mini-profiler' оценил время загрузки страницы в зависимости от объема данных: +* small.json - 516ms +* medium.json - 804ms +* large.json - 5800ms + +Добавил тесты для проверки конторллера и элементов страницы. + +## Поиск точек роста +- `rack-mini-profiler` показывает более 1700 sql запросов при рендере страницы + Проанализировав запросы видна N+1 проблема при загрузке Trip и Bus. + Подтверждаю свою догадку с помощью `bullet` + + Как результат оптимизирую запрос: + ```@trips = Trip.includes(bus: :services).where(from: @from, to: @to).order(:start_time)``` + + В итоге количество запросов сократилось до 7. `bullet` больше не ругается на N+1 + Время загрузки уменьшилось до 2600ms. + +- В `rack-mini-profiler` заметил много бесполезных ```<%= render "service", service: service %>``` + Заменил на
  • <%= service.name %>
  • + + Как результат сократил количество render операций. + Время загрузки уменьшилось до 1700ms. + +- То же самое проделал с 'delimeter'. + Время загрузки уменьшилось до 1600ms. + +- Использовал <%= render partial: "trip", collection: @trips, as: :trip %> + Время загрузки уменьшилось до 1300ms. + +- В `rack-mini-profiler` вижу два запроса в таблице cities по name. + Понимаю, что из-за количества запросов добавление индекса не очень скажется на перформансе. + Для интереса проверяю гепотизу и убеждаюсь что время загрузки не изменилось. + +- Вижу лишний запрос по количеству трипов. Количество мы можем взять из уже полученного массива поездок @count = @trips.size. + Время загрузки уменьшилось до 1100ms. + +- Добавил составной индекс длz trips на на [:from_id, :to_id] + Время загрузки практически неизменилось. + +- В логах `rack-mini-profiler` вижу что запросы на buses, buses_services и services выполняются отдельно. + Предполагаю что надо заменить includes на eager_load, чтобы избавиться от этого. + После замены гипотеза подтверждается, вместо 6 запросов - 3 запроса на страницу. + Время загрузки страницы - 830ms. Бюджет выполнен. + +- В заверешении проверяю тестом, что ничего не поломалось. \ No newline at end of file diff --git a/config/environments/development.rb b/config/environments/development.rb index bc3f8142..59d84671 100644 --- a/config/environments/development.rb +++ b/config/environments/development.rb @@ -1,4 +1,13 @@ Rails.application.configure do + config.after_initialize do + Bullet.enable = true + Bullet.alert = true + Bullet.bullet_logger = true + Bullet.console = true + Bullet.rails_logger = true + Bullet.add_footer = true + end + # Settings specified here will take precedence over those in config/application.rb. # In the development environment your application's code is reloaded on diff --git a/config/environments/test.rb b/config/environments/test.rb index 0a38fd3c..bc5971ab 100644 --- a/config/environments/test.rb +++ b/config/environments/test.rb @@ -1,4 +1,10 @@ Rails.application.configure do + config.after_initialize do + Bullet.enable = true + Bullet.bullet_logger = true + Bullet.raise = true # raise an error if n+1 query occurs + end + # Settings specified here will take precedence over those in config/application.rb. # The test environment is used exclusively to run your application's diff --git a/config/routes.rb b/config/routes.rb index 0bbefa7a..d59ea946 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -1,4 +1,6 @@ Rails.application.routes.draw do # For details on the DSL available within this file, see http://guides.rubyonrails.org/routing.html + mount PgHero::Engine, at: "pghero" + get "автобусы/:from/:to" => "trips#index" end diff --git a/db/migrate/20250222155254_create_pghero_query_stats.rb b/db/migrate/20250222155254_create_pghero_query_stats.rb new file mode 100644 index 00000000..74aaaa9a --- /dev/null +++ b/db/migrate/20250222155254_create_pghero_query_stats.rb @@ -0,0 +1,15 @@ +class CreatePgheroQueryStats < ActiveRecord::Migration[8.0] + def change + create_table :pghero_query_stats do |t| + t.text :database + t.text :user + t.text :query + t.integer :query_hash, limit: 8 + t.float :total_time + t.integer :calls, limit: 8 + t.timestamp :captured_at + end + + add_index :pghero_query_stats, [:database, :captured_at] + end +end diff --git a/db/migrate/20250222160422_create_pghero_space_stats.rb b/db/migrate/20250222160422_create_pghero_space_stats.rb new file mode 100644 index 00000000..5592db37 --- /dev/null +++ b/db/migrate/20250222160422_create_pghero_space_stats.rb @@ -0,0 +1,13 @@ +class CreatePgheroSpaceStats < ActiveRecord::Migration[8.0] + def change + create_table :pghero_space_stats do |t| + t.text :database + t.text :schema + t.text :relation + t.integer :size, limit: 8 + t.timestamp :captured_at + end + + add_index :pghero_space_stats, [:database, :captured_at] + end +end diff --git a/db/migrate/20250222163633_remove_unused_indexes.rb b/db/migrate/20250222163633_remove_unused_indexes.rb new file mode 100644 index 00000000..ad0230eb --- /dev/null +++ b/db/migrate/20250222163633_remove_unused_indexes.rb @@ -0,0 +1,5 @@ +class RemoveUnusedIndexes < ActiveRecord::Migration[8.0] + def change + remove_index :pghero_query_stats, name: "index_pghero_query_stats_on_database_and_captured_at" + end +end diff --git a/db/migrate/20250223085937_add_index_to_cities_on_name.rb b/db/migrate/20250223085937_add_index_to_cities_on_name.rb new file mode 100644 index 00000000..c50f6dea --- /dev/null +++ b/db/migrate/20250223085937_add_index_to_cities_on_name.rb @@ -0,0 +1,5 @@ +class AddIndexToCitiesOnName < ActiveRecord::Migration[8.0] + def change + add_index :cities, :name, unique: true + end +end diff --git a/db/migrate/20250223092208_add_index_from_trips.rb b/db/migrate/20250223092208_add_index_from_trips.rb new file mode 100644 index 00000000..3a6d0487 --- /dev/null +++ b/db/migrate/20250223092208_add_index_from_trips.rb @@ -0,0 +1,5 @@ +class AddIndexFromTrips < ActiveRecord::Migration[8.0] + def change + add_index :trips, [:from_id, :to_id] + end +end diff --git a/db/schema.rb b/db/schema.rb index 40c9c8ae..147f17e9 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,9 +10,10 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[8.0].define(version: 2019_03_30_193044) do +ActiveRecord::Schema[8.0].define(version: 2025_02_23_092208) do # These are extensions that must be enabled in order to support this database enable_extension "pg_catalog.plpgsql" + enable_extension "pg_stat_statements" create_table "buses", force: :cascade do |t| t.string "number" @@ -26,6 +27,26 @@ create_table "cities", force: :cascade do |t| t.string "name" + t.index ["name"], name: "index_cities_on_name", unique: true + end + + create_table "pghero_query_stats", force: :cascade do |t| + t.text "database" + t.text "user" + t.text "query" + t.bigint "query_hash" + t.float "total_time" + t.bigint "calls" + t.datetime "captured_at", precision: nil + end + + create_table "pghero_space_stats", force: :cascade do |t| + t.text "database" + t.text "schema" + t.text "relation" + t.bigint "size" + t.datetime "captured_at", precision: nil + t.index ["database", "captured_at"], name: "index_pghero_space_stats_on_database_and_captured_at" end create_table "services", force: :cascade do |t| @@ -39,5 +60,6 @@ t.integer "duration_minutes" t.integer "price_cents" t.integer "bus_id" + t.index ["from_id", "to_id"], name: "index_trips_on_from_id_and_to_id" end end diff --git a/spec/controllers/trips_controller.rb b/spec/controllers/trips_controller.rb new file mode 100644 index 00000000..96fda96b --- /dev/null +++ b/spec/controllers/trips_controller.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require 'rails_helper' + +RSpec.describe TripsController, type: :controller do + subject(:get_index) { get :index, params: { from: from_city.name, to: to_city.name } } + + let(:from_city) { City.create!(name: "Москва") } + let(:to_city) { City.create!(name: "Санкт-Петербург") } + let(:bus) { Bus.create!(number: "A123AA", model: "Икарус") } + + let(:trip) do + Trip.create!( + from: from_city, + to: to_city, + bus: bus, + start_time: "10:00", + duration_minutes: 300, + price_cents: 150_00 + ) + end + + it "should get index" do + get_index + expect(response).to be_successful + end +end diff --git a/spec/system/trips_spec.rb b/spec/system/trips_spec.rb new file mode 100644 index 00000000..504f6ae6 --- /dev/null +++ b/spec/system/trips_spec.rb @@ -0,0 +1,34 @@ +# spec/system/trips_spec.rb +require 'rails_helper' + +RSpec.describe "Trips Page", type: :system do + before do + end + + let!(:from_city) { City.create!(name: "Москва") } + let!(:to_city) { City.create!(name: "Санкт-Петербург") } + let!(:bus) { Bus.create!(number: "A123AA", model: "Икарус") } + + let!(:trip) do + Trip.create!( + from: from_city, + to: to_city, + bus: bus, + start_time: "10:00", + duration_minutes: 300, + price_cents: 150_00 + ) + end + + it "отображает index и нужные элементы" do + visit "/автобусы/#{from_city.name}/#{to_city.name}" + + expect(page).to have_selector("h1", text: "Автобусы Москва – Санкт-Петербург") + expect(page).to have_selector("h2", text: "В расписании 1 рейс") + expect(page).to have_selector("li", text: "Отправление: 10:00") + expect(page).to have_selector("li", text: "Прибытие: 15:00") + + expect(page).to have_selector("li", text: "Цена: 150р. 0коп.") + expect(page).to have_text("====================================================") + end +end