Improve performance of follow recommendation scheduler (#16159)
Express follow_recommendations in terms of account_summaries rather than accounts, integrate filters that are unconditionally used, and materialize the resulting view. This should result in the bulk of the computation being performed only once instead of **once per recommendation language**.shrike
parent
351c744590
commit
d9ae3db8d5
|
@ -14,9 +14,11 @@ class FollowRecommendation < ApplicationRecord
|
||||||
belongs_to :account_summary, foreign_key: :account_id
|
belongs_to :account_summary, foreign_key: :account_id
|
||||||
belongs_to :account, foreign_key: :account_id
|
belongs_to :account, foreign_key: :account_id
|
||||||
|
|
||||||
scope :safe, -> { joins(:account_summary).merge(AccountSummary.safe) }
|
|
||||||
scope :localized, ->(locale) { joins(:account_summary).merge(AccountSummary.localized(locale)) }
|
scope :localized, ->(locale) { joins(:account_summary).merge(AccountSummary.localized(locale)) }
|
||||||
scope :filtered, -> { joins(:account_summary).merge(AccountSummary.filtered) }
|
|
||||||
|
def self.refresh
|
||||||
|
Scenic.database.refresh_materialized_view(table_name, concurrently: true, cascade: false)
|
||||||
|
end
|
||||||
|
|
||||||
def readonly?
|
def readonly?
|
||||||
true
|
true
|
||||||
|
|
|
@ -14,13 +14,14 @@ class Scheduler::FollowRecommendationsScheduler
|
||||||
def perform
|
def perform
|
||||||
# Maintaining a materialized view speeds-up subsequent queries significantly
|
# Maintaining a materialized view speeds-up subsequent queries significantly
|
||||||
AccountSummary.refresh
|
AccountSummary.refresh
|
||||||
|
FollowRecommendation.refresh
|
||||||
|
|
||||||
fallback_recommendations = FollowRecommendation.safe.filtered.limit(SET_SIZE).index_by(&:account_id)
|
fallback_recommendations = FollowRecommendation.limit(SET_SIZE).index_by(&:account_id)
|
||||||
|
|
||||||
I18n.available_locales.each do |locale|
|
I18n.available_locales.each do |locale|
|
||||||
recommendations = begin
|
recommendations = begin
|
||||||
if AccountSummary.safe.filtered.localized(locale).exists? # We can skip the work if no accounts with that language exist
|
if AccountSummary.safe.filtered.localized(locale).exists? # We can skip the work if no accounts with that language exist
|
||||||
FollowRecommendation.safe.filtered.localized(locale).limit(SET_SIZE).index_by(&:account_id)
|
FollowRecommendation.localized(locale).limit(SET_SIZE).index_by(&:account_id)
|
||||||
else
|
else
|
||||||
{}
|
{}
|
||||||
end
|
end
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
class UpdateFollowRecommendationsToVersion2 < ActiveRecord::Migration[6.1]
|
||||||
|
# We're switching from a normal to a materialized view so we need
|
||||||
|
# custom `up` and `down` paths.
|
||||||
|
|
||||||
|
def up
|
||||||
|
drop_view :follow_recommendations
|
||||||
|
create_view :follow_recommendations, version: 2, materialized: true
|
||||||
|
|
||||||
|
# To be able to refresh the view concurrently,
|
||||||
|
# at least one unique index is required
|
||||||
|
safety_assured { add_index :follow_recommendations, :account_id, unique: true }
|
||||||
|
end
|
||||||
|
|
||||||
|
def down
|
||||||
|
drop_view :follow_recommendations, materialized: true
|
||||||
|
create_view :follow_recommendations, version: 1
|
||||||
|
end
|
||||||
|
end
|
28
db/schema.rb
28
db/schema.rb
|
@ -10,7 +10,7 @@
|
||||||
#
|
#
|
||||||
# It's strongly recommended that you check this file into your version control system.
|
# It's strongly recommended that you check this file into your version control system.
|
||||||
|
|
||||||
ActiveRecord::Schema.define(version: 2021_04_25_135952) do
|
ActiveRecord::Schema.define(version: 2021_05_05_174616) do
|
||||||
|
|
||||||
# These are extensions that must be enabled in order to support this database
|
# These are extensions that must be enabled in order to support this database
|
||||||
enable_extension "plpgsql"
|
enable_extension "plpgsql"
|
||||||
|
@ -1114,30 +1114,34 @@ ActiveRecord::Schema.define(version: 2021_04_25_135952) do
|
||||||
SQL
|
SQL
|
||||||
add_index "account_summaries", ["account_id"], name: "index_account_summaries_on_account_id", unique: true
|
add_index "account_summaries", ["account_id"], name: "index_account_summaries_on_account_id", unique: true
|
||||||
|
|
||||||
create_view "follow_recommendations", sql_definition: <<-SQL
|
create_view "follow_recommendations", materialized: true, sql_definition: <<-SQL
|
||||||
SELECT t0.account_id,
|
SELECT t0.account_id,
|
||||||
sum(t0.rank) AS rank,
|
sum(t0.rank) AS rank,
|
||||||
array_agg(t0.reason) AS reason
|
array_agg(t0.reason) AS reason
|
||||||
FROM ( SELECT accounts.id AS account_id,
|
FROM ( SELECT account_summaries.account_id,
|
||||||
((count(follows.id))::numeric / (1.0 + (count(follows.id))::numeric)) AS rank,
|
((count(follows.id))::numeric / (1.0 + (count(follows.id))::numeric)) AS rank,
|
||||||
'most_followed'::text AS reason
|
'most_followed'::text AS reason
|
||||||
FROM ((follows
|
FROM (((follows
|
||||||
JOIN accounts ON ((accounts.id = follows.target_account_id)))
|
JOIN account_summaries ON ((account_summaries.account_id = follows.target_account_id)))
|
||||||
JOIN users ON ((users.account_id = follows.account_id)))
|
JOIN users ON ((users.account_id = follows.account_id)))
|
||||||
WHERE ((users.current_sign_in_at >= (now() - 'P30D'::interval)) AND (accounts.suspended_at IS NULL) AND (accounts.moved_to_account_id IS NULL) AND (accounts.silenced_at IS NULL) AND (accounts.locked = false) AND (accounts.discoverable = true))
|
LEFT JOIN follow_recommendation_suppressions ON ((follow_recommendation_suppressions.account_id = follows.target_account_id)))
|
||||||
GROUP BY accounts.id
|
WHERE ((users.current_sign_in_at >= (now() - 'P30D'::interval)) AND (account_summaries.sensitive = false) AND (follow_recommendation_suppressions.id IS NULL))
|
||||||
|
GROUP BY account_summaries.account_id
|
||||||
HAVING (count(follows.id) >= 5)
|
HAVING (count(follows.id) >= 5)
|
||||||
UNION ALL
|
UNION ALL
|
||||||
SELECT accounts.id AS account_id,
|
SELECT account_summaries.account_id,
|
||||||
(sum((status_stats.reblogs_count + status_stats.favourites_count)) / (1.0 + sum((status_stats.reblogs_count + status_stats.favourites_count)))) AS rank,
|
(sum((status_stats.reblogs_count + status_stats.favourites_count)) / (1.0 + sum((status_stats.reblogs_count + status_stats.favourites_count)))) AS rank,
|
||||||
'most_interactions'::text AS reason
|
'most_interactions'::text AS reason
|
||||||
FROM ((status_stats
|
FROM (((status_stats
|
||||||
JOIN statuses ON ((statuses.id = status_stats.status_id)))
|
JOIN statuses ON ((statuses.id = status_stats.status_id)))
|
||||||
JOIN accounts ON ((accounts.id = statuses.account_id)))
|
JOIN account_summaries ON ((account_summaries.account_id = statuses.account_id)))
|
||||||
WHERE ((statuses.id >= (((date_part('epoch'::text, (now() - 'P30D'::interval)) * (1000)::double precision))::bigint << 16)) AND (accounts.suspended_at IS NULL) AND (accounts.moved_to_account_id IS NULL) AND (accounts.silenced_at IS NULL) AND (accounts.locked = false) AND (accounts.discoverable = true))
|
LEFT JOIN follow_recommendation_suppressions ON ((follow_recommendation_suppressions.account_id = statuses.account_id)))
|
||||||
GROUP BY accounts.id
|
WHERE ((statuses.id >= (((date_part('epoch'::text, (now() - 'P30D'::interval)) * (1000)::double precision))::bigint << 16)) AND (account_summaries.sensitive = false) AND (follow_recommendation_suppressions.id IS NULL))
|
||||||
|
GROUP BY account_summaries.account_id
|
||||||
HAVING (sum((status_stats.reblogs_count + status_stats.favourites_count)) >= (5)::numeric)) t0
|
HAVING (sum((status_stats.reblogs_count + status_stats.favourites_count)) >= (5)::numeric)) t0
|
||||||
GROUP BY t0.account_id
|
GROUP BY t0.account_id
|
||||||
ORDER BY (sum(t0.rank)) DESC;
|
ORDER BY (sum(t0.rank)) DESC;
|
||||||
SQL
|
SQL
|
||||||
|
add_index "follow_recommendations", ["account_id"], name: "index_follow_recommendations_on_account_id", unique: true
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
|
@ -0,0 +1,34 @@
|
||||||
|
SELECT
|
||||||
|
account_id,
|
||||||
|
sum(rank) AS rank,
|
||||||
|
array_agg(reason) AS reason
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
account_summaries.account_id AS account_id,
|
||||||
|
count(follows.id) / (1.0 + count(follows.id)) AS rank,
|
||||||
|
'most_followed' AS reason
|
||||||
|
FROM follows
|
||||||
|
INNER JOIN account_summaries ON account_summaries.account_id = follows.target_account_id
|
||||||
|
INNER JOIN users ON users.account_id = follows.account_id
|
||||||
|
LEFT OUTER JOIN follow_recommendation_suppressions ON follow_recommendation_suppressions.account_id = follows.target_account_id
|
||||||
|
WHERE users.current_sign_in_at >= (now() - interval '30 days')
|
||||||
|
AND account_summaries.sensitive = 'f'
|
||||||
|
AND follow_recommendation_suppressions.id IS NULL
|
||||||
|
GROUP BY account_summaries.account_id
|
||||||
|
HAVING count(follows.id) >= 5
|
||||||
|
UNION ALL
|
||||||
|
SELECT account_summaries.account_id AS account_id,
|
||||||
|
sum(reblogs_count + favourites_count) / (1.0 + sum(reblogs_count + favourites_count)) AS rank,
|
||||||
|
'most_interactions' AS reason
|
||||||
|
FROM status_stats
|
||||||
|
INNER JOIN statuses ON statuses.id = status_stats.status_id
|
||||||
|
INNER JOIN account_summaries ON account_summaries.account_id = statuses.account_id
|
||||||
|
LEFT OUTER JOIN follow_recommendation_suppressions ON follow_recommendation_suppressions.account_id = statuses.account_id
|
||||||
|
WHERE statuses.id >= ((date_part('epoch', now() - interval '30 days') * 1000)::bigint << 16)
|
||||||
|
AND account_summaries.sensitive = 'f'
|
||||||
|
AND follow_recommendation_suppressions.id IS NULL
|
||||||
|
GROUP BY account_summaries.account_id
|
||||||
|
HAVING sum(reblogs_count + favourites_count) >= 5
|
||||||
|
) t0
|
||||||
|
GROUP BY account_id
|
||||||
|
ORDER BY rank DESC
|
Loading…
Reference in New Issue