Add retry for failed media downloads and `tootctl media refresh` (#11775)

shrike
Eugen Rochko 2019-09-10 15:29:12 +02:00 committed by GitHub
parent 8674814825
commit 031ca25014
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 92 additions and 16 deletions

View File

@ -189,22 +189,25 @@ class ActivityPub::Activity::Create < ActivityPub::Activity
media_attachments = [] media_attachments = []
as_array(@object['attachment']).each do |attachment| as_array(@object['attachment']).each do |attachment|
next if attachment['url'].blank? next if attachment['url'].blank? || media_attachments.size >= 4
href = Addressable::URI.parse(attachment['url']).normalize.to_s begin
media_attachment = MediaAttachment.create(account: @account, remote_url: href, description: attachment['name'].presence, focus: attachment['focalPoint'], blurhash: supported_blurhash?(attachment['blurhash']) ? attachment['blurhash'] : nil) href = Addressable::URI.parse(attachment['url']).normalize.to_s
media_attachments << media_attachment media_attachment = MediaAttachment.create(account: @account, remote_url: href, description: attachment['name'].presence, focus: attachment['focalPoint'], blurhash: supported_blurhash?(attachment['blurhash']) ? attachment['blurhash'] : nil)
media_attachments << media_attachment
next if unsupported_media_type?(attachment['mediaType']) || skip_download? next if unsupported_media_type?(attachment['mediaType']) || skip_download?
media_attachment.file_remote_url = href media_attachment.file_remote_url = href
media_attachment.save media_attachment.save
rescue Mastodon::UnexpectedResponseError, HTTP::TimeoutError, HTTP::ConnectionError, OpenSSL::SSL::SSLError
RedownloadMediaWorker.perform_in(rand(30..600).seconds, media_attachment.id)
end
end end
media_attachments media_attachments
rescue Addressable::URI::InvalidURIError => e rescue Addressable::URI::InvalidURIError => e
Rails.logger.debug e Rails.logger.debug "Invalid URL in attachment: #{e}"
media_attachments media_attachments
end end

View File

@ -4,7 +4,7 @@ module Remotable
extend ActiveSupport::Concern extend ActiveSupport::Concern
class_methods do class_methods do
def remotable_attachment(attachment_name, limit) def remotable_attachment(attachment_name, limit, suppress_errors: true)
attribute_name = "#{attachment_name}_remote_url".to_sym attribute_name = "#{attachment_name}_remote_url".to_sym
method_name = "#{attribute_name}=".to_sym method_name = "#{attribute_name}=".to_sym
alt_method_name = "reset_#{attachment_name}!".to_sym alt_method_name = "reset_#{attachment_name}!".to_sym
@ -22,7 +22,7 @@ module Remotable
begin begin
Request.new(:get, url).perform do |response| Request.new(:get, url).perform do |response|
next if response.code != 200 raise Mastodon::UnexpectedResponseError, response unless (200...300).cover?(response.code)
content_type = parse_content_type(response.headers.get('content-type').last) content_type = parse_content_type(response.headers.get('content-type').last)
extname = detect_extname_from_content_type(content_type) extname = detect_extname_from_content_type(content_type)
@ -41,11 +41,11 @@ module Remotable
self[attribute_name] = url if has_attribute?(attribute_name) self[attribute_name] = url if has_attribute?(attribute_name)
end end
rescue HTTP::TimeoutError, HTTP::ConnectionError, OpenSSL::SSL::SSLError, Paperclip::Errors::NotIdentifiedByImageMagickError, Addressable::URI::InvalidURIError, Mastodon::HostValidationError, Mastodon::LengthValidationError => e rescue Mastodon::UnexpectedResponseError, HTTP::TimeoutError, HTTP::ConnectionError, OpenSSL::SSL::SSLError => e
Rails.logger.debug "Error fetching remote #{attachment_name}: #{e}"
raise e unless suppress_errors
rescue Paperclip::Errors::NotIdentifiedByImageMagickError, Addressable::URI::InvalidURIError, Mastodon::HostValidationError, Mastodon::LengthValidationError, Paperclip::Error, Mastodon::DimensionsValidationError => e
Rails.logger.debug "Error fetching remote #{attachment_name}: #{e}" Rails.logger.debug "Error fetching remote #{attachment_name}: #{e}"
nil
rescue Paperclip::Error, Mastodon::DimensionsValidationError => e
Rails.logger.debug "Error processing remote #{attachment_name}: #{e}"
nil nil
end end
end end

View File

@ -118,7 +118,7 @@ class MediaAttachment < ApplicationRecord
validates_attachment_content_type :file, content_type: IMAGE_MIME_TYPES + VIDEO_MIME_TYPES + AUDIO_MIME_TYPES validates_attachment_content_type :file, content_type: IMAGE_MIME_TYPES + VIDEO_MIME_TYPES + AUDIO_MIME_TYPES
validates_attachment_size :file, less_than: IMAGE_LIMIT, unless: :larger_media_format? validates_attachment_size :file, less_than: IMAGE_LIMIT, unless: :larger_media_format?
validates_attachment_size :file, less_than: VIDEO_LIMIT, if: :larger_media_format? validates_attachment_size :file, less_than: VIDEO_LIMIT, if: :larger_media_format?
remotable_attachment :file, VIDEO_LIMIT remotable_attachment :file, VIDEO_LIMIT, suppress_errors: false
include Attachmentable include Attachmentable

View File

@ -0,0 +1,19 @@
# frozen_string_literal: true
class RedownloadMediaWorker
include Sidekiq::Worker
include ExponentialBackoff
sidekiq_options queue: 'pull', retry: 3
def perform(id)
media_attachment = MediaAttachment.find(id)
return if media_attachment.remote_url.blank?
media_attachment.reset_file!
media_attachment.save
rescue ActiveRecord::RecordNotFound
true
end
end

View File

@ -43,5 +43,59 @@ module Mastodon
say("Removed #{processed} media attachments (approx. #{number_to_human_size(aggregate)}) #{dry_run}", :green, true) say("Removed #{processed} media attachments (approx. #{number_to_human_size(aggregate)}) #{dry_run}", :green, true)
end end
option :account, type: :string
option :domain, type: :string
option :status, type: :numeric
option :concurrency, type: :numeric, default: 5, aliases: [:c]
option :verbose, type: :boolean, default: false, aliases: [:v]
option :dry_run, type: :boolean, default: false
desc 'refresh', 'Fetch remote media files'
long_desc <<-DESC
Re-downloads media attachments from other servers. You must specify the
source of media attachments with one of the following options:
Use the --status option to download attachments from a specific status,
using the status local numeric ID.
Use the --account option to download attachments from a specific account,
using username@domain handle of the account.
Use the --domain option to download attachments from a specific domain.
DESC
def refresh
dry_run = options[:dry_run] ? ' (DRY RUN)' : ''
if options[:status]
scope = MediaAttachment.where(status_id: options[:status])
elsif options[:account]
username, domain = username.split('@')
account = Account.find_remote(username, domain)
if account.nil?
say('No such account', :red)
exit(1)
end
scope = MediaAttachment.where(account_id: account.id)
elsif options[:domain]
scope = MediaAttachment.joins(:account).merge(Account.by_domain_and_subdomains(options[:domain]))
else
exit(1)
end
processed, aggregate = parallelize_with_progress(scope) do |media_attachment|
next if media_attachment.remote_url.blank?
unless options[:dry_run]
media_attachment.reset_file!
media_attachment.save
end
media_attachment.file_file_size
end
say("Downloaded #{processed} media attachments (approx. #{number_to_human_size(aggregate)})#{dry_run}", :green, true)
end
end end
end end