Add `tootctl media remove-orphans` (#12568)

pull/12569/head
Eugen Rochko 5 years ago committed by GitHub
parent 6d7daf6154
commit f3d232381d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -167,6 +167,18 @@ class MediaAttachment < ApplicationRecord
audio? || video?
end
def variant?(other_file_name)
return true if file_file_name == other_file_name
formats = file.styles.values.map(&:format).compact
return false if formats.empty?
extension = File.extname(other_file_name)
formats.include?(extension.delete('.')) && File.basename(other_file_name, extension) == File.basename(file_file_name, File.extname(file_file_name))
end
def to_param
shortcode
end

@ -89,7 +89,7 @@ else
Paperclip::Attachment.default_options.merge!(
storage: :filesystem,
use_timestamp: true,
path: ENV.fetch('PAPERCLIP_ROOT_PATH', ':rails_root/public/system') + '/:class/:attachment/:id_partition/:style/:filename',
path: File.join(ENV.fetch('PAPERCLIP_ROOT_PATH', File.join(':rails_root', 'public', 'system')), ':class', ':attachment', ':id_partition', ':style', ':filename'),
url: ENV.fetch('PAPERCLIP_ROOT_URL', '/system') + '/:class/:attachment/:id_partition/:style/:filename',
)
end

@ -44,6 +44,83 @@ module Mastodon
say("Removed #{processed} media attachments (approx. #{number_to_human_size(aggregate)}) #{dry_run}", :green, true)
end
option :start_after
option :dry_run, type: :boolean, default: false
desc 'remove-orphans', 'Scan storage and check for files that do not belong to existing media attachments'
long_desc <<~LONG_DESC
Scans file storage for files that do not belong to existing media attachments. Because this operation
requires iterating over every single file individually, it will be slow.
Please mind that some storage providers charge for the necessary API requests to list objects.
LONG_DESC
def remove_orphans
progress = create_progress_bar(nil)
reclaimed_bytes = 0
removed = 0
dry_run = options[:dry_run] ? ' (DRY RUN)' : ''
case Paperclip::Attachment.default_options[:storage]
when :s3
paperclip_instance = MediaAttachment.new.file
s3_interface = paperclip_instance.s3_interface
bucket = s3_interface.bucket(Paperclip::Attachment.default_options[:s3_credentials][:bucket])
last_key = options[:start_after]
loop do
objects = bucket.objects(start_after: last_key, prefix: 'media_attachments/files/').limit(1000).map { |x| x }
break if objects.empty?
last_key = objects.last.key
attachments_map = MediaAttachment.where(id: objects.map { |object| object.key.split('/')[2..-2].join.to_i }).each_with_object({}) { |attachment, map| map[attachment.id] = attachment }
objects.each do |object|
attachment_id = object.key.split('/')[2..-2].join.to_i
filename = object.key.split('/').last
progress.increment
next unless attachments_map[attachment_id].nil? || !attachments_map[attachment_id].variant?(filename)
reclaimed_bytes += object.size
removed += 1
object.delete unless options[:dry_run]
progress.log("Found and removed orphan: #{object.key}")
end
end
when :fog
say('The fog storage driver is not supported for this operation at this time', :red)
exit(1)
when :filesystem
require 'find'
root_path = ENV.fetch('RAILS_ROOT_PATH', File.join(':rails_root', 'public', 'system')).gsub(':rails_root', Rails.root.to_s)
Find.find(File.join(root_path, 'media_attachments', 'files')) do |path|
next if File.directory?(path)
key = path.gsub("#{root_path}#{File::SEPARATOR}", '')
attachment_id = key.split(File::SEPARATOR)[2..-2].join.to_i
filename = key.split(File::SEPARATOR).last
attachment = MediaAttachment.find_by(id: attachment_id)
progress.increment
next unless attachment.nil? || !attachment.variant?(filename)
reclaimed_bytes += File.size(path)
removed += 1
File.delete(path) unless options[:dry_run]
progress.log("Found and removed orphan: #{key}")
end
end
progress.total = progress.progress
progress.finish
say("Removed #{removed} orphans (approx. #{number_to_human_size(reclaimed_bytes)})#{dry_run}", :green, true)
end
option :account, type: :string
option :domain, type: :string
option :status, type: :numeric

Loading…
Cancel
Save