diff --git a/app/Models/CustomFilter.php b/app/Models/CustomFilter.php index 1f0463e4a..976a0d84c 100644 --- a/app/Models/CustomFilter.php +++ b/app/Models/CustomFilter.php @@ -31,20 +31,8 @@ class CustomFilter extends Model 'account', ]; - const MAX_LIMIT = 20; - - const MAX_KEYWORDS_PER_FILTER = 10; - const MAX_STATUSES_PER_FILTER = 10; - const MAX_CONTENT_SCAN_LEN = 1000; - - const MAX_KEYWORD_LEN = 40; - - const MAX_PER_HOUR = 40; - - const MAX_UPDATES_PER_HOUR = 40; - const EXPIRATION_DURATIONS = [ 1800, // 30 minutes 3600, // 1 hour @@ -60,6 +48,20 @@ class CustomFilter extends Model const ACTION_BLUR = 2; + protected static ?int $maxContentScanLimit = null; + + protected static ?int $maxFiltersPerUser = null; + + protected static ?int $maxKeywordsPerFilter = null; + + protected static ?int $maxKeywordsLength = null; + + protected static ?int $maxPatternLength = null; + + protected static ?int $maxCreatePerHour = null; + + protected static ?int $maxUpdatesPerHour = null; + public function account() { return $this->belongsTo(Profile::class, 'profile_id'); @@ -166,6 +168,11 @@ class CustomFilter extends Model $model->shouldInvalidateCache = true; }); + static::updating(function ($model) { + $model->prepareContextForStorage(); + $model->shouldInvalidateCache = true; + }); + static::deleting(function ($model) { $model->shouldInvalidateCache = true; }); @@ -197,6 +204,69 @@ class CustomFilter extends Model Cache::forget("filters:v3:{$this->profile_id}"); } + public static function getMaxContentScanLimit(): int + { + if (self::$maxContentScanLimit === null) { + self::$maxContentScanLimit = config('instance.custom_filters.max_content_scan_limit', 2500); + } + + return self::$maxContentScanLimit; + } + + public static function getMaxFiltersPerUser(): int + { + if (self::$maxFiltersPerUser === null) { + self::$maxFiltersPerUser = config('instance.custom_filters.max_filters_per_user', 20); + } + + return self::$maxFiltersPerUser; + } + + public static function getMaxKeywordsPerFilter(): int + { + if (self::$maxKeywordsPerFilter === null) { + self::$maxKeywordsPerFilter = config('instance.custom_filters.max_keywords_per_filter', 10); + } + + return self::$maxKeywordsPerFilter; + } + + public static function getMaxKeywordLength(): int + { + if (self::$maxKeywordsLength === null) { + self::$maxKeywordsLength = config('instance.custom_filters.max_keyword_length', 40); + } + + return self::$maxKeywordsLength; + } + + public static function getMaxPatternLength(): int + { + if (self::$maxPatternLength === null) { + self::$maxPatternLength = config('instance.custom_filters.max_pattern_length', 10000); + } + + return self::$maxPatternLength; + } + + public static function getMaxCreatePerHour(): int + { + if (self::$maxCreatePerHour === null) { + self::$maxCreatePerHour = config('instance.custom_filters.max_create_per_hour', 20); + } + + return self::$maxCreatePerHour; + } + + public static function getMaxUpdatesPerHour(): int + { + if (self::$maxUpdatesPerHour === null) { + self::$maxUpdatesPerHour = config('instance.custom_filters.max_updates_per_hour', 40); + } + + return self::$maxUpdatesPerHour; + } + /** * Get cached filters for an account with simplified, secure approach * @@ -219,7 +289,7 @@ class CustomFilter extends Model return; } - $maxPatternsPerFilter = self::MAX_KEYWORDS_PER_FILTER; + $maxPatternsPerFilter = self::getMaxFiltersPerUser(); $keywordsToProcess = $keywords->take($maxPatternsPerFilter); $regexPatterns = $keywordsToProcess->map(function ($keyword) { @@ -237,7 +307,7 @@ class CustomFilter extends Model } $combinedPattern = implode('|', $regexPatterns); - $maxPatternLength = self::MAX_KEYWORD_LEN; + $maxPatternLength = self::getMaxPatternLength(); if (strlen($combinedPattern) > $maxPatternLength) { $combinedPattern = substr($combinedPattern, 0, $maxPatternLength); } @@ -248,24 +318,24 @@ class CustomFilter extends Model ]; }); - $statusFilters = CustomFilterStatus::with(['customFilter' => function ($query) use ($profileId) { - $query->unexpired()->where('profile_id', $profileId); - }])->get(); + // $statusFilters = CustomFilterStatus::with(['customFilter' => function ($query) use ($profileId) { + // $query->unexpired()->where('profile_id', $profileId); + // }])->get(); - $statusFilters->groupBy('custom_filter_id')->each(function ($statuses, $filterId) use (&$filtersHash) { - $filter = $statuses->first()->customFilter; + // $statusFilters->groupBy('custom_filter_id')->each(function ($statuses, $filterId) use (&$filtersHash) { + // $filter = $statuses->first()->customFilter; - if (! $filter) { - return; - } + // if (! $filter) { + // return; + // } - if (! isset($filtersHash[$filterId])) { - $filtersHash[$filterId] = ['filter' => $filter]; - } + // if (! isset($filtersHash[$filterId])) { + // $filtersHash[$filterId] = ['filter' => $filter]; + // } - $maxStatusIds = self::MAX_STATUSES_PER_FILTER; - $filtersHash[$filterId]['status_ids'] = $statuses->take($maxStatusIds)->pluck('status_id')->toArray(); - }); + // $maxStatusIds = self::MAX_STATUSES_PER_FILTER; + // $filtersHash[$filterId]['status_ids'] = $statuses->take($maxStatusIds)->pluck('status_id')->toArray(); + // }); return array_map(function ($item) { $filter = $item['filter']; @@ -300,7 +370,7 @@ class CustomFilter extends Model if (isset($rules['keywords'])) { $text = strip_tags($status['content']); - $maxContentLength = self::MAX_CONTENT_SCAN_LEN; + $maxContentLength = self::getMaxContentScanLimit(); if (mb_strlen($text) > $maxContentLength) { $text = mb_substr($text, 0, $maxContentLength); } @@ -308,7 +378,7 @@ class CustomFilter extends Model try { preg_match_all($rules['keywords'], $text, $matches, PREG_PATTERN_ORDER, 0); if (! empty($matches[0])) { - $maxReportedMatches = 10; + $maxReportedMatches = (int) config('instance.custom_filters.max_reported_matches', 10); $keywordMatches = array_slice($matches[0], 0, $maxReportedMatches); } } catch (\Throwable $e) { @@ -318,15 +388,15 @@ class CustomFilter extends Model } } - if (isset($rules['status_ids'])) { - $statusId = $status->id; - $reblogId = $status->reblog_of_id ?? null; + // if (isset($rules['status_ids'])) { + // $statusId = $status->id; + // $reblogId = $status->reblog_of_id ?? null; - $matchingIds = array_intersect($rules['status_ids'], array_filter([$statusId, $reblogId])); - if (! empty($matchingIds)) { - $statusMatches = $matchingIds; - } - } + // $matchingIds = array_intersect($rules['status_ids'], array_filter([$statusId, $reblogId])); + // if (! empty($matchingIds)) { + // $statusMatches = $matchingIds; + // } + // } if (! empty($keywordMatches) || ! empty($statusMatches)) { $results[] = [ diff --git a/config/instance.php b/config/instance.php index 82402e057..0a3277421 100644 --- a/config/instance.php +++ b/config/instance.php @@ -190,4 +190,78 @@ return [ 'allow_new_account_dms' => env('INSTANCE_ALLOW_NEW_DMS', true), 'total_count_estimate' => env('INSTANCE_TOTAL_POSTS_COUNT_ESTIMATE', false), + + 'custom_filters' => [ + /* + * The maximum number of characters from a status that will be scanned + * for filter matching. Scanning too many characters can hurt performance, + * so this limit ensures that only the most relevant portion of a status is processed. + * + * For remote statuses, you might want to increase this value if you expect + * important content to appear later in long posts. + */ + 'max_content_scan_limit' => env('PF_CF_CONTENT_SCAN_LIMIT', 2500), + + /* + * The maximum number of filters a single user can create. + * Limiting the number of filters per user helps prevent abuse and + * ensures that the filtering system remains performant. + */ + 'max_filters_per_user' => env('PF_CF_MAX_FILTERS_PER_USER', 20), + + /* + * The maximum number of keywords that can be associated with a single filter. + * This limit helps control the complexity of the generated regular expressions + * and protects against potential performance issues during content scanning. + */ + 'max_keywords_per_filter' => env('PF_CF_MAX_KEYWORDS_PER_FILTER', 10), + + /* + * The maximum length allowed for each keyword in a filter. + * Limiting keyword length not only curtails the size of the regex patterns created, + * but also guards against potential abuse where excessively long keywords might + * negatively impact matching performance or lead to unintended behavior. + */ + 'max_keyword_length' => env('PF_CF_MAX_KEYWORD_LENGTH', 40), + + /* + * The maximum allowed length for the combined regex pattern. + * When constructing a regex that matches multiple filter keywords, each keyword + * (after escaping and adding boundaries) contributes to the total pattern length. + * + * This value is set to 10000 by default. If you increase either the number of keywords + * per filter or the maximum length allowed for each keyword, consider increasing this + * limit accordingly so that the final regex pattern can accommodate the additional length + * without being truncated or causing performance issues. + */ + 'max_pattern_length' => env('PF_CF_MAX_PATTERN_LENGTH', 10000), + + /* + * The maximum number of keyword matches to report for a given status. + * When a filter is applied to a status, the matching process may find multiple occurrences + * of a keyword. This value limits the number of matches that are reported back, + * which helps manage output volume and processing overhead. + * + * The default is set to 10, but you can adjust this value through your environment configuration. + */ + 'max_reported_matches' => env('PF_CF_MAX_REPORTED_MATCHES', 10), + + /* + * The maximum number of filter creation operations allowed per hour for a non-admin user. + * This rate limit prevents abuse by restricting how many filters a normal user can create + * within one hour. Admin users are exempt from this limit. + * + * Default is 20 creations per hour. + */ + 'max_create_per_hour' => env('PF_CF_MAX_CREATE_PER_HOUR', 20), + + /* + * The maximum number of filter update operations allowed per hour for a non-admin user. + * This rate limit is designed to prevent abuse by limiting how many times a normal user + * can update their filters within one hour. Admin users are not subject to these limits. + * + * Default is 40 updates per hour. + */ + 'max_updates_per_hour' => env('PF_CF_MAX_UPDATES_PER_HOUR', 40), + ], ];