Update CustomFilter model, add custom limits for max_content_scan_limit, max_filters_per_user, max_keywords_per_filter, max_keyword_length, max_pattern_length, max_reported_matches, max_create_per_hour, max_updates_per_hour so admins have more control over this feature

pull/5928/head
Daniel Supernault 5 months ago
parent 578718c606
commit 77044ba193
No known key found for this signature in database
GPG Key ID: 23740873EE6F76A1

@ -31,20 +31,8 @@ class CustomFilter extends Model
'account',
];
const MAX_LIMIT = 20;
const MAX_KEYWORDS_PER_FILTER = 10;
const MAX_STATUSES_PER_FILTER = 10;
const MAX_CONTENT_SCAN_LEN = 1000;
const MAX_KEYWORD_LEN = 40;
const MAX_PER_HOUR = 40;
const MAX_UPDATES_PER_HOUR = 40;
const EXPIRATION_DURATIONS = [
1800, // 30 minutes
3600, // 1 hour
@ -60,6 +48,20 @@ class CustomFilter extends Model
const ACTION_BLUR = 2;
protected static ?int $maxContentScanLimit = null;
protected static ?int $maxFiltersPerUser = null;
protected static ?int $maxKeywordsPerFilter = null;
protected static ?int $maxKeywordsLength = null;
protected static ?int $maxPatternLength = null;
protected static ?int $maxCreatePerHour = null;
protected static ?int $maxUpdatesPerHour = null;
public function account()
{
return $this->belongsTo(Profile::class, 'profile_id');
@ -166,6 +168,11 @@ class CustomFilter extends Model
$model->shouldInvalidateCache = true;
});
static::updating(function ($model) {
$model->prepareContextForStorage();
$model->shouldInvalidateCache = true;
});
static::deleting(function ($model) {
$model->shouldInvalidateCache = true;
});
@ -197,6 +204,69 @@ class CustomFilter extends Model
Cache::forget("filters:v3:{$this->profile_id}");
}
public static function getMaxContentScanLimit(): int
{
if (self::$maxContentScanLimit === null) {
self::$maxContentScanLimit = config('instance.custom_filters.max_content_scan_limit', 2500);
}
return self::$maxContentScanLimit;
}
public static function getMaxFiltersPerUser(): int
{
if (self::$maxFiltersPerUser === null) {
self::$maxFiltersPerUser = config('instance.custom_filters.max_filters_per_user', 20);
}
return self::$maxFiltersPerUser;
}
public static function getMaxKeywordsPerFilter(): int
{
if (self::$maxKeywordsPerFilter === null) {
self::$maxKeywordsPerFilter = config('instance.custom_filters.max_keywords_per_filter', 10);
}
return self::$maxKeywordsPerFilter;
}
public static function getMaxKeywordLength(): int
{
if (self::$maxKeywordsLength === null) {
self::$maxKeywordsLength = config('instance.custom_filters.max_keyword_length', 40);
}
return self::$maxKeywordsLength;
}
public static function getMaxPatternLength(): int
{
if (self::$maxPatternLength === null) {
self::$maxPatternLength = config('instance.custom_filters.max_pattern_length', 10000);
}
return self::$maxPatternLength;
}
public static function getMaxCreatePerHour(): int
{
if (self::$maxCreatePerHour === null) {
self::$maxCreatePerHour = config('instance.custom_filters.max_create_per_hour', 20);
}
return self::$maxCreatePerHour;
}
public static function getMaxUpdatesPerHour(): int
{
if (self::$maxUpdatesPerHour === null) {
self::$maxUpdatesPerHour = config('instance.custom_filters.max_updates_per_hour', 40);
}
return self::$maxUpdatesPerHour;
}
/**
* Get cached filters for an account with simplified, secure approach
*
@ -219,7 +289,7 @@ class CustomFilter extends Model
return;
}
$maxPatternsPerFilter = self::MAX_KEYWORDS_PER_FILTER;
$maxPatternsPerFilter = self::getMaxFiltersPerUser();
$keywordsToProcess = $keywords->take($maxPatternsPerFilter);
$regexPatterns = $keywordsToProcess->map(function ($keyword) {
@ -237,7 +307,7 @@ class CustomFilter extends Model
}
$combinedPattern = implode('|', $regexPatterns);
$maxPatternLength = self::MAX_KEYWORD_LEN;
$maxPatternLength = self::getMaxPatternLength();
if (strlen($combinedPattern) > $maxPatternLength) {
$combinedPattern = substr($combinedPattern, 0, $maxPatternLength);
}
@ -248,24 +318,24 @@ class CustomFilter extends Model
];
});
$statusFilters = CustomFilterStatus::with(['customFilter' => function ($query) use ($profileId) {
$query->unexpired()->where('profile_id', $profileId);
}])->get();
// $statusFilters = CustomFilterStatus::with(['customFilter' => function ($query) use ($profileId) {
// $query->unexpired()->where('profile_id', $profileId);
// }])->get();
$statusFilters->groupBy('custom_filter_id')->each(function ($statuses, $filterId) use (&$filtersHash) {
$filter = $statuses->first()->customFilter;
// $statusFilters->groupBy('custom_filter_id')->each(function ($statuses, $filterId) use (&$filtersHash) {
// $filter = $statuses->first()->customFilter;
if (! $filter) {
return;
}
// if (! $filter) {
// return;
// }
if (! isset($filtersHash[$filterId])) {
$filtersHash[$filterId] = ['filter' => $filter];
}
// if (! isset($filtersHash[$filterId])) {
// $filtersHash[$filterId] = ['filter' => $filter];
// }
$maxStatusIds = self::MAX_STATUSES_PER_FILTER;
$filtersHash[$filterId]['status_ids'] = $statuses->take($maxStatusIds)->pluck('status_id')->toArray();
});
// $maxStatusIds = self::MAX_STATUSES_PER_FILTER;
// $filtersHash[$filterId]['status_ids'] = $statuses->take($maxStatusIds)->pluck('status_id')->toArray();
// });
return array_map(function ($item) {
$filter = $item['filter'];
@ -300,7 +370,7 @@ class CustomFilter extends Model
if (isset($rules['keywords'])) {
$text = strip_tags($status['content']);
$maxContentLength = self::MAX_CONTENT_SCAN_LEN;
$maxContentLength = self::getMaxContentScanLimit();
if (mb_strlen($text) > $maxContentLength) {
$text = mb_substr($text, 0, $maxContentLength);
}
@ -308,7 +378,7 @@ class CustomFilter extends Model
try {
preg_match_all($rules['keywords'], $text, $matches, PREG_PATTERN_ORDER, 0);
if (! empty($matches[0])) {
$maxReportedMatches = 10;
$maxReportedMatches = (int) config('instance.custom_filters.max_reported_matches', 10);
$keywordMatches = array_slice($matches[0], 0, $maxReportedMatches);
}
} catch (\Throwable $e) {
@ -318,15 +388,15 @@ class CustomFilter extends Model
}
}
if (isset($rules['status_ids'])) {
$statusId = $status->id;
$reblogId = $status->reblog_of_id ?? null;
// if (isset($rules['status_ids'])) {
// $statusId = $status->id;
// $reblogId = $status->reblog_of_id ?? null;
$matchingIds = array_intersect($rules['status_ids'], array_filter([$statusId, $reblogId]));
if (! empty($matchingIds)) {
$statusMatches = $matchingIds;
}
}
// $matchingIds = array_intersect($rules['status_ids'], array_filter([$statusId, $reblogId]));
// if (! empty($matchingIds)) {
// $statusMatches = $matchingIds;
// }
// }
if (! empty($keywordMatches) || ! empty($statusMatches)) {
$results[] = [

@ -190,4 +190,78 @@ return [
'allow_new_account_dms' => env('INSTANCE_ALLOW_NEW_DMS', true),
'total_count_estimate' => env('INSTANCE_TOTAL_POSTS_COUNT_ESTIMATE', false),
'custom_filters' => [
/*
* The maximum number of characters from a status that will be scanned
* for filter matching. Scanning too many characters can hurt performance,
* so this limit ensures that only the most relevant portion of a status is processed.
*
* For remote statuses, you might want to increase this value if you expect
* important content to appear later in long posts.
*/
'max_content_scan_limit' => env('PF_CF_CONTENT_SCAN_LIMIT', 2500),
/*
* The maximum number of filters a single user can create.
* Limiting the number of filters per user helps prevent abuse and
* ensures that the filtering system remains performant.
*/
'max_filters_per_user' => env('PF_CF_MAX_FILTERS_PER_USER', 20),
/*
* The maximum number of keywords that can be associated with a single filter.
* This limit helps control the complexity of the generated regular expressions
* and protects against potential performance issues during content scanning.
*/
'max_keywords_per_filter' => env('PF_CF_MAX_KEYWORDS_PER_FILTER', 10),
/*
* The maximum length allowed for each keyword in a filter.
* Limiting keyword length not only curtails the size of the regex patterns created,
* but also guards against potential abuse where excessively long keywords might
* negatively impact matching performance or lead to unintended behavior.
*/
'max_keyword_length' => env('PF_CF_MAX_KEYWORD_LENGTH', 40),
/*
* The maximum allowed length for the combined regex pattern.
* When constructing a regex that matches multiple filter keywords, each keyword
* (after escaping and adding boundaries) contributes to the total pattern length.
*
* This value is set to 10000 by default. If you increase either the number of keywords
* per filter or the maximum length allowed for each keyword, consider increasing this
* limit accordingly so that the final regex pattern can accommodate the additional length
* without being truncated or causing performance issues.
*/
'max_pattern_length' => env('PF_CF_MAX_PATTERN_LENGTH', 10000),
/*
* The maximum number of keyword matches to report for a given status.
* When a filter is applied to a status, the matching process may find multiple occurrences
* of a keyword. This value limits the number of matches that are reported back,
* which helps manage output volume and processing overhead.
*
* The default is set to 10, but you can adjust this value through your environment configuration.
*/
'max_reported_matches' => env('PF_CF_MAX_REPORTED_MATCHES', 10),
/*
* The maximum number of filter creation operations allowed per hour for a non-admin user.
* This rate limit prevents abuse by restricting how many filters a normal user can create
* within one hour. Admin users are exempt from this limit.
*
* Default is 20 creations per hour.
*/
'max_create_per_hour' => env('PF_CF_MAX_CREATE_PER_HOUR', 20),
/*
* The maximum number of filter update operations allowed per hour for a non-admin user.
* This rate limit is designed to prevent abuse by limiting how many times a normal user
* can update their filters within one hour. Admin users are not subject to these limits.
*
* Default is 40 updates per hour.
*/
'max_updates_per_hour' => env('PF_CF_MAX_UPDATES_PER_HOUR', 40),
],
];

Loading…
Cancel
Save