From 02851848e964675bb59919fa5fd1bdee2c1c29db Mon Sep 17 00:00:00 2001 From: Claire Date: Tue, 28 Jun 2022 09:42:13 +0200 Subject: Revamp post filtering system (#18058) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add model for custom filter keywords * Use CustomFilterKeyword internally Does not change the API * Fix /filters/edit and /filters/new * Add migration tests * Remove whole_word column from custom_filters (covered by custom_filter_keywords) * Redesign /filters Instead of a list, present a card that displays more information and handles multiple keywords per filter. * Redesign /filters/new and /filters/edit to add and remove keywords This adds a new gem dependency: cocoon, as well as a npm dependency: cocoon-js-vanilla. Those are used to easily populate and remove form fields from the user interface when manipulating multiple keyword filters at once. * Add /api/v2/filters to edit filter with multiple keywords Entities: - `Filter`: `id`, `title`, `filter_action` (either `hide` or `warn`), `context` `keywords` - `FilterKeyword`: `id`, `keyword`, `whole_word` API endpoits: - `GET /api/v2/filters` to list filters (including keywords) - `POST /api/v2/filters` to create a new filter `keywords_attributes` can also be passed to create keywords in one request - `GET /api/v2/filters/:id` to read a particular filter - `PUT /api/v2/filters/:id` to update a new filter `keywords_attributes` can also be passed to edit, delete or add keywords in one request - `DELETE /api/v2/filters/:id` to delete a particular filter - `GET /api/v2/filters/:id/keywords` to list keywords for a filter - `POST /api/v2/filters/:filter_id/keywords/:id` to add a new keyword to a filter - `GET /api/v2/filter_keywords/:id` to read a particular keyword - `PUT /api/v2/filter_keywords/:id` to edit a particular keyword - `DELETE /api/v2/filter_keywords/:id` to delete a particular keyword * Change from `irreversible` boolean to `action` enum * Remove irrelevent `irreversible_must_be_within_context` check * Fix /filters/new and /filters/edit with update for filter_action * Fix Rubocop/Codeclimate complaining about task names * Refactor FeedManager#phrase_filtered? This moves regexp building and filter caching to the `CustomFilter` class. This does not change the functional behavior yet, but this changes how the cache is built, doing per-custom_filter regexps so that filters can be matched independently, while still offering caching. * Perform server-side filtering and output result in REST API * Fix numerous filters_changed events being sent when editing multiple keywords at once * Add some tests * Use the new API in the WebUI - use client-side logic for filters we have fetched rules for. This is so that filter changes can be retroactively applied without reloading the UI. - use server-side logic for filters we haven't fetched rules for yet (e.g. network error, or initial timeline loading) * Minor optimizations and refactoring * Perform server-side filtering on the streaming server * Change the wording of filter action labels * Fix issues pointed out by linter * Change design of “Show anyway” link in accordence to review comments * Drop “irreversible” filtering behavior * Move /api/v2/filter_keywords to /api/v1/filters/keywords * Rename `filter_results` attribute to `filtered` * Rename REST::LegacyFilterSerializer to REST::V1::FilterSerializer * Fix systemChannelId value in streaming server * Simplify code by removing client-side filtering code The simplifcation comes at a cost though: filters aren't retroactively applied anymore. --- streaming/index.js | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 86 insertions(+), 4 deletions(-) (limited to 'streaming') diff --git a/streaming/index.js b/streaming/index.js index 6935c4764..792ec5a44 100644 --- a/streaming/index.js +++ b/streaming/index.js @@ -12,6 +12,7 @@ const url = require('url'); const uuid = require('uuid'); const fs = require('fs'); const WebSocket = require('ws'); +const { JSDOM } = require('jsdom'); const env = process.env.NODE_ENV || 'development'; const alwaysRequireAuth = process.env.LIMITED_FEDERATION_MODE === 'true' || process.env.WHITELIST_MODE === 'true' || process.env.AUTHORIZED_FETCH === 'true'; @@ -503,6 +504,9 @@ const startWorker = async (workerId) => { if (event === 'kill') { log.verbose(req.requestId, `Closing connection for ${req.accountId} due to expired access token`); eventHandlers.onKill(); + } else if (event === 'filters_changed') { + log.verbose(req.requestId, `Invalidating filters cache for ${req.accountId}`); + req.cachedFilters = null; } }; }; @@ -512,7 +516,8 @@ const startWorker = async (workerId) => { * @param {any} res */ const subscribeHttpToSystemChannel = (req, res) => { - const systemChannelId = `timeline:access_token:${req.accessTokenId}`; + const accessTokenChannelId = `timeline:access_token:${req.accessTokenId}`; + const systemChannelId = `timeline:system:${req.accountId}`; const listener = createSystemMessageListener(req, { @@ -523,9 +528,11 @@ const startWorker = async (workerId) => { }); res.on('close', () => { + unsubscribe(`${redisPrefix}${accessTokenChannelId}`, listener); unsubscribe(`${redisPrefix}${systemChannelId}`, listener); }); + subscribe(`${redisPrefix}${accessTokenChannelId}`, listener); subscribe(`${redisPrefix}${systemChannelId}`, listener); }; @@ -674,17 +681,84 @@ const startWorker = async (workerId) => { queries.push(client.query('SELECT 1 FROM account_domain_blocks WHERE account_id = $1 AND domain = $2', [req.accountId, accountDomain])); } + if (!unpackedPayload.filter_results && !req.cachedFilters) { + queries.push(client.query('SELECT filter.id AS id, filter.phrase AS title, filter.context AS context, filter.expires_at AS expires_at, filter.action AS filter_action, keyword.keyword AS keyword, keyword.whole_word AS whole_word FROM custom_filter_keywords keyword JOIN custom_filters filter ON keyword.custom_filter_id = filter.id WHERE filter.account_id = $1 AND filter.expires_at IS NULL OR filter.expires_at > NOW()', [req.accountId])); + } + Promise.all(queries).then(values => { done(); - if (values[0].rows.length > 0 || (values.length > 1 && values[1].rows.length > 0)) { + if (values[0].rows.length > 0 || (accountDomain && values[1].rows.length > 0)) { return; } + if (!unpackedPayload.filter_results && !req.cachedFilters) { + const filterRows = values[accountDomain ? 2 : 1].rows; + + req.cachedFilters = filterRows.reduce((cache, row) => { + if (cache[row.id]) { + cache[row.id].keywords.push([row.keyword, row.whole_word]); + } else { + cache[row.id] = { + keywords: [[row.keyword, row.whole_word]], + expires_at: row.expires_at, + repr: { + id: row.id, + title: row.title, + context: row.context, + expires_at: row.expires_at, + filter_action: row.filter_action, + }, + }; + } + + return cache; + }, {}); + + Object.keys(req.cachedFilters).forEach((key) => { + req.cachedFilters[key].regexp = new RegExp(req.cachedFilters[key].keywords.map(([keyword, whole_word]) => { + let expr = keyword.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');; + + if (whole_word) { + if (/^[\w]/.test(expr)) { + expr = `\\b${expr}`; + } + + if (/[\w]$/.test(expr)) { + expr = `${expr}\\b`; + } + } + + return expr; + }).join('|'), 'i'); + }); + } + + // Check filters + if (req.cachedFilters && !unpackedPayload.filter_results) { + const status = unpackedPayload; + const searchContent = ([status.spoiler_text || '', status.content].concat((status.poll && status.poll.options) ? status.poll.options.map(option => option.title) : [])).concat(status.media_attachments.map(att => att.description)).join('\n\n').replace(//g, '\n').replace(/<\/p>

/g, '\n\n'); + const searchIndex = JSDOM.fragment(searchContent).textContent; + + const now = new Date(); + payload.filter_results = []; + Object.values(req.cachedFilters).forEach((cachedFilter) => { + if ((cachedFilter.expires_at === null || cachedFilter.expires_at > now)) { + const keyword_matches = searchIndex.match(cachedFilter.regexp); + if (keyword_matches) { + payload.filter_results.push({ + filter: cachedFilter.repr, + keyword_matches, + }); + } + } + }); + } + transmit(); }).catch(err => { - done(); log.error(err); + done(); }); }); }; @@ -1009,7 +1083,8 @@ const startWorker = async (workerId) => { * @param {WebSocketSession} session */ const subscribeWebsocketToSystemChannel = ({ socket, request, subscriptions }) => { - const systemChannelId = `timeline:access_token:${request.accessTokenId}`; + const accessTokenChannelId = `timeline:access_token:${request.accessTokenId}`; + const systemChannelId = `timeline:system:${request.accountId}`; const listener = createSystemMessageListener(request, { @@ -1019,8 +1094,15 @@ const startWorker = async (workerId) => { }); + subscribe(`${redisPrefix}${accessTokenChannelId}`, listener); subscribe(`${redisPrefix}${systemChannelId}`, listener); + subscriptions[accessTokenChannelId] = { + listener, + stopHeartbeat: () => { + }, + }; + subscriptions[systemChannelId] = { listener, stopHeartbeat: () => { -- cgit