about summary refs log tree commit diff
path: root/streaming
diff options
context:
space:
mode:
authorClaire <claire.github-309c@sitedethib.com>2022-06-28 09:42:13 +0200
committerGitHub <noreply@github.com>2022-06-28 09:42:13 +0200
commit02851848e964675bb59919fa5fd1bdee2c1c29db (patch)
tree55f0836a4dda9f8de3a9c9511d59e5b726fd77a7 /streaming
parent5823ae70c4c7c297c8d69ecd0be8df65019411e3 (diff)
Revamp post filtering system (#18058)
* Add model for custom filter keywords

* Use CustomFilterKeyword internally

Does not change the API

* Fix /filters/edit and /filters/new

* Add migration tests

* Remove whole_word column from custom_filters (covered by custom_filter_keywords)

* Redesign /filters

Instead of a list, present a card that displays more information and handles
multiple keywords per filter.

* Redesign /filters/new and /filters/edit to add and remove keywords

This adds a new gem dependency: cocoon, as well as a npm dependency:
cocoon-js-vanilla. Those are used to easily populate and remove form fields
from the user interface when manipulating multiple keyword filters at once.

* Add /api/v2/filters to edit filter with multiple keywords

Entities:
- `Filter`: `id`, `title`, `filter_action` (either `hide` or `warn`), `context`
  `keywords`
- `FilterKeyword`: `id`, `keyword`, `whole_word`

API endpoits:
- `GET /api/v2/filters` to list filters (including keywords)
- `POST /api/v2/filters` to create a new filter
  `keywords_attributes` can also be passed to create keywords in one request
- `GET /api/v2/filters/:id` to read a particular filter
- `PUT /api/v2/filters/:id` to update a new filter
  `keywords_attributes` can also be passed to edit, delete or add keywords in
   one request
- `DELETE /api/v2/filters/:id` to delete a particular filter
- `GET /api/v2/filters/:id/keywords` to list keywords for a filter
- `POST /api/v2/filters/:filter_id/keywords/:id` to add a new keyword to a
   filter
- `GET /api/v2/filter_keywords/:id` to read a particular keyword
- `PUT /api/v2/filter_keywords/:id` to edit a particular keyword
- `DELETE /api/v2/filter_keywords/:id` to delete a particular keyword

* Change from `irreversible` boolean to `action` enum

* Remove irrelevent `irreversible_must_be_within_context` check

* Fix /filters/new and /filters/edit with update for filter_action

* Fix Rubocop/Codeclimate complaining about task names

* Refactor FeedManager#phrase_filtered?

This moves regexp building and filter caching to the `CustomFilter` class.

This does not change the functional behavior yet, but this changes how the
cache is built, doing per-custom_filter regexps so that filters can be matched
independently, while still offering caching.

* Perform server-side filtering and output result in REST API

* Fix numerous filters_changed events being sent when editing multiple keywords at once

* Add some tests

* Use the new API in the WebUI

- use client-side logic for filters we have fetched rules for.
  This is so that filter changes can be retroactively applied without
  reloading the UI.
- use server-side logic for filters we haven't fetched rules for yet
  (e.g. network error, or initial timeline loading)

* Minor optimizations and refactoring

* Perform server-side filtering on the streaming server

* Change the wording of filter action labels

* Fix issues pointed out by linter

* Change design of “Show anyway” link in accordence to review comments

* Drop “irreversible” filtering behavior

* Move /api/v2/filter_keywords to /api/v1/filters/keywords

* Rename `filter_results` attribute to `filtered`

* Rename REST::LegacyFilterSerializer to REST::V1::FilterSerializer

* Fix systemChannelId value in streaming server

* Simplify code by removing client-side filtering code

The simplifcation comes at a cost though: filters aren't retroactively
applied anymore.
Diffstat (limited to 'streaming')
-rw-r--r--streaming/index.js90
1 files changed, 86 insertions, 4 deletions
diff --git a/streaming/index.js b/streaming/index.js
index 6935c4764..792ec5a44 100644
--- a/streaming/index.js
+++ b/streaming/index.js
@@ -12,6 +12,7 @@ const url = require('url');
 const uuid = require('uuid');
 const fs = require('fs');
 const WebSocket = require('ws');
+const { JSDOM } = require('jsdom');
 
 const env = process.env.NODE_ENV || 'development';
 const alwaysRequireAuth = process.env.LIMITED_FEDERATION_MODE === 'true' || process.env.WHITELIST_MODE === 'true' || process.env.AUTHORIZED_FETCH === 'true';
@@ -503,6 +504,9 @@ const startWorker = async (workerId) => {
       if (event === 'kill') {
         log.verbose(req.requestId, `Closing connection for ${req.accountId} due to expired access token`);
         eventHandlers.onKill();
+      } else if (event === 'filters_changed') {
+        log.verbose(req.requestId, `Invalidating filters cache for ${req.accountId}`);
+        req.cachedFilters = null;
       }
     };
   };
@@ -512,7 +516,8 @@ const startWorker = async (workerId) => {
    * @param {any} res
    */
   const subscribeHttpToSystemChannel = (req, res) => {
-    const systemChannelId = `timeline:access_token:${req.accessTokenId}`;
+    const accessTokenChannelId = `timeline:access_token:${req.accessTokenId}`;
+    const systemChannelId = `timeline:system:${req.accountId}`;
 
     const listener = createSystemMessageListener(req, {
 
@@ -523,9 +528,11 @@ const startWorker = async (workerId) => {
     });
 
     res.on('close', () => {
+      unsubscribe(`${redisPrefix}${accessTokenChannelId}`, listener);
       unsubscribe(`${redisPrefix}${systemChannelId}`, listener);
     });
 
+    subscribe(`${redisPrefix}${accessTokenChannelId}`, listener);
     subscribe(`${redisPrefix}${systemChannelId}`, listener);
   };
 
@@ -674,17 +681,84 @@ const startWorker = async (workerId) => {
           queries.push(client.query('SELECT 1 FROM account_domain_blocks WHERE account_id = $1 AND domain = $2', [req.accountId, accountDomain]));
         }
 
+        if (!unpackedPayload.filter_results && !req.cachedFilters) {
+          queries.push(client.query('SELECT filter.id AS id, filter.phrase AS title, filter.context AS context, filter.expires_at AS expires_at, filter.action AS filter_action, keyword.keyword AS keyword, keyword.whole_word AS whole_word FROM custom_filter_keywords keyword JOIN custom_filters filter ON keyword.custom_filter_id = filter.id WHERE filter.account_id = $1 AND filter.expires_at IS NULL OR filter.expires_at > NOW()', [req.accountId]));
+        }
+
         Promise.all(queries).then(values => {
           done();
 
-          if (values[0].rows.length > 0 || (values.length > 1 && values[1].rows.length > 0)) {
+          if (values[0].rows.length > 0 || (accountDomain && values[1].rows.length > 0)) {
             return;
           }
 
+          if (!unpackedPayload.filter_results && !req.cachedFilters) {
+            const filterRows = values[accountDomain ? 2 : 1].rows;
+
+            req.cachedFilters = filterRows.reduce((cache, row) => {
+              if (cache[row.id]) {
+                cache[row.id].keywords.push([row.keyword, row.whole_word]);
+              } else {
+                cache[row.id] = {
+                  keywords: [[row.keyword, row.whole_word]],
+                  expires_at: row.expires_at,
+                  repr: {
+                    id: row.id,
+                    title: row.title,
+                    context: row.context,
+                    expires_at: row.expires_at,
+                    filter_action: row.filter_action,
+                  },
+                };
+              }
+
+              return cache;
+            }, {});
+
+            Object.keys(req.cachedFilters).forEach((key) => {
+              req.cachedFilters[key].regexp = new RegExp(req.cachedFilters[key].keywords.map(([keyword, whole_word]) => {
+                let expr = keyword.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');;
+
+                if (whole_word) {
+                  if (/^[\w]/.test(expr)) {
+                    expr = `\\b${expr}`;
+                  }
+
+                  if (/[\w]$/.test(expr)) {
+                    expr = `${expr}\\b`;
+                  }
+                }
+
+                return expr;
+              }).join('|'), 'i');
+            });
+          }
+
+          // Check filters
+          if (req.cachedFilters && !unpackedPayload.filter_results) {
+            const status = unpackedPayload;
+            const searchContent = ([status.spoiler_text || '', status.content].concat((status.poll && status.poll.options) ? status.poll.options.map(option => option.title) : [])).concat(status.media_attachments.map(att => att.description)).join('\n\n').replace(/<br\s*\/?>/g, '\n').replace(/<\/p><p>/g, '\n\n');
+            const searchIndex = JSDOM.fragment(searchContent).textContent;
+
+            const now = new Date();
+            payload.filter_results = [];
+            Object.values(req.cachedFilters).forEach((cachedFilter) => {
+              if ((cachedFilter.expires_at === null || cachedFilter.expires_at > now)) {
+                const keyword_matches = searchIndex.match(cachedFilter.regexp);
+                if (keyword_matches) {
+                  payload.filter_results.push({
+                    filter: cachedFilter.repr,
+                    keyword_matches,
+                  });
+                }
+              }
+            });
+          }
+
           transmit();
         }).catch(err => {
-          done();
           log.error(err);
+          done();
         });
       });
     };
@@ -1009,7 +1083,8 @@ const startWorker = async (workerId) => {
    * @param {WebSocketSession} session
    */
   const subscribeWebsocketToSystemChannel = ({ socket, request, subscriptions }) => {
-    const systemChannelId = `timeline:access_token:${request.accessTokenId}`;
+    const accessTokenChannelId = `timeline:access_token:${request.accessTokenId}`;
+    const systemChannelId = `timeline:system:${request.accountId}`;
 
     const listener = createSystemMessageListener(request, {
 
@@ -1019,8 +1094,15 @@ const startWorker = async (workerId) => {
 
     });
 
+    subscribe(`${redisPrefix}${accessTokenChannelId}`, listener);
     subscribe(`${redisPrefix}${systemChannelId}`, listener);
 
+    subscriptions[accessTokenChannelId] = {
+      listener,
+      stopHeartbeat: () => {
+      },
+    };
+
     subscriptions[systemChannelId] = {
       listener,
       stopHeartbeat: () => {