about summary refs log tree commit diff
diff options
context:
space:
mode:
authorClaire <claire.github-309c@sitedethib.com>2022-02-08 19:43:59 +0100
committerGitHub <noreply@github.com>2022-02-08 19:43:59 +0100
commit2fd1db7c9d0fe8c1cca159b9b0818c72e7c318aa (patch)
treebc7319ae242a889bb1d05b7afdd365d78a43ac1d
parentb1983623aec8e0b066d115736d2151e0c74407fa (diff)
parent692963d43beb5e66a86e15d63b5aa3eeca82f0a1 (diff)
Merge pull request #1680 from ClearlyClaire/glitch-soc/merge-upstream
Merge upstream changes
-rw-r--r--Gemfile2
-rw-r--r--Gemfile.lock5
-rw-r--r--app/controllers/activitypub/replies_controller.rb32
-rw-r--r--app/controllers/concerns/localized.rb29
-rw-r--r--app/helpers/languages_helper.rb291
-rw-r--r--app/helpers/settings_helper.rb2
-rw-r--r--app/lib/activitypub/activity/create.rb6
-rw-r--r--app/lib/language_detector.rb101
-rw-r--r--app/lib/link_details_extractor.rb62
-rw-r--r--app/models/account_suggestions/global_source.rb2
-rw-r--r--app/models/user.rb4
-rw-r--r--app/services/activitypub/process_status_update_service.rb6
-rw-r--r--app/services/post_status_service.rb7
-rw-r--r--app/validators/import_validator.rb2
-rw-r--r--app/views/admin/follow_recommendations/show.html.haml2
-rw-r--r--app/views/settings/preferences/other/show.html.haml4
-rw-r--r--app/workers/activitypub/processing_worker.rb5
-rw-r--r--app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb2
-rw-r--r--app/workers/scheduler/follow_recommendations_scheduler.rb29
-rw-r--r--config/application.rb13
-rw-r--r--config/locales/en.yml2
-rw-r--r--lib/tasks/repo.rake5
-rw-r--r--spec/controllers/activitypub/replies_controller_spec.rb293
-rw-r--r--spec/helpers/languages_helper_spec.rb6
-rw-r--r--spec/lib/language_detector_spec.rb134
-rw-r--r--spec/lib/link_details_extractor_spec.rb122
26 files changed, 601 insertions, 567 deletions
diff --git a/Gemfile b/Gemfile
index 1ddb6f383..7713bfb2b 100644
--- a/Gemfile
+++ b/Gemfile
@@ -29,9 +29,7 @@ gem 'addressable', '~> 2.8'
 gem 'bootsnap', '~> 1.10.2', require: false
 gem 'browser'
 gem 'charlock_holmes', '~> 0.7.7'
-gem 'iso-639'
 gem 'chewy', '~> 7.2'
-gem 'cld3', '~> 3.4.4'
 gem 'devise', '~> 4.8'
 gem 'devise-two-factor', '~> 4.0'
 
diff --git a/Gemfile.lock b/Gemfile.lock
index 18459a159..2baa12038 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -152,8 +152,6 @@ GEM
       elasticsearch (>= 7.12.0, < 7.14.0)
       elasticsearch-dsl
     chunky_png (1.4.0)
-    cld3 (3.4.4)
-      ffi (>= 1.1.0, < 1.16.0)
     climate_control (0.2.0)
     coderay (1.1.3)
     color_diff (0.1)
@@ -303,7 +301,6 @@ GEM
       terminal-table (>= 1.5.1)
     idn-ruby (0.1.4)
     ipaddress (0.8.3)
-    iso-639 (0.3.5)
     jmespath (1.5.0)
     json (2.5.1)
     json-canonicalization (0.3.0)
@@ -701,7 +698,6 @@ DEPENDENCIES
   capybara (~> 3.36)
   charlock_holmes (~> 0.7.7)
   chewy (~> 7.2)
-  cld3 (~> 3.4.4)
   climate_control (~> 0.2)
   color_diff (~> 0.1)
   concurrent-ruby
@@ -729,7 +725,6 @@ DEPENDENCIES
   httplog (~> 1.5.0)
   i18n-tasks (~> 0.9)
   idn-ruby
-  iso-639
   json-ld
   json-ld-preloaded (~> 3.2)
   kaminari (~> 1.2)
diff --git a/app/controllers/activitypub/replies_controller.rb b/app/controllers/activitypub/replies_controller.rb
index fde6c861f..4ff7cfa08 100644
--- a/app/controllers/activitypub/replies_controller.rb
+++ b/app/controllers/activitypub/replies_controller.rb
@@ -63,15 +63,29 @@ class ActivityPub::RepliesController < ActivityPub::BaseController
   end
 
   def next_page
-    only_other_accounts = !(@replies&.last&.account_id == @account.id && @replies.size == DESCENDANTS_LIMIT)
-
-    account_status_replies_url(
-      @account,
-      @status,
-      page: true,
-      min_id: only_other_accounts && !only_other_accounts? ? nil : @replies&.last&.id,
-      only_other_accounts: only_other_accounts
-    )
+    if only_other_accounts?
+      # Only consider remote accounts
+      return nil if @replies.size < DESCENDANTS_LIMIT
+
+      account_status_replies_url(
+        @account,
+        @status,
+        page: true,
+        min_id: @replies&.last&.id,
+        only_other_accounts: true
+      )
+    else
+      # For now, we're serving only self-replies, but next page might be other accounts
+      next_only_other_accounts = @replies&.last&.account_id != @account.id || @replies.size < DESCENDANTS_LIMIT
+
+      account_status_replies_url(
+        @account,
+        @status,
+        page: true,
+        min_id: next_only_other_accounts ? nil : @replies&.last&.id,
+        only_other_accounts: next_only_other_accounts
+      )
+    end
   end
 
   def page_params
diff --git a/app/controllers/concerns/localized.rb b/app/controllers/concerns/localized.rb
index fe1142f34..f7b62f09c 100644
--- a/app/controllers/concerns/localized.rb
+++ b/app/controllers/concerns/localized.rb
@@ -7,27 +7,24 @@ module Localized
     around_action :set_locale
   end
 
-  def set_locale
-    locale   = current_user.locale if respond_to?(:user_signed_in?) && user_signed_in?
-    locale ||= session[:locale] ||= default_locale
-    locale   = default_locale unless I18n.available_locales.include?(locale.to_sym)
-
-    I18n.with_locale(locale) do
-      yield
-    end
+  def set_locale(&block)
+    I18n.with_locale(requested_locale || I18n.default_locale, &block)
   end
 
   private
 
-  def default_locale
-    if ENV['DEFAULT_LOCALE'].present?
-      I18n.default_locale
-    else
-      request_locale || I18n.default_locale
-    end
+  def requested_locale
+    requested_locale_name   = available_locale_or_nil(params[:locale])
+    requested_locale_name ||= available_locale_or_nil(current_user.locale) if respond_to?(:user_signed_in?) && user_signed_in?
+    requested_locale_name ||= http_accept_language if ENV['DEFAULT_LOCALE'].blank?
+    requested_locale_name
+  end
+
+  def http_accept_language
+    HttpAcceptLanguage::Parser.new(request.headers.fetch('Accept-Language')).language_region_compatible_from(I18n.available_locales) if request.headers.key?('Accept-Language')
   end
 
-  def request_locale
-    http_accept_language.language_region_compatible_from(I18n.available_locales)
+  def available_locale_or_nil(locale_name)
+    locale_name.to_sym if locale_name.present? && I18n.available_locales.include?(locale_name.to_sym)
   end
 end
diff --git a/app/helpers/languages_helper.rb b/app/helpers/languages_helper.rb
index 730724208..f3ed7b314 100644
--- a/app/helpers/languages_helper.rb
+++ b/app/helpers/languages_helper.rb
@@ -1,94 +1,237 @@
 # frozen_string_literal: true
 
 module LanguagesHelper
-  HUMAN_LOCALES = {
-    af: 'Afrikaans',
-    ar: 'العربية',
-    ast: 'Asturianu',
-    bg: 'Български',
-    bn: 'বাংলা',
-    br: 'Breton',
-    ca: 'Català',
-    co: 'Corsu',
-    cs: 'Čeština',
-    cy: 'Cymraeg',
-    da: 'Dansk',
-    de: 'Deutsch',
-    el: 'Ελληνικά',
-    en: 'English',
-    eo: 'Esperanto',
+  ISO_639_1 = {
+    aa: ['Afar', 'Afaraf'].freeze,
+    ab: ['Abkhaz', 'аҧсуа бызшәа'].freeze,
+    ae: ['Avestan', 'avesta'].freeze,
+    af: ['Afrikaans', 'Afrikaans'].freeze,
+    ak: ['Akan', 'Akan'].freeze,
+    am: ['Amharic', 'አማርኛ'].freeze,
+    an: ['Aragonese', 'aragonés'].freeze,
+    ar: ['Arabic', 'اللغة العربية'].freeze,
+    as: ['Assamese', 'অসমীয়া'].freeze,
+    av: ['Avaric', 'авар мацӀ'].freeze,
+    ay: ['Aymara', 'aymar aru'].freeze,
+    az: ['Azerbaijani', 'azərbaycan dili'].freeze,
+    ba: ['Bashkir', 'башҡорт теле'].freeze,
+    be: ['Belarusian', 'беларуская мова'].freeze,
+    bg: ['Bulgarian', 'български език'].freeze,
+    bh: ['Bihari', 'भोजपुरी'].freeze,
+    bi: ['Bislama', 'Bislama'].freeze,
+    bm: ['Bambara', 'bamanankan'].freeze,
+    bn: ['Bengali', 'বাংলা'].freeze,
+    bo: ['Tibetan', 'བོད་ཡིག'].freeze,
+    br: ['Breton', 'brezhoneg'].freeze,
+    bs: ['Bosnian', 'bosanski jezik'].freeze,
+    ca: ['Catalan', 'Català'].freeze,
+    ce: ['Chechen', 'нохчийн мотт'].freeze,
+    ch: ['Chamorro', 'Chamoru'].freeze,
+    co: ['Corsican', 'corsu'].freeze,
+    cr: ['Cree', 'ᓀᐦᐃᔭᐍᐏᐣ'].freeze,
+    cs: ['Czech', 'čeština'].freeze,
+    cu: ['Old Church Slavonic', 'ѩзыкъ словѣньскъ'].freeze,
+    cv: ['Chuvash', 'чӑваш чӗлхи'].freeze,
+    cy: ['Welsh', 'Cymraeg'].freeze,
+    da: ['Danish', 'dansk'].freeze,
+    de: ['German', 'Deutsch'].freeze,
+    dv: ['Divehi', 'Dhivehi'].freeze,
+    dz: ['Dzongkha', 'རྫོང་ཁ'].freeze,
+    ee: ['Ewe', 'Eʋegbe'].freeze,
+    el: ['Greek', 'Ελληνικά'].freeze,
+    en: ['English', 'English'].freeze,
+    eo: ['Esperanto', 'Esperanto'].freeze,
+    es: ['Spanish', 'Español'].freeze,
+    et: ['Estonian', 'eesti'].freeze,
+    eu: ['Basque', 'euskara'].freeze,
+    fa: ['Persian', 'فارسی'].freeze,
+    ff: ['Fula', 'Fulfulde'].freeze,
+    fi: ['Finnish', 'suomi'].freeze,
+    fj: ['Fijian', 'Vakaviti'].freeze,
+    fo: ['Faroese', 'føroyskt'].freeze,
+    fr: ['French', 'Français'].freeze,
+    fy: ['Western Frisian', 'Frysk'].freeze,
+    ga: ['Irish', 'Gaeilge'].freeze,
+    gd: ['Scottish Gaelic', 'Gàidhlig'].freeze,
+    gl: ['Galician', 'galego'].freeze,
+    gu: ['Gujarati', 'ગુજરાતી'].freeze,
+    gv: ['Manx', 'Gaelg'].freeze,
+    ha: ['Hausa', 'هَوُسَ'].freeze,
+    he: ['Hebrew', 'עברית'].freeze,
+    hi: ['Hindi', 'हिन्दी'].freeze,
+    ho: ['Hiri Motu', 'Hiri Motu'].freeze,
+    hr: ['Croatian', 'Hrvatski'].freeze,
+    ht: ['Haitian', 'Kreyòl ayisyen'].freeze,
+    hu: ['Hungarian', 'magyar'].freeze,
+    hy: ['Armenian', 'Հայերեն'].freeze,
+    hz: ['Herero', 'Otjiherero'].freeze,
+    ia: ['Interlingua', 'Interlingua'].freeze,
+    id: ['Indonesian', 'Bahasa Indonesia'].freeze,
+    ie: ['Interlingue', 'Interlingue'].freeze,
+    ig: ['Igbo', 'Asụsụ Igbo'].freeze,
+    ii: ['Nuosu', 'ꆈꌠ꒿ Nuosuhxop'].freeze,
+    ik: ['Inupiaq', 'Iñupiaq'].freeze,
+    io: ['Ido', 'Ido'].freeze,
+    is: ['Icelandic', 'Íslenska'].freeze,
+    it: ['Italian', 'Italiano'].freeze,
+    iu: ['Inuktitut', 'ᐃᓄᒃᑎᑐᑦ'].freeze,
+    ja: ['Japanese', '日本語'].freeze,
+    jv: ['Javanese', 'basa Jawa'].freeze,
+    ka: ['Georgian', 'ქართული'].freeze,
+    kg: ['Kongo', 'Kikongo'].freeze,
+    ki: ['Kikuyu', 'Gĩkũyũ'].freeze,
+    kj: ['Kwanyama', 'Kuanyama'].freeze,
+    kk: ['Kazakh', 'қазақ тілі'].freeze,
+    kl: ['Kalaallisut', 'kalaallisut'].freeze,
+    km: ['Khmer', 'ខេមរភាសា'].freeze,
+    kn: ['Kannada', 'ಕನ್ನಡ'].freeze,
+    ko: ['Korean', '한국어'].freeze,
+    kr: ['Kanuri', 'Kanuri'].freeze,
+    ks: ['Kashmiri', 'कश्मीरी'].freeze,
+    ku: ['Kurdish', 'Kurdî'].freeze,
+    kv: ['Komi', 'коми кыв'].freeze,
+    kw: ['Cornish', 'Kernewek'].freeze,
+    ky: ['Kyrgyz', 'Кыргызча'].freeze,
+    la: ['Latin', 'latine'].freeze,
+    lb: ['Luxembourgish', 'Lëtzebuergesch'].freeze,
+    lg: ['Ganda', 'Luganda'].freeze,
+    li: ['Limburgish', 'Limburgs'].freeze,
+    ln: ['Lingala', 'Lingála'].freeze,
+    lo: ['Lao', 'ພາສາ'].freeze,
+    lt: ['Lithuanian', 'lietuvių kalba'].freeze,
+    lu: ['Luba-Katanga', 'Tshiluba'].freeze,
+    lv: ['Latvian', 'latviešu valoda'].freeze,
+    mg: ['Malagasy', 'fiteny malagasy'].freeze,
+    mh: ['Marshallese', 'Kajin M̧ajeļ'].freeze,
+    mi: ['Māori', 'te reo Māori'].freeze,
+    mk: ['Macedonian', 'македонски јазик'].freeze,
+    ml: ['Malayalam', 'മലയാളം'].freeze,
+    mn: ['Mongolian', 'Монгол хэл'].freeze,
+    mr: ['Marathi', 'मराठी'].freeze,
+    ms: ['Malay', 'Bahasa Malaysia'].freeze,
+    mt: ['Maltese', 'Malti'].freeze,
+    my: ['Burmese', 'ဗမာစာ'].freeze,
+    na: ['Nauru', 'Ekakairũ Naoero'].freeze,
+    nb: ['Norwegian Bokmål', 'Norsk bokmål'].freeze,
+    nd: ['Northern Ndebele', 'isiNdebele'].freeze,
+    ne: ['Nepali', 'नेपाली'].freeze,
+    ng: ['Ndonga', 'Owambo'].freeze,
+    nl: ['Dutch', 'Nederlands'].freeze,
+    nn: ['Norwegian Nynorsk', 'Norsk nynorsk'].freeze,
+    no: ['Norwegian', 'Norsk'].freeze,
+    nr: ['Southern Ndebele', 'isiNdebele'].freeze,
+    nv: ['Navajo', 'Diné bizaad'].freeze,
+    ny: ['Chichewa', 'chiCheŵa'].freeze,
+    oc: ['Occitan', 'occitan'].freeze,
+    oj: ['Ojibwe', 'ᐊᓂᔑᓈᐯᒧᐎᓐ'].freeze,
+    om: ['Oromo', 'Afaan Oromoo'].freeze,
+    or: ['Oriya', 'ଓଡ଼ିଆ'].freeze,
+    os: ['Ossetian', 'ирон æвзаг'].freeze,
+    pa: ['Panjabi', 'ਪੰਜਾਬੀ'].freeze,
+    pi: ['Pāli', 'पाऴि'].freeze,
+    pl: ['Polish', 'Polski'].freeze,
+    ps: ['Pashto', 'پښتو'].freeze,
+    pt: ['Portuguese', 'Português'].freeze,
+    qu: ['Quechua', 'Runa Simi'].freeze,
+    rm: ['Romansh', 'rumantsch grischun'].freeze,
+    rn: ['Kirundi', 'Ikirundi'].freeze,
+    ro: ['Romanian', 'Română'].freeze,
+    ru: ['Russian', 'Русский'].freeze,
+    rw: ['Kinyarwanda', 'Ikinyarwanda'].freeze,
+    sa: ['Sanskrit', 'संस्कृतम्'].freeze,
+    sc: ['Sardinian', 'sardu'].freeze,
+    sd: ['Sindhi', 'सिन्धी'].freeze,
+    se: ['Northern Sami', 'Davvisámegiella'].freeze,
+    sg: ['Sango', 'yângâ tî sängö'].freeze,
+    si: ['Sinhala', 'සිංහල'].freeze,
+    sk: ['Slovak', 'slovenčina'].freeze,
+    sl: ['Slovenian', 'slovenščina'].freeze,
+    sn: ['Shona', 'chiShona'].freeze,
+    so: ['Somali', 'Soomaaliga'].freeze,
+    sq: ['Albanian', 'Shqip'].freeze,
+    sr: ['Serbian', 'српски језик'].freeze,
+    ss: ['Swati', 'SiSwati'].freeze,
+    st: ['Southern Sotho', 'Sesotho'].freeze,
+    su: ['Sundanese', 'Basa Sunda'].freeze,
+    sv: ['Swedish', 'Svenska'].freeze,
+    sw: ['Swahili', 'Kiswahili'].freeze,
+    ta: ['Tamil', 'தமிழ்'].freeze,
+    te: ['Telugu', 'తెలుగు'].freeze,
+    tg: ['Tajik', 'тоҷикӣ'].freeze,
+    th: ['Thai', 'ไทย'].freeze,
+    ti: ['Tigrinya', 'ትግርኛ'].freeze,
+    tk: ['Turkmen', 'Türkmen'].freeze,
+    tl: ['Tagalog', 'Wikang Tagalog'].freeze,
+    tn: ['Tswana', 'Setswana'].freeze,
+    to: ['Tonga', 'faka Tonga'].freeze,
+    tr: ['Turkish', 'Türkçe'].freeze,
+    ts: ['Tsonga', 'Xitsonga'].freeze,
+    tt: ['Tatar', 'татар теле'].freeze,
+    tw: ['Twi', 'Twi'].freeze,
+    ty: ['Tahitian', 'Reo Tahiti'].freeze,
+    ug: ['Uyghur', 'ئۇيغۇرچە‎'].freeze,
+    uk: ['Ukrainian', 'Українська'].freeze,
+    ur: ['Urdu', 'اردو'].freeze,
+    uz: ['Uzbek', 'Ўзбек'].freeze,
+    ve: ['Venda', 'Tshivenḓa'].freeze,
+    vi: ['Vietnamese', 'Tiếng Việt'].freeze,
+    vo: ['Volapük', 'Volapük'].freeze,
+    wa: ['Walloon', 'walon'].freeze,
+    wo: ['Wolof', 'Wollof'].freeze,
+    xh: ['Xhosa', 'isiXhosa'].freeze,
+    yi: ['Yiddish', 'ייִדיש'].freeze,
+    yo: ['Yoruba', 'Yorùbá'].freeze,
+    za: ['Zhuang', 'Saɯ cueŋƅ'].freeze,
+    zh: ['Chinese', '中文'].freeze,
+    zu: ['Zulu', 'isiZulu'].freeze,
+  }.freeze
+
+  ISO_639_3 = {
+    ast: ['Asturian', 'Asturianu'].freeze,
+    kab: ['Kabyle', 'Taqbaylit'].freeze,
+    kmr: ['Northern Kurdish', 'Kurmancî'].freeze,
+    zgh: ['Standard Moroccan Tamazight', 'ⵜⴰⵎⴰⵣⵉⵖⵜ'].freeze,
+  }.freeze
+
+  SUPPORTED_LOCALES = {}.merge(ISO_639_1).merge(ISO_639_3).freeze
+
+  # For ISO-639-1 and ISO-639-3 language codes, we have their official
+  # names, but for some translations, we need the names of the
+  # regional variants specifically
+  REGIONAL_LOCALE_NAMES = {
     'es-AR': 'Español (Argentina)',
     'es-MX': 'Español (México)',
-    es: 'Español',
-    et: 'Eesti',
-    eu: 'Euskara',
-    fa: 'فارسی',
-    fi: 'Suomi',
-    fr: 'Français',
-    ga: 'Gaeilge',
-    gd: 'Gàidhlig',
-    gl: 'Galego',
-    he: 'עברית',
-    hi: 'हिन्दी',
-    hr: 'Hrvatski',
-    hu: 'Magyar',
-    hy: 'Հայերեն',
-    id: 'Bahasa Indonesia',
-    io: 'Ido',
-    is: 'Íslenska',
-    it: 'Italiano',
-    ja: '日本語',
-    ka: 'ქართული',
-    kab: 'Taqbaylit',
-    kk: 'Қазақша',
-    kmr: 'Kurmancî',
-    kn: 'ಕನ್ನಡ',
-    ko: '한국어',
-    ku: 'سۆرانی',
-    lt: 'Lietuvių',
-    lv: 'Latviešu',
-    mk: 'Македонски',
-    ml: 'മലയാളം',
-    mr: 'मराठी',
-    ms: 'Bahasa Melayu',
-    nl: 'Nederlands',
-    nn: 'Nynorsk',
-    no: 'Norsk',
-    oc: 'Occitan',
-    pl: 'Polski',
     'pt-BR': 'Português (Brasil)',
     'pt-PT': 'Português (Portugal)',
-    pt: 'Português',
-    ro: 'Română',
-    ru: 'Русский',
-    sa: 'संस्कृतम्',
-    sc: 'Sardu',
-    si: 'සිංහල',
-    sk: 'Slovenčina',
-    sl: 'Slovenščina',
-    sq: 'Shqip',
     'sr-Latn': 'Srpski (latinica)',
-    sr: 'Српски',
-    sv: 'Svenska',
-    ta: 'தமிழ்',
-    te: 'తెలుగు',
-    th: 'ไทย',
-    tr: 'Türkçe',
-    uk: 'Українська',
-    ur: 'اُردُو',
-    vi: 'Tiếng Việt',
-    zgh: 'ⵜⴰⵎⴰⵣⵉⵖⵜ',
     'zh-CN': '简体中文',
     'zh-HK': '繁體中文(香港)',
     'zh-TW': '繁體中文(臺灣)',
-    zh: '中文',
   }.freeze
 
   def human_locale(locale)
     if locale == 'und'
       I18n.t('generic.none')
+    elsif (supported_locale = SUPPORTED_LOCALES[locale.to_sym])
+      supported_locale[1]
+    elsif (regional_locale = REGIONAL_LOCALE_NAMES[locale.to_sym])
+      regional_locale
     else
-      HUMAN_LOCALES[locale.to_sym] || locale
+      locale
     end
   end
+
+  def valid_locale_or_nil(str)
+    return if str.blank?
+
+    code, = str.to_s.split(/[_-]/) # Strip out the region from e.g. en_US or ja-JP
+
+    return unless valid_locale?(code)
+
+    code
+  end
+
+  def valid_locale?(locale)
+    SUPPORTED_LOCALES.key?(locale.to_sym)
+  end
 end
diff --git a/app/helpers/settings_helper.rb b/app/helpers/settings_helper.rb
index 23739d1cd..3d5592867 100644
--- a/app/helpers/settings_helper.rb
+++ b/app/helpers/settings_helper.rb
@@ -2,7 +2,7 @@
 
 module SettingsHelper
   def filterable_languages
-    LanguageDetector.instance.language_names.select(&LanguagesHelper::HUMAN_LOCALES.method(:key?))
+    LanguagesHelper::SUPPORTED_LOCALES.keys
   end
 
   def hash_to_object(hash)
diff --git a/app/lib/activitypub/activity/create.rb b/app/lib/activitypub/activity/create.rb
index ad273c20b..cf31b6ff6 100644
--- a/app/lib/activitypub/activity/create.rb
+++ b/app/lib/activitypub/activity/create.rb
@@ -112,7 +112,7 @@ class ActivityPub::Activity::Create < ActivityPub::Activity
         url: @status_parser.url || @status_parser.uri,
         account: @account,
         text: converted_object_type? ? converted_text : (@status_parser.text || ''),
-        language: @status_parser.language || detected_language,
+        language: @status_parser.language,
         spoiler_text: converted_object_type? ? '' : (@status_parser.spoiler_text || ''),
         created_at: @status_parser.created_at,
         edited_at: @status_parser.edited_at,
@@ -370,10 +370,6 @@ class ActivityPub::Activity::Create < ActivityPub::Activity
     Formatter.instance.linkify([@status_parser.title.presence, @status_parser.spoiler_text.presence, @status_parser.url || @status_parser.uri].compact.join("\n\n"))
   end
 
-  def detected_language
-    LanguageDetector.instance.detect(@status_parser.text, @account) if supported_object_type?
-  end
-
   def unsupported_media_type?(mime_type)
     mime_type.present? && !MediaAttachment.supported_mime_types.include?(mime_type)
   end
diff --git a/app/lib/language_detector.rb b/app/lib/language_detector.rb
deleted file mode 100644
index 40452eddc..000000000
--- a/app/lib/language_detector.rb
+++ /dev/null
@@ -1,101 +0,0 @@
-# frozen_string_literal: true
-
-class LanguageDetector
-  include Singleton
-
-  WORDS_THRESHOLD        = 4
-  RELIABLE_CHARACTERS_RE = /[\p{Hebrew}\p{Arabic}\p{Syriac}\p{Thaana}\p{Nko}\p{Han}\p{Katakana}\p{Hiragana}\p{Hangul}\p{Thai}]+/m
-
-  def initialize
-    @identifier = CLD3::NNetLanguageIdentifier.new(1, 2048)
-  end
-
-  def detect(text, account)
-    input_text = prepare_text(text)
-
-    return if input_text.blank?
-
-    detect_language_code(input_text) || default_locale(account)
-  end
-
-  def language_names
-    @language_names = CLD3::TaskContextParams::LANGUAGE_NAMES.map { |name| iso6391(name.to_s).to_sym }.uniq
-  end
-
-  private
-
-  def prepare_text(text)
-    simplify_text(text).strip
-  end
-
-  def unreliable_input?(text)
-    !reliable_input?(text)
-  end
-
-  def reliable_input?(text)
-    sufficient_text_length?(text) || language_specific_character_set?(text)
-  end
-
-  def sufficient_text_length?(text)
-    text.split(/\s+/).size >= WORDS_THRESHOLD
-  end
-
-  def language_specific_character_set?(text)
-    words = text.scan(RELIABLE_CHARACTERS_RE)
-
-    if words.present?
-      words.reduce(0) { |acc, elem| acc + elem.size }.to_f / text.size > 0.3
-    else
-      false
-    end
-  end
-
-  def detect_language_code(text)
-    return if unreliable_input?(text)
-
-    result = @identifier.find_language(text)
-
-    iso6391(result.language.to_s).to_sym if result&.reliable?
-  end
-
-  def iso6391(bcp47)
-    iso639 = bcp47.split('-').first
-
-    # CLD3 returns grandfathered language code for Hebrew
-    return 'he' if iso639 == 'iw'
-
-    ISO_639.find(iso639).alpha2
-  end
-
-  def simplify_text(text)
-    new_text = remove_html(text)
-    new_text.gsub!(FetchLinkCardService::URL_PATTERN, '\1')
-    new_text.gsub!(Account::MENTION_RE, '')
-    new_text.gsub!(Tag::HASHTAG_RE) { |string| string.gsub(/[#_]/, '#' => '', '_' => ' ').gsub(/[a-z][A-Z]|[a-zA-Z][\d]/) { |s| s.insert(1, ' ') }.downcase }
-    new_text.gsub!(/:#{CustomEmoji::SHORTCODE_RE_FRAGMENT}:/, '')
-    new_text.gsub!(/\s+/, ' ')
-    new_text
-  end
-
-  def new_scrubber
-    scrubber = Rails::Html::PermitScrubber.new
-    scrubber.tags = %w(br p)
-    scrubber
-  end
-
-  def scrubber
-    @scrubber ||= new_scrubber
-  end
-
-  def remove_html(text)
-    text = Loofah.fragment(text).scrub!(scrubber).to_s
-    text.gsub!('<br>', "\n")
-    text.gsub!('</p><p>', "\n\n")
-    text.gsub!(/(^<p>|<\/p>$)/, '')
-    text
-  end
-
-  def default_locale(account)
-    account.user_locale&.to_sym || I18n.default_locale if account.local?
-  end
-end
diff --git a/app/lib/link_details_extractor.rb b/app/lib/link_details_extractor.rb
index 56ad0717b..fabbd244d 100644
--- a/app/lib/link_details_extractor.rb
+++ b/app/lib/link_details_extractor.rb
@@ -2,6 +2,20 @@
 
 class LinkDetailsExtractor
   include ActionView::Helpers::TagHelper
+  include LanguagesHelper
+
+  # Some publications wrap their JSON-LD data in their <script> tags
+  # in commented-out CDATA blocks, they need to be removed before
+  # attempting to parse JSON
+  CDATA_JUNK_PATTERN = %r{^[\s]*(
+    (/\*[\s]*<!\[CDATA\[[\s]*\*/) # Block comment style opening
+    |
+    (//[\s]*<!\[CDATA\[) # Single-line comment style opening
+    |
+    (/\*[\s]*\]\]>[\s]*\*/) # Block comment style closing
+    |
+    (//[\s]*\]\]>) # Single-line comment style closing
+  )[\s]*$}x
 
   class StructuredData
     SUPPORTED_TYPES = %w(
@@ -61,6 +75,10 @@ class LinkDetailsExtractor
       publisher.dig('logo', 'url')
     end
 
+    def valid?
+      json.present?
+    end
+
     private
 
     def author
@@ -134,11 +152,11 @@ class LinkDetailsExtractor
   end
 
   def title
-    structured_data&.headline || opengraph_tag('og:title') || document.xpath('//title').map(&:content).first
+    html_entities.decode(structured_data&.headline || opengraph_tag('og:title') || document.xpath('//title').map(&:content).first)
   end
 
   def description
-    structured_data&.description || opengraph_tag('og:description') || meta_tag('description')
+    html_entities.decode(structured_data&.description || opengraph_tag('og:description') || meta_tag('description'))
   end
 
   def image
@@ -146,11 +164,11 @@ class LinkDetailsExtractor
   end
 
   def canonical_url
-    valid_url_or_nil(opengraph_tag('og:url') || link_tag('canonical'), same_origin_only: true) || @original_url.to_s
+    valid_url_or_nil(link_tag('canonical') || opengraph_tag('og:url'), same_origin_only: true) || @original_url.to_s
   end
 
   def provider_name
-    structured_data&.publisher_name || opengraph_tag('og:site_name')
+    html_entities.decode(structured_data&.publisher_name || opengraph_tag('og:site_name'))
   end
 
   def provider_url
@@ -158,7 +176,7 @@ class LinkDetailsExtractor
   end
 
   def author_name
-    structured_data&.author_name || opengraph_tag('og:author') || opengraph_tag('og:author:username')
+    html_entities.decode(structured_data&.author_name || opengraph_tag('og:author') || opengraph_tag('og:author:username'))
   end
 
   def author_url
@@ -201,14 +219,6 @@ class LinkDetailsExtractor
     nil
   end
 
-  def valid_locale_or_nil(str)
-    return nil if str.blank?
-
-    code,  = str.split(/_-/) # Strip out the region from e.g. en_US or ja-JA
-    locale = ISO_639.find(code)
-    locale&.alpha2
-  end
-
   def link_tag(name)
     document.xpath("//link[@rel=\"#{name}\"]").map { |link| link['href'] }.first
   end
@@ -223,10 +233,24 @@ class LinkDetailsExtractor
 
   def structured_data
     @structured_data ||= begin
-      json_ld = document.xpath('//script[@type="application/ld+json"]').map(&:content).first
-      json_ld.present? ? StructuredData.new(json_ld) : nil
-    rescue Oj::ParseError
-      nil
+      # Some publications have more than one JSON-LD definition on the page,
+      # and some of those definitions aren't valid JSON either, so we have
+      # to loop through here until we find something that is the right type
+      # and doesn't break
+      document.xpath('//script[@type="application/ld+json"]').filter_map do |element|
+        json_ld = element.content&.gsub(CDATA_JUNK_PATTERN, '')
+
+        next if json_ld.blank?
+
+        structured_data = StructuredData.new(html_entities.decode(json_ld))
+
+        next unless structured_data.valid?
+
+        structured_data
+      rescue Oj::ParseError, EncodingError
+        Rails.logger.debug("Invalid JSON-LD in #{@original_url}")
+        next
+      end.first
     end
   end
 
@@ -246,4 +270,8 @@ class LinkDetailsExtractor
       detector.strip_tags = true
     end
   end
+
+  def html_entities
+    @html_entities ||= HTMLEntities.new
+  end
 end
diff --git a/app/models/account_suggestions/global_source.rb b/app/models/account_suggestions/global_source.rb
index ac764de50..03ed1b6c2 100644
--- a/app/models/account_suggestions/global_source.rb
+++ b/app/models/account_suggestions/global_source.rb
@@ -6,7 +6,7 @@ class AccountSuggestions::GlobalSource < AccountSuggestions::Source
   end
 
   def get(account, skip_account_ids: [], limit: 40)
-    account_ids = account_ids_for_locale(account.user_locale) - [account.id] - skip_account_ids
+    account_ids = account_ids_for_locale(I18n.locale.to_str.split(/[_-]/).first) - [account.id] - skip_account_ids
 
     as_ordered_suggestions(
       scope(account).where(id: account_ids),
diff --git a/app/models/user.rb b/app/models/user.rb
index 9afdc481d..ee20e293e 100644
--- a/app/models/user.rb
+++ b/app/models/user.rb
@@ -245,6 +245,10 @@ class User < ApplicationRecord
     save!
   end
 
+  def preferred_posting_language
+    settings.default_language || locale
+  end
+
   def setting_default_privacy
     settings.default_privacy || (account.locked? ? 'private' : 'public')
   end
diff --git a/app/services/activitypub/process_status_update_service.rb b/app/services/activitypub/process_status_update_service.rb
index 977928127..b1cea1cdf 100644
--- a/app/services/activitypub/process_status_update_service.rb
+++ b/app/services/activitypub/process_status_update_service.rb
@@ -120,7 +120,7 @@ class ActivityPub::ProcessStatusUpdateService < BaseService
     @status.text         = @status_parser.text || ''
     @status.spoiler_text = @status_parser.spoiler_text || ''
     @status.sensitive    = @account.sensitized? || @status_parser.sensitive || false
-    @status.language     = @status_parser.language || detected_language
+    @status.language     = @status_parser.language
     @status.edited_at    = @status_parser.edited_at || Time.now.utc if significant_changes?
 
     @status.save!
@@ -210,10 +210,6 @@ class ActivityPub::ProcessStatusUpdateService < BaseService
     { redis: Redis.current, key: "create:#{@uri}", autorelease: 15.minutes.seconds }
   end
 
-  def detected_language
-    LanguageDetector.instance.detect(@status_parser.text, @account)
-  end
-
   def create_previous_edit!
     # We only need to create a previous edit when no previous edits exist, e.g.
     # when the status has never been edited. For other cases, we always create
diff --git a/app/services/post_status_service.rb b/app/services/post_status_service.rb
index 9d26e0f5b..c5061dd63 100644
--- a/app/services/post_status_service.rb
+++ b/app/services/post_status_service.rb
@@ -2,6 +2,7 @@
 
 class PostStatusService < BaseService
   include Redisable
+  include LanguagesHelper
 
   MIN_SCHEDULE_OFFSET = 5.minutes.freeze
 
@@ -118,10 +119,6 @@ class PostStatusService < BaseService
     raise Mastodon::ValidationError, I18n.t('media_attachments.validations.not_ready') if @media.any?(&:not_processed?)
   end
 
-  def language_from_option(str)
-    ISO_639.find(str)&.alpha2
-  end
-
   def process_mentions_service
     ProcessMentionsService.new
   end
@@ -174,7 +171,7 @@ class PostStatusService < BaseService
       sensitive: @sensitive,
       spoiler_text: @options[:spoiler_text] || '',
       visibility: @visibility,
-      language: language_from_option(@options[:language]) || @account.user&.setting_default_language&.presence || LanguageDetector.instance.detect(@text, @account),
+      language: valid_locale_or_nil(@options[:language].presence || @account.user&.preferred_posting_language || I18n.default_locale),
       application: @options[:application],
       content_type: @options[:content_type] || @account.user&.setting_default_content_type,
       rate_limit: @options[:with_rate_limit],
diff --git a/app/validators/import_validator.rb b/app/validators/import_validator.rb
index a182abfa5..9f19aee2a 100644
--- a/app/validators/import_validator.rb
+++ b/app/validators/import_validator.rb
@@ -1,5 +1,7 @@
 # frozen_string_literal: true
 
+require 'csv'
+
 class ImportValidator < ActiveModel::Validator
   KNOWN_HEADERS = [
     'Account address',
diff --git a/app/views/admin/follow_recommendations/show.html.haml b/app/views/admin/follow_recommendations/show.html.haml
index 2878f07d7..d1c160bd2 100644
--- a/app/views/admin/follow_recommendations/show.html.haml
+++ b/app/views/admin/follow_recommendations/show.html.haml
@@ -10,7 +10,7 @@
     .filter-subset.filter-subset--with-select
       %strong= t('admin.follow_recommendations.language')
       .input.select.optional
-        = select_tag :language, options_for_select(I18n.available_locales.map { |key| [human_locale(key), key]}, @language)
+        = select_tag :language, options_for_select(I18n.available_locales.map { |key| key.to_s.split(/[_-]/).first.to_sym }.uniq.map { |key| [human_locale(key), key]}, @language)
 
     .filter-subset
       %strong= t('admin.follow_recommendations.status')
diff --git a/app/views/settings/preferences/other/show.html.haml b/app/views/settings/preferences/other/show.html.haml
index 3b5c7016d..76ff2bcbc 100644
--- a/app/views/settings/preferences/other/show.html.haml
+++ b/app/views/settings/preferences/other/show.html.haml
@@ -27,7 +27,7 @@
       = f.input :setting_default_privacy, collection: Status.selectable_visibilities, wrapper: :with_label, include_blank: false, label_method: lambda { |visibility| safe_join([I18n.t("statuses.visibilities.#{visibility}"), I18n.t("statuses.visibilities.#{visibility}_long")], ' - ') }, required: false, hint: false
 
     .fields-group.fields-row__column.fields-row__column-6
-      = f.input :setting_default_language, collection: [nil] + filterable_languages.sort, wrapper: :with_label, label_method: lambda { |locale| locale.nil? ? I18n.t('statuses.language_detection') : human_locale(locale) }, required: false, include_blank: false, hint: false
+      = f.input :setting_default_language, collection: [nil] + filterable_languages, wrapper: :with_label, label_method: lambda { |locale| locale.nil? ? I18n.t('statuses.default_language') : human_locale(locale) }, required: false, include_blank: false, hint: false
 
   .fields-group
     = f.input :setting_default_sensitive, as: :boolean, wrapper: :with_label
@@ -41,7 +41,7 @@
   %h4= t 'preferences.public_timelines'
 
   .fields-group
-    = f.input :chosen_languages, collection: filterable_languages.sort, wrapper: :with_block_label, include_blank: false, label_method: lambda { |locale| human_locale(locale) }, required: false, as: :check_boxes, collection_wrapper_tag: 'ul', item_wrapper_tag: 'li'
+    = f.input :chosen_languages, collection: filterable_languages, wrapper: :with_block_label, include_blank: false, label_method: lambda { |locale| human_locale(locale) }, required: false, as: :check_boxes, collection_wrapper_tag: 'ul', item_wrapper_tag: 'li'
 
   .actions
     = f.button :button, t('generic.save_changes'), type: :submit
diff --git a/app/workers/activitypub/processing_worker.rb b/app/workers/activitypub/processing_worker.rb
index cef595319..37e316354 100644
--- a/app/workers/activitypub/processing_worker.rb
+++ b/app/workers/activitypub/processing_worker.rb
@@ -6,7 +6,10 @@ class ActivityPub::ProcessingWorker
   sidekiq_options backtrace: true, retry: 8
 
   def perform(account_id, body, delivered_to_account_id = nil)
-    ActivityPub::ProcessCollectionService.new.call(body, Account.find(account_id), override_timestamps: true, delivered_to_account_id: delivered_to_account_id, delivery: true)
+    account = Account.find_by(id: account_id)
+    return if account.nil?
+
+    ActivityPub::ProcessCollectionService.new.call(body, account, override_timestamps: true, delivered_to_account_id: delivered_to_account_id, delivery: true)
   rescue ActiveRecord::RecordInvalid => e
     Rails.logger.debug "Error processing incoming ActivityPub object: #{e}"
   end
diff --git a/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb b/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb
index f42d4bca6..7195f0ff9 100644
--- a/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb
+++ b/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb
@@ -66,7 +66,7 @@ class Scheduler::AccountsStatusesCleanupScheduler
   end
 
   def compute_budget
-    threads = Sidekiq::ProcessSet.new.filter { |x| x['queues'].include?('push') }.map { |x| x['concurrency'] }.sum
+    threads = Sidekiq::ProcessSet.new.select { |x| x['queues'].include?('push') }.map { |x| x['concurrency'] }.sum
     [PER_THREAD_BUDGET * threads, MAX_BUDGET].min
   end
 
diff --git a/app/workers/scheduler/follow_recommendations_scheduler.rb b/app/workers/scheduler/follow_recommendations_scheduler.rb
index effc63e59..084619cbd 100644
--- a/app/workers/scheduler/follow_recommendations_scheduler.rb
+++ b/app/workers/scheduler/follow_recommendations_scheduler.rb
@@ -16,28 +16,33 @@ class Scheduler::FollowRecommendationsScheduler
     AccountSummary.refresh
     FollowRecommendation.refresh
 
-    fallback_recommendations = FollowRecommendation.order(rank: :desc).limit(SET_SIZE).index_by(&:account_id)
+    fallback_recommendations = FollowRecommendation.order(rank: :desc).limit(SET_SIZE)
 
-    I18n.available_locales.each do |locale|
+    I18n.available_locales.map { |locale| locale.to_s.split(/[_-]/).first }.uniq.each do |locale|
       recommendations = begin
         if AccountSummary.safe.filtered.localized(locale).exists? # We can skip the work if no accounts with that language exist
-          FollowRecommendation.localized(locale).order(rank: :desc).limit(SET_SIZE).index_by(&:account_id)
+          FollowRecommendation.localized(locale).order(rank: :desc).limit(SET_SIZE).map { |recommendation| [recommendation.account_id, recommendation.rank] }
         else
-          {}
+          []
         end
       end
 
       # Use language-agnostic results if there are not enough language-specific ones
-      missing = SET_SIZE - recommendations.keys.size
+      missing = SET_SIZE - recommendations.size
+
+      if missing.positive? && fallback_recommendations.size.positive?
+        max_fallback_rank = fallback_recommendations.first.rank || 0
+
+        # Language-specific results should be above language-agnostic ones,
+        # otherwise language-agnostic ones will always overshadow them
+        recommendations.map! { |(account_id, rank)| [account_id, rank + max_fallback_rank] }
 
-      if missing.positive?
         added = 0
 
-        # Avoid duplicate results
-        fallback_recommendations.each_value do |recommendation|
-          next if recommendations.key?(recommendation.account_id)
+        fallback_recommendations.each do |recommendation|
+          next if recommendations.any? { |(account_id, _)| account_id == recommendation.account_id }
 
-          recommendations[recommendation.account_id] = recommendation
+          recommendations << [recommendation.account_id, recommendation.rank]
           added += 1
 
           break if added >= missing
@@ -47,8 +52,8 @@ class Scheduler::FollowRecommendationsScheduler
       redis.pipelined do
         redis.del(key(locale))
 
-        recommendations.each_value do |recommendation|
-          redis.zadd(key(locale), recommendation.rank, recommendation.account_id)
+        recommendations.each do |(account_id, rank)|
+          redis.zadd(key(locale), rank, account_id)
         end
       end
     end
diff --git a/config/application.rb b/config/application.rb
index 561722884..c6f775162 100644
--- a/config/application.rb
+++ b/config/application.rb
@@ -149,10 +149,14 @@ module Mastodon
       :'zh-TW',
     ]
 
-    config.i18n.default_locale = ENV['DEFAULT_LOCALE']&.to_sym
-
-    unless config.i18n.available_locales.include?(config.i18n.default_locale)
-      config.i18n.default_locale = :en
+    config.i18n.default_locale = begin
+      custom_default_locale = ENV['DEFAULT_LOCALE']&.to_sym
+
+      if config.i18n.available_locales.include?(custom_default_locale)
+        custom_default_locale
+      else
+        :en
+      end
     end
 
     # config.paths.add File.join('app', 'api'), glob: File.join('**', '*.rb')
@@ -169,7 +173,6 @@ module Mastodon
       Doorkeeper::Application.send :include, ApplicationExtension
       Doorkeeper::AccessToken.send :include, AccessTokenExtension
       Devise::FailureApp.send :include, AbstractController::Callbacks
-      Devise::FailureApp.send :include, HttpAcceptLanguage::EasyAccess
       Devise::FailureApp.send :include, Localized
     end
   end
diff --git a/config/locales/en.yml b/config/locales/en.yml
index 85aa87c7a..600090e78 100644
--- a/config/locales/en.yml
+++ b/config/locales/en.yml
@@ -1307,13 +1307,13 @@ en:
         other: "%{count} videos"
     boosted_from_html: Boosted from %{acct_link}
     content_warning: 'Content warning: %{warning}'
+    default_language: Same as interface language
     disallowed_hashtags:
       one: 'contained a disallowed hashtag: %{tags}'
       other: 'contained the disallowed hashtags: %{tags}'
     edited_at: Edited %{date}
     errors:
       in_reply_not_found: The post you are trying to reply to does not appear to exist.
-    language_detection: Automatically detect language
     open_in_web: Open in web
     over_character_limit: character limit of %{max} exceeded
     pin_errors:
diff --git a/lib/tasks/repo.rake b/lib/tasks/repo.rake
index bbf7f20ee..795b54c59 100644
--- a/lib/tasks/repo.rake
+++ b/lib/tasks/repo.rake
@@ -96,7 +96,8 @@ namespace :repo do
     end.uniq.compact
 
     missing_available_locales = locales_in_files - I18n.available_locales
-    missing_locale_names = I18n.available_locales.reject { |locale| LanguagesHelper::HUMAN_LOCALES.key?(locale) }
+    supported_locale_codes    = Set.new(LanguagesHelper::SUPPORTED_LOCALES.keys + LanguagesHelper::REGIONAL_LOCALE_NAMES.keys)
+    missing_locale_names      = I18n.available_locales.reject { |locale| supported_locale_codes.include?(locale) }
 
     critical = false
 
@@ -123,7 +124,7 @@ namespace :repo do
 
     unless missing_locale_names.empty?
       puts pastel.yellow("You are missing human-readable names for these locales: #{pastel.bold(missing_locale_names.join(', '))}")
-      puts pastel.yellow("Add them to #{pastel.bold('HUMAN_LOCALES')} in app/helpers/settings_helper.rb or remove the locales from #{pastel.bold('I18n.available_locales')} in config/application.rb")
+      puts pastel.yellow("Add them to app/helpers/languages_helper.rb or remove the locales from #{pastel.bold('I18n.available_locales')} in config/application.rb")
     end
 
     if critical
diff --git a/spec/controllers/activitypub/replies_controller_spec.rb b/spec/controllers/activitypub/replies_controller_spec.rb
index bf82fd020..a2c7f336f 100644
--- a/spec/controllers/activitypub/replies_controller_spec.rb
+++ b/spec/controllers/activitypub/replies_controller_spec.rb
@@ -4,8 +4,9 @@ require 'rails_helper'
 
 RSpec.describe ActivityPub::RepliesController, type: :controller do
   let(:status) { Fabricate(:status, visibility: parent_visibility) }
-  let(:remote_reply_id) { nil }
-  let(:remote_account) { nil }
+  let(:remote_account)  { Fabricate(:account, domain: 'foobar.com') }
+  let(:remote_reply_id) { 'https://foobar.com/statuses/1234' }
+  let(:remote_querier) { nil }
 
   shared_examples 'cachable response' do
     it 'does not set cookies' do
@@ -23,224 +24,188 @@ RSpec.describe ActivityPub::RepliesController, type: :controller do
     end
   end
 
-  before do
-    allow(controller).to receive(:signed_request_account).and_return(remote_account)
+  shared_examples 'common behavior' do
+    context 'when status is private' do
+      let(:parent_visibility) { :private }
 
-    Fabricate(:status, thread: status, visibility: :public)
-    Fabricate(:status, thread: status, visibility: :public)
-    Fabricate(:status, thread: status, visibility: :private)
-    Fabricate(:status, account: status.account, thread: status, visibility: :public)
-    Fabricate(:status, account: status.account, thread: status, visibility: :private)
-
-    Fabricate(:status, account: remote_account, thread: status, visibility: :public, uri: remote_reply_id) if remote_reply_id
-  end
+      it 'returns http not found' do
+        expect(response).to have_http_status(404)
+      end
+    end
 
-  describe 'GET #index' do
-    context 'with no signature' do
-      subject(:response) { get :index, params: { account_username: status.account.username, status_id: status.id } }
-      subject(:body) { body_as_json }
+    context 'when status is direct' do
+      let(:parent_visibility) { :direct }
 
-      context 'when account is permanently suspended' do
-        let(:parent_visibility) { :public }
+      it 'returns http not found' do
+        expect(response).to have_http_status(404)
+      end
+    end
+  end
 
-        before do
-          status.account.suspend!
-          status.account.deletion_request.destroy
-        end
+  shared_examples 'disallowed access' do
+    context 'when status is public' do
+      let(:parent_visibility) { :public }
 
-        it 'returns http gone' do
-          expect(response).to have_http_status(410)
-        end
+      it 'returns http not found' do
+        expect(response).to have_http_status(404)
       end
+    end
 
-      context 'when account is temporarily suspended' do
-        let(:parent_visibility) { :public }
+    it_behaves_like 'common behavior'
+  end
 
-        before do
-          status.account.suspend!
-        end
+  shared_examples 'allowed access' do
+    context 'when account is permanently suspended' do
+      let(:parent_visibility) { :public }
 
-        it 'returns http forbidden' do
-          expect(response).to have_http_status(403)
-        end
+      before do
+        status.account.suspend!
+        status.account.deletion_request.destroy
       end
 
-      context 'when status is public' do
-        let(:parent_visibility) { :public }
-
-        it 'returns http success' do
-          expect(response).to have_http_status(200)
-        end
+      it 'returns http gone' do
+        expect(response).to have_http_status(410)
+      end
+    end
 
-        it 'returns application/activity+json' do
-          expect(response.media_type).to eq 'application/activity+json'
-        end
+    context 'when account is temporarily suspended' do
+      let(:parent_visibility) { :public }
 
-        it_behaves_like 'cachable response'
+      before do
+        status.account.suspend!
+      end
 
-        it 'returns items with account\'s own replies' do
-          expect(body[:first]).to be_a Hash
-          expect(body[:first][:items]).to be_an Array
-          expect(body[:first][:items].size).to eq 1
-          expect(body[:first][:items].all? { |item| item[:to].include?(ActivityPub::TagManager::COLLECTIONS[:public]) || item[:cc].include?(ActivityPub::TagManager::COLLECTIONS[:public]) }).to be true
-        end
+      it 'returns http forbidden' do
+        expect(response).to have_http_status(403)
       end
+    end
 
-      context 'when status is private' do
-        let(:parent_visibility) { :private }
+    context 'when status is public' do
+      let(:parent_visibility) { :public }
+      let(:json) { body_as_json }
+      let(:page_json) { json[:first] }
 
-        it 'returns http not found' do
-          expect(response).to have_http_status(404)
-        end
+      it 'returns http success' do
+        expect(response).to have_http_status(200)
       end
 
-      context 'when status is direct' do
-        let(:parent_visibility) { :direct }
-
-        it 'returns http not found' do
-          expect(response).to have_http_status(404)
-        end
+      it 'returns application/activity+json' do
+        expect(response.media_type).to eq 'application/activity+json'
       end
-    end
 
-    context 'with signature' do
-      let(:remote_account) { Fabricate(:account, domain: 'example.com') }
-      let(:only_other_accounts) { nil }
+      it_behaves_like 'cachable response'
 
-      context do
-        before do
-          get :index, params: { account_username: status.account.username, status_id: status.id, only_other_accounts: only_other_accounts }
+      context 'without only_other_accounts' do
+        it "returns items with thread author's replies" do
+          expect(page_json).to be_a Hash
+          expect(page_json[:items]).to be_an Array
+          expect(page_json[:items].size).to eq 1
+          expect(page_json[:items].all? { |item| item[:to].include?(ActivityPub::TagManager::COLLECTIONS[:public]) || item[:cc].include?(ActivityPub::TagManager::COLLECTIONS[:public]) }).to be true
         end
 
-        context 'when status is public' do
-          let(:parent_visibility) { :public }
-
-          it 'returns http success' do
-            expect(response).to have_http_status(200)
+        context 'when there are few self-replies' do
+          it 'points next to replies from other people' do
+            expect(page_json).to be_a Hash
+            expect(Addressable::URI.parse(page_json[:next]).query.split('&')).to include('only_other_accounts=true', 'page=true')
           end
+        end
 
-          it 'returns application/activity+json' do
-            expect(response.media_type).to eq 'application/activity+json'
+        context 'when there are many self-replies' do
+          before do
+            10.times { Fabricate(:status, account: status.account, thread: status, visibility: :public) }
           end
 
-          it_behaves_like 'cachable response'
-
-          context 'without only_other_accounts' do
-            it 'returns items with account\'s own replies' do
-              json = body_as_json
-
-              expect(json[:first]).to be_a Hash
-              expect(json[:first][:items]).to be_an Array
-              expect(json[:first][:items].size).to eq 1
-              expect(json[:first][:items].all? { |item| item[:to].include?(ActivityPub::TagManager::COLLECTIONS[:public]) || item[:cc].include?(ActivityPub::TagManager::COLLECTIONS[:public]) }).to be true
-            end
+          it 'points next to other self-replies' do
+            expect(page_json).to be_a Hash
+            expect(Addressable::URI.parse(page_json[:next]).query.split('&')).to include('only_other_accounts=false', 'page=true')
           end
+        end
+      end
 
-          context 'with only_other_accounts' do
-            let(:only_other_accounts) { 'true' }
-
-            it 'returns items with other public or unlisted replies' do
-              json = body_as_json
-
-              expect(json[:first]).to be_a Hash
-              expect(json[:first][:items]).to be_an Array
-              expect(json[:first][:items].size).to eq 2
-              expect(json[:first][:items].all? { |item| item[:to].include?(ActivityPub::TagManager::COLLECTIONS[:public]) || item[:cc].include?(ActivityPub::TagManager::COLLECTIONS[:public]) }).to be true
-            end
-
-            context 'with remote responses' do
-              let(:remote_reply_id) { 'foo' }
+      context 'with only_other_accounts' do
+        let(:only_other_accounts) { 'true' }
 
-              it 'returned items are all inlined local toots or are ids' do
-                json = body_as_json
+        it 'returns items with other public or unlisted replies' do
+          expect(page_json).to be_a Hash
+          expect(page_json[:items]).to be_an Array
+          expect(page_json[:items].size).to eq 3
+        end
 
-                expect(json[:first]).to be_a Hash
-                expect(json[:first][:items]).to be_an Array
-                expect(json[:first][:items].size).to eq 3
-                expect(json[:first][:items].all? { |item| item.is_a?(Hash) ? ActivityPub::TagManager.instance.local_uri?(item[:id]) : item.is_a?(String) }).to be true
-                expect(json[:first][:items]).to include remote_reply_id
-              end
-            end
-          end
+        it 'only inlines items that are local and public or unlisted replies' do
+          inlined_replies = page_json[:items].select { |x| x.is_a?(Hash) }
+          public_collection = ActivityPub::TagManager::COLLECTIONS[:public]
+          expect(inlined_replies.all? { |item| item[:to].include?(public_collection) || item[:cc].include?(public_collection) }).to be true
+          expect(inlined_replies.all? { |item| ActivityPub::TagManager.instance.local_uri?(item[:id]) }).to be true
         end
 
-        context 'when status is private' do
-          let(:parent_visibility) { :private }
+        it 'uses ids for remote toots' do
+          remote_replies = page_json[:items].select { |x| !x.is_a?(Hash) }
+          expect(remote_replies.all? { |item| item.is_a?(String) && !ActivityPub::TagManager.instance.local_uri?(item) }).to be true
+        end
 
-          it 'returns http not found' do
-            expect(response).to have_http_status(404)
+        context 'when there are few replies' do
+          it 'does not have a next page' do
+            expect(page_json).to be_a Hash
+            expect(page_json[:next]).to be_nil
           end
         end
 
-        context 'when status is direct' do
-          let(:parent_visibility) { :direct }
+        context 'when there are many replies' do
+          before do
+            10.times { Fabricate(:status, thread: status, visibility: :public) }
+          end
 
-          it 'returns http not found' do
-            expect(response).to have_http_status(404)
+          it 'points next to other replies' do
+            expect(page_json).to be_a Hash
+            expect(Addressable::URI.parse(page_json[:next]).query.split('&')).to include('only_other_accounts=true', 'page=true')
           end
         end
       end
+    end
 
-      context 'when signed request account is blocked' do
-        before do
-          status.account.block!(remote_account)
-          get :index, params: { account_username: status.account.username, status_id: status.id }
-        end
-
-        context 'when status is public' do
-          let(:parent_visibility) { :public }
+    it_behaves_like 'common behavior'
+  end
 
-          it 'returns http not found' do
-            expect(response).to have_http_status(404)
-          end
-        end
+  before do
+    stub_const 'ActivityPub::RepliesController::DESCENDANTS_LIMIT', 5
+    allow(controller).to receive(:signed_request_account).and_return(remote_querier)
 
-        context 'when status is private' do
-          let(:parent_visibility) { :private }
+    Fabricate(:status, thread: status, visibility: :public)
+    Fabricate(:status, thread: status, visibility: :public)
+    Fabricate(:status, thread: status, visibility: :private)
+    Fabricate(:status, account: status.account, thread: status, visibility: :public)
+    Fabricate(:status, account: status.account, thread: status, visibility: :private)
 
-          it 'returns http not found' do
-            expect(response).to have_http_status(404)
-          end
-        end
+    Fabricate(:status, account: remote_account, thread: status, visibility: :public, uri: remote_reply_id)
+  end
 
-        context 'when status is direct' do
-          let(:parent_visibility) { :direct }
+  describe 'GET #index' do
+    subject(:response) { get :index, params: { account_username: status.account.username, status_id: status.id, only_other_accounts: only_other_accounts } }
+    let(:only_other_accounts) { nil }
 
-          it 'returns http not found' do
-            expect(response).to have_http_status(404)
-          end
-        end
-      end
+    context 'with no signature' do
+      it_behaves_like 'allowed access'
+    end
 
-      context 'when signed request account is domain blocked' do
-        before do
-          status.account.block_domain!(remote_account.domain)
-          get :index, params: { account_username: status.account.username, status_id: status.id }
-        end
+    context 'with signature' do
+      let(:remote_querier) { Fabricate(:account, domain: 'example.com') }
 
-        context 'when status is public' do
-          let(:parent_visibility) { :public }
+      it_behaves_like 'allowed access'
 
-          it 'returns http not found' do
-            expect(response).to have_http_status(404)
-          end
+      context 'when signed request account is blocked' do
+        before do
+          status.account.block!(remote_querier)
         end
 
-        context 'when status is private' do
-          let(:parent_visibility) { :private }
+        it_behaves_like 'disallowed access'
+      end
 
-          it 'returns http not found' do
-            expect(response).to have_http_status(404)
-          end
+      context 'when signed request account is domain blocked' do
+        before do
+          status.account.block_domain!(remote_querier.domain)
         end
 
-        context 'when status is direct' do
-          let(:parent_visibility) { :direct }
-
-          it 'returns http not found' do
-            expect(response).to have_http_status(404)
-          end
-        end
+        it_behaves_like 'disallowed access'
       end
     end
   end
diff --git a/spec/helpers/languages_helper_spec.rb b/spec/helpers/languages_helper_spec.rb
index 6db617824..b455cee33 100644
--- a/spec/helpers/languages_helper_spec.rb
+++ b/spec/helpers/languages_helper_spec.rb
@@ -3,9 +3,9 @@
 require 'rails_helper'
 
 describe LanguagesHelper do
-  describe 'the HUMAN_LOCALES constant' do
-    it 'includes all I18n locales' do
-      expect(described_class::HUMAN_LOCALES.keys).to include(*I18n.available_locales)
+  describe 'the SUPPORTED_LOCALES constant' do
+    it 'includes all i18n locales' do
+      expect(Set.new(described_class::SUPPORTED_LOCALES.keys + described_class::REGIONAL_LOCALE_NAMES.keys)).to include(*I18n.available_locales)
     end
   end
 
diff --git a/spec/lib/language_detector_spec.rb b/spec/lib/language_detector_spec.rb
deleted file mode 100644
index b7ba0f6c4..000000000
--- a/spec/lib/language_detector_spec.rb
+++ /dev/null
@@ -1,134 +0,0 @@
-# frozen_string_literal: true
-
-require 'rails_helper'
-
-describe LanguageDetector do
-  describe 'prepare_text' do
-    it 'returns unmodified string without special cases' do
-      string = 'just a regular string'
-      result = described_class.instance.send(:prepare_text, string)
-
-      expect(result).to eq string
-    end
-
-    it 'collapses spacing in strings' do
-      string = 'The formatting   in    this is very        odd'
-
-      result = described_class.instance.send(:prepare_text, string)
-      expect(result).to eq 'The formatting in this is very odd'
-    end
-
-    it 'strips usernames from strings before detection' do
-      string = '@username Yeah, very surreal...! also @friend'
-
-      result = described_class.instance.send(:prepare_text, string)
-      expect(result).to eq 'Yeah, very surreal...! also'
-    end
-
-    it 'strips URLs from strings before detection' do
-      string = 'Our website is https://example.com and also http://localhost.dev'
-
-      result = described_class.instance.send(:prepare_text, string)
-      expect(result).to eq 'Our website is and also'
-    end
-
-    it 'converts #hashtags back to normal text before detection' do
-      string = 'Hey look at all the #animals and #FishAndChips'
-
-      result = described_class.instance.send(:prepare_text, string)
-      expect(result).to eq 'Hey look at all the animals and fish and chips'
-    end
-  end
-
-  describe 'detect' do
-    let(:account_without_user_locale) { Fabricate(:user, locale: nil).account }
-    let(:account_remote) { Fabricate(:account, domain: 'joinmastodon.org') }
-
-    it 'detects english language for basic strings' do
-      strings = [
-        "Hello and welcome to mastodon how are you today?",
-        "I'd rather not!",
-        "a lot of people just want to feel righteous all the time and that's all that matters",
-      ]
-      strings.each do |string|
-        result = described_class.instance.detect(string, account_without_user_locale)
-
-        expect(result).to eq(:en), string
-      end
-    end
-
-    it 'detects spanish language' do
-      string = 'Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon'
-      result = described_class.instance.detect(string, account_without_user_locale)
-
-      expect(result).to eq :es
-    end
-
-    describe 'when language can\'t be detected' do
-      it 'uses nil when sent an empty document' do
-        result = described_class.instance.detect('', account_without_user_locale)
-        expect(result).to eq nil
-      end
-
-      describe 'because of a URL' do
-        it 'uses nil when sent just a URL' do
-          string = 'http://example.com/media/2kFTgOJLXhQf0g2nKB4'
-          cld_result = CLD3::NNetLanguageIdentifier.new(0, 2048).find_language(string)
-          expect(cld_result).not_to eq :en
-
-          result = described_class.instance.detect(string, account_without_user_locale)
-
-          expect(result).to eq nil
-        end
-      end
-
-      describe 'with an account' do
-        it 'uses the account locale when present' do
-          account = double(user_locale: 'fr')
-          result  = described_class.instance.detect('', account)
-
-          expect(result).to eq nil
-        end
-
-        it 'uses nil when account is present but has no locale' do
-          result = described_class.instance.detect('', account_without_user_locale)
-
-          expect(result).to eq nil
-        end
-      end
-
-      describe 'with an `en` default locale' do
-        it 'uses nil for undetectable string' do
-          result = described_class.instance.detect('', account_without_user_locale)
-
-          expect(result).to eq nil
-        end
-      end
-
-      describe 'remote user' do
-        it 'detects Korean language' do
-          string = '안녕하세요'
-          result = described_class.instance.detect(string, account_remote)
-
-          expect(result).to eq :ko
-        end
-      end
-
-      describe 'with a non-`en` default locale' do
-        around(:each) do |example|
-          before = I18n.default_locale
-          I18n.default_locale = :ja
-          example.run
-          I18n.default_locale = before
-        end
-
-        it 'uses nil for undetectable string' do
-          string = ''
-          result = described_class.instance.detect(string, account_without_user_locale)
-
-          expect(result).to eq nil
-        end
-      end
-    end
-  end
-end
diff --git a/spec/lib/link_details_extractor_spec.rb b/spec/lib/link_details_extractor_spec.rb
index 850857b2d..84bb4579c 100644
--- a/spec/lib/link_details_extractor_spec.rb
+++ b/spec/lib/link_details_extractor_spec.rb
@@ -26,4 +26,126 @@ RSpec.describe LinkDetailsExtractor do
       end
     end
   end
+
+  context 'when structured data is present' do
+    let(:original_url) { 'https://example.com/page.html' }
+
+    context 'and is wrapped in CDATA tags' do
+      let(:html) { <<-HTML }
+<!doctype html>
+<html>
+<head>
+  <script type="application/ld+json">
+  //<![CDATA[
+  {"@context":"http://schema.org","@type":"NewsArticle","mainEntityOfPage":"https://example.com/page.html","headline":"Foo","datePublished":"2022-01-31T19:53:00+00:00","url":"https://example.com/page.html","description":"Bar","author":{"@type":"Person","name":"Hoge"},"publisher":{"@type":"Organization","name":"Baz"}}
+  //]]>
+  </script>
+</head>
+</html>
+      HTML
+
+      describe '#title' do
+        it 'returns the title from structured data' do
+          expect(subject.title).to eq 'Foo'
+        end
+      end
+
+      describe '#description' do
+        it 'returns the description from structured data' do
+          expect(subject.description).to eq 'Bar'
+        end
+      end
+
+      describe '#provider_name' do
+        it 'returns the provider name from structured data' do
+          expect(subject.provider_name).to eq 'Baz'
+        end
+      end
+
+      describe '#author_name' do
+        it 'returns the author name from structured data' do
+          expect(subject.author_name).to eq 'Hoge'
+        end
+      end
+    end
+
+    context 'but the first tag is invalid JSON' do
+      let(:html) { <<-HTML }
+<!doctype html>
+<html>
+<body>
+  <script type="application/ld+json">
+    {
+      "@context":"https://schema.org",
+      "@type":"ItemList",
+      "url":"https://example.com/page.html",
+      "name":"Foo",
+      "description":"Bar"
+    },
+    {
+      "@context": "https://schema.org",
+      "@type": "BreadcrumbList",
+      "itemListElement":[
+        {
+          "@type":"ListItem",
+          "position":1,
+          "item":{
+            "@id":"https://www.example.com",
+            "name":"Baz"
+          }
+        }
+      ]
+    }
+  </script>
+  <script type="application/ld+json">
+    {
+      "@context":"https://schema.org",
+      "@type":"NewsArticle",
+      "mainEntityOfPage": {
+        "@type":"WebPage",
+        "@id": "http://example.com/page.html"
+      },
+      "headline": "Foo",
+      "description": "Bar",
+      "datePublished": "2022-01-31T19:46:00+00:00",
+      "author": {
+        "@type": "Organization",
+        "name": "Hoge"
+      },
+      "publisher": {
+        "@type": "NewsMediaOrganization",
+        "name":"Baz",
+        "url":"https://example.com/"
+      }
+    }
+  </script>
+</body>
+</html>
+      HTML
+
+      describe '#title' do
+        it 'returns the title from structured data' do
+          expect(subject.title).to eq 'Foo'
+        end
+      end
+
+      describe '#description' do
+        it 'returns the description from structured data' do
+          expect(subject.description).to eq 'Bar'
+        end
+      end
+
+      describe '#provider_name' do
+        it 'returns the provider name from structured data' do
+          expect(subject.provider_name).to eq 'Baz'
+        end
+      end
+
+      describe '#author_name' do
+        it 'returns the author name from structured data' do
+          expect(subject.author_name).to eq 'Hoge'
+        end
+      end
+    end
+  end
 end