diff options
Diffstat (limited to 'app')
-rw-r--r-- | app/controllers/activitypub/replies_controller.rb | 32 | ||||
-rw-r--r-- | app/controllers/concerns/localized.rb | 29 | ||||
-rw-r--r-- | app/helpers/languages_helper.rb | 291 | ||||
-rw-r--r-- | app/helpers/settings_helper.rb | 2 | ||||
-rw-r--r-- | app/lib/activitypub/activity/create.rb | 6 | ||||
-rw-r--r-- | app/lib/language_detector.rb | 101 | ||||
-rw-r--r-- | app/lib/link_details_extractor.rb | 62 | ||||
-rw-r--r-- | app/models/account_suggestions/global_source.rb | 2 | ||||
-rw-r--r-- | app/models/user.rb | 4 | ||||
-rw-r--r-- | app/services/activitypub/process_status_update_service.rb | 6 | ||||
-rw-r--r-- | app/services/post_status_service.rb | 7 | ||||
-rw-r--r-- | app/validators/import_validator.rb | 2 | ||||
-rw-r--r-- | app/views/admin/follow_recommendations/show.html.haml | 2 | ||||
-rw-r--r-- | app/views/settings/preferences/other/show.html.haml | 4 | ||||
-rw-r--r-- | app/workers/activitypub/processing_worker.rb | 5 | ||||
-rw-r--r-- | app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb | 2 | ||||
-rw-r--r-- | app/workers/scheduler/follow_recommendations_scheduler.rb | 29 |
17 files changed, 335 insertions, 251 deletions
diff --git a/app/controllers/activitypub/replies_controller.rb b/app/controllers/activitypub/replies_controller.rb index fde6c861f..4ff7cfa08 100644 --- a/app/controllers/activitypub/replies_controller.rb +++ b/app/controllers/activitypub/replies_controller.rb @@ -63,15 +63,29 @@ class ActivityPub::RepliesController < ActivityPub::BaseController end def next_page - only_other_accounts = !(@replies&.last&.account_id == @account.id && @replies.size == DESCENDANTS_LIMIT) - - account_status_replies_url( - @account, - @status, - page: true, - min_id: only_other_accounts && !only_other_accounts? ? nil : @replies&.last&.id, - only_other_accounts: only_other_accounts - ) + if only_other_accounts? + # Only consider remote accounts + return nil if @replies.size < DESCENDANTS_LIMIT + + account_status_replies_url( + @account, + @status, + page: true, + min_id: @replies&.last&.id, + only_other_accounts: true + ) + else + # For now, we're serving only self-replies, but next page might be other accounts + next_only_other_accounts = @replies&.last&.account_id != @account.id || @replies.size < DESCENDANTS_LIMIT + + account_status_replies_url( + @account, + @status, + page: true, + min_id: next_only_other_accounts ? nil : @replies&.last&.id, + only_other_accounts: next_only_other_accounts + ) + end end def page_params diff --git a/app/controllers/concerns/localized.rb b/app/controllers/concerns/localized.rb index fe1142f34..f7b62f09c 100644 --- a/app/controllers/concerns/localized.rb +++ b/app/controllers/concerns/localized.rb @@ -7,27 +7,24 @@ module Localized around_action :set_locale end - def set_locale - locale = current_user.locale if respond_to?(:user_signed_in?) && user_signed_in? - locale ||= session[:locale] ||= default_locale - locale = default_locale unless I18n.available_locales.include?(locale.to_sym) - - I18n.with_locale(locale) do - yield - end + def set_locale(&block) + I18n.with_locale(requested_locale || I18n.default_locale, &block) end private - def default_locale - if ENV['DEFAULT_LOCALE'].present? - I18n.default_locale - else - request_locale || I18n.default_locale - end + def requested_locale + requested_locale_name = available_locale_or_nil(params[:locale]) + requested_locale_name ||= available_locale_or_nil(current_user.locale) if respond_to?(:user_signed_in?) && user_signed_in? + requested_locale_name ||= http_accept_language if ENV['DEFAULT_LOCALE'].blank? + requested_locale_name + end + + def http_accept_language + HttpAcceptLanguage::Parser.new(request.headers.fetch('Accept-Language')).language_region_compatible_from(I18n.available_locales) if request.headers.key?('Accept-Language') end - def request_locale - http_accept_language.language_region_compatible_from(I18n.available_locales) + def available_locale_or_nil(locale_name) + locale_name.to_sym if locale_name.present? && I18n.available_locales.include?(locale_name.to_sym) end end diff --git a/app/helpers/languages_helper.rb b/app/helpers/languages_helper.rb index 730724208..f3ed7b314 100644 --- a/app/helpers/languages_helper.rb +++ b/app/helpers/languages_helper.rb @@ -1,94 +1,237 @@ # frozen_string_literal: true module LanguagesHelper - HUMAN_LOCALES = { - af: 'Afrikaans', - ar: 'العربية', - ast: 'Asturianu', - bg: 'Български', - bn: 'বাংলা', - br: 'Breton', - ca: 'Català', - co: 'Corsu', - cs: 'Čeština', - cy: 'Cymraeg', - da: 'Dansk', - de: 'Deutsch', - el: 'Ελληνικά', - en: 'English', - eo: 'Esperanto', + ISO_639_1 = { + aa: ['Afar', 'Afaraf'].freeze, + ab: ['Abkhaz', 'аҧсуа бызшәа'].freeze, + ae: ['Avestan', 'avesta'].freeze, + af: ['Afrikaans', 'Afrikaans'].freeze, + ak: ['Akan', 'Akan'].freeze, + am: ['Amharic', 'አማርኛ'].freeze, + an: ['Aragonese', 'aragonés'].freeze, + ar: ['Arabic', 'اللغة العربية'].freeze, + as: ['Assamese', 'অসমীয়া'].freeze, + av: ['Avaric', 'авар мацӀ'].freeze, + ay: ['Aymara', 'aymar aru'].freeze, + az: ['Azerbaijani', 'azərbaycan dili'].freeze, + ba: ['Bashkir', 'башҡорт теле'].freeze, + be: ['Belarusian', 'беларуская мова'].freeze, + bg: ['Bulgarian', 'български език'].freeze, + bh: ['Bihari', 'भोजपुरी'].freeze, + bi: ['Bislama', 'Bislama'].freeze, + bm: ['Bambara', 'bamanankan'].freeze, + bn: ['Bengali', 'বাংলা'].freeze, + bo: ['Tibetan', 'བོད་ཡིག'].freeze, + br: ['Breton', 'brezhoneg'].freeze, + bs: ['Bosnian', 'bosanski jezik'].freeze, + ca: ['Catalan', 'Català'].freeze, + ce: ['Chechen', 'нохчийн мотт'].freeze, + ch: ['Chamorro', 'Chamoru'].freeze, + co: ['Corsican', 'corsu'].freeze, + cr: ['Cree', 'ᓀᐦᐃᔭᐍᐏᐣ'].freeze, + cs: ['Czech', 'čeština'].freeze, + cu: ['Old Church Slavonic', 'ѩзыкъ словѣньскъ'].freeze, + cv: ['Chuvash', 'чӑваш чӗлхи'].freeze, + cy: ['Welsh', 'Cymraeg'].freeze, + da: ['Danish', 'dansk'].freeze, + de: ['German', 'Deutsch'].freeze, + dv: ['Divehi', 'Dhivehi'].freeze, + dz: ['Dzongkha', 'རྫོང་ཁ'].freeze, + ee: ['Ewe', 'Eʋegbe'].freeze, + el: ['Greek', 'Ελληνικά'].freeze, + en: ['English', 'English'].freeze, + eo: ['Esperanto', 'Esperanto'].freeze, + es: ['Spanish', 'Español'].freeze, + et: ['Estonian', 'eesti'].freeze, + eu: ['Basque', 'euskara'].freeze, + fa: ['Persian', 'فارسی'].freeze, + ff: ['Fula', 'Fulfulde'].freeze, + fi: ['Finnish', 'suomi'].freeze, + fj: ['Fijian', 'Vakaviti'].freeze, + fo: ['Faroese', 'føroyskt'].freeze, + fr: ['French', 'Français'].freeze, + fy: ['Western Frisian', 'Frysk'].freeze, + ga: ['Irish', 'Gaeilge'].freeze, + gd: ['Scottish Gaelic', 'Gàidhlig'].freeze, + gl: ['Galician', 'galego'].freeze, + gu: ['Gujarati', 'ગુજરાતી'].freeze, + gv: ['Manx', 'Gaelg'].freeze, + ha: ['Hausa', 'هَوُسَ'].freeze, + he: ['Hebrew', 'עברית'].freeze, + hi: ['Hindi', 'हिन्दी'].freeze, + ho: ['Hiri Motu', 'Hiri Motu'].freeze, + hr: ['Croatian', 'Hrvatski'].freeze, + ht: ['Haitian', 'Kreyòl ayisyen'].freeze, + hu: ['Hungarian', 'magyar'].freeze, + hy: ['Armenian', 'Հայերեն'].freeze, + hz: ['Herero', 'Otjiherero'].freeze, + ia: ['Interlingua', 'Interlingua'].freeze, + id: ['Indonesian', 'Bahasa Indonesia'].freeze, + ie: ['Interlingue', 'Interlingue'].freeze, + ig: ['Igbo', 'Asụsụ Igbo'].freeze, + ii: ['Nuosu', 'ꆈꌠ꒿ Nuosuhxop'].freeze, + ik: ['Inupiaq', 'Iñupiaq'].freeze, + io: ['Ido', 'Ido'].freeze, + is: ['Icelandic', 'Íslenska'].freeze, + it: ['Italian', 'Italiano'].freeze, + iu: ['Inuktitut', 'ᐃᓄᒃᑎᑐᑦ'].freeze, + ja: ['Japanese', '日本語'].freeze, + jv: ['Javanese', 'basa Jawa'].freeze, + ka: ['Georgian', 'ქართული'].freeze, + kg: ['Kongo', 'Kikongo'].freeze, + ki: ['Kikuyu', 'Gĩkũyũ'].freeze, + kj: ['Kwanyama', 'Kuanyama'].freeze, + kk: ['Kazakh', 'қазақ тілі'].freeze, + kl: ['Kalaallisut', 'kalaallisut'].freeze, + km: ['Khmer', 'ខេមរភាសា'].freeze, + kn: ['Kannada', 'ಕನ್ನಡ'].freeze, + ko: ['Korean', '한국어'].freeze, + kr: ['Kanuri', 'Kanuri'].freeze, + ks: ['Kashmiri', 'कश्मीरी'].freeze, + ku: ['Kurdish', 'Kurdî'].freeze, + kv: ['Komi', 'коми кыв'].freeze, + kw: ['Cornish', 'Kernewek'].freeze, + ky: ['Kyrgyz', 'Кыргызча'].freeze, + la: ['Latin', 'latine'].freeze, + lb: ['Luxembourgish', 'Lëtzebuergesch'].freeze, + lg: ['Ganda', 'Luganda'].freeze, + li: ['Limburgish', 'Limburgs'].freeze, + ln: ['Lingala', 'Lingála'].freeze, + lo: ['Lao', 'ພາສາ'].freeze, + lt: ['Lithuanian', 'lietuvių kalba'].freeze, + lu: ['Luba-Katanga', 'Tshiluba'].freeze, + lv: ['Latvian', 'latviešu valoda'].freeze, + mg: ['Malagasy', 'fiteny malagasy'].freeze, + mh: ['Marshallese', 'Kajin M̧ajeļ'].freeze, + mi: ['Māori', 'te reo Māori'].freeze, + mk: ['Macedonian', 'македонски јазик'].freeze, + ml: ['Malayalam', 'മലയാളം'].freeze, + mn: ['Mongolian', 'Монгол хэл'].freeze, + mr: ['Marathi', 'मराठी'].freeze, + ms: ['Malay', 'Bahasa Malaysia'].freeze, + mt: ['Maltese', 'Malti'].freeze, + my: ['Burmese', 'ဗမာစာ'].freeze, + na: ['Nauru', 'Ekakairũ Naoero'].freeze, + nb: ['Norwegian Bokmål', 'Norsk bokmål'].freeze, + nd: ['Northern Ndebele', 'isiNdebele'].freeze, + ne: ['Nepali', 'नेपाली'].freeze, + ng: ['Ndonga', 'Owambo'].freeze, + nl: ['Dutch', 'Nederlands'].freeze, + nn: ['Norwegian Nynorsk', 'Norsk nynorsk'].freeze, + no: ['Norwegian', 'Norsk'].freeze, + nr: ['Southern Ndebele', 'isiNdebele'].freeze, + nv: ['Navajo', 'Diné bizaad'].freeze, + ny: ['Chichewa', 'chiCheŵa'].freeze, + oc: ['Occitan', 'occitan'].freeze, + oj: ['Ojibwe', 'ᐊᓂᔑᓈᐯᒧᐎᓐ'].freeze, + om: ['Oromo', 'Afaan Oromoo'].freeze, + or: ['Oriya', 'ଓଡ଼ିଆ'].freeze, + os: ['Ossetian', 'ирон æвзаг'].freeze, + pa: ['Panjabi', 'ਪੰਜਾਬੀ'].freeze, + pi: ['Pāli', 'पाऴि'].freeze, + pl: ['Polish', 'Polski'].freeze, + ps: ['Pashto', 'پښتو'].freeze, + pt: ['Portuguese', 'Português'].freeze, + qu: ['Quechua', 'Runa Simi'].freeze, + rm: ['Romansh', 'rumantsch grischun'].freeze, + rn: ['Kirundi', 'Ikirundi'].freeze, + ro: ['Romanian', 'Română'].freeze, + ru: ['Russian', 'Русский'].freeze, + rw: ['Kinyarwanda', 'Ikinyarwanda'].freeze, + sa: ['Sanskrit', 'संस्कृतम्'].freeze, + sc: ['Sardinian', 'sardu'].freeze, + sd: ['Sindhi', 'सिन्धी'].freeze, + se: ['Northern Sami', 'Davvisámegiella'].freeze, + sg: ['Sango', 'yângâ tî sängö'].freeze, + si: ['Sinhala', 'සිංහල'].freeze, + sk: ['Slovak', 'slovenčina'].freeze, + sl: ['Slovenian', 'slovenščina'].freeze, + sn: ['Shona', 'chiShona'].freeze, + so: ['Somali', 'Soomaaliga'].freeze, + sq: ['Albanian', 'Shqip'].freeze, + sr: ['Serbian', 'српски језик'].freeze, + ss: ['Swati', 'SiSwati'].freeze, + st: ['Southern Sotho', 'Sesotho'].freeze, + su: ['Sundanese', 'Basa Sunda'].freeze, + sv: ['Swedish', 'Svenska'].freeze, + sw: ['Swahili', 'Kiswahili'].freeze, + ta: ['Tamil', 'தமிழ்'].freeze, + te: ['Telugu', 'తెలుగు'].freeze, + tg: ['Tajik', 'тоҷикӣ'].freeze, + th: ['Thai', 'ไทย'].freeze, + ti: ['Tigrinya', 'ትግርኛ'].freeze, + tk: ['Turkmen', 'Türkmen'].freeze, + tl: ['Tagalog', 'Wikang Tagalog'].freeze, + tn: ['Tswana', 'Setswana'].freeze, + to: ['Tonga', 'faka Tonga'].freeze, + tr: ['Turkish', 'Türkçe'].freeze, + ts: ['Tsonga', 'Xitsonga'].freeze, + tt: ['Tatar', 'татар теле'].freeze, + tw: ['Twi', 'Twi'].freeze, + ty: ['Tahitian', 'Reo Tahiti'].freeze, + ug: ['Uyghur', 'ئۇيغۇرچە'].freeze, + uk: ['Ukrainian', 'Українська'].freeze, + ur: ['Urdu', 'اردو'].freeze, + uz: ['Uzbek', 'Ўзбек'].freeze, + ve: ['Venda', 'Tshivenḓa'].freeze, + vi: ['Vietnamese', 'Tiếng Việt'].freeze, + vo: ['Volapük', 'Volapük'].freeze, + wa: ['Walloon', 'walon'].freeze, + wo: ['Wolof', 'Wollof'].freeze, + xh: ['Xhosa', 'isiXhosa'].freeze, + yi: ['Yiddish', 'ייִדיש'].freeze, + yo: ['Yoruba', 'Yorùbá'].freeze, + za: ['Zhuang', 'Saɯ cueŋƅ'].freeze, + zh: ['Chinese', '中文'].freeze, + zu: ['Zulu', 'isiZulu'].freeze, + }.freeze + + ISO_639_3 = { + ast: ['Asturian', 'Asturianu'].freeze, + kab: ['Kabyle', 'Taqbaylit'].freeze, + kmr: ['Northern Kurdish', 'Kurmancî'].freeze, + zgh: ['Standard Moroccan Tamazight', 'ⵜⴰⵎⴰⵣⵉⵖⵜ'].freeze, + }.freeze + + SUPPORTED_LOCALES = {}.merge(ISO_639_1).merge(ISO_639_3).freeze + + # For ISO-639-1 and ISO-639-3 language codes, we have their official + # names, but for some translations, we need the names of the + # regional variants specifically + REGIONAL_LOCALE_NAMES = { 'es-AR': 'Español (Argentina)', 'es-MX': 'Español (México)', - es: 'Español', - et: 'Eesti', - eu: 'Euskara', - fa: 'فارسی', - fi: 'Suomi', - fr: 'Français', - ga: 'Gaeilge', - gd: 'Gàidhlig', - gl: 'Galego', - he: 'עברית', - hi: 'हिन्दी', - hr: 'Hrvatski', - hu: 'Magyar', - hy: 'Հայերեն', - id: 'Bahasa Indonesia', - io: 'Ido', - is: 'Íslenska', - it: 'Italiano', - ja: '日本語', - ka: 'ქართული', - kab: 'Taqbaylit', - kk: 'Қазақша', - kmr: 'Kurmancî', - kn: 'ಕನ್ನಡ', - ko: '한국어', - ku: 'سۆرانی', - lt: 'Lietuvių', - lv: 'Latviešu', - mk: 'Македонски', - ml: 'മലയാളം', - mr: 'मराठी', - ms: 'Bahasa Melayu', - nl: 'Nederlands', - nn: 'Nynorsk', - no: 'Norsk', - oc: 'Occitan', - pl: 'Polski', 'pt-BR': 'Português (Brasil)', 'pt-PT': 'Português (Portugal)', - pt: 'Português', - ro: 'Română', - ru: 'Русский', - sa: 'संस्कृतम्', - sc: 'Sardu', - si: 'සිංහල', - sk: 'Slovenčina', - sl: 'Slovenščina', - sq: 'Shqip', 'sr-Latn': 'Srpski (latinica)', - sr: 'Српски', - sv: 'Svenska', - ta: 'தமிழ்', - te: 'తెలుగు', - th: 'ไทย', - tr: 'Türkçe', - uk: 'Українська', - ur: 'اُردُو', - vi: 'Tiếng Việt', - zgh: 'ⵜⴰⵎⴰⵣⵉⵖⵜ', 'zh-CN': '简体中文', 'zh-HK': '繁體中文(香港)', 'zh-TW': '繁體中文(臺灣)', - zh: '中文', }.freeze def human_locale(locale) if locale == 'und' I18n.t('generic.none') + elsif (supported_locale = SUPPORTED_LOCALES[locale.to_sym]) + supported_locale[1] + elsif (regional_locale = REGIONAL_LOCALE_NAMES[locale.to_sym]) + regional_locale else - HUMAN_LOCALES[locale.to_sym] || locale + locale end end + + def valid_locale_or_nil(str) + return if str.blank? + + code, = str.to_s.split(/[_-]/) # Strip out the region from e.g. en_US or ja-JP + + return unless valid_locale?(code) + + code + end + + def valid_locale?(locale) + SUPPORTED_LOCALES.key?(locale.to_sym) + end end diff --git a/app/helpers/settings_helper.rb b/app/helpers/settings_helper.rb index 23739d1cd..3d5592867 100644 --- a/app/helpers/settings_helper.rb +++ b/app/helpers/settings_helper.rb @@ -2,7 +2,7 @@ module SettingsHelper def filterable_languages - LanguageDetector.instance.language_names.select(&LanguagesHelper::HUMAN_LOCALES.method(:key?)) + LanguagesHelper::SUPPORTED_LOCALES.keys end def hash_to_object(hash) diff --git a/app/lib/activitypub/activity/create.rb b/app/lib/activitypub/activity/create.rb index ad273c20b..cf31b6ff6 100644 --- a/app/lib/activitypub/activity/create.rb +++ b/app/lib/activitypub/activity/create.rb @@ -112,7 +112,7 @@ class ActivityPub::Activity::Create < ActivityPub::Activity url: @status_parser.url || @status_parser.uri, account: @account, text: converted_object_type? ? converted_text : (@status_parser.text || ''), - language: @status_parser.language || detected_language, + language: @status_parser.language, spoiler_text: converted_object_type? ? '' : (@status_parser.spoiler_text || ''), created_at: @status_parser.created_at, edited_at: @status_parser.edited_at, @@ -370,10 +370,6 @@ class ActivityPub::Activity::Create < ActivityPub::Activity Formatter.instance.linkify([@status_parser.title.presence, @status_parser.spoiler_text.presence, @status_parser.url || @status_parser.uri].compact.join("\n\n")) end - def detected_language - LanguageDetector.instance.detect(@status_parser.text, @account) if supported_object_type? - end - def unsupported_media_type?(mime_type) mime_type.present? && !MediaAttachment.supported_mime_types.include?(mime_type) end diff --git a/app/lib/language_detector.rb b/app/lib/language_detector.rb deleted file mode 100644 index 40452eddc..000000000 --- a/app/lib/language_detector.rb +++ /dev/null @@ -1,101 +0,0 @@ -# frozen_string_literal: true - -class LanguageDetector - include Singleton - - WORDS_THRESHOLD = 4 - RELIABLE_CHARACTERS_RE = /[\p{Hebrew}\p{Arabic}\p{Syriac}\p{Thaana}\p{Nko}\p{Han}\p{Katakana}\p{Hiragana}\p{Hangul}\p{Thai}]+/m - - def initialize - @identifier = CLD3::NNetLanguageIdentifier.new(1, 2048) - end - - def detect(text, account) - input_text = prepare_text(text) - - return if input_text.blank? - - detect_language_code(input_text) || default_locale(account) - end - - def language_names - @language_names = CLD3::TaskContextParams::LANGUAGE_NAMES.map { |name| iso6391(name.to_s).to_sym }.uniq - end - - private - - def prepare_text(text) - simplify_text(text).strip - end - - def unreliable_input?(text) - !reliable_input?(text) - end - - def reliable_input?(text) - sufficient_text_length?(text) || language_specific_character_set?(text) - end - - def sufficient_text_length?(text) - text.split(/\s+/).size >= WORDS_THRESHOLD - end - - def language_specific_character_set?(text) - words = text.scan(RELIABLE_CHARACTERS_RE) - - if words.present? - words.reduce(0) { |acc, elem| acc + elem.size }.to_f / text.size > 0.3 - else - false - end - end - - def detect_language_code(text) - return if unreliable_input?(text) - - result = @identifier.find_language(text) - - iso6391(result.language.to_s).to_sym if result&.reliable? - end - - def iso6391(bcp47) - iso639 = bcp47.split('-').first - - # CLD3 returns grandfathered language code for Hebrew - return 'he' if iso639 == 'iw' - - ISO_639.find(iso639).alpha2 - end - - def simplify_text(text) - new_text = remove_html(text) - new_text.gsub!(FetchLinkCardService::URL_PATTERN, '\1') - new_text.gsub!(Account::MENTION_RE, '') - new_text.gsub!(Tag::HASHTAG_RE) { |string| string.gsub(/[#_]/, '#' => '', '_' => ' ').gsub(/[a-z][A-Z]|[a-zA-Z][\d]/) { |s| s.insert(1, ' ') }.downcase } - new_text.gsub!(/:#{CustomEmoji::SHORTCODE_RE_FRAGMENT}:/, '') - new_text.gsub!(/\s+/, ' ') - new_text - end - - def new_scrubber - scrubber = Rails::Html::PermitScrubber.new - scrubber.tags = %w(br p) - scrubber - end - - def scrubber - @scrubber ||= new_scrubber - end - - def remove_html(text) - text = Loofah.fragment(text).scrub!(scrubber).to_s - text.gsub!('<br>', "\n") - text.gsub!('</p><p>', "\n\n") - text.gsub!(/(^<p>|<\/p>$)/, '') - text - end - - def default_locale(account) - account.user_locale&.to_sym || I18n.default_locale if account.local? - end -end diff --git a/app/lib/link_details_extractor.rb b/app/lib/link_details_extractor.rb index 56ad0717b..fabbd244d 100644 --- a/app/lib/link_details_extractor.rb +++ b/app/lib/link_details_extractor.rb @@ -2,6 +2,20 @@ class LinkDetailsExtractor include ActionView::Helpers::TagHelper + include LanguagesHelper + + # Some publications wrap their JSON-LD data in their <script> tags + # in commented-out CDATA blocks, they need to be removed before + # attempting to parse JSON + CDATA_JUNK_PATTERN = %r{^[\s]*( + (/\*[\s]*<!\[CDATA\[[\s]*\*/) # Block comment style opening + | + (//[\s]*<!\[CDATA\[) # Single-line comment style opening + | + (/\*[\s]*\]\]>[\s]*\*/) # Block comment style closing + | + (//[\s]*\]\]>) # Single-line comment style closing + )[\s]*$}x class StructuredData SUPPORTED_TYPES = %w( @@ -61,6 +75,10 @@ class LinkDetailsExtractor publisher.dig('logo', 'url') end + def valid? + json.present? + end + private def author @@ -134,11 +152,11 @@ class LinkDetailsExtractor end def title - structured_data&.headline || opengraph_tag('og:title') || document.xpath('//title').map(&:content).first + html_entities.decode(structured_data&.headline || opengraph_tag('og:title') || document.xpath('//title').map(&:content).first) end def description - structured_data&.description || opengraph_tag('og:description') || meta_tag('description') + html_entities.decode(structured_data&.description || opengraph_tag('og:description') || meta_tag('description')) end def image @@ -146,11 +164,11 @@ class LinkDetailsExtractor end def canonical_url - valid_url_or_nil(opengraph_tag('og:url') || link_tag('canonical'), same_origin_only: true) || @original_url.to_s + valid_url_or_nil(link_tag('canonical') || opengraph_tag('og:url'), same_origin_only: true) || @original_url.to_s end def provider_name - structured_data&.publisher_name || opengraph_tag('og:site_name') + html_entities.decode(structured_data&.publisher_name || opengraph_tag('og:site_name')) end def provider_url @@ -158,7 +176,7 @@ class LinkDetailsExtractor end def author_name - structured_data&.author_name || opengraph_tag('og:author') || opengraph_tag('og:author:username') + html_entities.decode(structured_data&.author_name || opengraph_tag('og:author') || opengraph_tag('og:author:username')) end def author_url @@ -201,14 +219,6 @@ class LinkDetailsExtractor nil end - def valid_locale_or_nil(str) - return nil if str.blank? - - code, = str.split(/_-/) # Strip out the region from e.g. en_US or ja-JA - locale = ISO_639.find(code) - locale&.alpha2 - end - def link_tag(name) document.xpath("//link[@rel=\"#{name}\"]").map { |link| link['href'] }.first end @@ -223,10 +233,24 @@ class LinkDetailsExtractor def structured_data @structured_data ||= begin - json_ld = document.xpath('//script[@type="application/ld+json"]').map(&:content).first - json_ld.present? ? StructuredData.new(json_ld) : nil - rescue Oj::ParseError - nil + # Some publications have more than one JSON-LD definition on the page, + # and some of those definitions aren't valid JSON either, so we have + # to loop through here until we find something that is the right type + # and doesn't break + document.xpath('//script[@type="application/ld+json"]').filter_map do |element| + json_ld = element.content&.gsub(CDATA_JUNK_PATTERN, '') + + next if json_ld.blank? + + structured_data = StructuredData.new(html_entities.decode(json_ld)) + + next unless structured_data.valid? + + structured_data + rescue Oj::ParseError, EncodingError + Rails.logger.debug("Invalid JSON-LD in #{@original_url}") + next + end.first end end @@ -246,4 +270,8 @@ class LinkDetailsExtractor detector.strip_tags = true end end + + def html_entities + @html_entities ||= HTMLEntities.new + end end diff --git a/app/models/account_suggestions/global_source.rb b/app/models/account_suggestions/global_source.rb index ac764de50..03ed1b6c2 100644 --- a/app/models/account_suggestions/global_source.rb +++ b/app/models/account_suggestions/global_source.rb @@ -6,7 +6,7 @@ class AccountSuggestions::GlobalSource < AccountSuggestions::Source end def get(account, skip_account_ids: [], limit: 40) - account_ids = account_ids_for_locale(account.user_locale) - [account.id] - skip_account_ids + account_ids = account_ids_for_locale(I18n.locale.to_str.split(/[_-]/).first) - [account.id] - skip_account_ids as_ordered_suggestions( scope(account).where(id: account_ids), diff --git a/app/models/user.rb b/app/models/user.rb index 9afdc481d..ee20e293e 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -245,6 +245,10 @@ class User < ApplicationRecord save! end + def preferred_posting_language + settings.default_language || locale + end + def setting_default_privacy settings.default_privacy || (account.locked? ? 'private' : 'public') end diff --git a/app/services/activitypub/process_status_update_service.rb b/app/services/activitypub/process_status_update_service.rb index 977928127..b1cea1cdf 100644 --- a/app/services/activitypub/process_status_update_service.rb +++ b/app/services/activitypub/process_status_update_service.rb @@ -120,7 +120,7 @@ class ActivityPub::ProcessStatusUpdateService < BaseService @status.text = @status_parser.text || '' @status.spoiler_text = @status_parser.spoiler_text || '' @status.sensitive = @account.sensitized? || @status_parser.sensitive || false - @status.language = @status_parser.language || detected_language + @status.language = @status_parser.language @status.edited_at = @status_parser.edited_at || Time.now.utc if significant_changes? @status.save! @@ -210,10 +210,6 @@ class ActivityPub::ProcessStatusUpdateService < BaseService { redis: Redis.current, key: "create:#{@uri}", autorelease: 15.minutes.seconds } end - def detected_language - LanguageDetector.instance.detect(@status_parser.text, @account) - end - def create_previous_edit! # We only need to create a previous edit when no previous edits exist, e.g. # when the status has never been edited. For other cases, we always create diff --git a/app/services/post_status_service.rb b/app/services/post_status_service.rb index 9d26e0f5b..c5061dd63 100644 --- a/app/services/post_status_service.rb +++ b/app/services/post_status_service.rb @@ -2,6 +2,7 @@ class PostStatusService < BaseService include Redisable + include LanguagesHelper MIN_SCHEDULE_OFFSET = 5.minutes.freeze @@ -118,10 +119,6 @@ class PostStatusService < BaseService raise Mastodon::ValidationError, I18n.t('media_attachments.validations.not_ready') if @media.any?(&:not_processed?) end - def language_from_option(str) - ISO_639.find(str)&.alpha2 - end - def process_mentions_service ProcessMentionsService.new end @@ -174,7 +171,7 @@ class PostStatusService < BaseService sensitive: @sensitive, spoiler_text: @options[:spoiler_text] || '', visibility: @visibility, - language: language_from_option(@options[:language]) || @account.user&.setting_default_language&.presence || LanguageDetector.instance.detect(@text, @account), + language: valid_locale_or_nil(@options[:language].presence || @account.user&.preferred_posting_language || I18n.default_locale), application: @options[:application], content_type: @options[:content_type] || @account.user&.setting_default_content_type, rate_limit: @options[:with_rate_limit], diff --git a/app/validators/import_validator.rb b/app/validators/import_validator.rb index a182abfa5..9f19aee2a 100644 --- a/app/validators/import_validator.rb +++ b/app/validators/import_validator.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require 'csv' + class ImportValidator < ActiveModel::Validator KNOWN_HEADERS = [ 'Account address', diff --git a/app/views/admin/follow_recommendations/show.html.haml b/app/views/admin/follow_recommendations/show.html.haml index 2878f07d7..d1c160bd2 100644 --- a/app/views/admin/follow_recommendations/show.html.haml +++ b/app/views/admin/follow_recommendations/show.html.haml @@ -10,7 +10,7 @@ .filter-subset.filter-subset--with-select %strong= t('admin.follow_recommendations.language') .input.select.optional - = select_tag :language, options_for_select(I18n.available_locales.map { |key| [human_locale(key), key]}, @language) + = select_tag :language, options_for_select(I18n.available_locales.map { |key| key.to_s.split(/[_-]/).first.to_sym }.uniq.map { |key| [human_locale(key), key]}, @language) .filter-subset %strong= t('admin.follow_recommendations.status') diff --git a/app/views/settings/preferences/other/show.html.haml b/app/views/settings/preferences/other/show.html.haml index 3b5c7016d..76ff2bcbc 100644 --- a/app/views/settings/preferences/other/show.html.haml +++ b/app/views/settings/preferences/other/show.html.haml @@ -27,7 +27,7 @@ = f.input :setting_default_privacy, collection: Status.selectable_visibilities, wrapper: :with_label, include_blank: false, label_method: lambda { |visibility| safe_join([I18n.t("statuses.visibilities.#{visibility}"), I18n.t("statuses.visibilities.#{visibility}_long")], ' - ') }, required: false, hint: false .fields-group.fields-row__column.fields-row__column-6 - = f.input :setting_default_language, collection: [nil] + filterable_languages.sort, wrapper: :with_label, label_method: lambda { |locale| locale.nil? ? I18n.t('statuses.language_detection') : human_locale(locale) }, required: false, include_blank: false, hint: false + = f.input :setting_default_language, collection: [nil] + filterable_languages, wrapper: :with_label, label_method: lambda { |locale| locale.nil? ? I18n.t('statuses.default_language') : human_locale(locale) }, required: false, include_blank: false, hint: false .fields-group = f.input :setting_default_sensitive, as: :boolean, wrapper: :with_label @@ -41,7 +41,7 @@ %h4= t 'preferences.public_timelines' .fields-group - = f.input :chosen_languages, collection: filterable_languages.sort, wrapper: :with_block_label, include_blank: false, label_method: lambda { |locale| human_locale(locale) }, required: false, as: :check_boxes, collection_wrapper_tag: 'ul', item_wrapper_tag: 'li' + = f.input :chosen_languages, collection: filterable_languages, wrapper: :with_block_label, include_blank: false, label_method: lambda { |locale| human_locale(locale) }, required: false, as: :check_boxes, collection_wrapper_tag: 'ul', item_wrapper_tag: 'li' .actions = f.button :button, t('generic.save_changes'), type: :submit diff --git a/app/workers/activitypub/processing_worker.rb b/app/workers/activitypub/processing_worker.rb index cef595319..37e316354 100644 --- a/app/workers/activitypub/processing_worker.rb +++ b/app/workers/activitypub/processing_worker.rb @@ -6,7 +6,10 @@ class ActivityPub::ProcessingWorker sidekiq_options backtrace: true, retry: 8 def perform(account_id, body, delivered_to_account_id = nil) - ActivityPub::ProcessCollectionService.new.call(body, Account.find(account_id), override_timestamps: true, delivered_to_account_id: delivered_to_account_id, delivery: true) + account = Account.find_by(id: account_id) + return if account.nil? + + ActivityPub::ProcessCollectionService.new.call(body, account, override_timestamps: true, delivered_to_account_id: delivered_to_account_id, delivery: true) rescue ActiveRecord::RecordInvalid => e Rails.logger.debug "Error processing incoming ActivityPub object: #{e}" end diff --git a/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb b/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb index f42d4bca6..7195f0ff9 100644 --- a/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb +++ b/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb @@ -66,7 +66,7 @@ class Scheduler::AccountsStatusesCleanupScheduler end def compute_budget - threads = Sidekiq::ProcessSet.new.filter { |x| x['queues'].include?('push') }.map { |x| x['concurrency'] }.sum + threads = Sidekiq::ProcessSet.new.select { |x| x['queues'].include?('push') }.map { |x| x['concurrency'] }.sum [PER_THREAD_BUDGET * threads, MAX_BUDGET].min end diff --git a/app/workers/scheduler/follow_recommendations_scheduler.rb b/app/workers/scheduler/follow_recommendations_scheduler.rb index effc63e59..084619cbd 100644 --- a/app/workers/scheduler/follow_recommendations_scheduler.rb +++ b/app/workers/scheduler/follow_recommendations_scheduler.rb @@ -16,28 +16,33 @@ class Scheduler::FollowRecommendationsScheduler AccountSummary.refresh FollowRecommendation.refresh - fallback_recommendations = FollowRecommendation.order(rank: :desc).limit(SET_SIZE).index_by(&:account_id) + fallback_recommendations = FollowRecommendation.order(rank: :desc).limit(SET_SIZE) - I18n.available_locales.each do |locale| + I18n.available_locales.map { |locale| locale.to_s.split(/[_-]/).first }.uniq.each do |locale| recommendations = begin if AccountSummary.safe.filtered.localized(locale).exists? # We can skip the work if no accounts with that language exist - FollowRecommendation.localized(locale).order(rank: :desc).limit(SET_SIZE).index_by(&:account_id) + FollowRecommendation.localized(locale).order(rank: :desc).limit(SET_SIZE).map { |recommendation| [recommendation.account_id, recommendation.rank] } else - {} + [] end end # Use language-agnostic results if there are not enough language-specific ones - missing = SET_SIZE - recommendations.keys.size + missing = SET_SIZE - recommendations.size + + if missing.positive? && fallback_recommendations.size.positive? + max_fallback_rank = fallback_recommendations.first.rank || 0 + + # Language-specific results should be above language-agnostic ones, + # otherwise language-agnostic ones will always overshadow them + recommendations.map! { |(account_id, rank)| [account_id, rank + max_fallback_rank] } - if missing.positive? added = 0 - # Avoid duplicate results - fallback_recommendations.each_value do |recommendation| - next if recommendations.key?(recommendation.account_id) + fallback_recommendations.each do |recommendation| + next if recommendations.any? { |(account_id, _)| account_id == recommendation.account_id } - recommendations[recommendation.account_id] = recommendation + recommendations << [recommendation.account_id, recommendation.rank] added += 1 break if added >= missing @@ -47,8 +52,8 @@ class Scheduler::FollowRecommendationsScheduler redis.pipelined do redis.del(key(locale)) - recommendations.each_value do |recommendation| - redis.zadd(key(locale), recommendation.rank, recommendation.account_id) + recommendations.each do |(account_id, rank)| + redis.zadd(key(locale), rank, account_id) end end end |