From 550ff677daef92886ab7440ad262821f4c732e85 Mon Sep 17 00:00:00 2001 From: ThibG Date: Wed, 13 Sep 2017 14:22:16 +0200 Subject: Fix ActivityPub handling of replies with WEB_DOMAIN (#4895) (#4904) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix ActivityPub handling of replies when LOCAL_DOMAIN ≠ WEB_DOMAIN (#4895) For all intents and purposes, `local_url?` is used to check if an URL refers to the Web UI or the various API endpoints of the local instances. Those things reside on `WEB_DOMAIN` and not `LOCAL_DOMAIN`. * Change local_url? spec, as all URLs handled by Mastodon are based on WEB_DOMAIN --- spec/lib/tag_manager_spec.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'spec/lib') diff --git a/spec/lib/tag_manager_spec.rb b/spec/lib/tag_manager_spec.rb index 1cd6e0a6f..6c7830231 100644 --- a/spec/lib/tag_manager_spec.rb +++ b/spec/lib/tag_manager_spec.rb @@ -63,23 +63,23 @@ RSpec.describe TagManager do describe '#local_url?' do around do |example| - original_local_domain = Rails.configuration.x.local_domain + original_web_domain = Rails.configuration.x.web_domain example.run - Rails.configuration.x.local_domain = original_local_domain + Rails.configuration.x.web_domain = original_web_domain end it 'returns true if the normalized string with port is local URL' do - Rails.configuration.x.local_domain = 'domain:42' + Rails.configuration.x.web_domain = 'domain:42' expect(TagManager.instance.local_url?('https://DoMaIn:42/')).to eq true end it 'returns true if the normalized string without port is local URL' do - Rails.configuration.x.local_domain = 'domain' + Rails.configuration.x.web_domain = 'domain' expect(TagManager.instance.local_url?('https://DoMaIn/')).to eq true end it 'returns false for string with irrelevant characters' do - Rails.configuration.x.local_domain = 'domain' + Rails.configuration.x.web_domain = 'domain' expect(TagManager.instance.local_url?('https://domainn/')).to eq false end end -- cgit From 3816943e6b5e86b22c35f3c068521f7a9007deec Mon Sep 17 00:00:00 2001 From: ふぁぼ原 Date: Fri, 15 Sep 2017 01:03:20 +0900 Subject: Enable to recognize most kinds of characters as URL paths (#4941) --- app/lib/formatter.rb | 2 +- app/services/fetch_link_card_service.rb | 14 ++++++--- config/initializers/twitter_regex.rb | 42 +++++++++++++++++++++++++++ spec/lib/formatter_spec.rb | 32 ++++++++++++++++++++ spec/services/fetch_link_card_service_spec.rb | 11 +++++++ 5 files changed, 96 insertions(+), 5 deletions(-) create mode 100644 config/initializers/twitter_regex.rb (limited to 'spec/lib') diff --git a/app/lib/formatter.rb b/app/lib/formatter.rb index cacc0364f..d9f843f44 100644 --- a/app/lib/formatter.rb +++ b/app/lib/formatter.rb @@ -131,7 +131,7 @@ class Formatter end def link_html(url) - url = Addressable::URI.parse(url).display_uri.to_s + url = Addressable::URI.parse(url).to_s prefix = url.match(/\Ahttps?:\/\/(www\.)?/).to_s text = url[prefix.length, 30] suffix = url[prefix.length + 30..-1] diff --git a/app/services/fetch_link_card_service.rb b/app/services/fetch_link_card_service.rb index 215c69fe4..4acbfae7a 100644 --- a/app/services/fetch_link_card_service.rb +++ b/app/services/fetch_link_card_service.rb @@ -1,9 +1,15 @@ # frozen_string_literal: true class FetchLinkCardService < BaseService - include ActionView::Helpers::TagHelper - - URL_PATTERN = %r{https?://\S+} + URL_PATTERN = %r{ + ( # $1 URL + (https?:\/\/)? # $2 Protocol (optional) + (#{Twitter::Regex[:valid_domain]}) # $3 Domain(s) + (?::(#{Twitter::Regex[:valid_port_number]}))? # $4 Port number (optional) + (/#{Twitter::Regex[:valid_url_path]}*)? # $5 URL Path and anchor + (\?#{Twitter::Regex[:valid_url_query_chars]}*#{Twitter::Regex[:valid_url_query_ending_chars]})? # $6 Query String + ) + }iox def call(status) @status = status @@ -42,7 +48,7 @@ class FetchLinkCardService < BaseService def parse_urls if @status.local? - urls = @status.text.match(URL_PATTERN).to_a.map { |uri| Addressable::URI.parse(uri).normalize } + urls = @status.text.scan(URL_PATTERN).map { |array| Addressable::URI.parse(array[0]).normalize } else html = Nokogiri::HTML(@status.text) links = html.css('a') diff --git a/config/initializers/twitter_regex.rb b/config/initializers/twitter_regex.rb new file mode 100644 index 000000000..5a0723d24 --- /dev/null +++ b/config/initializers/twitter_regex.rb @@ -0,0 +1,42 @@ +module Twitter + class Regex + + REGEXEN[:valid_general_url_path_chars] = /[^\p{White_Space}\(\)\?]/iou + REGEXEN[:valid_url_path_ending_chars] = /[^\p{White_Space}\(\)\?!\*';:=\,\.\$%\[\]\p{Pd}_~&\|@]|(?:#{REGEXEN[:valid_url_balanced_parens]})/iou + REGEXEN[:valid_url_balanced_parens] = / + \( + (?: + #{REGEXEN[:valid_general_url_path_chars]}+ + | + # allow one nested level of balanced parentheses + (?: + #{REGEXEN[:valid_general_url_path_chars]}* + \( + #{REGEXEN[:valid_general_url_path_chars]}+ + \) + #{REGEXEN[:valid_general_url_path_chars]}* + ) + ) + \) + /iox + REGEXEN[:valid_url_path] = /(?: + (?: + #{REGEXEN[:valid_general_url_path_chars]}* + (?:#{REGEXEN[:valid_url_balanced_parens]} #{REGEXEN[:valid_general_url_path_chars]}*)* + #{REGEXEN[:valid_url_path_ending_chars]} + )|(?:#{REGEXEN[:valid_general_url_path_chars]}+\/) + )/iox + REGEXEN[:valid_url] = %r{ + ( # $1 total match + (#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceeding chracter + ( # $3 URL + (https?:\/\/)? # $4 Protocol (optional) + (#{REGEXEN[:valid_domain]}) # $5 Domain(s) + (?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional) + (/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor + (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $8 Query String + ) + ) + }iox + end +end diff --git a/spec/lib/formatter_spec.rb b/spec/lib/formatter_spec.rb index ab04ccbab..f9b7efac5 100644 --- a/spec/lib/formatter_spec.rb +++ b/spec/lib/formatter_spec.rb @@ -89,6 +89,38 @@ RSpec.describe Formatter do end end + context 'matches a URL with Japanese path string' do + let(:text) { 'https://ja.wikipedia.org/wiki/日本' } + + it 'has valid URL' do + is_expected.to include 'href="https://ja.wikipedia.org/wiki/%E6%97%A5%E6%9C%AC"' + end + end + + context 'matches a URL with Korean path string' do + let(:text) { 'https://ko.wikipedia.org/wiki/대한민국' } + + it 'has valid URL' do + is_expected.to include 'href="https://ko.wikipedia.org/wiki/%EB%8C%80%ED%95%9C%EB%AF%BC%EA%B5%AD"' + end + end + + context 'matches a URL with Simplified Chinese path string' do + let(:text) { 'https://baike.baidu.com/item/中华人民共和国' } + + it 'has valid URL' do + is_expected.to include 'href="https://baike.baidu.com/item/%E4%B8%AD%E5%8D%8E%E4%BA%BA%E6%B0%91%E5%85%B1%E5%92%8C%E5%9B%BD"' + end + end + + context 'matches a URL with Traditional Chinese path string' do + let(:text) { 'https://zh.wikipedia.org/wiki/臺灣' } + + it 'has valid URL' do + is_expected.to include 'href="https://zh.wikipedia.org/wiki/%E8%87%BA%E7%81%A3"' + end + end + context 'contains HTML (script tag)' do let(:text) { '' } diff --git a/spec/services/fetch_link_card_service_spec.rb b/spec/services/fetch_link_card_service_spec.rb index b0aa740ac..ba61d22c3 100644 --- a/spec/services/fetch_link_card_service_spec.rb +++ b/spec/services/fetch_link_card_service_spec.rb @@ -12,6 +12,8 @@ RSpec.describe FetchLinkCardService do stub_request(:get, 'http://example.com/sjis_with_wrong_charset').to_return(request_fixture('sjis_with_wrong_charset.txt')) stub_request(:head, 'http://example.com/koi8-r').to_return(status: 200, headers: { 'Content-Type' => 'text/html' }) stub_request(:get, 'http://example.com/koi8-r').to_return(request_fixture('koi8-r.txt')) + stub_request(:head, 'http://example.com/日本語').to_return(status: 200, headers: { 'Content-Type' => 'text/html' }) + stub_request(:get, 'http://example.com/日本語').to_return(request_fixture('sjis.txt')) stub_request(:head, 'https://github.com/qbi/WannaCry').to_return(status: 404) subject.call(status) @@ -52,6 +54,15 @@ RSpec.describe FetchLinkCardService do expect(status.preview_cards.first.title).to eq("Московя начинаетъ только въ XVI ст. привлекать внимане иностранцевъ.") end end + + context do + let(:status) { Fabricate(:status, text: 'テストhttp://example.com/日本語') } + + it 'works with Japanese path string' do + expect(a_request(:get, 'http://example.com/日本語')).to have_been_made.at_least_once + expect(status.preview_cards.first.title).to eq("SJISのページ") + end + end end context 'in a remote status' do -- cgit From 48d77ea1ebd6096a6ad5e265d99fa20e7a965276 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Sat, 16 Sep 2017 21:59:41 +0900 Subject: Fix filterable_languages method of SettingsHelper (#4966) --- Gemfile | 2 +- Gemfile.lock | 4 ++-- app/helpers/settings_helper.rb | 2 +- app/lib/language_detector.rb | 39 ++++++++++++++----------------- app/services/post_status_service.rb | 6 +---- spec/lib/language_detector_spec.rb | 34 +++++++++++++-------------- spec/services/post_status_service_spec.rb | 9 +++---- 7 files changed, 43 insertions(+), 53 deletions(-) (limited to 'spec/lib') diff --git a/Gemfile b/Gemfile index 051f09f78..4f4861913 100644 --- a/Gemfile +++ b/Gemfile @@ -25,7 +25,7 @@ gem 'bootsnap' gem 'browser' gem 'charlock_holmes', '~> 0.7.5' gem 'iso-639' -gem 'cld3', '~> 3.1' +gem 'cld3', '~> 3.2.0' gem 'devise', '~> 4.2' gem 'devise-two-factor', '~> 3.0' gem 'doorkeeper', '~> 4.2' diff --git a/Gemfile.lock b/Gemfile.lock index 31893dc3f..f080f15e5 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -110,7 +110,7 @@ GEM activesupport charlock_holmes (0.7.5) chunky_png (1.3.8) - cld3 (3.1.3) + cld3 (3.2.0) ffi (>= 1.1.0, < 1.10.0) climate_control (0.2.0) cocaine (0.5.8) @@ -541,7 +541,7 @@ DEPENDENCIES capistrano-yarn (~> 2.0) capybara (~> 2.14) charlock_holmes (~> 0.7.5) - cld3 (~> 3.1) + cld3 (~> 3.2.0) climate_control (~> 0.2) devise (~> 4.2) devise-two-factor (~> 3.0) diff --git a/app/helpers/settings_helper.rb b/app/helpers/settings_helper.rb index 369a45680..14776b354 100644 --- a/app/helpers/settings_helper.rb +++ b/app/helpers/settings_helper.rb @@ -41,7 +41,7 @@ module SettingsHelper end def filterable_languages - I18n.available_locales.map { |locale| locale.to_s.split('-').first.to_sym }.uniq + LanguageDetector.instance.language_names.select(&HUMAN_LOCALES.method(:key?)) end def hash_to_object(hash) diff --git a/app/lib/language_detector.rb b/app/lib/language_detector.rb index 1d9932b52..a42460e10 100644 --- a/app/lib/language_detector.rb +++ b/app/lib/language_detector.rb @@ -1,26 +1,31 @@ # frozen_string_literal: true class LanguageDetector - attr_reader :text, :account + include Singleton - def initialize(text, account = nil) - @text = text - @account = account + def initialize @identifier = CLD3::NNetLanguageIdentifier.new(1, 2048) end - def to_iso_s - detected_language_code || default_locale + def detect(text, account) + detect_language_code(text) || default_locale(account) end - def prepared_text - simplified_text.strip + def language_names + @language_names = + CLD3::TaskContextParams::LANGUAGE_NAMES.map { |name| iso6391(name.to_s).to_sym } + .uniq end private - def detected_language_code - iso6391(result.language).to_sym if detected_language_reliable? + def prepare_text(text) + simplify_text(text).strip + end + + def detect_language_code(text) + result = @identifier.find_language(prepare_text(text)) + iso6391(result.language.to_s).to_sym if result.reliable? end def iso6391(bcp47) @@ -32,15 +37,7 @@ class LanguageDetector ISO_639.find(iso639).alpha2 end - def result - @result ||= @identifier.find_language(prepared_text) - end - - def detected_language_reliable? - result.reliable? - end - - def simplified_text + def simplify_text(text) text.dup.tap do |new_text| new_text.gsub!(FetchLinkCardService::URL_PATTERN, '') new_text.gsub!(Account::MENTION_RE, '') @@ -49,7 +46,7 @@ class LanguageDetector end end - def default_locale - account&.user_locale&.to_sym || nil + def default_locale(account) + account.user_locale&.to_sym end end diff --git a/app/services/post_status_service.rb b/app/services/post_status_service.rb index 97c55c4b2..e37cd94df 100644 --- a/app/services/post_status_service.rb +++ b/app/services/post_status_service.rb @@ -28,7 +28,7 @@ class PostStatusService < BaseService sensitive: options[:sensitive], spoiler_text: options[:spoiler_text] || '', visibility: options[:visibility] || account.user&.setting_default_privacy, - language: detect_language_for(text, account), + language: LanguageDetector.instance.detect(text, account), application: options[:application]) attach_media(status, media) @@ -69,10 +69,6 @@ class PostStatusService < BaseService media.update(status_id: status.id) end - def detect_language_for(text, account) - LanguageDetector.new(text, account).to_iso_s - end - def process_mentions_service @process_mentions_service ||= ProcessMentionsService.new end diff --git a/spec/lib/language_detector_spec.rb b/spec/lib/language_detector_spec.rb index ec39cb6a0..d17026511 100644 --- a/spec/lib/language_detector_spec.rb +++ b/spec/lib/language_detector_spec.rb @@ -3,10 +3,10 @@ require 'rails_helper' describe LanguageDetector do - describe 'prepared_text' do + describe 'prepare_text' do it 'returns unmodified string without special cases' do string = 'just a regular string' - result = described_class.new(string).prepared_text + result = described_class.instance.send(:prepare_text, string) expect(result).to eq string end @@ -14,33 +14,35 @@ describe LanguageDetector do it 'collapses spacing in strings' do string = 'The formatting in this is very odd' - result = described_class.new(string).prepared_text + result = described_class.instance.send(:prepare_text, string) expect(result).to eq 'The formatting in this is very odd' end it 'strips usernames from strings before detection' do string = '@username Yeah, very surreal...! also @friend' - result = described_class.new(string).prepared_text + result = described_class.instance.send(:prepare_text, string) expect(result).to eq 'Yeah, very surreal...! also' end it 'strips URLs from strings before detection' do string = 'Our website is https://example.com and also http://localhost.dev' - result = described_class.new(string).prepared_text + result = described_class.instance.send(:prepare_text, string) expect(result).to eq 'Our website is and also' end it 'strips #hashtags from strings before detection' do string = 'Hey look at all the #animals and #fish' - result = described_class.new(string).prepared_text + result = described_class.instance.send(:prepare_text, string) expect(result).to eq 'Hey look at all the and' end end - describe 'to_iso_s' do + describe 'detect' do + let(:account_without_user_locale) { Fabricate(:user, locale: nil).account } + it 'detects english language for basic strings' do strings = [ "Hello and welcome to mastodon how are you today?", @@ -48,7 +50,7 @@ describe LanguageDetector do "a lot of people just want to feel righteous all the time and that's all that matters", ] strings.each do |string| - result = described_class.new(string).to_iso_s + result = described_class.instance.detect(string, account_without_user_locale) expect(result).to eq(:en), string end @@ -56,14 +58,14 @@ describe LanguageDetector do it 'detects spanish language' do string = 'Obtener un Hola y bienvenidos a Mastodon' - result = described_class.new(string).to_iso_s + result = described_class.instance.detect(string, account_without_user_locale) expect(result).to eq :es end describe 'when language can\'t be detected' do it 'uses nil when sent an empty document' do - result = described_class.new('').to_iso_s + result = described_class.instance.detect('', account_without_user_locale) expect(result).to eq nil end @@ -73,7 +75,7 @@ describe LanguageDetector do cld_result = CLD3::NNetLanguageIdentifier.new(0, 2048).find_language(string) expect(cld_result).not_to eq :en - result = described_class.new(string).to_iso_s + result = described_class.instance.detect(string, account_without_user_locale) expect(result).to eq nil end @@ -82,14 +84,13 @@ describe LanguageDetector do describe 'with an account' do it 'uses the account locale when present' do account = double(user_locale: 'fr') - result = described_class.new('', account).to_iso_s + result = described_class.instance.detect('', account) expect(result).to eq :fr end it 'uses nil when account is present but has no locale' do - account = double(user_locale: nil) - result = described_class.new('', account).to_iso_s + result = described_class.instance.detect('', account_without_user_locale) expect(result).to eq nil end @@ -97,8 +98,7 @@ describe LanguageDetector do describe 'with an `en` default locale' do it 'uses nil for undetectable string' do - string = '' - result = described_class.new(string).to_iso_s + result = described_class.instance.detect('', account_without_user_locale) expect(result).to eq nil end @@ -114,7 +114,7 @@ describe LanguageDetector do it 'uses nil for undetectable string' do string = '' - result = described_class.new(string).to_iso_s + result = described_class.instance.detect(string, account_without_user_locale) expect(result).to eq nil end diff --git a/spec/services/post_status_service_spec.rb b/spec/services/post_status_service_spec.rb index 4182c4e1f..91902ff69 100644 --- a/spec/services/post_status_service_spec.rb +++ b/spec/services/post_status_service_spec.rb @@ -65,15 +65,12 @@ RSpec.describe PostStatusService do end it 'creates a status with a language set' do - detector = double(to_iso_s: :en) - allow(LanguageDetector).to receive(:new).and_return(detector) - account = Fabricate(:account) - text = 'test status text' + text = 'This is an English text.' - subject.call(account, text) + status = subject.call(account, text) - expect(LanguageDetector).to have_received(:new).with(text, account) + expect(status.language).to eq 'en' end it 'processes mentions' do -- cgit From ec36df97c4ea3da4bc177a96050c54cf8f35ba25 Mon Sep 17 00:00:00 2001 From: unarist Date: Sun, 17 Sep 2017 04:33:52 +0900 Subject: Escape URL parts on formatting local status (#4975) --- app/lib/formatter.rb | 2 +- spec/lib/formatter_spec.rb | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'spec/lib') diff --git a/app/lib/formatter.rb b/app/lib/formatter.rb index d9f843f44..575830190 100644 --- a/app/lib/formatter.rb +++ b/app/lib/formatter.rb @@ -137,7 +137,7 @@ class Formatter suffix = url[prefix.length + 30..-1] cutoff = url[prefix.length..-1].length > 30 - "#{prefix}#{text}#{suffix}" + "#{encode(prefix)}#{encode(text)}#{encode(suffix)}" end def hashtag_html(tag) diff --git a/spec/lib/formatter_spec.rb b/spec/lib/formatter_spec.rb index f9b7efac5..b714b317a 100644 --- a/spec/lib/formatter_spec.rb +++ b/spec/lib/formatter_spec.rb @@ -121,6 +121,22 @@ RSpec.describe Formatter do end end + context 'contains unsafe URL (XSS attack, visible part)' do + let(:text) { %q{http://example.com/bb} } + + it 'has escaped HTML' do + is_expected.to include '<del>b</del>' + end + end + + context 'contains unsafe URL (XSS attack, invisible part)' do + let(:text) { %q{http://example.com/blahblahblahblah/a} } + + it 'has escaped HTML' do + is_expected.to include '<script>alert("Hello")</script>' + end + end + context 'contains HTML (script tag)' do let(:text) { '' } -- cgit From 81cec35dbf1b348d23363559e3f4e6b1ec3415c5 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Tue, 19 Sep 2017 02:42:40 +0200 Subject: Custom emoji (#4988) * Custom emoji - In OStatus: `` - In ActivityPub: `{ type: "Emoji", name: ":coolcat:", href: "http://..." }` - In REST API: Status object includes `emojis` array (`shortcode`, `url`) - Domain blocks with reject media stop emojis - Emoji file up to 50KB - Web UI handles custom emojis - Static pages render custom emojis as `` tags Side effects: - Undo #4500 optimization, as I needed to modify it to restore shortcode handling in emojify() - Formatter#plaintext should now make sure stripped out line-breaks and paragraphs are replaced with newlines * Fix emoji at the start not being converted --- app/javascript/mastodon/emoji.js | 60 ++++++++++------ app/javascript/mastodon/reducers/statuses.js | 9 ++- app/lib/activitypub/activity/create.rb | 13 ++++ app/lib/formatter.rb | 54 +++++++++++++- app/lib/ostatus/activity/creation.rb | 20 ++++++ app/lib/ostatus/atom_serializer.rb | 4 ++ app/models/custom_emoji.rb | 38 ++++++++++ app/models/status.rb | 4 ++ app/serializers/activitypub/note_serializer.rb | 20 +++++- app/serializers/rest/status_serializer.rb | 11 +++ .../stream_entries/_detailed_status.html.haml | 2 +- app/views/stream_entries/_simple_status.html.haml | 2 +- db/migrate/20170917153509_create_custom_emojis.rb | 13 ++++ db/schema.rb | 14 +++- spec/fabricators/custom_emoji_fabricator.rb | 5 ++ spec/fixtures/files/emojo.png | Bin 0 -> 29814 bytes spec/lib/activitypub/activity/create_spec.rb | 25 +++++++ spec/lib/formatter_spec.rb | 78 +++++++++++++++++++++ spec/lib/ostatus/atom_serializer_spec.rb | 16 ++++- spec/models/custom_emoji_spec.rb | 25 +++++++ 20 files changed, 382 insertions(+), 31 deletions(-) create mode 100644 app/models/custom_emoji.rb create mode 100644 db/migrate/20170917153509_create_custom_emojis.rb create mode 100644 spec/fabricators/custom_emoji_fabricator.rb create mode 100644 spec/fixtures/files/emojo.png create mode 100644 spec/models/custom_emoji_spec.rb (limited to 'spec/lib') diff --git a/app/javascript/mastodon/emoji.js b/app/javascript/mastodon/emoji.js index a41dfdd1d..865b85b61 100644 --- a/app/javascript/mastodon/emoji.js +++ b/app/javascript/mastodon/emoji.js @@ -3,28 +3,48 @@ import Trie from 'substring-trie'; const trie = new Trie(Object.keys(unicodeMapping)); -const emojify = str => { - let rtn = ''; - for (;;) { - let match, i = 0; - while (i < str.length && str[i] !== '<' && !(match = trie.search(str.slice(i)))) { - i += str.codePointAt(i) < 65536 ? 1 : 2; - } - if (i === str.length) - break; - else if (str[i] === '<') { - let tagend = str.indexOf('>', i + 1) + 1; - if (!tagend) - break; - rtn += str.slice(0, tagend); - str = str.slice(tagend); - } else { - const [filename, shortCode] = unicodeMapping[match]; - rtn += str.slice(0, i) + `${match}`; - str = str.slice(i + match.length); +const emojify = (str, customEmojis = {}) => { + // This walks through the string from start to end, ignoring any tags (

,
, etc.) + // and replacing valid unicode strings + // that _aren't_ within tags with an version. + // The goal is to be the same as an emojione.regUnicode replacement, but faster. + let i = -1; + let insideTag = false; + let insideShortname = false; + let shortnameStartIndex = -1; + let match; + while (++i < str.length) { + const char = str.charAt(i); + if (insideShortname && char === ':') { + const shortname = str.substring(shortnameStartIndex, i + 1); + if (shortname in customEmojis) { + const replacement = `${shortname}`; + str = str.substring(0, shortnameStartIndex) + replacement + str.substring(i + 1); + i += (replacement.length - shortname.length - 1); // jump ahead the length we've added to the string + } else { + i--; + } + insideShortname = false; + } else if (insideTag && char === '>') { + insideTag = false; + } else if (char === '<') { + insideTag = true; + insideShortname = false; + } else if (!insideTag && char === ':') { + insideShortname = true; + shortnameStartIndex = i; + } else if (!insideTag && (match = trie.search(str.substring(i)))) { + const unicodeStr = match; + if (unicodeStr in unicodeMapping) { + const [filename, shortCode] = unicodeMapping[unicodeStr]; + const alt = unicodeStr; + const replacement = `${alt}`; + str = str.substring(0, i) + replacement + str.substring(i + unicodeStr.length); + i += (replacement.length - unicodeStr.length); // jump ahead the length we've added to the string + } } } - return rtn + str; + return str; }; export default emojify; diff --git a/app/javascript/mastodon/reducers/statuses.js b/app/javascript/mastodon/reducers/statuses.js index 7f906bef6..38b23504e 100644 --- a/app/javascript/mastodon/reducers/statuses.js +++ b/app/javascript/mastodon/reducers/statuses.js @@ -58,9 +58,14 @@ const normalizeStatus = (state, status) => { } const searchContent = [status.spoiler_text, status.content].join(' ').replace(/
/g, '\n').replace(/<\/p>

/g, '\n\n'); + const emojiMap = normalStatus.emojis.reduce((obj, emoji) => { + obj[`:${emoji.shortcode}:`] = emoji.url; + return obj; + }, {}); + normalStatus.search_index = domParser.parseFromString(searchContent, 'text/html').documentElement.textContent; - normalStatus.contentHtml = emojify(normalStatus.content); - normalStatus.spoilerHtml = emojify(escapeTextContentForBrowser(normalStatus.spoiler_text || '')); + normalStatus.contentHtml = emojify(normalStatus.content, emojiMap); + normalStatus.spoilerHtml = emojify(escapeTextContentForBrowser(normalStatus.spoiler_text || ''), emojiMap); return state.update(status.id, ImmutableMap(), map => map.mergeDeep(fromJS(normalStatus))); }; diff --git a/app/lib/activitypub/activity/create.rb b/app/lib/activitypub/activity/create.rb index 894759d9a..41f2b0bad 100644 --- a/app/lib/activitypub/activity/create.rb +++ b/app/lib/activitypub/activity/create.rb @@ -61,6 +61,8 @@ class ActivityPub::Activity::Create < ActivityPub::Activity process_hashtag tag, status when 'Mention' process_mention tag, status + when 'Emoji' + process_emoji tag, status end end end @@ -79,6 +81,17 @@ class ActivityPub::Activity::Create < ActivityPub::Activity account.mentions.create(status: status) end + def process_emoji(tag, _status) + shortcode = tag['name'].delete(':') + emoji = CustomEmoji.find_by(shortcode: shortcode, domain: @account.domain) + + return if !emoji.nil? || skip_download? + + emoji = CustomEmoji.new(domain: @account.domain, shortcode: shortcode) + emoji.image_remote_url = tag['href'] + emoji.save + end + def process_attachments(status) return unless @object['attachment'].is_a?(Array) diff --git a/app/lib/formatter.rb b/app/lib/formatter.rb index 575830190..29fea27de 100644 --- a/app/lib/formatter.rb +++ b/app/lib/formatter.rb @@ -9,7 +9,7 @@ class Formatter include ActionView::Helpers::TextHelper - def format(status) + def format(status, options = {}) if status.reblog? prepend_reblog = status.reblog.account.acct status = status.proper @@ -19,7 +19,11 @@ class Formatter raw_content = status.text - return reformat(raw_content) unless status.local? + unless status.local? + html = reformat(raw_content) + html = encode_custom_emojis(html, status.emojis) if options[:custom_emojify] + return html + end linkable_accounts = status.mentions.map(&:account) linkable_accounts << status.account @@ -27,6 +31,7 @@ class Formatter html = raw_content html = "RT @#{prepend_reblog} #{html}" if prepend_reblog html = encode_and_link_urls(html, linkable_accounts) + html = encode_custom_emojis(html, status.emojis) if options[:custom_emojify] html = simple_format(html, {}, sanitize: false) html = html.delete("\n") @@ -39,7 +44,9 @@ class Formatter def plaintext(status) return status.text if status.local? - strip_tags(status.text) + + text = status.text.gsub(/(
|
|<\/p>)+/) { |match| "#{match}\n" } + strip_tags(text) end def simplified_format(account) @@ -76,6 +83,47 @@ class Formatter end end + def encode_custom_emojis(html, emojis) + return html if emojis.empty? + + emoji_map = emojis.map { |e| [e.shortcode, full_asset_url(e.image.url)] }.to_h + + i = -1 + inside_tag = false + inside_shortname = false + shortname_start_index = -1 + + while i + 1 < html.size + i += 1 + + if inside_shortname && html[i] == ':' + shortcode = html[shortname_start_index + 1..i - 1] + emoji = emoji_map[shortcode] + + if emoji + replacement = "\":#{shortcode}:\"" + before_html = shortname_start_index.positive? ? html[0..shortname_start_index - 1] : '' + html = before_html + replacement + html[i + 1..-1] + i += replacement.size - (shortcode.size + 2) - 1 + else + i -= 1 + end + + inside_shortname = false + elsif inside_tag && html[i] == '>' + inside_tag = false + elsif html[i] == '<' + inside_tag = true + inside_shortname = false + elsif !inside_tag && html[i] == ':' + inside_shortname = true + shortname_start_index = i + end + end + + html + end + def rewrite(text, entities) chars = text.to_s.to_char_a diff --git a/app/lib/ostatus/activity/creation.rb b/app/lib/ostatus/activity/creation.rb index 1a23c9efa..d3f1629c4 100644 --- a/app/lib/ostatus/activity/creation.rb +++ b/app/lib/ostatus/activity/creation.rb @@ -42,6 +42,7 @@ class OStatus::Activity::Creation < OStatus::Activity::Base save_mentions(status) save_hashtags(status) save_media(status) + save_emojis(status) end if thread? && status.thread.nil? @@ -150,6 +151,25 @@ class OStatus::Activity::Creation < OStatus::Activity::Base end end + def save_emojis(parent) + do_not_download = DomainBlock.find_by(domain: parent.account.domain)&.reject_media? + + return if do_not_download + + @xml.xpath('./xmlns:link[@rel="emoji"]', xmlns: TagManager::XMLNS).each do |link| + next unless link['href'] && link['name'] + + shortcode = link['name'].delete(':') + emoji = CustomEmoji.find_by(shortcode: shortcode, domain: parent.account.domain) + + next unless emoji.nil? + + emoji = CustomEmoji.new(shortcode: shortcode, domain: parent.account.domain) + emoji.image_remote_url = link['href'] + emoji.save + end + end + def account_from_href(href) url = Addressable::URI.parse(href).normalize diff --git a/app/lib/ostatus/atom_serializer.rb b/app/lib/ostatus/atom_serializer.rb index b8e22a381..a6a5cb0c4 100644 --- a/app/lib/ostatus/atom_serializer.rb +++ b/app/lib/ostatus/atom_serializer.rb @@ -368,5 +368,9 @@ class OStatus::AtomSerializer end append_element(entry, 'mastodon:scope', status.visibility) + + status.emojis.each do |emoji| + append_element(entry, 'link', nil, rel: :emoji, href: full_asset_url(emoji.image.url), name: emoji.shortcode) + end end end diff --git a/app/models/custom_emoji.rb b/app/models/custom_emoji.rb new file mode 100644 index 000000000..f4d3b16a0 --- /dev/null +++ b/app/models/custom_emoji.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true +# == Schema Information +# +# Table name: custom_emojis +# +# id :integer not null, primary key +# shortcode :string default(""), not null +# domain :string +# image_file_name :string +# image_content_type :string +# image_file_size :integer +# image_updated_at :datetime +# created_at :datetime not null +# updated_at :datetime not null +# + +class CustomEmoji < ApplicationRecord + SHORTCODE_RE_FRAGMENT = '[a-zA-Z0-9_]{2,}' + + SCAN_RE = /(?<=[^[:alnum:]:]|\n|^) + :(#{SHORTCODE_RE_FRAGMENT}): + (?=[^[:alnum:]:]|$)/x + + has_attached_file :image + + validates_attachment :image, content_type: { content_type: 'image/png' }, presence: true, size: { in: 0..50.kilobytes } + validates :shortcode, uniqueness: { scope: :domain }, format: { with: /\A#{SHORTCODE_RE_FRAGMENT}\z/ }, length: { minimum: 2 } + + include Remotable + + class << self + def from_text(text, domain) + return [] if text.blank? + shortcodes = text.scan(SCAN_RE).map(&:first) + where(shortcode: shortcodes, domain: domain) + end + end +end diff --git a/app/models/status.rb b/app/models/status.rb index 2a2cdcf6e..326d128d6 100644 --- a/app/models/status.rb +++ b/app/models/status.rb @@ -131,6 +131,10 @@ class Status < ApplicationRecord !sensitive? && media_attachments.any? end + def emojis + CustomEmoji.from_text(text, account.domain) + end + after_create :store_uri, if: :local? before_validation :prepare_contents, if: :local? diff --git a/app/serializers/activitypub/note_serializer.rb b/app/serializers/activitypub/note_serializer.rb index 166214eee..e5d8e3f03 100644 --- a/app/serializers/activitypub/note_serializer.rb +++ b/app/serializers/activitypub/note_serializer.rb @@ -57,7 +57,7 @@ class ActivityPub::NoteSerializer < ActiveModel::Serializer end def virtual_tags - object.mentions + object.tags + object.mentions + object.tags + object.emojis end def atom_uri @@ -137,4 +137,22 @@ class ActivityPub::NoteSerializer < ActiveModel::Serializer "##{object.name}" end end + + class CustomEmojiSerializer < ActiveModel::Serializer + include RoutingHelper + + attributes :type, :href, :name + + def type + 'Emoji' + end + + def href + full_asset_url(object.image.url) + end + + def name + ":#{object.shortcode}:" + end + end end diff --git a/app/serializers/rest/status_serializer.rb b/app/serializers/rest/status_serializer.rb index 298a3bb40..d8efa8e60 100644 --- a/app/serializers/rest/status_serializer.rb +++ b/app/serializers/rest/status_serializer.rb @@ -17,6 +17,7 @@ class REST::StatusSerializer < ActiveModel::Serializer has_many :media_attachments, serializer: REST::MediaAttachmentSerializer has_many :mentions has_many :tags + has_many :emojis def current_user? !current_user.nil? @@ -106,4 +107,14 @@ class REST::StatusSerializer < ActiveModel::Serializer tag_url(object) end end + + class CustomEmojiSerializer < ActiveModel::Serializer + include RoutingHelper + + attributes :shortcode, :url + + def url + full_asset_url(object.image.url) + end + end end diff --git a/app/views/stream_entries/_detailed_status.html.haml b/app/views/stream_entries/_detailed_status.html.haml index dd9456260..692d5a6d5 100644 --- a/app/views/stream_entries/_detailed_status.html.haml +++ b/app/views/stream_entries/_detailed_status.html.haml @@ -17,7 +17,7 @@ %p{ style: 'margin-bottom: 0' }< %span.p-summary> #{status.spoiler_text}  %a.status__content__spoiler-link{ href: '#' }= t('statuses.show_more') - .e-content{ lang: status.language, style: "display: #{status.spoiler_text? ? 'none' : 'block'}; direction: #{rtl_status?(status) ? 'rtl' : 'ltr'}" }= Formatter.instance.format(status) + .e-content{ lang: status.language, style: "display: #{status.spoiler_text? ? 'none' : 'block'}; direction: #{rtl_status?(status) ? 'rtl' : 'ltr'}" }= Formatter.instance.format(status, custom_emojify: true) - if !status.media_attachments.empty? - if status.media_attachments.first.video? diff --git a/app/views/stream_entries/_simple_status.html.haml b/app/views/stream_entries/_simple_status.html.haml index 55aa97f32..f9a530d38 100644 --- a/app/views/stream_entries/_simple_status.html.haml +++ b/app/views/stream_entries/_simple_status.html.haml @@ -18,7 +18,7 @@ %p{ style: 'margin-bottom: 0' }< %span.p-summary> #{status.spoiler_text}  %a.status__content__spoiler-link{ href: '#' }= t('statuses.show_more') - .e-content{ lang: status.language, style: "display: #{status.spoiler_text? ? 'none' : 'block'}; direction: #{rtl_status?(status) ? 'rtl' : 'ltr'}" }= Formatter.instance.format(status) + .e-content{ lang: status.language, style: "display: #{status.spoiler_text? ? 'none' : 'block'}; direction: #{rtl_status?(status) ? 'rtl' : 'ltr'}" }= Formatter.instance.format(status, custom_emojify: true) - unless status.media_attachments.empty? - if status.media_attachments.first.video? diff --git a/db/migrate/20170917153509_create_custom_emojis.rb b/db/migrate/20170917153509_create_custom_emojis.rb new file mode 100644 index 000000000..4040c8312 --- /dev/null +++ b/db/migrate/20170917153509_create_custom_emojis.rb @@ -0,0 +1,13 @@ +class CreateCustomEmojis < ActiveRecord::Migration[5.1] + def change + create_table :custom_emojis do |t| + t.string :shortcode, null: false, default: '' + t.string :domain + t.attachment :image + + t.timestamps + end + + add_index :custom_emojis, [:shortcode, :domain], unique: true + end +end diff --git a/db/schema.rb b/db/schema.rb index f2ca2af69..9f42d46dd 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20170913000752) do +ActiveRecord::Schema.define(version: 20170917153509) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -89,6 +89,18 @@ ActiveRecord::Schema.define(version: 20170913000752) do t.index ["uri"], name: "index_conversations_on_uri", unique: true end + create_table "custom_emojis", force: :cascade do |t| + t.string "shortcode", default: "", null: false + t.string "domain" + t.string "image_file_name" + t.string "image_content_type" + t.integer "image_file_size" + t.datetime "image_updated_at" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["shortcode", "domain"], name: "index_custom_emojis_on_shortcode_and_domain", unique: true + end + create_table "domain_blocks", id: :serial, force: :cascade do |t| t.string "domain", default: "", null: false t.datetime "created_at", null: false diff --git a/spec/fabricators/custom_emoji_fabricator.rb b/spec/fabricators/custom_emoji_fabricator.rb new file mode 100644 index 000000000..18a7d23dc --- /dev/null +++ b/spec/fabricators/custom_emoji_fabricator.rb @@ -0,0 +1,5 @@ +Fabricator(:custom_emoji) do + shortcode 'coolcat' + domain nil + image { File.open(Rails.root.join('spec', 'fixtures', 'files', 'emojo.png')) } +end diff --git a/spec/fixtures/files/emojo.png b/spec/fixtures/files/emojo.png new file mode 100644 index 000000000..cb5993499 Binary files /dev/null and b/spec/fixtures/files/emojo.png differ diff --git a/spec/lib/activitypub/activity/create_spec.rb b/spec/lib/activitypub/activity/create_spec.rb index fcb044ebc..1a9520f04 100644 --- a/spec/lib/activitypub/activity/create_spec.rb +++ b/spec/lib/activitypub/activity/create_spec.rb @@ -17,6 +17,7 @@ RSpec.describe ActivityPub::Activity::Create do before do stub_request(:get, 'http://example.com/attachment.png').to_return(request_fixture('avatar.txt')) + stub_request(:get, 'http://example.com/emoji.png').to_return(body: attachment_fixture('emojo.png')) end describe '#perform' do @@ -217,5 +218,29 @@ RSpec.describe ActivityPub::Activity::Create do expect(status.tags.map(&:name)).to include('test') end end + + context 'with emojis' do + let(:object_json) do + { + id: 'bar', + type: 'Note', + content: 'Lorem ipsum :tinking:', + tag: [ + { + type: 'Emoji', + href: 'http://example.com/emoji.png', + name: 'tinking', + }, + ], + } + end + + it 'creates status' do + status = sender.statuses.first + + expect(status).to_not be_nil + expect(status.emojis.map(&:shortcode)).to include('tinking') + end + end end end diff --git a/spec/lib/formatter_spec.rb b/spec/lib/formatter_spec.rb index b714b317a..71b6b78d2 100644 --- a/spec/lib/formatter_spec.rb +++ b/spec/lib/formatter_spec.rb @@ -223,6 +223,45 @@ RSpec.describe Formatter do include_examples 'encode and link URLs' end + + context 'with custom_emojify option' do + let!(:emoji) { Fabricate(:custom_emoji) } + let(:status) { Fabricate(:status, account: local_account, text: text) } + + subject { Formatter.instance.format(status, custom_emojify: true) } + + context 'with emoji at the start' do + let(:text) { ':coolcat: Beep boop' } + + it 'converts shortcode to image tag' do + is_expected.to match(/

:coolcat::coolcat: Beep boop
' } + + it 'converts shortcode to image tag' do + is_expected.to match(/

:coolcat:Beep :coolcat: boop

' } + + it 'converts shortcode to image tag' do + is_expected.to match(/Beep :coolcat::coolcat::coolcat:

' } + + it 'does not touch the shortcodes' do + is_expected.to match(/

:coolcat::coolcat:<\/p>/) + end + end + + context 'with emoji at the end' do + let(:text) { '

Beep boop
:coolcat:

' } + + it 'converts shortcode to image tag' do + is_expected.to match(/
:coolcat:Hello :coolcat:

' } + + it 'returns records used via shortcodes in text' do + is_expected.to include(emojo) + end + end + end +end -- cgit