about summary refs log tree commit diff
path: root/spec/lib/language_detector_spec.rb
blob: b7ba0f6c4f4ac9e924176b40991d8f73e7739668 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# frozen_string_literal: true

require 'rails_helper'

describe LanguageDetector do
  describe 'prepare_text' do
    it 'returns unmodified string without special cases' do
      string = 'just a regular string'
      result = described_class.instance.send(:prepare_text, string)

      expect(result).to eq string
    end

    it 'collapses spacing in strings' do
      string = 'The formatting   in    this is very        odd'

      result = described_class.instance.send(:prepare_text, string)
      expect(result).to eq 'The formatting in this is very odd'
    end

    it 'strips usernames from strings before detection' do
      string = '@username Yeah, very surreal...! also @friend'

      result = described_class.instance.send(:prepare_text, string)
      expect(result).to eq 'Yeah, very surreal...! also'
    end

    it 'strips URLs from strings before detection' do
      string = 'Our website is https://example.com and also http://localhost.dev'

      result = described_class.instance.send(:prepare_text, string)
      expect(result).to eq 'Our website is and also'
    end

    it 'converts #hashtags back to normal text before detection' do
      string = 'Hey look at all the #animals and #FishAndChips'

      result = described_class.instance.send(:prepare_text, string)
      expect(result).to eq 'Hey look at all the animals and fish and chips'
    end
  end

  describe 'detect' do
    let(:account_without_user_locale) { Fabricate(:user, locale: nil).account }
    let(:account_remote) { Fabricate(:account, domain: 'joinmastodon.org') }

    it 'detects english language for basic strings' do
      strings = [
        "Hello and welcome to mastodon how are you today?",
        "I'd rather not!",
        "a lot of people just want to feel righteous all the time and that's all that matters",
      ]
      strings.each do |string|
        result = described_class.instance.detect(string, account_without_user_locale)

        expect(result).to eq(:en), string
      end
    end

    it 'detects spanish language' do
      string = 'Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon'
      result = described_class.instance.detect(string, account_without_user_locale)

      expect(result).to eq :es
    end

    describe 'when language can\'t be detected' do
      it 'uses nil when sent an empty document' do
        result = described_class.instance.detect('', account_without_user_locale)
        expect(result).to eq nil
      end

      describe 'because of a URL' do
        it 'uses nil when sent just a URL' do
          string = 'http://example.com/media/2kFTgOJLXhQf0g2nKB4'
          cld_result = CLD3::NNetLanguageIdentifier.new(0, 2048).find_language(string)
          expect(cld_result).not_to eq :en

          result = described_class.instance.detect(string, account_without_user_locale)

          expect(result).to eq nil
        end
      end

      describe 'with an account' do
        it 'uses the account locale when present' do
          account = double(user_locale: 'fr')
          result  = described_class.instance.detect('', account)

          expect(result).to eq nil
        end

        it 'uses nil when account is present but has no locale' do
          result = described_class.instance.detect('', account_without_user_locale)

          expect(result).to eq nil
        end
      end

      describe 'with an `en` default locale' do
        it 'uses nil for undetectable string' do
          result = described_class.instance.detect('', account_without_user_locale)

          expect(result).to eq nil
        end
      end

      describe 'remote user' do
        it 'detects Korean language' do
          string = '안녕하세요'
          result = described_class.instance.detect(string, account_remote)

          expect(result).to eq :ko
        end
      end

      describe 'with a non-`en` default locale' do
        around(:each) do |example|
          before = I18n.default_locale
          I18n.default_locale = :ja
          example.run
          I18n.default_locale = before
        end

        it 'uses nil for undetectable string' do
          string = ''
          result = described_class.instance.detect(string, account_without_user_locale)

          expect(result).to eq nil
        end
      end
    end
  end
end