1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
|
# frozen_string_literal: true
require 'rails_helper'
describe LanguageDetector do
describe 'prepare_text' do
it 'returns unmodified string without special cases' do
string = 'just a regular string'
result = described_class.instance.send(:prepare_text, string)
expect(result).to eq string
end
it 'collapses spacing in strings' do
string = 'The formatting in this is very odd'
result = described_class.instance.send(:prepare_text, string)
expect(result).to eq 'The formatting in this is very odd'
end
it 'strips usernames from strings before detection' do
string = '@username Yeah, very surreal...! also @friend'
result = described_class.instance.send(:prepare_text, string)
expect(result).to eq 'Yeah, very surreal...! also'
end
it 'strips URLs from strings before detection' do
string = 'Our website is https://example.com and also http://localhost.dev'
result = described_class.instance.send(:prepare_text, string)
expect(result).to eq 'Our website is and also'
end
it 'strips #hashtags from strings before detection' do
string = 'Hey look at all the #animals and #fish'
result = described_class.instance.send(:prepare_text, string)
expect(result).to eq 'Hey look at all the and'
end
end
describe 'detect' do
let(:account_without_user_locale) { Fabricate(:user, locale: nil).account }
let(:account_remote) { Fabricate(:account, domain: 'joinmastodon.org') }
it 'detects english language for basic strings' do
strings = [
"Hello and welcome to mastodon how are you today?",
"I'd rather not!",
"a lot of people just want to feel righteous all the time and that's all that matters",
]
strings.each do |string|
result = described_class.instance.detect(string, account_without_user_locale)
expect(result).to eq(:en), string
end
end
it 'detects spanish language' do
string = 'Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon'
result = described_class.instance.detect(string, account_without_user_locale)
expect(result).to eq :es
end
describe 'when language can\'t be detected' do
it 'uses nil when sent an empty document' do
result = described_class.instance.detect('', account_without_user_locale)
expect(result).to eq nil
end
describe 'because of a URL' do
it 'uses nil when sent just a URL' do
string = 'http://example.com/media/2kFTgOJLXhQf0g2nKB4'
cld_result = CLD3::NNetLanguageIdentifier.new(0, 2048).find_language(string)
expect(cld_result).not_to eq :en
result = described_class.instance.detect(string, account_without_user_locale)
expect(result).to eq nil
end
end
describe 'with an account' do
it 'uses the account locale when present' do
account = double(user_locale: 'fr')
result = described_class.instance.detect('', account)
expect(result).to eq nil
end
it 'uses nil when account is present but has no locale' do
result = described_class.instance.detect('', account_without_user_locale)
expect(result).to eq nil
end
end
describe 'with an `en` default locale' do
it 'uses nil for undetectable string' do
result = described_class.instance.detect('', account_without_user_locale)
expect(result).to eq nil
end
end
describe 'remote user' do
it 'detects Korean language' do
string = '안녕하세요'
result = described_class.instance.detect(string, account_remote)
expect(result).to eq :ko
end
end
describe 'with a non-`en` default locale' do
around(:each) do |example|
before = I18n.default_locale
I18n.default_locale = :ja
example.run
I18n.default_locale = before
end
it 'uses nil for undetectable string' do
string = ''
result = described_class.instance.detect(string, account_without_user_locale)
expect(result).to eq nil
end
end
end
end
end
|