about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEugen <eugen@zeonfederated.com>2017-04-16 20:32:17 +0200
committerGitHub <noreply@github.com>2017-04-16 20:32:17 +0200
commite4af4898de8ab962bf39ced5d31d88e3fd510538 (patch)
tree7853ee0cf1ee9c2bd0cf22802af9adcf0efed547
parent6d70a8026309c2d41e2402ec84875ced8e181105 (diff)
Add language detection (#1772)
* Add language detection via WhatLanguage and (de)serialization of it through Atom

* Fix default language in ProcessFeedService

* Re-add newline before 'react-rails' Gem to fix groupings

Fixes Code Climate issue
-rw-r--r--Gemfile1
-rw-r--r--Gemfile.lock2
-rw-r--r--app/lib/atom_serializer.rb4
-rw-r--r--app/services/post_status_service.rb5
-rw-r--r--app/services/process_feed_service.rb5
-rw-r--r--db/migrate/20170414132105_add_language_to_statuses.rb5
-rw-r--r--db/schema.rb5
7 files changed, 23 insertions, 4 deletions
diff --git a/Gemfile b/Gemfile
index 12e783805..2971a7a29 100644
--- a/Gemfile
+++ b/Gemfile
@@ -56,6 +56,7 @@ gem 'sprockets-rails', :require => 'sprockets/railtie'
 gem 'statsd-instrument'
 gem 'twitter-text'
 gem 'tzinfo-data'
+gem 'whatlanguage'
 
 gem 'react-rails'
 gem 'browserify-rails'
diff --git a/Gemfile.lock b/Gemfile.lock
index 97e5a6658..b0ef1c768 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -455,6 +455,7 @@ GEM
     websocket-driver (0.6.5)
       websocket-extensions (>= 0.1.0)
     websocket-extensions (0.1.2)
+    whatlanguage (1.0.6)
     xpath (2.0.0)
       nokogiri (~> 1.3)
 
@@ -541,6 +542,7 @@ DEPENDENCIES
   tzinfo-data
   uglifier (>= 1.3.0)
   webmock
+  whatlanguage
 
 RUBY VERSION
    ruby 2.4.1p111
diff --git a/app/lib/atom_serializer.rb b/app/lib/atom_serializer.rb
index 180b9bb82..6f1910440 100644
--- a/app/lib/atom_serializer.rb
+++ b/app/lib/atom_serializer.rb
@@ -327,8 +327,8 @@ class AtomSerializer
   end
 
   def serialize_status_attributes(entry, status)
-    append_element(entry, 'summary', status.spoiler_text) if status.spoiler_text?
-    append_element(entry, 'content', Formatter.instance.format(status.proper).to_str, type: 'html')
+    append_element(entry, 'summary', status.spoiler_text, 'xml:lang': status.language) if status.spoiler_text?
+    append_element(entry, 'content', Formatter.instance.format(status.proper).to_str, type: 'html', 'xml:lang': status.language)
 
     status.mentions.each do |mentioned|
       append_element(entry, 'link', nil, rel: :mentioned, 'ostatus:object-type': TagManager::TYPES[:person], href: TagManager.instance.uri_for(mentioned.account))
diff --git a/app/services/post_status_service.rb b/app/services/post_status_service.rb
index 221aa42a3..d8202bea8 100644
--- a/app/services/post_status_service.rb
+++ b/app/services/post_status_service.rb
@@ -19,6 +19,7 @@ class PostStatusService < BaseService
                                       sensitive: options[:sensitive],
                                       spoiler_text: options[:spoiler_text] || '',
                                       visibility: options[:visibility],
+                                      language: detect_language(text),
                                       application: options[:application])
 
     attach_media(status, media)
@@ -51,6 +52,10 @@ class PostStatusService < BaseService
     media.update(status_id: status.id)
   end
 
+  def detect_language(text)
+    WhatLanguage.new(:all).language_iso(text)
+  end
+
   def process_mentions_service
     @process_mentions_service ||= ProcessMentionsService.new
   end
diff --git a/app/services/process_feed_service.rb b/app/services/process_feed_service.rb
index 64a531e74..98d92f630 100644
--- a/app/services/process_feed_service.rb
+++ b/app/services/process_feed_service.rb
@@ -119,6 +119,7 @@ class ProcessFeedService < BaseService
         spoiler_text: content_warning(entry),
         created_at: published(entry),
         reply: thread?(entry),
+        language: content_language(entry),
         visibility: visibility_scope(entry)
       )
 
@@ -238,6 +239,10 @@ class ProcessFeedService < BaseService
       xml.at_xpath('./xmlns:content', xmlns: TagManager::XMLNS).content
     end
 
+    def content_language(xml = @xml)
+      xml.at_xpath('./xmlns:content', xmlns: TagManager::XMLNS)['xml:lang']&.presence || 'en'
+    end
+
     def content_warning(xml = @xml)
       xml.at_xpath('./xmlns:summary', xmlns: TagManager::XMLNS)&.content || ''
     end
diff --git a/db/migrate/20170414132105_add_language_to_statuses.rb b/db/migrate/20170414132105_add_language_to_statuses.rb
new file mode 100644
index 000000000..59d51cb86
--- /dev/null
+++ b/db/migrate/20170414132105_add_language_to_statuses.rb
@@ -0,0 +1,5 @@
+class AddLanguageToStatuses < ActiveRecord::Migration[5.0]
+  def change
+    add_column :statuses, :language, :string, null: false, default: 'en'
+  end
+end
diff --git a/db/schema.rb b/db/schema.rb
index 5f995ebda..62ff4207d 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -10,7 +10,7 @@
 #
 # It's strongly recommended that you check this file into your version control system.
 
-ActiveRecord::Schema.define(version: 20170414080609) do
+ActiveRecord::Schema.define(version: 20170414132105) do
 
   # These are extensions that must be enabled in order to support this database
   enable_extension "plpgsql"
@@ -40,7 +40,6 @@ ActiveRecord::Schema.define(version: 20170414080609) do
     t.datetime "header_updated_at"
     t.string   "avatar_remote_url"
     t.datetime "subscription_expires_at"
-    t.datetime "last_webfingered_at"
     t.boolean  "silenced",                default: false, null: false
     t.boolean  "suspended",               default: false, null: false
     t.boolean  "locked",                  default: false, null: false
@@ -48,6 +47,7 @@ ActiveRecord::Schema.define(version: 20170414080609) do
     t.integer  "statuses_count",          default: 0,     null: false
     t.integer  "followers_count",         default: 0,     null: false
     t.integer  "following_count",         default: 0,     null: false
+    t.datetime "last_webfingered_at"
     t.index "(((setweight(to_tsvector('simple'::regconfig, (display_name)::text), 'A'::\"char\") || setweight(to_tsvector('simple'::regconfig, (username)::text), 'B'::\"char\")) || setweight(to_tsvector('simple'::regconfig, (COALESCE(domain, ''::character varying))::text), 'C'::\"char\")))", name: "search_index", using: :gin
     t.index "lower((username)::text), lower((domain)::text)", name: "index_accounts_on_username_and_domain_lower", using: :btree
     t.index ["url"], name: "index_accounts_on_url", using: :btree
@@ -244,6 +244,7 @@ ActiveRecord::Schema.define(version: 20170414080609) do
     t.boolean  "reply",                  default: false
     t.integer  "favourites_count",       default: 0,     null: false
     t.integer  "reblogs_count",          default: 0,     null: false
+    t.string   "language",               default: "en",  null: false
     t.index ["account_id"], name: "index_statuses_on_account_id", using: :btree
     t.index ["in_reply_to_id"], name: "index_statuses_on_in_reply_to_id", using: :btree
     t.index ["reblog_of_id"], name: "index_statuses_on_reblog_of_id", using: :btree