From 007ab330e6ffb1e07995d4e306473d457043e2eb Mon Sep 17 00:00:00 2001 From: nullkal Date: Sun, 9 Jul 2017 05:44:31 +0900 Subject: Use charlock_holmes instead of nkf at FetchLinkCardService (#4080) * Specs for language detection * Use CharlockHolmes instead of NKF * Correct mistakes * Correct style * Set hint_enc instead of falling back and strip_tags * Improve specs * Add dependencies --- spec/fixtures/requests/koi8-r.txt | 20 ++++++++++++++++++++ spec/fixtures/requests/sjis.txt | 4 ++-- spec/fixtures/requests/sjis_with_wrong_charset.txt | 20 ++++++++++++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 spec/fixtures/requests/koi8-r.txt create mode 100644 spec/fixtures/requests/sjis_with_wrong_charset.txt (limited to 'spec/fixtures/requests') diff --git a/spec/fixtures/requests/koi8-r.txt b/spec/fixtures/requests/koi8-r.txt new file mode 100644 index 000000000..d4242af01 --- /dev/null +++ b/spec/fixtures/requests/koi8-r.txt @@ -0,0 +1,20 @@ +HTTP/1.1 200 OK +Server: nginx/1.11.10 +Date: Tue, 04 Jul 2017 16:43:39 GMT +Content-Type: text/html +Content-Length: 273 +Connection: keep-alive +Last-Modified: Tue, 04 Jul 2017 16:41:34 GMT +Accept-Ranges: bytes + + + + + + íÏÓËÏ×Ñ ÎÁÞÉÎÁÅÔß ÔÏÌØËÏ ×ß XVI ÓÔ. ÐÒÉ×ÌÅËÁÔØ ×ÎÉÍÁÎÅ ÉÎÏÓÔÒÁÎÃÅ×ß. + + +

íÏÓËÏ×Ñ ÎÁÞÉÎÁÅÔß ÔÏÌØËÏ ×ß XVI ÓÔ. ÐÒÉ×ÌÅËÁÔØ ×ÎÉÍÁÎÅ ÉÎÏÓÔÒÁÎÃÅ×ß.
+

+ + diff --git a/spec/fixtures/requests/sjis.txt b/spec/fixtures/requests/sjis.txt index 9041aa25d..faf18d35c 100644 --- a/spec/fixtures/requests/sjis.txt +++ b/spec/fixtures/requests/sjis.txt @@ -11,10 +11,10 @@ Accept-Ranges: bytes - JSIS‚̃y[ƒW + SJIS‚̃y[ƒW -

SJIS‚̃y[ƒW
+

Ž„‚à“¯”N‚Ü‚µ‚Ä‚¢‚í‚ä‚é‹L”Ol‚Á‚Ä‚à‚Ì‚ÌŽž‚Å‚µ‚ ‚è‚Å‚·B‚à‚µŽžŠÔ‚ɈӖ¡ŽÒ‚ͳ‚µ‚­‚Ç‚ñ‚È”­‰ï‚Ü‚¹‚¾‚Ü‚Å‚ª\‚µã‚°‚ª‚¢‚ç‚Á‚µ‚á‚邽‚É‚ÍŽQl‹A‚邽‚¢‚¾‚©‚çA­‚µ‚É‚à‚â‚Á‚ ‚Á‚Ü‚µ‚È‚½B‹à‚©‚ç‚¢‚¤‚È‚¢‚Ì‚Í‚Ç‚¤‚à‹ãŒŽ‚ð‚Å‚«‚邾‚¯‚½‚½‚­‚½B‚¯‚Á‚µ‚ĉª“c‚³‚ñ‚É”½RK­‚µ’¥‚ɉ]‚¨‚Å‚µ‚å‹à—Í‚±‚¤‚µ‚½Œ —Í‚ ‚È‚½‚©Žw}‚ª‚Æ‚¢‚¤‚¨o“ü‚è‚È‚­‚¾‚ë‚È‚ ‚è‚ÄA‚»‚ÌÌ‚ÍŽ„‚©‹à—͉A‚ð“{‚ç‚©‚çA‹vŒ´‚³‚ñ‚Ì‚à‚Ì‚ð‚ª‚½‚Ì‚¢‚‚ª‚µ‚©‚é‚É‚²Šó–]‚ÆŒü‚¢‚΂»‚êman‚É‚²–µ‚‚ÖŽQ‚è‚悤‚É“¯Žž‚É‚²‰‰à‚ª‚µ‚Å‚È‚ç‚Ì‚ÅA‘½•ª‚à‚µ•\— ‚É•Ï‚Á‚½‚Ä‚­‚ê‚Å‚·Ž–‚Ål‚¦‚½‚½B‚µ‚©‚à—Ⴆ‚΂²‚ª‚½‚ª‚Æ‚Ç‚Ü‚ç‚à‚Ì‚àŽÀÛ‚Þ‚â‚Ý‚Æ‚ ‚è‚Å‚·‚ÄA‚±‚ÌŽ©•ª‚Å‚Í\‚µ‚ñ‚Ä‚Æ‚µ‚Ä¢ŠÔ‚É•À‚ׂ̂És‚©‚È‚©‚Á‚ÈB


diff --git a/spec/fixtures/requests/sjis_with_wrong_charset.txt b/spec/fixtures/requests/sjis_with_wrong_charset.txt new file mode 100644 index 000000000..456750c6b --- /dev/null +++ b/spec/fixtures/requests/sjis_with_wrong_charset.txt @@ -0,0 +1,20 @@ +HTTP/1.1 200 OK +Server: nginx/1.11.10 +Date: Tue, 04 Jul 2017 16:43:39 GMT +Content-Type: text/html; charset=utf-8 +Content-Length: 273 +Connection: keep-alive +Last-Modified: Tue, 04 Jul 2017 16:41:34 GMT +Accept-Ranges: bytes + + + + + + SJIS‚̃y[ƒW + + +

Ž„‚à“¯”N‚Ü‚µ‚Ä‚¢‚í‚ä‚é‹L”Ol‚Á‚Ä‚à‚Ì‚ÌŽž‚Å‚µ‚ ‚è‚Å‚·B‚à‚µŽžŠÔ‚ɈӖ¡ŽÒ‚ͳ‚µ‚­‚Ç‚ñ‚È”­‰ï‚Ü‚¹‚¾‚Ü‚Å‚ª\‚µã‚°‚ª‚¢‚ç‚Á‚µ‚á‚邽‚É‚ÍŽQl‹A‚邽‚¢‚¾‚©‚çA­‚µ‚É‚à‚â‚Á‚ ‚Á‚Ü‚µ‚È‚½B‹à‚©‚ç‚¢‚¤‚È‚¢‚Ì‚Í‚Ç‚¤‚à‹ãŒŽ‚ð‚Å‚«‚邾‚¯‚½‚½‚­‚½B‚¯‚Á‚µ‚ĉª“c‚³‚ñ‚É”½RK­‚µ’¥‚ɉ]‚¨‚Å‚µ‚å‹à—Í‚±‚¤‚µ‚½Œ —Í‚ ‚È‚½‚©Žw}‚ª‚Æ‚¢‚¤‚¨o“ü‚è‚È‚­‚¾‚ë‚È‚ ‚è‚ÄA‚»‚ÌÌ‚ÍŽ„‚©‹à—͉A‚ð“{‚ç‚©‚çA‹vŒ´‚³‚ñ‚Ì‚à‚Ì‚ð‚ª‚½‚Ì‚¢‚‚ª‚µ‚©‚é‚É‚²Šó–]‚ÆŒü‚¢‚΂»‚êman‚É‚²–µ‚‚ÖŽQ‚è‚悤‚É“¯Žž‚É‚²‰‰à‚ª‚µ‚Å‚È‚ç‚Ì‚ÅA‘½•ª‚à‚µ•\— ‚É•Ï‚Á‚½‚Ä‚­‚ê‚Å‚·Ž–‚Ål‚¦‚½‚½B‚µ‚©‚à—Ⴆ‚΂²‚ª‚½‚ª‚Æ‚Ç‚Ü‚ç‚à‚Ì‚àŽÀÛ‚Þ‚â‚Ý‚Æ‚ ‚è‚Å‚·‚ÄA‚±‚ÌŽ©•ª‚Å‚Í\‚µ‚ñ‚Ä‚Æ‚µ‚Ä¢ŠÔ‚É•À‚ׂ̂És‚©‚È‚©‚Á‚ÈB
+

+ + -- cgit