From f1f6ddd5362f40e287857750f5e102206bd0e169 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Mon, 7 Feb 2022 18:16:31 +0100 Subject: Fix structured data parsing from links choking on bad data (#17403) * Fix structured data parsing from links choking on bad data - Fix og:url meta tag being prioritized over canonical link tag - Fix structured data parsing choking on commented-out CDATA declarations - Fix HTML entities in title, description, provider_name, author_name - Change structured data parsing to attempt every JSON-LD script tag * Remove unnecessary slash escapes from CDATA regex pattern --- app/lib/link_details_extractor.rb | 53 ++++++++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 9 deletions(-) (limited to 'app/lib') diff --git a/app/lib/link_details_extractor.rb b/app/lib/link_details_extractor.rb index 56ad0717b..d2bcf0c25 100644 --- a/app/lib/link_details_extractor.rb +++ b/app/lib/link_details_extractor.rb @@ -3,6 +3,19 @@ class LinkDetailsExtractor include ActionView::Helpers::TagHelper + # Some publications wrap their JSON-LD data in their