From b18caff5b6309fe9fad42dac6b7342b246223688 Mon Sep 17 00:00:00 2001
From: Jonathan de Jong <jonathandejong02@gmail.com>
Date: Wed, 22 Jan 2025 15:02:05 +0100
Subject: [PATCH] Change `first_of_value` to `first_of_hash`, and add more test
 cases (#33647)

---
 app/lib/link_details_extractor.rb       | 10 +++----
 spec/lib/link_details_extractor_spec.rb | 38 +++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/app/lib/link_details_extractor.rb b/app/lib/link_details_extractor.rb
index fe7f23f481..a8004f2925 100644
--- a/app/lib/link_details_extractor.rb
+++ b/app/lib/link_details_extractor.rb
@@ -46,7 +46,7 @@ class LinkDetailsExtractor
     end
 
     def image
-      obj = first_of_value(json['image'])
+      obj = first_of_hash(json['image'])
 
       return obj['url'] if obj.is_a?(Hash)
 
@@ -85,15 +85,15 @@ class LinkDetailsExtractor
     private
 
     def author
-      first_of_value(json['author']) || {}
+      first_of_hash(json['author']) || {}
     end
 
     def publisher
-      first_of_value(json['publisher']) || {}
+      first_of_hash(json['publisher']) || {}
     end
 
-    def first_of_value(arr)
-      arr.is_a?(Array) ? arr.first : arr
+    def first_of_hash(arr)
+      arr.is_a?(Array) ? arr.flatten.find { |item| item.is_a?(Hash) } : arr
     end
 
     def root_array(root)
diff --git a/spec/lib/link_details_extractor_spec.rb b/spec/lib/link_details_extractor_spec.rb
index 36d6f22b00..cb072c4870 100644
--- a/spec/lib/link_details_extractor_spec.rb
+++ b/spec/lib/link_details_extractor_spec.rb
@@ -249,6 +249,44 @@ RSpec.describe LinkDetailsExtractor do
         expect(subject.author_name).to eq 'Author 1, Author 2'
       end
     end
+
+    context 'with embedded arrays' do
+      let(:ld_json) do
+        {
+          '@context' => 'https://schema.org',
+          '@type' => 'NewsArticle',
+          'headline' => 'A lot of authors',
+          'description' => 'But we decided to cram them into one',
+          'author' => [[{
+            '@type' => 'Person',
+            'name' => ['Author 1'],
+          }]],
+          'publisher' => [[{
+            '@type' => 'NewsMediaOrganization',
+            'name' => 'Pet News',
+            'url' => 'https://example.com',
+          }]],
+        }.to_json
+      end
+      let(:html) { <<~HTML }
+        <!doctype html>
+        <html>
+        <body>
+          <script type="application/ld+json">
+            #{ld_json}
+          </script>
+        </body>
+        </html>
+      HTML
+
+      it 'gives correct author_name' do
+        expect(subject.author_name).to eq 'Author 1'
+      end
+
+      it 'gives provider_name' do
+        expect(subject.provider_name).to eq 'Pet News'
+      end
+    end
   end
 
   context 'when Open Graph protocol data is present' do