79 lines
2.6 KiB
Ruby
79 lines
2.6 KiB
Ruby
require "active_support/all"
|
|
require 'nokogiri'
|
|
require 'open-uri'
|
|
|
|
module Helpers
|
|
extend ActiveSupport::NumberHelper
|
|
end
|
|
|
|
module Jekyll
|
|
class GoogleScholarCitationsTag < Liquid::Tag
|
|
Citations = { }
|
|
|
|
def initialize(tag_name, params, tokens)
|
|
super
|
|
splitted = params.split(" ").map(&:strip)
|
|
@scholar_id = splitted[0]
|
|
@article_id = splitted[1]
|
|
end
|
|
|
|
def render(context)
|
|
article_id = context[@article_id.strip]
|
|
scholar_id = context[@scholar_id.strip]
|
|
article_url = "https://scholar.google.com/citations?view_op=view_citation&hl=en&user=#{scholar_id}&citation_for_view=#{scholar_id}:#{article_id}"
|
|
|
|
begin
|
|
# If the citation count has already been fetched, return it
|
|
if GoogleScholarCitationsTag::Citations[article_id]
|
|
return GoogleScholarCitationsTag::Citations[article_id]
|
|
end
|
|
|
|
# Sleep for a random amount of time to avoid being blocked
|
|
sleep(rand(1.5..3.5))
|
|
|
|
# Fetch the article page
|
|
doc = Nokogiri::HTML(URI.open(article_url, "User-Agent" => "Ruby/#{RUBY_VERSION}"))
|
|
|
|
# Attempt to extract the "Cited by n" string from the meta tags
|
|
citation_count = 0
|
|
|
|
# Look for meta tags with "name" attribute set to "description"
|
|
description_meta = doc.css('meta[name="description"]')
|
|
og_description_meta = doc.css('meta[property="og:description"]')
|
|
|
|
if !description_meta.empty?
|
|
cited_by_text = description_meta[0]['content']
|
|
matches = cited_by_text.match(/Cited by (\d+[,\d]*)/)
|
|
|
|
if matches
|
|
citation_count = matches[1].sub(",", "").to_i
|
|
end
|
|
|
|
elsif !og_description_meta.empty?
|
|
cited_by_text = og_description_meta[0]['content']
|
|
matches = cited_by_text.match(/Cited by (\d+[,\d]*)/)
|
|
|
|
if matches
|
|
citation_count = matches[1].sub(",", "").to_i
|
|
end
|
|
end
|
|
|
|
citation_count = Helpers.number_to_human(citation_count, :format => '%n%u', :precision => 2, :units => { :thousand => 'K', :million => 'M', :billion => 'B' })
|
|
|
|
rescue Exception => e
|
|
# Handle any errors that may occur during fetching
|
|
citation_count = "N/A"
|
|
|
|
# Print the error message including the exception class and message
|
|
puts "Error fetching citation count for #{article_id}: #{e.class} - #{e.message}"
|
|
end
|
|
|
|
|
|
GoogleScholarCitationsTag::Citations[article_id] = citation_count
|
|
return "#{citation_count}"
|
|
end
|
|
end
|
|
end
|
|
|
|
Liquid::Template.register_tag('google_scholar_citations', Jekyll::GoogleScholarCitationsTag)
|