Enable specifying explicit list of external posts to display (#2059)
- updates `external-posts.rb` plugin, allowing the user to specify an explicit lists of urls in `_config.yml` that are then displayed in the blog feed as external posts - 99% of the code in this change is written by gpt-4: https://chat.openai.com/share/24432d24-36a7-4d6f-a5c0-d7e5142f68cd
This commit is contained in:
parent
1274581702
commit
d004837e60
|
|
@ -133,7 +133,7 @@ bing_site_verification: # out your bing-site-verification ID (Bing Webmaster)
|
||||||
blog_name: al-folio # blog_name will be displayed in your blog page
|
blog_name: al-folio # blog_name will be displayed in your blog page
|
||||||
blog_description: a simple whitespace theme for academics
|
blog_description: a simple whitespace theme for academics
|
||||||
permalink: /blog/:year/:title/
|
permalink: /blog/:year/:title/
|
||||||
lsi: true # produce an index for related posts
|
lsi: false # produce an index for related posts
|
||||||
|
|
||||||
# Pagination
|
# Pagination
|
||||||
pagination:
|
pagination:
|
||||||
|
|
@ -168,6 +168,10 @@ disqus_shortname: al-folio # put your disqus shortname
|
||||||
external_sources:
|
external_sources:
|
||||||
- name: medium.com
|
- name: medium.com
|
||||||
rss_url: https://medium.com/@al-folio/feed
|
rss_url: https://medium.com/@al-folio/feed
|
||||||
|
- name: Google Blog
|
||||||
|
posts:
|
||||||
|
- url: https://blog.google/technology/ai/google-gemini-update-flash-ai-assistant-io-2024/
|
||||||
|
published_date: 2024-05-14
|
||||||
|
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
# Collections
|
# Collections
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,8 @@
|
||||||
require 'feedjira'
|
require 'feedjira'
|
||||||
require 'httparty'
|
require 'httparty'
|
||||||
require 'jekyll'
|
require 'jekyll'
|
||||||
|
require 'nokogiri'
|
||||||
|
require 'time'
|
||||||
|
|
||||||
module ExternalPosts
|
module ExternalPosts
|
||||||
class ExternalPostsGenerator < Jekyll::Generator
|
class ExternalPostsGenerator < Jekyll::Generator
|
||||||
|
|
@ -10,27 +12,85 @@ module ExternalPosts
|
||||||
def generate(site)
|
def generate(site)
|
||||||
if site.config['external_sources'] != nil
|
if site.config['external_sources'] != nil
|
||||||
site.config['external_sources'].each do |src|
|
site.config['external_sources'].each do |src|
|
||||||
p "Fetching external posts from #{src['name']}:"
|
puts "Fetching external posts from #{src['name']}:"
|
||||||
xml = HTTParty.get(src['rss_url']).body
|
if src['rss_url']
|
||||||
return if xml.nil?
|
fetch_from_rss(site, src)
|
||||||
feed = Feedjira.parse(xml)
|
elsif src['posts']
|
||||||
feed.entries.each do |e|
|
fetch_from_urls(site, src)
|
||||||
p "...fetching #{e.url}"
|
|
||||||
slug = e.title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
|
|
||||||
path = site.in_source_dir("_posts/#{slug}.md")
|
|
||||||
doc = Jekyll::Document.new(
|
|
||||||
path, { :site => site, :collection => site.collections['posts'] }
|
|
||||||
)
|
|
||||||
doc.data['external_source'] = src['name'];
|
|
||||||
doc.data['feed_content'] = e.content;
|
|
||||||
doc.data['title'] = "#{e.title}";
|
|
||||||
doc.data['description'] = e.summary;
|
|
||||||
doc.data['date'] = e.published;
|
|
||||||
doc.data['redirect'] = e.url;
|
|
||||||
site.collections['posts'].docs << doc
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def fetch_from_rss(site, src)
|
||||||
|
xml = HTTParty.get(src['rss_url']).body
|
||||||
|
return if xml.nil?
|
||||||
|
feed = Feedjira.parse(xml)
|
||||||
|
process_entries(site, src, feed.entries)
|
||||||
|
end
|
||||||
|
|
||||||
|
def process_entries(site, src, entries)
|
||||||
|
entries.each do |e|
|
||||||
|
puts "...fetching #{e.url}"
|
||||||
|
create_document(site, src['name'], e.url, {
|
||||||
|
title: e.title,
|
||||||
|
content: e.content,
|
||||||
|
summary: e.summary,
|
||||||
|
published: e.published
|
||||||
|
})
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def create_document(site, source_name, url, content)
|
||||||
|
slug = content[:title].downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
|
||||||
|
path = site.in_source_dir("_posts/#{slug}.md")
|
||||||
|
doc = Jekyll::Document.new(
|
||||||
|
path, { :site => site, :collection => site.collections['posts'] }
|
||||||
|
)
|
||||||
|
doc.data['external_source'] = source_name
|
||||||
|
doc.data['title'] = content[:title]
|
||||||
|
doc.data['feed_content'] = content[:content]
|
||||||
|
doc.data['description'] = content[:summary]
|
||||||
|
doc.data['date'] = content[:published]
|
||||||
|
doc.data['redirect'] = url
|
||||||
|
site.collections['posts'].docs << doc
|
||||||
|
end
|
||||||
|
|
||||||
|
def fetch_from_urls(site, src)
|
||||||
|
src['posts'].each do |post|
|
||||||
|
puts "...fetching #{post['url']}"
|
||||||
|
content = fetch_content_from_url(post['url'])
|
||||||
|
content[:published] = parse_published_date(post['published_date'])
|
||||||
|
create_document(site, src['name'], post['url'], content)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def parse_published_date(published_date)
|
||||||
|
case published_date
|
||||||
|
when String
|
||||||
|
Time.parse(published_date).utc
|
||||||
|
when Date
|
||||||
|
published_date.to_time.utc
|
||||||
|
else
|
||||||
|
raise "Invalid date format for #{published_date}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def fetch_content_from_url(url)
|
||||||
|
html = HTTParty.get(url).body
|
||||||
|
parsed_html = Nokogiri::HTML(html)
|
||||||
|
|
||||||
|
title = parsed_html.at('head title')&.text || ''
|
||||||
|
description = parsed_html.at('head meta[name="description"]')&.attr('content') || ''
|
||||||
|
body_content = parsed_html.at('body')&.inner_html || ''
|
||||||
|
|
||||||
|
{
|
||||||
|
title: title,
|
||||||
|
content: body_content,
|
||||||
|
summary: description
|
||||||
|
# Note: The published date is now added in the fetch_from_urls method.
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue