Importing Drupal comments to Jekyll
The first thing I needed to do was get the
importer to output
the post node id
so I could match
comment to post. While I was mucking about, I changed
the categories to tags for a
better Jekyll directory structure.
Like so:
# frozen_string_literal: true
require "jekyll-import/importers/drupal_common"
module JekyllImport
module Importers
class Drupal7 < Importer
include DrupalCommon
extend DrupalCommon::ClassMethods
def self.build_query(prefix, types, engine)
types = types.join("' OR n.type = '")
types = "n.type = '#{types}'"
tag_group = if engine == "postgresql"
<<POSTGRESQL
(SELECT STRING_AGG(td.name, '|')
FROM #{prefix}taxonomy_term_data td, #{prefix}taxonomy_index ti
WHERE ti.tid = td.tid AND ti.nid = n.nid) AS tags
POSTGRESQL
else
<<SQL
(SELECT GROUP_CONCAT(td.name SEPARATOR '|')
FROM #{prefix}taxonomy_term_data td, #{prefix}taxonomy_index ti
WHERE ti.tid = td.tid AND ti.nid = n.nid) AS 'tags'
SQL
end
query = <<QUERY
SELECT n.nid,
n.title,
fdb.body_value,
fdb.body_summary,
n.created,
n.status,
n.type,
#{tag_group}
FROM #{prefix}node AS n
LEFT JOIN #{prefix}field_data_body AS fdb
ON fdb.entity_id = n.nid AND fdb.entity_type = 'node'
WHERE (#{types})
QUERY
query
end
def self.aliases_query(prefix)
"SELECT source, alias FROM #{prefix}url_alias WHERE source = ?"
end
def self.post_data(sql_post_data)
content = sql_post_data[:body_value].to_s
summary = sql_post_data[:body_summary].to_s
tags = (sql_post_data[:tags] || "").downcase.strip
nid = sql_post_data[:nid]
data = {
"excerpt" => summary,
"tags" => tags.split("|"),
"nid" => nid
}
[data, content]
end
end
end
end
This is the script I'm using to extract both Drupal blog posts
(from all my Drupal blogs) and now comments:
#!/bin/sh
cd /jekyll
DBS=`mysql -h"$MYSQL_PORT_3306_TCP_ADDR" -p"$MYSQL_ENV_MYSQL_ROOT_PASSWORD" -B --disable-column-names -e "SHOW DATABASES;" | grep drupal`
# mysql> describe comment;
# +----------+---------------------+------+-----+---------+----------------+
# | Field | Type | Null | Key | Default | Extra |
# +----------+---------------------+------+-----+---------+----------------+
# | cid | int(11) | NO | PRI | NULL | auto_increment |
# | pid | int(11) | NO | MUL | 0 | |
# | nid | int(11) | NO | MUL | 0 | |
# | uid | int(11) | NO | MUL | 0 | |
# | subject | varchar(64) | NO | | | |
# | hostname | varchar(128) | NO | | | |
# | changed | int(11) | NO | | 0 | |
# | status | tinyint(3) unsigned | NO | | 1 | |
# | thread | varchar(255) | NO | | NULL | |
# | name | varchar(60) | YES | | NULL | |
# | mail | varchar(64) | YES | | NULL | |
# | homepage | varchar(255) | YES | | NULL | |
# | language | varchar(12) | NO | | | |
# | created | int(11) | NO | MUL | 0 | |
# +----------+---------------------+------+-----+---------+----------------+
# mysql> describe field_data_comment_body;
# +---------------------+------------------+------+-----+---------+-------+
# | Field | Type | Null | Key | Default | Extra |
# +---------------------+------------------+------+-----+---------+-------+
# | entity_type | varchar(128) | NO | PRI | | |
# | bundle | varchar(128) | NO | MUL | | |
# | deleted | tinyint(4) | NO | PRI | 0 | |
# | entity_id | int(10) unsigned | NO | PRI | NULL | |
# | revision_id | int(10) unsigned | YES | MUL | NULL | |
# | language | varchar(32) | NO | PRI | | |
# | delta | int(10) unsigned | NO | PRI | NULL | |
# | comment_body_value | longtext | YES | | NULL | |
# | comment_body_format | varchar(255) | YES | MUL | NULL | |
# +---------------------+------------------+------+-----+---------+-------+
for DB in $DBS
do
mkdir -p $DB
cd $DB
ruby -r rubygems -e "require \"jekyll-import\";
JekyllImport::Importers::Drupal7.run({
\"dbname\" => \"$DB\",
\"user\" => \"root\",
\"password\" => \"$MYSQL_ENV_MYSQL_ROOT_PASSWORD\",
\"host\" => \"$MYSQL_PORT_3306_TCP_ADDR\",
\"types\" => [\"blog\", \"story\", \"article\",\"page\",\"book\",\"poll\"]
})"
mysql --default-character-set=utf8 -h"$MYSQL_PORT_3306_TCP_ADDR" -p"$MYSQL_ENV_MYSQL_ROOT_PASSWORD" -B --disable-column-names -e "select c.nid, c.cid, c.subject, c.hostname, c.name, c.mail, c.homepage, c.created, fdcb.comment_body_value from comment AS c LEFT JOIN field_data_comment_body AS fdcb ON c.cid = fdcb.entity_id WHERE c.status = 1;" $DB > comments.dump
cd ..
done
This gives me a tab delimited file with all my comment data. At this point, I haven't decided if I'm going to turn it into a Jekyll approved data file, individual files for a collection, or merge them into the relevant posts. I do know that I don't want to add any kind of JavaScript, PHP or database driven commenting system.