Importing Drupal comments to Jekyll

The first thing I needed to do was get the importer to output the post node id so I could match comment to post. While I was mucking about, I changed the categories to tags for a better Jekyll directory structure.

Like so:

# frozen_string_literal: true

require "jekyll-import/importers/drupal_common"

module JekyllImport
  module Importers
    class Drupal7 < Importer
      include DrupalCommon
      extend DrupalCommon::ClassMethods

      def self.build_query(prefix, types, engine)
        types = types.join("' OR n.type = '")
        types = "n.type = '#{types}'"

        tag_group = if engine == "postgresql"
                      <<POSTGRESQL
            (SELECT STRING_AGG(td.name, '|')
            FROM #{prefix}taxonomy_term_data td, #{prefix}taxonomy_index ti
            WHERE ti.tid = td.tid AND ti.nid = n.nid) AS tags
POSTGRESQL
                    else
                      <<SQL
            (SELECT GROUP_CONCAT(td.name SEPARATOR '|')
            FROM #{prefix}taxonomy_term_data td, #{prefix}taxonomy_index ti
            WHERE ti.tid = td.tid AND ti.nid = n.nid) AS 'tags'
SQL
                    end

        query = <<QUERY
                SELECT n.nid,
                       n.title,
                       fdb.body_value,
                       fdb.body_summary,
                       n.created,
                       n.status,
                       n.type,
                       #{tag_group}
                FROM #{prefix}node AS n
                LEFT JOIN #{prefix}field_data_body AS fdb
                  ON fdb.entity_id = n.nid AND fdb.entity_type = 'node'
                WHERE (#{types})
QUERY

        query
      end

      def self.aliases_query(prefix)
        "SELECT source, alias FROM #{prefix}url_alias WHERE source = ?"
      end

      def self.post_data(sql_post_data)
        content = sql_post_data[:body_value].to_s
        summary = sql_post_data[:body_summary].to_s
        tags = (sql_post_data[:tags] || "").downcase.strip
        nid = sql_post_data[:nid]

        data = {
          "excerpt"    => summary,
          "tags" => tags.split("|"),
          "nid" => nid
        }

        [data, content]
      end
    end
  end
end

This is the script I'm using to extract both Drupal blog posts (from all my Drupal blogs) and now comments:

#!/bin/sh

cd /jekyll

DBS=`mysql -h"$MYSQL_PORT_3306_TCP_ADDR" -p"$MYSQL_ENV_MYSQL_ROOT_PASSWORD" -B --disable-column-names -e "SHOW DATABASES;" | grep drupal`

# mysql> describe comment;
# +----------+---------------------+------+-----+---------+----------------+
# | Field    | Type                | Null | Key | Default | Extra          |
# +----------+---------------------+------+-----+---------+----------------+
# | cid      | int(11)             | NO   | PRI | NULL    | auto_increment |
# | pid      | int(11)             | NO   | MUL | 0       |                |
# | nid      | int(11)             | NO   | MUL | 0       |                |
# | uid      | int(11)             | NO   | MUL | 0       |                |
# | subject  | varchar(64)         | NO   |     |         |                |
# | hostname | varchar(128)        | NO   |     |         |                |
# | changed  | int(11)             | NO   |     | 0       |                |
# | status   | tinyint(3) unsigned | NO   |     | 1       |                |
# | thread   | varchar(255)        | NO   |     | NULL    |                |
# | name     | varchar(60)         | YES  |     | NULL    |                |
# | mail     | varchar(64)         | YES  |     | NULL    |                |
# | homepage | varchar(255)        | YES  |     | NULL    |                |
# | language | varchar(12)         | NO   |     |         |                |
# | created  | int(11)             | NO   | MUL | 0       |                |
# +----------+---------------------+------+-----+---------+----------------+

# mysql> describe field_data_comment_body;
# +---------------------+------------------+------+-----+---------+-------+
# | Field               | Type             | Null | Key | Default | Extra |
# +---------------------+------------------+------+-----+---------+-------+
# | entity_type         | varchar(128)     | NO   | PRI |         |       |
# | bundle              | varchar(128)     | NO   | MUL |         |       |
# | deleted             | tinyint(4)       | NO   | PRI | 0       |       |
# | entity_id           | int(10) unsigned | NO   | PRI | NULL    |       |
# | revision_id         | int(10) unsigned | YES  | MUL | NULL    |       |
# | language            | varchar(32)      | NO   | PRI |         |       |
# | delta               | int(10) unsigned | NO   | PRI | NULL    |       |
# | comment_body_value  | longtext         | YES  |     | NULL    |       |
# | comment_body_format | varchar(255)     | YES  | MUL | NULL    |       |
# +---------------------+------------------+------+-----+---------+-------+

for DB in $DBS
do
    mkdir -p $DB
    cd $DB
    
    ruby -r rubygems -e "require \"jekyll-import\";
    JekyllImport::Importers::Drupal7.run({
      \"dbname\"   => \"$DB\",
      \"user\"     => \"root\",
      \"password\" => \"$MYSQL_ENV_MYSQL_ROOT_PASSWORD\",
      \"host\"     => \"$MYSQL_PORT_3306_TCP_ADDR\",
      \"types\"    => [\"blog\", \"story\", \"article\",\"page\",\"book\",\"poll\"]
    })"
    
    mysql --default-character-set=utf8 -h"$MYSQL_PORT_3306_TCP_ADDR" -p"$MYSQL_ENV_MYSQL_ROOT_PASSWORD" -B --disable-column-names -e "select c.nid, c.cid, c.subject, c.hostname, c.name, c.mail, c.homepage, c.created, fdcb.comment_body_value from comment AS c LEFT JOIN field_data_comment_body AS fdcb ON c.cid = fdcb.entity_id WHERE c.status = 1;" $DB > comments.dump
    
    cd ..
    
done

This gives me a tab delimited file with all my comment data. At this point, I haven't decided if I'm going to turn it into a Jekyll approved data file, individual files for a collection, or merge them into the relevant posts. I do know that I don't want to add any kind of JavaScript, PHP or database driven commenting system.