Skip to content
Snippets Groups Projects
update_hivemind_db.sql 12.31 KiB
-- This script will upgrade hivemind database to new version
-- Authors: Dariusz Kędzierski
-- Created: 26-04-2020
-- Last edit: 26-05-2020

CREATE TABLE IF NOT EXISTS hive_db_version (
  version VARCHAR(50) PRIMARY KEY,
  notes VARCHAR(1024)
);

DO $$
  BEGIN
    RAISE NOTICE 'Upgrading database to version 1.0';
    IF EXISTS (SELECT version FROM hive_db_version WHERE version = '1.0')
    THEN
      RAISE EXCEPTION 'Database already in version 1.0';
    END IF;
  END
$$ LANGUAGE plpgsql;

-- Upgrade to version 1.0
-- in this version we will move data from raw_json into separate columns
-- also will split hive_posts_cache to parts and then move all data to proper tables
-- also it will add needed indexes and procedures

-- Update version info
INSERT INTO hive_db_version (version, notes) VALUES ('1.0', 'https://gitlab.syncad.com/blocktrades/hivemind/issues/5');

-- add special author value, empty author to accounts table
-- RAISE NOTICE 'add special author value, empty author to accounts table';
INSERT INTO hive_accounts (name, created_at) VALUES ('', '1990-01-01T00:00:00');

-- Table to hold permlink dictionary, permlink is unique
-- RAISE NOTICE 'Table to hold permlink dictionary, permlink is unique';
CREATE TABLE IF NOT EXISTS hive_permlink_data (
    id SERIAL PRIMARY KEY NOT NULL,
    permlink VARCHAR(255) NOT NULL CONSTRAINT hive_permlink_data_permlink UNIQUE
);
-- Populate hive_permlink_data
-- insert special permlink, empty permlink
-- RAISE NOTICE 'insert special permlink, empty permlink';
INSERT INTO hive_permlink_data (permlink) VALUES ('');
-- run on permlink field of hive_posts_cache
-- RAISE NOTICE 'run on permlink field of hive_posts_cache';
INSERT INTO hive_permlink_data (permlink) SELECT permlink FROM hive_posts ON CONFLICT (permlink) DO NOTHING;
-- we should also scan parent_permlink and root_permlink but we will do that on raw_json scan
-- Create indexes
CREATE INDEX IF NOT EXISTS hive_permlink_data_permlink_idx ON hive_permlink_data (permlink ASC);
CREATE INDEX IF NOT EXISTS hive_permlink_data_permlink_c_idx ON hive_permlink_data (permlink COLLATE "C" ASC);

-- Table to hold category data, category is unique
-- RAISE NOTICE 'Table to hold category data, category is unique';
CREATE TABLE IF NOT EXISTS hive_category_data (
    id SERIAL PRIMARY KEY NOT NULL,
    category VARCHAR(255) NOT NULL CONSTRAINT hive_category_data_category UNIQUE
);
-- Populate hive_category_data
-- insert special category, empty category
-- RAISE NOTICE 'insert special category, empty category';
INSERT INTO hive_category_data (category) VALUES ('');
-- run on category field of hive_posts_cache
-- RAISE NOTICE 'run on category field of hive_posts_cache';
INSERT INTO hive_category_data (category) SELECT category FROM hive_posts ON CONFLICT (category) DO NOTHING;
-- Create indexes
CREATE INDEX IF NOT EXISTS hive_category_data_category_idx ON hive_category_data (category ASC);
CREATE INDEX IF NOT EXISTS hive_category_data_category_c_idx ON hive_category_data (category COLLATE "C" ASC);

-- Table to hold post data
-- RAISE NOTICE 'Table to hold post data';
CREATE TABLE IF NOT EXISTS hive_posts_new (
  id INT DEFAULT '-1',
  parent_id INT DEFAULT '-1',
  author_id INT DEFAULT '-1',
  permlink_id INT DEFAULT '-1',
  category_id INT DEFAULT '1',
  community_id INT,
  created_at DATE DEFAULT '1990-01-01T00:00:00',
  depth SMALLINT DEFAULT '-1',
  is_deleted BOOLEAN DEFAULT '0',
  is_pinned BOOLEAN DEFAULT '0',
  is_muted BOOLEAN DEFAULT '0',
  is_valid BOOLEAN DEFAULT '1',
  promoted NUMERIC(10, 3) DEFAULT '0.0',
  
  -- important/index
  children SMALLINT DEFAULT '-1',

  -- basic/extended-stats
  author_rep NUMERIC(6) DEFAULT '0.0',
  flag_weight NUMERIC(6) DEFAULT '0.0',
  total_votes INT DEFAULT '-1',
  up_votes INT DEFAULT '-1',
  
  -- core stats/indexes
  payout NUMERIC(10, 3) DEFAULT '0.0',
  payout_at DATE DEFAULT '1990-01-01T00:00:00',
  updated_at DATE DEFAULT '1990-01-01T00:00:00',
  is_paidout BOOLEAN DEFAULT '0',

  -- ui flags/filters
  is_nsfw BOOLEAN DEFAULT '0',
  is_declined BOOLEAN DEFAULT '0',
  is_full_power BOOLEAN DEFAULT '0',
  is_hidden BOOLEAN DEFAULT '0',
  is_grayed BOOLEAN DEFAULT '0',

  -- important indexes
  rshares BIGINT DEFAULT '-1',
  sc_trend NUMERIC(6) DEFAULT '0.0',
  sc_hot NUMERIC(6) DEFAULT '0.0',

  -- columns from raw_json
  parent_author_id INT DEFAULT '-1',
  parent_permlink_id INT DEFAULT '-1',
  curator_payout_value VARCHAR(16) DEFAULT '',
  root_author_id INT DEFAULT '-1',
  root_permlink_id INT DEFAULT '-1',
  max_accepted_payout VARCHAR(16) DEFAULT '',
  percent_steem_dollars INT DEFAULT '-1',
  allow_replies BOOLEAN DEFAULT '1',
  allow_votes BOOLEAN DEFAULT '1',
  allow_curation_rewards BOOLEAN DEFAULT '1',
  beneficiaries JSON DEFAULT '[]',
  url TEXT DEFAULT '',
  root_title VARCHAR(255) DEFAULT ''
);

-- Table to hold bulk post data
-- RAISE NOTICE 'Table to hold bulk post data';
CREATE TABLE IF NOT EXISTS hive_post_data (
  id INT NOT NULL,
  title VARCHAR(255) NOT NULL,
  preview VARCHAR(1024) NOT NULL,
  img_url VARCHAR(1024) NOT NULL,
  body TEXT,
  votes TEXT,
  json JSON
);
CREATE INDEX IF NOT EXISTS hive_post_data_id_idx ON hive_post_data (id);
-- Copy data from hive_posts table to new table
-- RAISE NOTICE 'Copy data from hive_posts table to new table';
INSERT INTO hive_posts_new (
  id,
  parent_id,
  author_id,
  permlink_id,
  category_id,
  community_id,
  created_at,
  depth,
  is_deleted,
  is_pinned,
  is_muted,
  is_valid,
  promoted
)
SELECT
  hp.id,
  hp.parent_id,
  (SELECT id FROM hive_accounts WHERE name = hp.author) as author_id,
  (SELECT id FROM hive_permlink_data WHERE permlink = hp.permlink) as permlink_id,
  (SELECT id FROM hive_category_data WHERE category = hp.category) as category_id,
  hp.community_id,
  hp.created_at,
  hp.depth,
  hp.is_deleted,
  hp.is_pinned,
  hp.is_muted,
  hp.is_valid,
  hp.promoted
FROM
  hive_posts hp;

-- Copy standard data to new posts table
-- RAISE NOTICE 'Copy standard data to new posts table';
UPDATE hive_posts_new hpn SET (                             
  children, author_rep, flag_weight, total_votes, up_votes, payout,
  payout_at, updated_at, is_paidout, is_nsfw, is_declined, is_full_power,
  is_hidden, is_grayed, rshares, sc_trend, sc_hot)
=
  (SELECT
    children, author_rep, flag_weight, total_votes, up_votes, payout,
    payout_at, updated_at, is_paidout, is_nsfw, is_declined, is_full_power,
    is_hidden, is_grayed, rshares, sc_trend, sc_hot FROM hive_posts_cache hpc WHERE hpn.id = hpc.post_id);

-- Populate table hive_post_data with bulk data from hive_posts_cache
-- RAISE NOTICE 'Populate table hive_post_data with bulk data from hive_posts_cache';
INSERT INTO hive_post_data (id, title, preview, img_url, body, votes, json) SELECT post_id, title, preview, img_url, body, votes, json::json FROM hive_posts_cache;

-- Helper type for use with json_populate_record
-- RAISE NOTICE 'Creating legacy_comment_data table';
CREATE TABLE legacy_comment_data (
  id BIGINT,
  raw_json TEXT,
  parent_author VARCHAR(16),
  parent_permlink VARCHAR(255),
  curator_payout_value VARCHAR(16),
  root_author VARCHAR(16),
  root_permlink VARCHAR(255),
  max_accepted_payout VARCHAR(16),
  percent_steem_dollars INT,
  allow_replies BOOLEAN,
  allow_votes BOOLEAN,
  allow_curation_rewards BOOLEAN,
  beneficiaries JSON,
  url TEXT,
  root_title VARCHAR(255)
);
-- RAISE NOTICE 'Creating legacy_comment_type table';
CREATE TYPE legacy_comment_type AS (
  id BIGINT,
  parent_author VARCHAR(16),
  parent_permlink VARCHAR(255),
  curator_payout_value VARCHAR(16),
  root_author VARCHAR(16),
  root_permlink VARCHAR(255),
  max_accepted_payout VARCHAR(16),
  percent_steem_dollars INT,
  allow_replies BOOLEAN,
  allow_votes BOOLEAN,
  allow_curation_rewards BOOLEAN,
  beneficiaries JSON,
  url TEXT,
  root_title VARCHAR(255)
);

-- RAISE NOTICE 'Copying raw_json data to temporaty table';
INSERT INTO legacy_comment_data (id, raw_json) SELECT post_id, raw_json FROM hive_posts_cache;

update legacy_comment_data lcd set (parent_author, parent_permlink, 
  curator_payout_value, root_author, root_permlink, max_accepted_payout,
  percent_steem_dollars, allow_replies, allow_votes, allow_curation_rewards,
  beneficiaries, url, root_title)
=
(SELECT parent_author, parent_permlink, 
  curator_payout_value, root_author, root_permlink, max_accepted_payout,
  percent_steem_dollars, allow_replies, allow_votes, allow_curation_rewards,
  beneficiaries, url, root_title from json_populate_record(null::legacy_comment_type, lcd.raw_json::json)
);

-- RAISE NOTICE 'Copying parent_permlink data to proper colums';
INSERT INTO hive_permlink_data (permlink) SELECT parent_permlink FROM legacy_comment_data ON CONFLICT (permlink) DO NOTHING;

-- RAISE NOTICE 'Copying root_permlink data to proper colums';
INSERT INTO hive_permlink_data (permlink) SELECT root_permlink FROM legacy_comment_data ON CONFLICT (permlink) DO NOTHING;

-- RAISE NOTICE 'Moving raw json data data to proper colums in hive_posts';
UPDATE hive_posts_new hpn SET
  parent_author_id = (SELECT id FROM hive_accounts WHERE name = lcd.parent_author),
  parent_permlink_id = (SELECT id FROM hive_permlink_data WHERE permlink = lcd.parent_permlink),
  curator_payout_value = lcd.curator_payout_value,
  root_author_id = (SELECT id FROM hive_accounts WHERE name = lcd.root_author),
  root_permlink_id = (SELECT id FROM hive_permlink_data WHERE permlink = lcd.root_permlink),
  max_accepted_payout = lcd.max_accepted_payout,
  percent_steem_dollars = lcd.percent_steem_dollars,
  allow_replies = lcd.allow_replies,
  allow_votes = lcd.allow_votes,
  allow_curation_rewards = lcd.allow_curation_rewards,
  beneficiaries = lcd.beneficiaries,
  url = lcd.url,
  root_title = lcd.root_title
FROM (SELECT id, parent_author, parent_permlink, curator_payout_value, root_author, root_permlink,
  max_accepted_payout, percent_steem_dollars, allow_replies, allow_votes, allow_curation_rewards,
  beneficiaries, url, root_title FROM legacy_comment_data) AS lcd
WHERE lcd.id = hpn.id;

-- Drop and rename tables after data migration
-- RAISE NOTICE 'Droping tables';
DROP TYPE IF EXISTS legacy_comment_type;
DROP TABLE IF EXISTS legacy_comment_data;
DROP TABLE IF EXISTS hive_posts_cache;
-- before deleting hive_posts we need to remove constraints
ALTER TABLE hive_payments DROP CONSTRAINT hive_payments_fk3;
ALTER TABLE hive_reblogs DROP CONSTRAINT hive_reblogs_fk2;
DROP TABLE IF EXISTS hive_posts;
-- now rename table 
-- RAISE NOTICE 'Renaming hive_posts_new to hive_posts';
ALTER TABLE hive_posts_new RENAME TO hive_posts;
-- in order to make id column a primary key we will need a sequence
CREATE SEQUENCE hive_posts_serial OWNED BY hive_posts.id;
-- and init that sequence from largest id + 1
SELECT setval('hive_posts_serial', (SELECT max(id)+1 FROM hive_posts), false);
-- now set that sequence as id sequence for hive_posts
ALTER TABLE hive_posts ALTER COLUMN id set default nextval('hive_posts_serial');
-- finally add primary key
ALTER TABLE hive_posts ADD PRIMARY KEY (id);
-- put constraints back
ALTER TABLE hive_payments ADD CONSTRAINT hive_payments_fk3 FOREIGN KEY (post_id) REFERENCES hive_posts(id);
ALTER TABLE hive_reblogs ADD CONSTRAINT hive_reblogs_fk2 FOREIGN KEY (post_id) REFERENCES hive_posts(id);

ALTER TABLE hive_posts ADD CONSTRAINT hive_posts_fk1 FOREIGN KEY (author_id) REFERENCES hive_accounts(id);
ALTER TABLE hive_posts ADD CONSTRAINT hive_posts_fk3 FOREIGN KEY (parent_id) REFERENCES hive_posts(id);
ALTER TABLE hive_posts ADD CONSTRAINT hive_posts_ux1 UNIQUE (author_id, permlink_id);

-- Make indexes in hive_posts
-- RAISE NOTICE 'Creating indexes';
CREATE INDEX IF NOT EXISTS hive_posts_author_id_idx ON hive_posts (author_id);
CREATE INDEX IF NOT EXISTS hive_posts_permlink_id_idx ON hive_posts (permlink_id);

CREATE INDEX IF NOT EXISTS hive_posts_depth_idx ON hive_posts (depth);
CREATE INDEX IF NOT EXISTS hive_posts_parent_id_idx ON hive_posts (parent_id);
CREATE INDEX IF NOT EXISTS hive_posts_community_id_idx ON hive_posts (community_id);

CREATE INDEX IF NOT EXISTS hive_posts_category_id_idx ON hive_posts (category_id);
CREATE INDEX IF NOT EXISTS hive_posts_payout_at_idx ON hive_posts (payout_at);

CREATE INDEX IF NOT EXISTS hive_posts_payout_idx ON hive_posts (payout);

CREATE INDEX IF NOT EXISTS hive_posts_promoted_idx ON hive_posts (promoted);

CREATE INDEX IF NOT EXISTS hive_posts_sc_trend_idx ON hive_posts (sc_trend);
CREATE INDEX IF NOT EXISTS hive_posts_sc_hot_idx ON hive_posts (sc_hot);

CREATE INDEX IF NOT EXISTS hive_posts_created_at_idx ON hive_posts (created_at);