MariaDB ColumnStore is ideal for real-time analytics and complex queries on large datasets across industries.
install.packages("odbc")install.packages("RMariaDB")library(RMariaDB)
con <- dbConnect(
drv = RMariaDB::MariaDB(),
username = NULL,
password = NULL,
host = NULL,
port = 3306
)install.packages("h2o")connection_url <- "jdbc:mariadb://172.16.2.178:3306/ingestSQL?&useSSL=false"
username <- "root"
password <- "abc123"
# Whole Table:
table <- "citibike20k"
my_citibike_data <- h2o.import_sql_table(connection_url, table, username, password)
# SELECT Query:
select_query <- "SELECT bikeid FROM citibike20k"
my_citibike_data <- h2o.import_sql_select(connection_url, select_query, username, password)java -cp <path_to_h2o_jar>:<path_to_jdbc_driver_jar> water.H2OApp-- Enable Query Accelerator and save previous settings
SET @old_settings = queryacc.enable_queryacc();
-- Run your queries
SELECT c_zip, SUM(c_payment_cnt) FROM test.customer_indexed GROUP BY c_zip;
-- Disable and restore previous settings
CALL queryacc.disable_queryacc(@old_settings);CALL queryacc.with_queryacc('SELECT c_zip, SUM(c_payment_cnt) FROM test.customer_indexed GROUP BY c_zip');columnstore_query_accel_parallel_factor
controls the number of parallel ranges to be used for Query Accelerator.SET @old_settings = queryacc.enable_queryacc();
SELECT c_zip, sum(c_payment_cnt) FROM test.customer_indexed GROUP BY c_zip ORDER BY c_zip ; -- 0.7s
CALL queryacc.disable_queryacc(@old_settings);sed -i 's/^\s*loose-columnstore_innodb_queries_use_mcs\s*=.*/loose-columnstore_innodb_queries_use_mcs = off/' /etc/my.cnf.d/columnstore.cnf
systemctl restart mariadbSELECT mcs_get_plan('rules');
+-----------------------+
| mcs_get_plan('rules') |
+-----------------------+
| parallel_ces |
+-----------------------+
SELECT mcs_get_plan('optimized');
+-----------------------+
| mcs_get_plan('rules') |
+-----------------------+
...
>>From Tables
derived table - $added_sub_test_customer_indexed_0SELECT column_a, SUM(column_b) FROM innodb_table GROUP BY column_a SELECT column_a FROM tbl WHERE column_a = column_b[mariadb]
loose-columnstore_innodb_queries_use_mcs = onANALYZE TABLE table_name PERSISTENT FOR COLUMNS (column_name) indexes();CREATE DATABASE IF NOT EXISTS test; USE test;
CREATE TABLE IF NOT EXISTS test.customer_indexed ( `c_d_id` int(2) NOT NULL, `c_w_id` int(6) NOT NULL, `c_first` varchar(16) , `c_middle` char(2) , `c_last` varchar(16) , `c_street_1` varchar(20) , `c_street_2` varchar(20) , `c_city` varchar(20) , `c_state` char(2) , `c_zip` int(5) , `c_phone` char(16) , `c_since` datetime DEFAULT NULL, `c_credit` char(2) , `c_credit_lim` decimal(12,2) DEFAULT NULL, `c_discount` decimal(4,4) DEFAULT NULL, `c_balance` decimal(12,2) DEFAULT NULL, `c_ytd_payment` decimal(12,2) DEFAULT NULL, `c_payment_cnt` int(8) DEFAULT NULL, `c_delivery_cnt` int(8) DEFAULT NULL, `c_data` varchar(500)) ENGINE=InnoDB DEFAULT CHARSET=latin1;
INSERT INTO test.customer_indexed SELECT ROUND(RAND() * 42000, 0), ROUND(RAND() * 42000, 0), substring(MD5(RAND()*1000000000),1,16), substring(MD5(RAND()),1,2), substring(MD5(RAND()*1000000000),1,16), substring(MD5(RAND()*1000000000),1,20), substring(MD5(RAND()*1000000000),1,20), substring(MD5(RAND()*1000000000),1,20), substring(MD5(RAND()),1,2), ROUND(RAND() * 42000, 0), substring(MD5(RAND()),1,16), CURRENT_TIMESTAMP - INTERVAL FLOOR(RAND() * 365 * 24 * 60 *60) SECOND, substring(MD5(RAND()),1,2), ROUND(RAND() * 9999999999, 2), ROUND(RAND() * 0, 4), ROUND(RAND() * 9999999999, 2), ROUND(RAND() * 9999999999, 2), ROUND(RAND() * 42000, 0), ROUND(RAND() * 42000, 0), substring(MD5(RAND()*1000000000),1,500) FROM seq_1_to_8000000; -- 3.5 min
ALTER TABLE test.customer_indexed ADD INDEX idx_fast (`c_zip`, `c_payment_cnt`); -- ~1.5 min
-- baseline
SELECT c_zip, sum(c_payment_cnt) FROM test.customer_indexed GROUP BY c_zip ORDER BY c_zip ; --2.6ssed -i 's/^\s*loose-columnstore_innodb_queries_use_mcs\s*=.*/loose-columnstore_innodb_queries_use_mcs = on/' /etc/my.cnf.d/columnstore.cnf
systemctl restart mariadb# In mariadb (MariaDB command-line client)
USE test;
ANALYZE table test.customer_indexed PERSISTENT FOR COLUMNS (c_zip,c_payment_cnt) indexes(); --8s
SELECT table_name, column_name, hist_type FROM mysql.column_stats WHERE table_name="customer_indexed";
SHOW VARIABLES LIKE "%columnstore_innodb_queries_use_mcs%";tail -f /var/log/mariadb/columnstore/debug.logSET columnstore_ces_optimization_parallel_factor=100;EXPLAIN FORMAT=JSON SELECT c_zip, SUM(c_payment_cnt) FROM test.customer_indexed GROUP BY c_zip ORDER BY c_zip ;
...
| {
"query_block": {
"select_id": 1,
"table": {
"message": "Pushed select"
}
}
} |
...