Commit 2c6005e7 authored by unknown's avatar unknown

W#2776 Utility for calculating storage requirements for NDB

Initial ndb_size.pl

Connects to an existing MySQL database and produces a XHTML document containing
an estimation of storage requirements.


ndb/tools/ndb_size.pl:
  Initial checkin of ndb_size.pl
ndb/tools/ndb_size.tmpl:
  Initial checkin of XHTML template file for ndb_size.pl
parent 980b53b5
#!/usr/bin/perl -w
use strict;
use DBI;
use POSIX;
use HTML::Template;
# MySQL Cluster size estimator
# ----------------------------
#
# (C)2005 MySQL AB
#
#
# The purpose of this tool is to work out storage requirements
# from an existing MySQL database.
#
# This involves connecting to a mysql server and throwing a bunch
# of queries at it.
#
# We currently estimate sizes for: 4.1, 5.0 and 5.1 to various amounts
# of accurracy.
#
# There is no warranty.
#
# BUGS
# ----
# - enum/set is 0 byte storage! Woah - efficient!
# - some float stores come out weird (when there's a comma e.g. 'float(4,1)')
# - no disk data values
# - computes the storage requirements of views (and probably MERGE)
# - ignores character sets.
my $template = HTML::Template->new(filename => 'ndb_size.tmpl',
die_on_bad_params => 0);
my $dbh;
{
my $database= $ARGV[0];
my $hostname= $ARGV[1];
my $port= $ARGV[2];
my $user= $ARGV[3];
my $password= $ARGV[4];
my $dsn = "DBI:mysql:database=$database;host=$hostname;port=$port";
$dbh= DBI->connect($dsn, $user, $password);
$template->param(db => $database);
$template->param(dsn => $dsn);
}
my @releases = ({rel=>'4.1'},{rel=>'5.0'},{rel=>'5.1'});
$template->param(releases => \@releases);
my $tables = $dbh->selectall_arrayref("show tables");
my @table_size;
sub align {
my($to,@unaligned) = @_;
my @aligned;
foreach my $x (@unaligned) {
push @aligned, $to * POSIX::floor(($x+$to-1)/$to);
}
return @aligned;
}
foreach(@{$tables})
{
my $table= @{$_}[0];
my @columns;
my $info= $dbh->selectall_hashref("describe $table","Field");
my @count = $dbh->selectrow_array("select count(*) from $table");
my %columnsize; # used for index calculations
# We now work out the DataMemory usage
# sizes for 4.1, 5.0, 5.1
my @totalsize= (0,0,0);
foreach(keys %$info)
{
my @realsize = (0,0,0);
my $type;
my $size;
my $name= $_;
if($$info{$_}{Type} =~ /^(.*?)\((\d+)\)/)
{
$type= $1;
$size= $2;
}
else
{
$type= $$info{$_}{Type};
}
if($type =~ /tinyint/)
{@realsize=(1,1,1)}
elsif($type =~ /smallint/)
{@realsize=(2,2,2)}
elsif($type =~ /mediumint/)
{@realsize=(3,3,3)}
elsif($type =~ /bigint/)
{@realsize=(8,8,8)}
elsif($type =~ /int/)
{@realsize=(4,4,4)}
elsif($type =~ /float/)
{
if($size<=24)
{@realsize=(4,4,4)}
else
{@realsize=(8,8,8)}
}
elsif($type =~ /double/ || $type =~ /real/)
{@realsize=(8,8,8)}
elsif($type =~ /bit/)
{
my $a=($size+7)/8;
@realsize = ($a,$a,$a);
}
elsif($type =~ /datetime/)
{@realsize=(8,8,8)}
elsif($type =~ /timestamp/)
{@realsize=(4,4,4)}
elsif($type =~ /date/ || $type =~ /time/)
{@realsize=(3,3,3)}
elsif($type =~ /year/)
{@realsize=(1,1,1)}
elsif($type =~ /varchar/ || $type =~ /varbinary/)
{
my $fixed= 1+$size;
my @dynamic=$dbh->selectrow_array("select avg(length($name)) from $table");
$dynamic[0]=0 if !$dynamic[0];
@realsize= ($fixed,$fixed,ceil($dynamic[0]));
}
elsif($type =~ /binary/ || $type =~ /char/)
{@realsize=($size,$size,$size)}
elsif($type =~ /text/ || $type =~ /blob/)
{@realsize=(256,256,1)} # FIXME check if 5.1 is correct
@realsize= align(4,@realsize);
$totalsize[$_]+=$realsize[$_] foreach 0..$#totalsize;
my @realout;
push @realout,{val=>$_} foreach @realsize;
push @columns, {
name=>$name,
type=>$type,
size=>$size,
key=>$$info{$_}{Key},
datamemory=>\@realout,
};
$columnsize{$name}= \@realsize; # used for index calculations
}
# And now... the IndexMemory usage.
#
# Firstly, we assemble some information about the indexes.
# We use SHOW INDEX instead of using INFORMATION_SCHEMA so
# we can still connect to pre-5.0 mysqlds.
my %indexes;
{
my $sth= $dbh->prepare("show index from $table");
$sth->execute;
while(my $i = $sth->fetchrow_hashref)
{
$indexes{${%$i}{Key_name}}= {
type=>${%$i}{Index_type},
unique=>!${%$i}{Non_unique},
comment=>${%$i}{Comment},
} if !defined($indexes{${%$i}{Key_name}});
$indexes{${%$i}{Key_name}}{columns}[${%$i}{Seq_in_index}-1]=
${%$i}{Column_name};
}
}
if(!defined($indexes{PRIMARY})) {
$indexes{PRIMARY}= {
type=>'BTREE',
unique=>1,
comment=>'Hidden pkey created by NDB',
columns=>['HIDDEN_NDB_PKEY'],
};
push @columns, {
name=>'HIDDEN_NDB_PKEY',
type=>'bigint',
size=>8,
key=>'PRI',
datamemory=>[{val=>8},{val=>8},{val=>8}],
};
$columnsize{'HIDDEN_NDB_PKEY'}= [8,8,8];
}
my @IndexDataMemory= ({val=>0},{val=>0},{val=>0});
my @RowIndexMemory= ({val=>0},{val=>0},{val=>0});
my @indexes;
foreach my $index (keys %indexes) {
my $im41= 25;
$im41+=$columnsize{$_}[0] foreach @{$indexes{$index}{columns}};
my @im = ({val=>$im41},{val=>25},{val=>25});
my @dm = ({val=>10},{val=>10},{val=>10});
push @indexes, {
name=>$index,
type=>$indexes{$index}{type},
columns=>join(',',@{$indexes{$index}{columns}}),
indexmemory=>\@im,
datamemory=>\@dm,
};
$IndexDataMemory[$_]{val}+=$dm[$_]{val} foreach 0..2;
$RowIndexMemory[$_]{val}+=$im[$_]{val} foreach 0..2;
}
# total size + 16 bytes overhead
my @TotalDataMemory;
$TotalDataMemory[$_]{val}=$IndexDataMemory[$_]{val}+$totalsize[$_]+16 foreach 0..2;
my @RowDataMemory;
push @RowDataMemory,{val=>$_} foreach @totalsize;
my @RowPerPage;
push @RowPerPage,{val=>(floor((32768-128)/$TotalDataMemory[$_]{val}))} foreach 0..$#TotalDataMemory;
my @RowPerIndexPage;
push @RowPerIndexPage,{val=>(floor(8192/$RowIndexMemory[$_]{val}))} foreach 0..$#TotalDataMemory;
my @DataMemory;
push @DataMemory,{val=>ceil(($count[0]/($RowPerPage[$_]{val})))*32} foreach 0..$#RowPerPage;
my @IndexMemory;
push @IndexMemory,{val=>ceil(($count[0]/($RowPerIndexPage[$_]{val})))*8} foreach 0..$#RowPerPage;
my $count= $count[0];
my @counts;
$counts[$_]{val}= $count foreach 0..$#releases;
push @table_size, {
table=>$table,
indexes=>\@indexes,
columns=>\@columns,
count=>\@counts,
RowDataMemory=>\@RowDataMemory,
releases=>\@releases,
IndexDataMemory=>\@IndexDataMemory,
TotalDataMemory=>\@TotalDataMemory,
RowPerPage=>\@RowPerPage,
DataMemory=>\@DataMemory,
RowIndexMemory=>\@RowIndexMemory,
RowPerIndexPage=>\@RowPerIndexPage,
IndexMemory=>\@IndexMemory,
};
}
$template->param(tables => \@table_size);
print $template->output;
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8"/>
<meta name="keywords" content="MySQL Cluster" />
<title>MySQL Cluster size estimate for <TMPL_VAR NAME="db" ESCAPE="HTML"></title>
<style type="text/css">
table { border-collapse: collapse }
td,th { border: 1px solid black }
</style>
</head>
<body>
<h1>MySQL Cluster analysis for <TMPL_VAR NAME="db" escape="html"></h1>
<p>This is an automated analysis of the <TMPL_VAR NAME="DSN" escape="html"> database for migration into <a href="http://www.mysql.com/">MySQL</a> Cluster. No warranty is made to the accuracy of the information.</p>
<p>This information should be valid for MySQL 4.1</p>
<ul>
<TMPL_LOOP NAME="tables">
<li><TMPL_VAR NAME="table"></li>
</TMPL_LOOP>
</ul>
<hr/>
<TMPL_LOOP NAME="tables">
<h2><TMPL_VAR NAME="table"></h2>
<table>
<tr>
<th>Column</th>
<th>Type</th>
<th>Size</th>
<th>Key</th>
<TMPL_LOOP NAME=releases>
<th><TMPL_VAR NAME=rel> NDB Size</th>
</TMPL_LOOP>
</tr>
<TMPL_LOOP NAME="columns">
<tr>
<td><TMPL_VAR NAME=name></td>
<td><TMPL_VAR NAME=type></td>
<td><TMPL_VAR NAME=size></td>
<td><TMPL_VAR NAME=key></td>
<TMPL_LOOP NAME=datamemory>
<td><TMPL_VAR NAME=val></td>
</TMPL_LOOP>
</tr>
</TMPL_LOOP>
</table>
<p>&nbsp;</p>
<h3>Indexes</h3>
<p>We assume that indexes are ORDERED (not created USING HASH). If order is not required, 10 bytes of data memory can be saved per row if the index is created USING HASH</p>
<table>
<tr>
<th>Index</th>
<th>Type</th>
<th>Columns</th>
<TMPL_LOOP NAME=releases>
<th><TMPL_VAR NAME=rel> IdxMem</th>
</TMPL_LOOP>
<TMPL_LOOP NAME=releases>
<th><TMPL_VAR NAME=rel> DatMem</th>
</TMPL_LOOP>
</tr>
<TMPL_LOOP NAME="indexes">
<tr>
<td><TMPL_VAR NAME=name></td>
<td><TMPL_VAR NAME=type></td>
<td><TMPL_VAR NAME=columns></td>
<TMPL_LOOP NAME=indexmemory>
<td><TMPL_VAR NAME=val></td>
</TMPL_LOOP>
<TMPL_LOOP NAME=datamemory>
<td><TMPL_VAR NAME=val></td>
</TMPL_LOOP>
</tr>
</TMPL_LOOP>
</table>
<h3>DataMemory Usage</h3>
<table>
<tr>
<th>&nbsp;</th>
<TMPL_LOOP NAME=releases>
<th><TMPL_VAR NAME=rel></th>
</TMPL_LOOP>
</tr>
<tr>
<th>Row Overhead</th>
<TMPL_LOOP NAME=releases>
<td>16</td>
</TMPL_LOOP>
</tr>
<tr>
<th>Column DataMemory/Row</th>
<TMPL_LOOP NAME=RowDataMemory>
<td><TMPL_VAR NAME=val></td>
</TMPL_LOOP>
</tr>
<tr>
<th>Index DataMemory/Row</th>
<TMPL_LOOP NAME=IndexDataMemory>
<td><TMPL_VAR NAME=val></td>
</TMPL_LOOP>
</tr>
<tr>
<th>Total DataMemory/Row</th>
<TMPL_LOOP NAME=TotalDataMemory>
<td><TMPL_VAR NAME=val></td>
</TMPL_LOOP>
</tr>
<tr>
<th>Rows per 32kb page</th>
<TMPL_LOOP NAME=RowPerPage>
<td><TMPL_VAR NAME=val></td>
</TMPL_LOOP>
</tr>
<tr>
<th>Current number of rows</th>
<TMPL_LOOP NAME=count>
<td><TMPL_VAR NAME=val></td>
</TMPL_LOOP>
</tr>
<tr>
<th>Total DataMemory (kb)</th>
<TMPL_LOOP NAME=DataMemory>
<td><TMPL_VAR NAME=val></td>
</TMPL_LOOP>
</tr>
</table>
<h3>IndexMemory Usage</h3>
<table>
<tr>
<th>&nbsp;</th>
<TMPL_LOOP NAME=releases>
<th><TMPL_VAR NAME=rel></th>
</TMPL_LOOP>
</tr>
<tr>
<th>IndexMemory/Row</th>
<TMPL_LOOP NAME=RowIndexMemory>
<td><TMPL_VAR NAME=val></td>
</TMPL_LOOP>
</tr>
<tr>
<th>Rows per 8kb page</th>
<TMPL_LOOP NAME=RowPerIndexPage>
<td><TMPL_VAR NAME=val></td>
</TMPL_LOOP>
</tr>
<tr>
<th>Current number of rows</th>
<TMPL_LOOP NAME=count>
<td><TMPL_VAR NAME=val></td>
</TMPL_LOOP>
</tr>
<tr>
<th>Total IndexMemory (kb)</th>
<TMPL_LOOP NAME=IndexMemory>
<td><TMPL_VAR NAME=val></td>
</TMPL_LOOP>
</tr>
</table>
<hr/>
</TMPL_LOOP>
<p>This is the output of ndb_size.pl.</p>
</body>
</html>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment