@stevedonkey Sorry for the late reply. I was on vacation last 2 weeks.
it is well-known that document inserts/replaces on Debian/Ubuntu take like 40x longer than on Windows?
No, it’s not a known issue. We are now preparing release 4.2.0 and as part of that I’m testing INSERT performance and here’s what I get:
| version | concurrency | batch size | throughput (a_o off) | throughput (a_o on) |
| 4.2.0 | 16 | 1 | 15770 | 15744 |
| 4.2.0 | 16 | 1000 | 133561 | 133281 |
| 4.2.0 | 16 | 10000 | 336893 | 333826 |
| 4.2.0 | 16 | 25000 | 301163 | 298703 |
| 4.2.0 | 16 | 50000 | 285994 | 289957 |
| 4.2.0 | 16 | 100000 | 302276 | 301617 |
| 4.2.0 | 32 | 1 | 10762 | 10763 |
| 4.2.0 | 32 | 1000 | 135976 | 135099 |
| 4.2.0 | 32 | 10000 | 287260 | 284934 |
| 4.2.0 | 32 | 25000 | 246351 | 245730 |
| 4.2.0 | 32 | 50000 | 227208 | 231153 |
| 4.2.0 | 32 | 100000 | 223982 | 227042 |
a_o
stands for auto_optimize
(on by default since 4.0.2).
4.2.0 is just the latest dev version (4.0.3).
The script I’m using is:
root@perf3 ~/rt_load # cat test.php
#!/usr/bin/php
<?php
if (count($argv) < 4) die("Usage: ".__FILE__." <batch size> <concurrency> <docs>\n");
require_once 'vendor/autoload.php';
// This function waits for an idle mysql connection for the $query, runs it and exits
function process($query) {
global $all_links;
global $requests;
foreach ($all_links as $k=>$link) {
if (@$requests[$k]) continue;
mysqli_query($link, $query, MYSQLI_ASYNC);
@$requests[$k] = microtime(true);
return true;
}
do {
$links = $errors = $reject = array();
foreach ($all_links as $link) {
$links[] = $errors[] = $reject[] = $link;
}
$count = @mysqli_poll($links, $errors, $reject, 0, 1000);
if ($count > 0) {
foreach ($links as $j=>$link) {
$res = @mysqli_reap_async_query($links[$j]);
foreach ($all_links as $i=>$link_orig) if ($all_links[$i] === $links[$j]) break;
if ($link->error) {
echo "ERROR: {$link->error}\n";
if (!mysqli_ping($link)) {
echo "ERROR: mysql connection is down, removing it from the pool\n";
unset($all_links[$i]); // remove the original link from the pool
unset($requests[$i]); // and from the $requests too
}
return false;
}
if ($res === false and !$link->error) continue;
if (is_object($res)) {
mysqli_free_result($res);
}
$requests[$i] = microtime(true);
mysqli_query($link, $query, MYSQLI_ASYNC); // making next query
return true;
}
};
} while (true);
return true;
}
$all_links = [];
$requests = [];
$c = 0;
for ($i=0;$i<$argv[2];$i++) {
$m = @mysqli_connect('127.0.0.1', '', '', '', 9306);
if (mysqli_connect_error()) die("Cannot connect to Manticore\n");
$all_links[] = $m;
}
// init
mysqli_query($all_links[0], "drop table if exists user");
mysqli_query($all_links[0], "create table user(name text, email string, description text, age int, active bit(1))");
$batch = [];
$query_start = "insert into user(id, name, email, description, age, active) values ";
$faker = Faker\Factory::create();
echo "preparing...\n";
$error = false;
$cache_file_name = '/tmp/'.md5($query_start).'_'.$argv[1].'_'.$argv[3];
$csv_file_name = '/tmp/csv_'.$argv[3];
$c = 0;
if (!file_exists($cache_file_name) or !file_exists($csv_file_name)) {
$batches = [];
while ($c < $argv[3]) {
$ar = [addslashes($faker->name()), addslashes($faker->email()), addslashes($faker->text()), rand(10,90), rand(0,1)];
file_put_contents($csv_file_name, ($c+1).",".$ar[0].",".$ar[1].",".$ar[2].",".$ar[3].",".$ar[4]."\n", FILE_APPEND);
$batch[] = "(0,'".$ar[0]."','".$ar[1]."','".$ar[2]."',".$ar[3].",".$ar[4].")";
// $batch[] = "(0,'".addslashes($faker->name())."')";
// $batch[] = "(0,'".substr(md5(rand()), 0, 6)."')";
$c++;
if (floor($c/1000) == $c/1000) echo "\r".($c/$argv[3]*100)."% ";
if (count($batch) == $argv[1]) {
$batches[] = $query_start.implode(',', $batch);
$batch = [];
}
}
$batches[] = $query_start.implode(',', $batch);
file_put_contents($cache_file_name, serialize($batches));
} else {
echo "found in cache\n";
$batches = unserialize(file_get_contents($cache_file_name));
}
echo "querying...\n";
$t = microtime(true);
foreach ($batches as $batch) {
if (!process($batch)) die("ERROR\n");
}
// wait until all the workers finish
do {
$links = $errors = $reject = array();
foreach ($all_links as $link) $links[] = $errors[] = $reject[] = $link;
$count = @mysqli_poll($links, $errors, $reject, 0, 100);
} while (count($all_links) != count($links) + count($errors) + count($reject));
echo "finished inserting\n";
echo "Total time: ".(microtime(true) - $t)."\n";
echo round($argv[3] / (microtime(true) - $t))." docs per sec\n";
and bash:
for conc in 16 32; do for batch in 1000 10000 25000 50000 100000; do echo "conc $conc, batch $batch"; php test.php $batch $conc $(($conc*1000000)); echo ---; sleep 30; done; done; for conc in 16 32; do for batch in 1; do echo "conc $conc, batch $batch"; php test.php $batch $conc 100000; sleep 30; done; done;
to test different modes.
Can you give it a try and see what numbers it gives in Windows vs Linux in your case?