Skip to content

Commit 6d7be14

Browse files
bx80michalkleiner
andauthored
Add option to hint join order for generated queries (matomo-org#21636)
* Add join_prefix query hint to segment select queries * Added additional option to add query join hint to all generated queries, added tests * Reworked GeneralConfig for easier inheritance, added new descendant DatabaseConfig class to retrieve [database] settings * Move join query hint config settings to [database] section, adjusted queryVisitByDimension to allow sql testing, added new test * Fix for different ranking query return type * Further abstraction of GeneralConfig, fixed test config setting * Update tests/PHPUnit/Integration/DataAccess/LogAggregatorTest.php Co-authored-by: Michal Kleiner <[email protected]> * Update tests/PHPUnit/Integration/DataAccess/LogAggregatorTest.php Co-authored-by: Michal Kleiner <[email protected]> * Update config/global.ini.php Co-authored-by: Michal Kleiner <[email protected]> --------- Co-authored-by: Michal Kleiner <[email protected]>
1 parent 7f1bdb2 commit 6d7be14

7 files changed

Lines changed: 318 additions & 38 deletions

File tree

config/global.ini.php

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,13 @@
4949
;
5050
;ignore_error_codes[] = 1105
5151

52+
; Add a query hint for the order of joined tables when building segment queries in MySQL. This can be used to override
53+
; sub-optimal choices by the MySQL optimizer and always ensure the query plan starts with the first table in the query.
54+
enable_segment_first_table_join_prefix = 0
55+
56+
; Add a query hint for the order of joined tables for all log table queries in MySQL.
57+
enable_first_table_join_prefix = 0
58+
5259
; If configured, the following queries will be executed on the reader instead of the writer.
5360
; * archiving queries that hit a log table
5461
; * live queries that hit a log table

core/Config/DatabaseConfig.php

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
<?php
2+
/**
3+
* Matomo - free/libre analytics platform
4+
*
5+
* @link https://matomo.org
6+
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
7+
*
8+
*/
9+
namespace Piwik\Config;
10+
11+
class DatabaseConfig extends SectionConfig
12+
{
13+
14+
public static function getSectionName(): string
15+
{
16+
return 'database';
17+
}
18+
19+
}

core/Config/GeneralConfig.php

Lines changed: 3 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -8,41 +8,12 @@
88
*/
99
namespace Piwik\Config;
1010

11-
use Piwik\Config;
12-
13-
class GeneralConfig
11+
class GeneralConfig extends SectionConfig
1412
{
15-
/**
16-
* Update Archive config
17-
*
18-
* @param string $name Setting name
19-
* @param mixed $value Value
20-
*/
21-
public static function setConfigValue($name, $value)
22-
{
23-
$section = self::getConfig();
24-
$section[$name] = $value;
25-
Config::getInstance()->General = $section;
26-
}
27-
28-
public static function getConfigValue($name, $idSite = null)
29-
{
30-
$config = self::getConfig();
31-
if (!empty($idSite)) {
32-
$siteSpecificConfig = self::getSiteSpecificConfig($idSite);
33-
$config = array_merge($config, $siteSpecificConfig);
34-
}
35-
return $config[$name] ?? null;
36-
}
3713

38-
private static function getConfig()
14+
public static function getSectionName(): string
3915
{
40-
return Config::getInstance()->General;
16+
return 'General';
4117
}
4218

43-
private static function getSiteSpecificConfig($idSite)
44-
{
45-
$key = 'General_' . $idSite;
46-
return Config::getInstance()->$key;
47-
}
4819
}

core/Config/SectionConfig.php

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
<?php
2+
/**
3+
* Matomo - free/libre analytics platform
4+
*
5+
* @link https://matomo.org
6+
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
7+
*
8+
*/
9+
namespace Piwik\Config;
10+
11+
use Piwik\Config;
12+
13+
abstract class SectionConfig
14+
{
15+
16+
public abstract static function getSectionName(): string;
17+
18+
/**
19+
* Set the value for a setting
20+
*
21+
* @param string $name Setting name
22+
* @param mixed $value Value
23+
*
24+
* @return void
25+
*/
26+
public static function setConfigValue(string $name, $value): void
27+
{
28+
$section = self::getConfig();
29+
$section[$name] = $value;
30+
Config::getInstance()->{static::getSectionName()} = $section;
31+
}
32+
33+
/**
34+
* Get a setting value
35+
*
36+
* @param string $name Setting name
37+
* @param int|null $idSite Optional site Id
38+
*
39+
* @return mixed|null
40+
*/
41+
public static function getConfigValue(string $name, ?int $idSite = null)
42+
{
43+
$config = self::getConfig();
44+
if (!empty($idSite)) {
45+
$siteSpecificConfig = self::getSiteSpecificConfig($idSite);
46+
$config = array_merge($config, $siteSpecificConfig);
47+
}
48+
return $config[$name] ?? null;
49+
}
50+
51+
/**
52+
* Get the section config as an array
53+
*
54+
* @return array|string
55+
*/
56+
private static function getConfig()
57+
{
58+
return Config::getInstance()->{static::getSectionName()};
59+
}
60+
61+
/**
62+
* Get the site specific config (if any) as an array
63+
*
64+
* @param int $idSite
65+
*
66+
* @return array|string
67+
*/
68+
private static function getSiteSpecificConfig(int $idSite)
69+
{
70+
$key = static::getSectionName() . '_' . $idSite;
71+
return Config::getInstance()->$key;
72+
}
73+
}

core/DataAccess/LogAggregator.php

Lines changed: 61 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
use Piwik\ArchiveProcessor\Parameters;
1212
use Piwik\Common;
1313
use Piwik\Config;
14+
use Piwik\Config\DatabaseConfig;
1415
use Piwik\Container\StaticContainer;
1516
use Piwik\DataArray;
1617
use Piwik\Date;
@@ -385,6 +386,10 @@ public function generateQuery($select, $from, $where, $groupBy, $orderBy, $limit
385386

386387
if (is_array($query) && array_key_exists('sql', $query)) {
387388
$query['sql'] = DbHelper::addOriginHintToQuery($query['sql'], $this->queryOriginHint, $this->dateStart, $this->dateEnd, $this->sites, $this->segment);
389+
if (DatabaseConfig::getConfigValue('enable_first_table_join_prefix'))
390+
{
391+
$query['sql'] = DbHelper::addJoinPrefixHintToQuery($query['sql'], (is_array($from) ? reset($from) : $from));
392+
}
388393
}
389394

390395
return $query;
@@ -401,20 +406,40 @@ public function generateQuery($select, $from, $where, $groupBy, $orderBy, $limit
401406
private function createSegmentTable(): string
402407
{
403408
$segmentTable = $this->getSegmentTmpTableName();
409+
$segmentSql = $this->getSegmentTableSql();
410+
411+
$this->createTemporaryTable($segmentTable, $segmentSql['sql'], $segmentSql['bind']);
412+
413+
return $segmentTable;
414+
}
415+
416+
/**
417+
* Return the SQL query used to populate the segment temporary table
418+
*
419+
* @return array
420+
* @throws \Piwik\Exception\DI\DependencyException
421+
* @throws \Piwik\Exception\DI\NotFoundException
422+
*/
423+
public function getSegmentTableSql(): array
424+
{
404425
$segmentWhere = $this->getWhereStatement('log_visit', 'visit_last_action_time');
405426
$segmentBind = $this->getGeneralQueryBindParams();
406427

407428
$logQueryBuilder = StaticContainer::get('Piwik\DataAccess\LogQueryBuilder');
408429
$forceGroupByBackup = $logQueryBuilder->getForcedInnerGroupBySubselect();
409430
$logQueryBuilder->forceInnerGroupBySubselect(LogQueryBuilder::FORCE_INNER_GROUP_BY_NO_SUBSELECT);
410431
$segmentSql = $this->segment->getSelectQuery('distinct log_visit.idvisit as idvisit', 'log_visit', $segmentWhere, $segmentBind, 'log_visit.idvisit ASC');
411-
$logQueryBuilder->forceInnerGroupBySubselect($forceGroupByBackup);
412432

413-
$this->createTemporaryTable($segmentTable, $segmentSql['sql'], $segmentSql['bind']);
433+
if (is_array($segmentSql) && array_key_exists('sql', $segmentSql)) {
434+
if (DatabaseConfig::getConfigValue('enable_segment_first_table_join_prefix')) {
435+
$segmentSql['sql'] = DbHelper::addJoinPrefixHintToQuery($segmentSql['sql'], 'log_visit');
436+
}
437+
}
414438

415-
return $segmentTable;
416-
}
439+
$logQueryBuilder->forceInnerGroupBySubselect($forceGroupByBackup);
417440

441+
return $segmentSql;
442+
}
418443

419444
protected function getVisitsMetricFields()
420445
{
@@ -553,9 +578,39 @@ public function getMetricsFromVisitByDimension($dimension)
553578
* ranking query SQL will be immediately executed and the results returned.
554579
* @api
555580
*/
556-
public function queryVisitsByDimension(array $dimensions = array(), $where = false, array $additionalSelects = array(),
581+
public function queryVisitsByDimension(array $dimensions = [], $where = false, array $additionalSelects = [],
557582
$metrics = false, $rankingQuery = false, $orderBy = false, $timeLimitInMs = -1,
558583
$rankingQueryGenerate = false)
584+
{
585+
$query = $this->getQueryByDimensionSql($dimensions, $where, $additionalSelects, $metrics, $rankingQuery, $orderBy,
586+
$timeLimitInMs, $rankingQueryGenerate);
587+
588+
// Ranking queries will return the data directly
589+
if ($rankingQuery && !$rankingQueryGenerate) {
590+
return $query;
591+
}
592+
593+
return $this->getDb()->query($query['sql'], $query['bind']);
594+
}
595+
596+
/**
597+
* Build the sql query used to query dimension data
598+
*
599+
* @param array $dimensions
600+
* @param bool|string $where
601+
* @param array $additionalSelects
602+
* @param bool|array $metrics
603+
* @param bool|\Piwik\RankingQuery $rankingQuery
604+
* @param bool|string $orderBy
605+
* @param int $timeLimitInMs
606+
* @param bool $rankingQueryGenerate
607+
*
608+
* @return array
609+
* @throws \Piwik\Exception\DI\DependencyException
610+
* @throws \Piwik\Exception\DI\NotFoundException
611+
*/
612+
public function getQueryByDimensionSql(array $dimensions, $where, array $additionalSelects, $metrics, $rankingQuery,
613+
$orderBy, $timeLimitInMs, $rankingQueryGenerate): array
559614
{
560615
$tableName = self::LOG_VISIT_TABLE;
561616
$availableMetrics = $this->getVisitsMetricFields();
@@ -600,7 +655,7 @@ public function queryVisitsByDimension(array $dimensions = array(), $where = fal
600655

601656
$query['sql'] = DbHelper::addMaxExecutionTimeHintToQuery($query['sql'], $timeLimitInMs);
602657

603-
return $this->getDb()->query($query['sql'], $query['bind']);
658+
return $query;
604659
}
605660

606661
protected function getSelectsMetrics($metricsAvailable, $metricsRequested = false)

core/DbHelper.php

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,29 @@ public static function addOriginHintToQuery(string $sql, string $origin, ?Date $
352352
return $sql;
353353
}
354354

355+
/**
356+
* Add an optimizer hint to the query to set the first table used by the MySQL join execution plan
357+
*
358+
* https://dev.mysql.com/doc/refman/8.0/en/optimizer-hints.html#optimizer-hints-join-order
359+
*
360+
* @param string $sql SQL query string
361+
* @param string $prefix Table prefix to be used as the first table in the plan
362+
*
363+
* @return string Modified query string with hint added
364+
*/
365+
public static function addJoinPrefixHintToQuery(string $sql, string $prefix): string
366+
{
367+
if (strpos(trim($sql), '/*+ JOIN_PREFIX(') === false) {
368+
$select = 'SELECT';
369+
if (0 === strpos(trim($sql), $select)) {
370+
$sql = trim($sql);
371+
$sql = 'SELECT /*+ JOIN_PREFIX('.$prefix.') */'.substr($sql, strlen($select));
372+
}
373+
}
374+
375+
return $sql;
376+
}
377+
355378
/**
356379
* Returns true if the string is a valid database name for MySQL. MySQL allows + in the database names.
357380
* Database names that start with a-Z or 0-9 and contain a-Z, 0-9, underscore(_), dash(-), plus(+), and dot(.) will be accepted.

0 commit comments

Comments
 (0)