You have have multiple BaseX servers reading and writing to a single set of databases.
For example, in my web application I have five servers running: 1 to satisfy the web pages and 4 to perform queries. I had to implement my redirector to direct REST requests to the least-loaded server—maybe
there’s a better way to do that that I’m not aware of?
I also depend heavily on indexes I build over my content to support specific queries (in my case, link where-used information) and make heavy use of attribute and token indexes.
Tamara Marnell recommends generating results on the server and then signalling the client (or responding to polls from the client) to return the result, avoiding having long-lived client-to-server HTTP connections—I
haven’t had a chance to implement this approach, but I think it will help a lot once I can get to it.
Here’s my code (10.7) to get the least-loaded server port, which is used from my REST handler. If there’s a better way to do this with BaseX 10 I don’t know what it is—I put this together question to resolve
an immediate performance issue. This is running on CentOS Linxu on an 8-core machine, so not the beefiest but it’s what I can get quickly from our internal IT group.
The REST handler handles an incoming URL and redirects it to the appropriate server port:
declare
%rest:GET
%rest:path('/now/rest/api')
%output:method('xml')
function now:root(
) as item()* {
let $incomingPort as xs:integer := request:port()
let $debug := prof:dump(``[REST API Handler: Starting. Port `{$incomingPort}`]``)
return
if ($incomingPort eq $now:webserverPort)
then now:redirectRestGet()
else
<mirabel timestamp="{current-dateTime()}">
<databases> {
for $database in util:getFamilyDatabaseNames()
order by $database descending
return
<database name="{$database}">
</database>
}</databases>
</mirabel>
};
The redirectRestGet() function (there’s a corresponding updating version for POST requests):
(:~
: Redirect the request to the appropriate back-end worker based on reported
: CPU load.
:)
declare function now:redirectRestGet(
) as item()* {
let $newURI as xs:string := now:getRedirectionTargetURI()
let $msg := util:logToLog('now:redirectRestGet', ``[Redirecting to "`{$newURI}`"]``)
return
web:redirect($newURI)
};
(:~
: Get the redirection target URI for a request
:)
declare function now:getRedirectionTargetURI() as xs:string {
let $workerPort as xs:integer := now:getWorkerPort()
let $newPort := $workerPort + $now:portOffset
let $query as xs:string? := request:query()
(: let $msg := prof:dump(``[redirectRestGet(): newPort="`{$newPort}`"]``) :)
let $newURI as xs:string :=
request:scheme() || ':'
|| '//' || request:hostname() || ':' || $newPort
|| request:path()
|| (if (exists($query)) then '?' || $query else ())
return $newURI
};
(:~
: Get the least-loaded worker port
:)
declare function now:getWorkerPort() as xs:integer {
let $infos as map(*) := status:getServerInfos()
let $portsToAvoid as xs:integer* := ($now:webserverPort - $now:portOffset)
let $workerPort as xs:integer :=
if (exists($infos?error))
then
let $msg := util:logToLog('now:redirectRestGet', ``[Error getting server infos: `{$infos?error}`]``)
return 9984 (: This is a guess. Need to more reliably configure the base worker port. :)
else
let $lowestCPU as xs:decimal? := ($infos?data?*?pcpu ! xs:decimal(.) => min())
let $candPorts as xs:integer* := ($infos?data?*[xs:decimal(?pcpu) eq $lowestCPU][not(xs:integer(?port) = $portsToAvoid)]?port ! xs:integer(.))
(: let $msg := prof:dump(``[candPorts=`{$candPorts}`]``) :)
return
(: For reasons that are not clear, sometimes $candPorts is an empty list.
Seeing of just recursing after a 1/2 second wait is enough to solve it or if we need to do more.
:)
if (count($candPorts) eq 0)
then (()
,prof:dump(``[[WARN] now:getWorkerPort(): Got empty $candPorts list. Recursing...]``)
,prof:sleep(2000)
,now:getWorkerPort())
else
if (count($candPorts) gt 1)
then $candPorts[(random-number-generator()?permute(1 to count($candPorts)))[1]]
else $candPorts[1]
return $workerPort
};
And here’s my server-status module that implements getServerInfos():
(:~
: Gets server status information
:)
module namespace status=http://servicenow.com/xquery/module/server-status;
declare function status:isMacos() as xs:boolean {
let $cmdResult := proc:execute('uname')
let $result as xs:boolean := normalize-space($cmdResult/output/text()) eq 'Darwin'
return $result
};
(:~
: Get the CPU usage percentage for the specified process ID.
: @param pids The process IDs to get the percentages for
: @return map:
map{
'error' : $errorMessage,
'data' map{
$pid : map{
'pid' : $pid,
'pcpu': $percent
}
}
}
:)
declare function status:getCpuPercentForPIDs($pids as xs:string*) as map(*) {
let $isMacos as xs:boolean := status:isMacos()
let $cmd as xs:string := 'top'
(: Get the data for each of the PIDs, using batch mode and 1 iteration: :)
let $parameters :=
if ($isMacos)
then ('-l', '1', '-s', '0', '-stats', 'pid,cpu')
else (for $pid in $pids return ('-p', $pid), '-b', '-n', '1')
(: let $msg := prof:dump(``[status:getCpuPercentForPIDs(): `{$cmd}` `{string-join($parameters, ' ')}`]``) :)
let $cmdResult as element() := (proc:execute($cmd, $parameters))
return
if (exists($cmdResult/error))
then
let $msg := () (: prof:dump(``[status:getCpuPercentForPIDs(): Error: `{$cmdResult/error/text()}`]``) :)
return
map{
'error' : $cmdResult/error/text()
}
else if (empty($pids))
then
map{
'error' : 'No PIDs provided'
}
else
map{
'data' :
(: The linux version of top doesn't have the equivalent of the macOS -stats parameter, so we get everything: :)
(: 25176 eliot.k+ 20 0 8076608 2.1g 21352 S 0.0 13.7 132:26.69 java :)
let $lines as xs:string* := (($cmdResult/output/text() => tokenize('
')) ! normalize-space(.))
let $matchPattern as xs:string := ``[^(`{$pids => string-join('|')}`)\s.+]``
(: The last line will be a blank line, so remove it: :)
let $lines := ($lines => reverse() => tail())[matches(., $matchPattern)]
(: let $msg := (prof:dump('Filtered Lines:'), prof:dump($lines)) :)
return
map:merge(
for $line in $lines
let $tokens as xs:string* := tokenize($line, '\s+')
let $pid as xs:string? := $tokens[1]
let $percent as xs:string := if ($isMacos) then $tokens[2] else $tokens[9]
return
map{
$pid :
map{
'pid' : $pid,
'pcpu' : $percent
}
}
)
}
};
(:~
: Get a map of server port to PID maps.
: @return A map with the structure:
:
: map{
'error' : "error message",
'data' : map{
$port : map{
'port' : $port,
'pid' : $pid,
'pcpu' : '-1'
}
}
}
:)
declare function status:getServerPIDInfos() as map(*) {
let $isMacos as xs:boolean := status:isMacos()
let $cmd as xs:string :=
if ($isMacos)
then 'pgrep'
else 'ps'
let $parameters :=
if ($isMacos)
then ('-lf', 'BaseX')
else ('-e', '-o', 'pid,command')
(: let $msg := prof:dump(``[status:getServerPIDInfos(): `{$cmd}` `{string-join($parameters, ' ')}`]``) :)
let $cmdResult as element() := (proc:execute($cmd, $parameters))
(: let $msg := (prof:dump(``[status:getServerPIDInfos(): cmdResult:]``), prof:dump($cmdResult)) :)
return
if (exists($cmdResult/error))
then
let $msg := (: prof:dump(``[Error from command: `{$cmdResult/error/text()}`]``) :) ()
return map{
'error' : $cmdResult/error/text()
}
else
let $lines as xs:string* := ($cmdResult/output/text() => tokenize('
'))
(: let $msg := (prof:dump(``[status:getServerPIDInfos(): raw lines:]``), ($lines ! prof:dump(.))) :)
let $lines as xs:string* := ($lines)[contains(., 'BaseX')]
(: let $msg := (prof:dump(``[status:getServerPIDInfos(): filtered lines:]``), ($lines ! prof:dump(.))) :)
(: Lines should be like:
32725 svc_sec+ 20 0 127624 3468 1700 S 0.0 0.0 0:00.10 bash
:)
return
map{
'data' :
let $servers as map(*) := map:merge(
for $line at $p in $lines[contains(., 'BaseXHTTP')]
let $pid as xs:string := tokenize(normalize-space($line), '\s+')[1]
let $port as xs:string := tokenize(substring-after($line, ' -p'),'\s+')[1]
(: let $msg := (prof:dump(``[status:getServerPIDInfos(): port: `{$port}`, pid: `{$pid}`]``)) :)
return map{ $port :
map{
'port' : $port,
'pid' : $pid,
'pcpu' : '-1.0'
}
}
)
return $servers
}
};
(:~
: Get a map of server information maps with CPU percentages, one for each running BaseX server.
: @return A map with the structure:
:
: map{
'error' : "error message",
'data' : map{
$port : map{
'port' : $port,
'pid' : $pid,
'pcpu' : $cpuPercent
}
}
}
:)
declare function status:getServerInfos() as map(*) {
let $isMacos as xs:boolean := status:isMacos()
let $serverInfo as map(*) := status:getServerPIDInfos()
return
if (exists($serverInfo?error))
then $serverInfo
else
let $data as map(*) := $serverInfo?data
(: let $msg := (prof:dump(``[status:getServerInfos(): data:]``), prof:dump($data)) :)
let $pids as xs:string* := $data?*?pid
let $cpuData as map(*)? := status:getCpuPercentForPIDs($pids)
return
map{
'data' :
let $servers as map(*) := map:merge(
for $serverMap as map(*)? in $data?*
let $pid := $serverMap?pid
let $cpuPercent as xs:string := ((if (exists($cpuData?data)) then $cpuData?data($pid)?pcpu else ()), '-1.0')[1]
return
map{ $serverMap?port :
map:put($serverMap, 'pcpu', $cpuPercent)
}
)
return $servers
}
};
Cheers,
Eliot
_____________________________________________
Eliot Kimber
Sr Staff Content Engineer
Digital Content & Design
O: 512 554 9368
M: 512 554 9368