Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ add_library(
${SOURCE_DIR}/MemoryBank.cxx
${SOURCE_DIR}/MemoryBankManager.cxx
${SOURCE_DIR}/MemoryPagesPool.cxx
${SOURCE_DIR}/ReadoutMonitoringQueue.cxx
$<$<BOOL:${ZMQ_FOUND}>:${SOURCE_DIR}/ZmqServer.cxx>
$<$<BOOL:${ZMQ_FOUND}>:${SOURCE_DIR}/ZmqClient.cxx>
)
Expand Down
3 changes: 2 additions & 1 deletion doc/configurationParameters.md
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ The parameters related to 3rd-party libraries are described here for convenience
| equipment-* | saveErrorPagesMax | int | 0 | If set, pages found with data error are saved to disk up to given maximum. |
| equipment-* | saveErrorPagesPath | string | | Path where to save data pages with errors (when feature enabled). |
| equipment-* | stopOnError | int | 0 | If 1, readout will stop automatically on equipment error. |
| equipment-* | TFperiod | int | 128 | Duration of a timeframe, in number of LHC orbits. |
| equipment-* | TFperiod | int | 32 | Duration of a timeframe, in number of LHC orbits. |
| equipment-* | verbose | int | 0 | If set, extra debug messages may be logged. |
| equipment-cruemulator-* | cruBlockSize | int | 8192 | Size of a RDH block. |
| equipment-cruemulator-* | cruId | int | 0 | CRU Id, used for CRU Id field in RDH. |
Expand Down Expand Up @@ -194,6 +194,7 @@ The parameters related to 3rd-party libraries are described here for convenience
| equipment-rorc-* | firmwareCheckEnabled | int | 1 | If set, RORC driver checks compatibility with detected firmware. Use 0 to bypass this check (eg new fw version not yet recognized by ReadoutCard version). |
| equipment-rorc-* | firmwareVersionsAllowed | string | | Comma-separated list of ROC firmware versions allowed (6-digit hash). If empty, all are allowed. |
| equipment-rorc-* | firmwareVersionsDenied | string | e4a5a46e | Comma-separated list of ROC firmware versions denied (6-digit hash), i.e. which would cause configuration to abort. |
| equipment-rorc-* | monitorFirstOrbitEnabled | int | 0 | If set, enable monitoring of RORC first orbit. |
| equipment-zmq-* | address | string | | Address of remote server to connect, eg tcp://remoteHost:12345. |
| equipment-zmq-* | mode | string | stream | Possible values: stream (1 input ZMQ message = 1 output data page), snapshot (last ZMQ message = one output data page per TF). |
| equipment-zmq-* | timeframeClientUrl | string | | The address to be used to retrieve current timeframe. When set, data is published only once for each TF id published by remote server. |
Expand Down
6 changes: 6 additions & 0 deletions doc/releaseNotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -655,3 +655,9 @@ This file describes the main feature changes for each readout.exe released versi

## v2.27.1 - 12/02/2025
- Path of configuration file from which defaults are loaded on startup has been changed to /etc/o2.d/readout/readout-defaults.cfg (it was in /etc/o2.d before, now changing to standard subdirectory location).

## v2.27.2 - 19/03/2025
- Updated configuration parameters:
- equipment.TFperiod is now set to 32 by default, instead of 128 previously. This is the duration of a timeframe, in number of LHC orbits. The new value corresponds to what is used in production.
- equipment-rorc-*: added parameter monitorFirstOrbitEnabled, to enable reporting to monitoring system the metric "readout.RORCfirstOrbit" on startup, as retrieved from CRU firmware. This is to be used on CTP FLP to detect possibly wrong orbit. A return value of 0xFFFFFFFF indicates the value could not be retrieved.
- ConsumerStats: added an internal queue to allow pushing spontaneous monitoring measurements (compared to periodic ones) from any readout module.
7 changes: 7 additions & 0 deletions src/ConsumerStats.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "DataSet.h"
#include "ReadoutUtils.h"
#include "ReadoutStats.h"
#include "ReadoutMonitoringQueue.h"

using namespace o2::monitoring;

Expand Down Expand Up @@ -182,6 +183,12 @@ class ConsumerStats : public Consumer
sendMetricNoException(Metric{"readout.bufferUsage"}.addValue((int)(r*100), "value").addValue(b, "bytes").addTag(tags::Key::ID, i));
}
}

// publish measurements stored in monitoring queue
auto ff = [&] (const ReadoutMonitoringMetric &m) -> void {
sendMetricNoException(Metric{m.value, m.name}.addTag(tags::Key::ID, m.tag));
};
gReadoutMonitoringQueue.execute(ff);
}

#ifdef WITH_ZMQ
Expand Down
4 changes: 2 additions & 2 deletions src/ReadoutEquipment.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,8 @@ ReadoutEquipment::ReadoutEquipment(ConfigFile& cfg, std::string cfgEntryPoint, b
cfg.getOptionalValue<int>(cfgEntryPoint + ".verbose", cfgVerbose);

if (!cfgDisableTimeframes) {
// configuration parameter: | equipment-* | TFperiod | int | 128 | Duration of a timeframe, in number of LHC orbits. |
int cfgTFperiod = 128;
// configuration parameter: | equipment-* | TFperiod | int | 32 | Duration of a timeframe, in number of LHC orbits. |
int cfgTFperiod = 32;
cfg.getOptionalValue<int>(cfgEntryPoint + ".TFperiod", cfgTFperiod);
timeframePeriodOrbits = cfgTFperiod;

Expand Down
2 changes: 1 addition & 1 deletion src/ReadoutEquipment.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ class ReadoutEquipment

//const unsigned int LHCBunches = 3564; // number of bunches in LHC
const unsigned int LHCOrbitRate = 11246; // LHC orbit rate, in Hz. 299792458 / 26659
uint32_t timeframePeriodOrbits = 128; // timeframe interval duration in number of LHC orbits
uint32_t timeframePeriodOrbits = 32; // timeframe interval duration in number of LHC orbits
double timeframeRate = 0; // timeframe rate, when generated internally

// RDH-related configuration parameters
Expand Down
10 changes: 10 additions & 0 deletions src/ReadoutEquipmentRORC.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "ReadoutEquipment.h"
#include "ReadoutUtils.h"
#include "readoutInfoLogger.h"
#include "ReadoutMonitoringQueue.h"

class ReadoutEquipmentRORC : public ReadoutEquipment
{
Expand Down Expand Up @@ -131,6 +132,10 @@ ReadoutEquipmentRORC::ReadoutEquipmentRORC(ConfigFile& cfg, std::string name) :
// configuration parameter: | equipment-rorc-* | debugStatsEnabled | int | 0 | If set, enable extra statistics about internal buffers status. (printed to stdout when stopping) |
cfg.getOptionalValue<int>(name + ".debugStatsEnabled", cfgDebugStatsEnabled);

// configuration parameter: | equipment-rorc-* | monitorFirstOrbitEnabled | int | 0 | If set, enable monitoring of RORC first orbit. |
int cfgMonitorFirstOrbitEnabled = 0;
cfg.getOptionalValue<int>(name + ".monitorFirstOrbitEnabled", cfgMonitorFirstOrbitEnabled);

// get readout memory buffer parameters
// std::string sMemorySize=cfg.getValue<std::string>(name + ".memoryBufferSize");
// std::string sPageSize=cfg.getValue<std::string>(name + ".memoryPageSize"); long long
Expand Down Expand Up @@ -216,6 +221,11 @@ ReadoutEquipmentRORC::ReadoutEquipmentRORC(ConfigFile& cfg, std::string name) :
BOOST_THROW_EXCEPTION(ReadoutEquipmentRORCException() << ErrorInfo::Message("This firmware version is not allowed"));
}

// publish relevant card info
if (cfgMonitorFirstOrbitEnabled) {
gReadoutMonitoringQueue.push({.name = "readout.RORCfirstOrbit", .tag = id, .value = (uint64_t)channel->getCounterFirstOrbit()});
}

// todo: log parameters ?

if (logRocCallsEnable) {
Expand Down
36 changes: 36 additions & 0 deletions src/ReadoutMonitoringQueue.cxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#include "ReadoutMonitoringQueue.h"

ReadoutMonitoringQueue::ReadoutMonitoringQueue() {
}

ReadoutMonitoringQueue::~ReadoutMonitoringQueue() {
}

void ReadoutMonitoringQueue::push(ReadoutMonitoringMetric m) {
std::unique_lock<std::mutex> lock(qMutex);
q.push_front(std::move(m));
}

void ReadoutMonitoringQueue::execute(std::function<void(const ReadoutMonitoringMetric &)> f) {
for (;;) {
ReadoutMonitoringMetric m;

{
std::unique_lock<std::mutex> lock(qMutex);
if (q.empty()) {
break;
}
m = std::move(q.back());
q.pop_back();
}

f(m);
}
}

void ReadoutMonitoringQueue::clear() {
std::unique_lock<std::mutex> lock(qMutex);
q.clear();
}

ReadoutMonitoringQueue gReadoutMonitoringQueue;
42 changes: 42 additions & 0 deletions src/ReadoutMonitoringQueue.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#include <deque>
#include <mutex>
#include <string>
#include <functional>

// a metric to be stored in queue for later processing
// fields as for o2::monitoring::metric
struct ReadoutMonitoringMetric {
std::string name;
unsigned short int tag;
uint64_t value;
};


// producer-consumer queue to define and publish metrics
// typical use:
// the module that pushes has no access to o2 Monitoring
// the module that publishes reads from the queue and publish them to o2 Monitoring
// the class is not aware of o2::monitoring, it's just a transient thread-safe storage

class ReadoutMonitoringQueue {
public:

ReadoutMonitoringQueue();
~ReadoutMonitoringQueue();

// push an element in the queue
void push(ReadoutMonitoringMetric);

// execute provided functions on all elements in the queue
// (and remove them from the queue)
void execute(std::function<void(const ReadoutMonitoringMetric &)>);

// remove all elements in queue
void clear();

private:
std::mutex qMutex;
std::deque<ReadoutMonitoringMetric> q;
};

extern ReadoutMonitoringQueue gReadoutMonitoringQueue;
2 changes: 1 addition & 1 deletion src/ReadoutVersion.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.

#define READOUT_VERSION "2.27.1"
#define READOUT_VERSION "2.27.2"

5 changes: 5 additions & 0 deletions src/mainReadout.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
#include "ReadoutVersion.h"
#include "TtyChecker.h"
#include "ReadoutConst.h"
#include "ReadoutMonitoringQueue.h"

#ifdef WITH_NUMA
#include <numa.h>
Expand Down Expand Up @@ -573,6 +574,7 @@ int Readout::_init(int argc, char* argv[])
sigaction(SIGTERM, &signalSettings, NULL);
sigaction(SIGQUIT, &signalSettings, NULL);
sigaction(SIGINT, &signalSettings, NULL);
signal(SIGPIPE, SIG_IGN);

// log startup and options
theLog.log(LogInfoSupport_(3001), "Readout " READOUT_VERSION " - process starting, pid %d for role %s", getpid(), occRole.c_str());
Expand Down Expand Up @@ -773,6 +775,9 @@ int Readout::_configure(const boost::property_tree::ptree& properties)
// reset some flags
gReadoutStats.isFairMQ = 0; // disable FMQ stats

// reset monitoring queue
gReadoutMonitoringQueue.clear();

// load configuration file
theLog.log(LogInfoSupport, "Reading configuration from %s %s", cfgFileURI, cfgFileEntryPoint);

Expand Down
2 changes: 1 addition & 1 deletion src/readRaw.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ int main(int argc, const char* argv[])
" dumpDataInline=(int) : if set, each packet raw content is printed (hex dump style).\n"
" dumpStats=(int) : if set, some statistics are printed on HBF/TF size.\n"
" fileReadVerbose=(int) : if set, more information is printed when reading/decoding file.\n"
" timeframePeriodOrbits=(int) : if set, TF id computed (and printed, when dump enabled) for each RDH. Typically, 128 or 256.\n"
" timeframePeriodOrbits=(int) : if set, TF id computed (and printed, when dump enabled) for each RDH. Typically, 32 or 128.\n"
" logOff=(int) : if set, logs disabled.\n"
" dumpOrbitStats=(int) : if set, first / min / max orbits are printed after file read.\n"
" \n",
Expand Down
3 changes: 2 additions & 1 deletion src/readoutConfigEditor.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ set configurationParametersDescriptor {
| equipment-* | saveErrorPagesMax | int | 0 | If set, pages found with data error are saved to disk up to given maximum. |
| equipment-* | saveErrorPagesPath | string | | Path where to save data pages with errors (when feature enabled). |
| equipment-* | stopOnError | int | 0 | If 1, readout will stop automatically on equipment error. |
| equipment-* | TFperiod | int | 128 | Duration of a timeframe, in number of LHC orbits. |
| equipment-* | TFperiod | int | 32 | Duration of a timeframe, in number of LHC orbits. |
| equipment-* | verbose | int | 0 | If set, extra debug messages may be logged. |
| equipment-cruemulator-* | cruBlockSize | int | 8192 | Size of a RDH block. |
| equipment-cruemulator-* | cruId | int | 0 | CRU Id, used for CRU Id field in RDH. |
Expand Down Expand Up @@ -140,6 +140,7 @@ set configurationParametersDescriptor {
| equipment-rorc-* | firmwareCheckEnabled | int | 1 | If set, RORC driver checks compatibility with detected firmware. Use 0 to bypass this check (eg new fw version not yet recognized by ReadoutCard version). |
| equipment-rorc-* | firmwareVersionsAllowed | string | | Comma-separated list of ROC firmware versions allowed (6-digit hash). If empty, all are allowed. |
| equipment-rorc-* | firmwareVersionsDenied | string | e4a5a46e | Comma-separated list of ROC firmware versions denied (6-digit hash), i.e. which would cause configuration to abort. |
| equipment-rorc-* | monitorFirstOrbitEnabled | int | 0 | If set, enable monitoring of RORC first orbit. |
| equipment-zmq-* | address | string | | Address of remote server to connect, eg tcp://remoteHost:12345. |
| equipment-zmq-* | mode | string | stream | Possible values: stream (1 input ZMQ message = 1 output data page), snapshot (last ZMQ message = one output data page per TF). |
| equipment-zmq-* | timeframeClientUrl | string | | The address to be used to retrieve current timeframe. When set, data is published only once for each TF id published by remote server. |
Expand Down
10 changes: 7 additions & 3 deletions src/testROC.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,11 @@ class ROCdevice

private:
std::string bankId = "testROC";
size_t bankSize = 2 * 1024 * 1024 * 1024L;
size_t bankSize = 1024 * 1024 * 1024L; // buffer siye

size_t memoryPoolNumberOfPages = 1000;
size_t memoryPoolPageSize = 2 * 1024 * 1024;
size_t memoryPoolNumberOfPages = 10 * int((bankSize / memoryPoolPageSize) / 10); // number of pages fitting in memory, round down to ten
const char* memoryPoolType = "malloc";

std::string cardId = "0:0.0";
int cfgChannelNumber = 0;
Expand All @@ -80,7 +81,7 @@ ROCdevice::ROCdevice(std::string id)

bankId += id;

bank = getMemoryBank(bankSize, "MemoryMappedFile", bankId);
bank = getMemoryBank(bankSize, memoryPoolType, bankId);
theMemoryBankManager.addBank(bank);
mp = theMemoryBankManager.getPagedPool(memoryPoolPageSize, memoryPoolNumberOfPages, bankId); // pool of pages

Expand All @@ -91,6 +92,7 @@ ROCdevice::ROCdevice(std::string id)
params.setCardId(AliceO2::roc::Parameters::cardIdFromString(cardId));
params.setChannelNumber(cfgChannelNumber);
params.setDataSource(AliceO2::roc::DataSource::fromString(cfgDataSource));
params.setFirmwareCheckEnabled(0);

params.setBufferParameters(AliceO2::roc::buffer_parameters::Memory{ mp->getBaseBlockAddress(), mp->getBaseBlockSize() });

Expand All @@ -106,6 +108,8 @@ ROCdevice::ROCdevice(std::string id)
std::string infoFirmwareVersion = channel->getFirmwareInfo().value_or("unknown");
std::string infoCardId = channel->getCardId().value_or("unknown");
theLog.log(LogInfoDevel_(3010), "ROC PCI %s @ NUMA node %d, serial number %s, firmware version %s, card id %s", infoPciAddress.c_str(), infoNumaNode, infoSerialNumber.c_str(), infoFirmwareVersion.c_str(), infoCardId.c_str());
// test getCounterFirstOrbit()
// printf("get = %X\n", (int)(channel->getCounterFirstOrbit()));
}

ROCdevice::~ROCdevice() {}
Expand Down