Include OpenTelemetry traces in rageshakes

This commit is contained in:
Robin Townsend
2023-04-11 01:09:52 -04:00
parent 6999765f39
commit 95eca18207
4 changed files with 148 additions and 21 deletions

View File

@@ -15,9 +15,12 @@ limitations under the License.
*/
import { SpanExporter, ReadableSpan } from "@opentelemetry/sdk-trace-base";
import { ExportResult, ExportResultCode } from "@opentelemetry/core";
import {
ExportResult,
ExportResultCode,
hrTimeToMilliseconds,
} from "@opentelemetry/core";
import { logger } from "matrix-js-sdk/src/logger";
import { HrTime } from "@opentelemetry/api";
import { PosthogAnalytics } from "./PosthogAnalytics";
@@ -26,10 +29,6 @@ interface PrevCall {
hangupTs: number;
}
function hrTimeToMs(time: HrTime): number {
return time[0] * 1000 + time[1] * 0.000001;
}
/**
* The maximum time between hanging up and joining the same call that we would
* consider a 'rejoin' on the user's part.
@@ -86,14 +85,14 @@ export class PosthogSpanExporter implements SpanExporter {
const prevCall = this.prevCall;
const newPrevCall = (this.prevCall = {
callId: span.attributes["matrix.confId"] as string,
hangupTs: hrTimeToMs(span.endTime),
hangupTs: hrTimeToMilliseconds(span.endTime),
});
// If the user joined the same call within a short time frame, log this as a
// rejoin. This is interesting as a call quality metric, since rejoins may
// indicate that users had to intervene to make the product work.
if (prevCall !== null && newPrevCall.callId === prevCall.callId) {
const duration = hrTimeToMs(span.startTime) - prevCall.hangupTs;
const duration = hrTimeToMilliseconds(span.startTime) - prevCall.hangupTs;
if (duration <= maxRejoinMs) {
PosthogAnalytics.instance.trackEvent(
{

View File

@@ -0,0 +1,103 @@
import { Attributes } from "@opentelemetry/api";
import {
ExportResult,
ExportResultCode,
hrTimeToMilliseconds,
} from "@opentelemetry/core";
import { SpanExporter, ReadableSpan } from "@opentelemetry/sdk-trace-base";
const dumpAttributes = (attr: Attributes) =>
Object.entries(attr).map(([key, value]) => ({
key,
type: typeof value,
value,
}));
/**
* Exports spans on demand to the Jaeger JSON format, which can be attached to
* rageshakes and loaded into analysis tools like Jaeger and Stalk.
*/
export class RageshakeSpanExporter implements SpanExporter {
private readonly spans: ReadableSpan[] = [];
export(
spans: ReadableSpan[],
resultCallback: (result: ExportResult) => void
): void {
this.spans.push(...spans);
resultCallback({ code: ExportResultCode.SUCCESS });
}
/**
* Dumps the spans collected so far as Jaeger-compatible JSON.
*/
public dump(): string {
const traces = new Map<string, ReadableSpan[]>();
// Organize spans by their trace IDs
for (const span of this.spans) {
const traceId = span.spanContext().traceId;
let trace = traces.get(traceId);
if (trace === undefined) {
trace = [];
traces.set(traceId, trace);
}
trace.push(span);
}
const processId = "p1";
const processes = {
[processId]: {
serviceName: "element-call",
tags: [],
},
warnings: null,
};
return JSON.stringify({
// Honestly not sure what some of these fields mean, I just know that
// they're present in Jaeger JSON exports
total: 0,
limit: 0,
offset: 0,
errors: null,
data: [...traces.entries()].map(([traceId, spans]) => ({
traceID: traceId,
warnings: null,
processes,
spans: spans.map((span) => {
const ctx = span.spanContext();
return {
traceID: traceId,
spanID: ctx.spanId,
operationName: span.name,
processID: processId,
warnings: null,
startTime: hrTimeToMilliseconds(span.startTime),
duration: hrTimeToMilliseconds(span.duration),
references:
span.parentSpanId === undefined
? []
: [
{
refType: "CHILD_OF",
traceID: traceId,
spanID: span.parentSpanId,
},
],
tags: dumpAttributes(span.attributes),
logs: span.events.map((event) => ({
timestamp: hrTimeToMilliseconds(event.time),
fields: dumpAttributes(event.attributes ?? {}),
})),
};
}),
})),
});
}
async shutdown(): Promise<void> {}
}