Use this skill to set up a tool-call monitoring dashboard for ADK 2.0 agents — track tool latency, error rate, usage frequency, and per-tool cost. Triggers on: "ADK tool monitoring", "ADK dashboard", "monitor ADK tool calls", "tool latency ADK", "ADK metrics", "Grafana ADK", "Cloud Monitoring ADK", "ADK tool stats". Generates metric exporters and a reference dashboard config.
How this skill is triggered — by the user, by Claude, or both
Slash command
/adk-observability-safety:tool-monitor-dashboardThe summary Claude sees in its skill listing — used to decide when to auto-load this skill
Track per-tool health metrics for an ADK 2.0 agent: latency, error rate, frequency, cost.
Track per-tool health metrics for an ADK 2.0 agent: latency, error rate, frequency, cost.
import time
from google.cloud import monitoring_v3
from google.adk.callbacks import on_before_tool_call, on_after_tool_call
PROJECT = "my-project"
client = monitoring_v3.MetricServiceClient()
def write_metric(metric_type: str, value: float, labels: dict):
series = monitoring_v3.TimeSeries()
series.metric.type = f"custom.googleapis.com/adk/{metric_type}"
series.metric.labels.update(labels)
series.resource.type = "global"
point = monitoring_v3.Point({
"interval": {"end_time": {"seconds": int(time.time())}},
"value": {"double_value": value},
})
series.points = [point]
client.create_time_series(name=f"projects/{PROJECT}", time_series=[series])
@on_before_tool_call
async def tool_start(ctx, tool_name, args):
ctx.runtime[f"_tool_start_{tool_name}"] = time.time()
@on_after_tool_call
async def tool_metrics(ctx, tool_name, args, result):
started = ctx.runtime.pop(f"_tool_start_{tool_name}", None)
if started is None:
return
latency_ms = (time.time() - started) * 1000
is_error = "error" in (result or {}) or result is None
write_metric("tool/latency_ms", latency_ms, {"tool": tool_name})
write_metric("tool/error", 1.0 if is_error else 0.0, {"tool": tool_name})
write_metric("tool/calls", 1.0, {"tool": tool_name})
from prometheus_client import Counter, Histogram, start_http_server
tool_calls = Counter("adk_tool_calls_total", "Total tool invocations", ["tool", "status"])
tool_latency = Histogram("adk_tool_latency_seconds", "Tool latency", ["tool"])
start_http_server(9100) # /metrics endpoint
@on_after_tool_call
async def prom_metrics(ctx, tool_name, args, result):
started = ctx.runtime.pop(f"_tool_start_{tool_name}", None)
if started:
tool_latency.labels(tool=tool_name).observe(time.time() - started)
status = "error" if "error" in (result or {}) else "ok"
tool_calls.labels(tool=tool_name, status=status).inc()
{
"title": "ADK Agent Health",
"panels": [
{"title": "Tool Calls/min", "targets": [{"expr": "sum by(tool) (rate(adk_tool_calls_total[1m]))"}]},
{"title": "Tool p95 Latency", "targets": [{"expr": "histogram_quantile(0.95, sum by (le, tool) (rate(adk_tool_latency_seconds_bucket[5m])))"}]},
{"title": "Tool Error Rate", "targets": [{"expr": "sum by(tool) (rate(adk_tool_calls_total{status=\"error\"}[5m])) / sum by(tool) (rate(adk_tool_calls_total[5m]))"}]}
]
}
| Metric | Target |
|---|---|
| Tool error rate | < 1% over 5min |
| Tool p95 latency | < 2s for sync tools |
| Agent end-to-end latency | < 10s p95 |
/metrics endpoint scrapes cleanlylogging-callback-setup for structured event logssafety-policy-enforcer for incident-grade alertsnpx claudepluginhub healthcare-ai-consulting-llc/adk-2-toolkit --plugin adk-observability-safetyProvides CDSS development patterns for drug interaction checking, dose validation, clinical scoring (NEWS2, qSOFA), and alert classification integrated into EMR workflows.