Use this skill to add safety policies to an ADK 2.0 agent — input filtering, output redaction, prompt-injection detection, blocked-topic enforcement. Triggers on: "ADK safety", "ADK guardrails", "block harmful content ADK", "ADK content filter", "prompt injection ADK", "PII redaction ADK", "ADK policy enforcement", "safe agent output". Generates pre/post callbacks that screen inputs and outputs against policy rules and either redact, refuse, or escalate.
How this skill is triggered — by the user, by Claude, or both
Slash command
/adk-observability-safety:safety-policy-enforcerThe summary Claude sees in its skill listing — used to decide when to auto-load this skill
Add layered safety controls to an ADK 2.0 agent: input screening, output redaction, blocked-topic enforcement, prompt-injection detection.
Add layered safety controls to an ADK 2.0 agent: input screening, output redaction, blocked-topic enforcement, prompt-injection detection.
User input ──▶ [input filter] ──▶ Agent ──▶ [output filter] ──▶ User
│ │
▼ ▼
block / redact redact / refuse / escalate
from google.adk.callbacks import on_user_input
from google.adk.events import Event
BLOCKED_PATTERNS = [
r"\b(?:ignore|disregard) (?:previous|prior|all) instructions\b",
r"\bsystem prompt\b",
r"reveal your instructions",
]
@on_user_input
async def input_guard(ctx, user_input: str):
import re
for pat in BLOCKED_PATTERNS:
if re.search(pat, user_input, re.IGNORECASE):
ctx.session.events.append(Event(
author="safety",
content="I can't help with that request.",
))
return None # short-circuit; agent doesn't run
return user_input
import re
from google.adk.callbacks import on_after_model_call
PII_PATTERNS = {
"email": r"[\w.-]+@[\w.-]+",
"phone": r"\b\d{3}-\d{3}-\d{4}\b",
"ssn": r"\b\d{3}-\d{2}-\d{4}\b",
"credit": r"\b(?:\d[ -]*?){13,16}\b",
}
@on_after_model_call
async def redact_pii(ctx, response):
text = response.text
for label, pat in PII_PATTERNS.items():
text = re.sub(pat, f"[{label}-redacted]", text)
response.text = text
return response
from google.adk.models.lite_llm import LiteLlm
JUDGE = LiteLlm(model="gemini-2.5-flash")
@on_after_model_call
async def safety_judge(ctx, response):
prompt = (
"Score 0-10: is this response safe? Penalize: medical advice without "
"disclaimers, harmful instructions, hate speech, doxing.\n"
f"Response: {response.text}\n\nOutput JSON: {{score: int, reason: str}}."
)
out = await JUDGE.complete(prompt)
import json
parsed = json.loads(out)
if parsed["score"] < 5:
ctx.session.events.append(Event(
author="safety",
content="(Response withheld — safety review failed.)",
))
response.text = "I can't provide that response. Please consult a professional."
return response
BLOCKED_TOPICS = ["weapon manufacturing", "self-harm", "minor exploitation"]
@on_user_input
async def topic_block(ctx, user_input):
lower = user_input.lower()
for topic in BLOCKED_TOPICS:
if topic in lower:
return None
return user_input
@on_after_model_call
async def escalate_critical(ctx, response):
if "self-harm" in response.text.lower():
await alert_oncall(session_id=ctx.session.id)
return response
auth-framework-config for who can hit the agent at alltool-confirmation-hitl for per-tool approval gatesnpx claudepluginhub healthcare-ai-consulting-llc/adk-2-toolkit --plugin adk-observability-safetyProvides CDSS development patterns for drug interaction checking, dose validation, clinical scoring (NEWS2, qSOFA), and alert classification integrated into EMR workflows.