Tests APIs for excessive data exposure by intercepting responses, extracting fields, and scanning for leaked PII, internal IDs, debug info via Python scripts and regex. Aligns with OWASP API3:2023 for REST/GraphQL endpoints.
How this skill is triggered — by the user, by Claude, or both
Slash command
/cybersecurity-skills-zh:exploiting-excessive-data-exposure-in-apiThe summary Claude sees in its skill listing — used to decide when to auto-load this skill
- 测试前端显示数据子集但 API 响应包含额外字段的 API
不适用于:未获得书面授权的情况。数据暴露测试涉及捕获和分析潜在的敏感个人数据。
requests 和 json 库将文档化的 API 响应与实际响应进行比较:
import requests
import json
BASE_URL = "https://target-api.example.com/api/v1"
headers = {"Authorization": "Bearer <user_token>", "Content-Type": "application/json"}
# 获取资源并分析所有返回字段
endpoints_to_test = [
("GET", "/users/me", None),
("GET", "/users/me/orders", None),
("GET", "/products", None),
("GET", "/users/me/settings", None),
("GET", "/transactions", None),
]
for method, path, body in endpoints_to_test:
resp = requests.request(method, f"{BASE_URL}{path}", headers=headers, json=body)
if resp.status_code == 200:
data = resp.json()
# 递归提取所有字段名
def extract_fields(obj, prefix=""):
fields = []
if isinstance(obj, dict):
for k, v in obj.items():
full_key = f"{prefix}.{k}" if prefix else k
fields.append(full_key)
fields.extend(extract_fields(v, full_key))
elif isinstance(obj, list) and obj:
fields.extend(extract_fields(obj[0], f"{prefix}[]"))
return fields
all_fields = extract_fields(data)
print(f"\n{method} {path} - 返回了 {len(all_fields)} 个字段:")
for f in sorted(all_fields):
print(f" {f}")
扫描 API 响应中的敏感数据模式:
import re
SENSITIVE_PATTERNS = {
"email": r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}',
"phone": r'(\+?1?\s?\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4})',
"ssn": r'\b\d{3}-\d{2}-\d{4}\b',
"credit_card": r'\b(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13})\b',
"password_hash": r'\$2[aby]?\$\d{2}\$[./A-Za-z0-9]{53}',
"api_key": r'(?:api[_-]?key|apikey)["\s:=]+["\']?([a-zA-Z0-9_\-]{20,})',
"internal_ip": r'\b(?:10\.\d{1,3}|172\.(?:1[6-9]|2\d|3[01])|192\.168)\.\d{1,3}\.\d{1,3}\b',
"aws_key": r'AKIA[0-9A-Z]{16}',
"jwt_token": r'eyJ[A-Za-z0-9_-]+\.eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+',
"uuid": r'[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}',
}
SENSITIVE_FIELD_NAMES = [
"password", "password_hash", "secret", "token", "ssn", "social_security",
"credit_card", "card_number", "cvv", "pin", "private_key", "api_key",
"internal_id", "debug", "trace", "stack_trace", "created_by_ip",
"last_login_ip", "salt", "session_id", "refresh_token", "mfa_secret",
"date_of_birth", "bank_account", "routing_number", "tax_id"
]
def scan_response(endpoint, response_text):
findings = []
# 检查值中的敏感数据模式
for pattern_name, pattern in SENSITIVE_PATTERNS.items():
matches = re.findall(pattern, response_text)
if matches:
findings.append({
"endpoint": endpoint,
"type": "sensitive_value",
"pattern": pattern_name,
"count": len(matches),
"sample": matches[0][:20] + "..." if len(matches[0]) > 20 else matches[0]
})
# 检查敏感字段名
response_lower = response_text.lower()
for field in SENSITIVE_FIELD_NAMES:
if f'"{field}"' in response_lower or f"'{field}'" in response_lower:
findings.append({
"endpoint": endpoint,
"type": "sensitive_field",
"field_name": field
})
return findings
# 扫描所有端点响应
for method, path, body in endpoints_to_test:
resp = requests.request(method, f"{BASE_URL}{path}", headers=headers, json=body)
if resp.status_code == 200:
findings = scan_response(f"{method} {path}", resp.text)
for f in findings:
print(f"[发现] {f['endpoint']}: {f['type']} - {f.get('pattern', f.get('field_name'))}")
# UI 显示的字段(从前端应用程序观察到的)
ui_displayed_fields = {
"/users/me": {"name", "email", "avatar_url", "role"},
"/users/me/orders": {"order_id", "date", "status", "total"},
"/products": {"id", "name", "price", "image_url", "description"},
}
# API 实际返回的字段
for method, path, body in endpoints_to_test:
resp = requests.request(method, f"{BASE_URL}{path}", headers=headers, json=body)
if resp.status_code == 200:
data = resp.json()
if isinstance(data, list):
actual_fields = set(data[0].keys()) if data else set()
elif isinstance(data, dict):
# 处理分页响应
items_key = next((k for k in data if isinstance(data[k], list)), None)
if items_key and data[items_key]:
actual_fields = set(data[items_key][0].keys())
else:
actual_fields = set(data.keys())
else:
continue
expected = ui_displayed_fields.get(path, set())
excess = actual_fields - expected
if excess:
print(f"\n{method} {path} - 多余字段(UI 未显示):")
for field in sorted(excess):
print(f" - {field}")
# 许多 API 在订单、评论等响应中嵌入完整的用户对象
endpoints_with_user_objects = [
"/orders", # 每个订单可能包含完整的卖方/买方个人资料
"/comments", # 评论可能包含完整的作者个人资料
"/reviews", # 评价可能暴露评价者详情
"/transactions", # 交易可能包含对方信息
"/team/members", # 团队列表可能暴露过多成员数据
]
for path in endpoints_with_user_objects:
resp = requests.get(f"{BASE_URL}{path}", headers=headers)
if resp.status_code == 200:
text = resp.text
# 检查嵌套对象中的用户数据泄露
user_fields_found = []
for field in ["password_hash", "last_login_ip", "mfa_enabled", "phone_number",
"date_of_birth", "ssn", "internal_notes", "salary", "address"]:
if f'"{field}"' in text:
user_fields_found.append(field)
if user_fields_found:
print(f"[过度暴露] {path} 暴露了用户字段:{user_fields_found}")
# GraphQL 允许客户端请求任何可用字段
GRAPHQL_URL = f"{BASE_URL}/graphql"
# 内省查询以发现 User 类型上的所有字段
introspection = {
"query": """
{
__type(name: "User") {
fields {
name
type {
name
kind
}
}
}
}
"""
}
resp = requests.post(GRAPHQL_URL, headers=headers, json=introspection)
if resp.status_code == 200:
fields = resp.json().get("data", {}).get("__type", {}).get("fields", [])
print("通过 GraphQL 可用的 User 字段:")
for f in fields:
sensitivity = "敏感" if f["name"] in SENSITIVE_FIELD_NAMES else "普通"
print(f" {f['name']} ({f['type']['name']}) [{sensitivity}]")
# 尝试查询敏感字段
sensitive_query = {
"query": """
query {
users {
id
email
passwordHash
socialSecurityNumber
internalNotes
lastLoginIp
mfaSecret
apiKey
}
}
"""
}
resp = requests.post(GRAPHQL_URL, headers=headers, json=sensitive_query)
if resp.status_code == 200 and "errors" not in resp.json():
print("[严重] GraphQL 无限制地暴露了敏感用户字段")
# 测试响应中的调试信息
debug_headers_to_check = [
"X-Debug-Token", "X-Debug-Info", "Server", "X-Powered-By",
"X-Request-Id", "X-Correlation-Id", "X-Backend-Server",
"X-Runtime", "X-Version", "X-Build-Version"
]
resp = requests.get(f"{BASE_URL}/users/me", headers=headers)
for h in debug_headers_to_check:
if h.lower() in {k.lower(): v for k, v in resp.headers.items()}:
print(f"[信息泄露] 响应头 {h}: {resp.headers.get(h)}")
# 测试错误响应中是否包含堆栈跟踪
error_payloads = [
("GET", "/users/invalid-id-format", None),
("POST", "/orders", {"invalid": "payload"}),
("GET", "/users/-1", None),
("GET", "/users/0", None),
]
for method, path, body in error_payloads:
resp = requests.request(method, f"{BASE_URL}{path}", headers=headers, json=body)
if resp.status_code >= 400:
text = resp.text.lower()
if any(kw in text for kw in ["stack trace", "traceback", "at com.", "at org.",
"file \"", "line ", "exception", "sql", "query"]):
print(f"[调试泄露] {method} {path} -> {resp.status_code}: 包含堆栈跟踪或查询信息")
| 术语 | 定义 |
|---|---|
| 过度数据暴露(Excessive Data Exposure) | API 返回的数据字段超出客户端需要的量,依赖前端过滤向用户隐藏敏感信息 |
| 过度获取(Over-Fetching) | 请求或接收超出特定操作所需的数据量,在返回固定响应 Schema 的 REST API 中很常见 |
| 响应过滤(Response Filtering) | 客户端过滤 API 响应数据以只显示相关字段,由于完整响应可被拦截,这不提供任何安全保障 |
| 对象属性级授权(Object Property Level Authorization) | OWASP API3:2023 - 确保用户只能读取/写入他们被授权访问的对象属性 |
| PII 泄露(PII Leakage) | API 响应中意外暴露个人身份信息,包括姓名、邮件地址、地址、社会安全号码或财务数据 |
| Schema 验证(Schema Validation) | 强制 API 响应符合定义的 Schema,在传输前去除未授权字段 |
场景背景:某移动银行应用程序的 API 向移动客户端返回完整的账户对象,而应用只显示账户昵称和余额。该 API 同时被 iOS 和 Android 应用以及 Web 门户访问。
方法:
GET /api/v1/accounts 响应:UI 显示 4 个字段,但 API 返回 23 个字段routing_number、account_holder_ssn_last4、internal_risk_score、kyc_verification_status 和 linked_external_accounts - UI 均未显示GET /api/v1/transactions 响应:API 返回客户端不需要的 merchant_id、terminal_id、authorization_code、processor_response 字段GET /api/v1/users/me:API 返回 last_login_ip、mfa_backup_codes_remaining、account_officer_name 和 credit_score_bandPOST /api/v1/transfers 发送无效载荷,错误消息中返回 SQL 表名常见陷阱:
## 发现:账户和交易 API 中存在过度数据暴露
**ID**: API-DATA-001
**严重性**: 高(CVSS 7.1)
**OWASP API**: API3:2023 - 对象属性级授权破坏
**受影响端点**:
- GET /api/v1/accounts
- GET /api/v1/transactions
- GET /api/v1/users/me
**描述**:
API 向客户端返回完整的数据库对象,包含移动应用 UI 未显示的
敏感字段。移动应用在客户端过滤这些字段,但通过拦截 API 响应
可以完全访问它们。这暴露了任何已认证用户的 SSN 片段、内部
风险评分和 KYC 验证数据。
**发现的多余字段**:
- /accounts: routing_number, account_holder_ssn_last4, internal_risk_score,
kyc_verification_status, linked_external_accounts(共 18 个多余字段)
- /transactions: merchant_id, terminal_id, authorization_code,
processor_response(共 12 个多余字段)
- /users/me: last_login_ip, mfa_backup_codes_remaining, credit_score_band
**影响**:
已认证用户可以提取敏感财务数据、内部风险评估以及应用程序
本不打算暴露的 PII。结合 BOLA 漏洞,可以为所有用户提取这些数据。
**修复建议**:
1. 在服务器端使用 DTO/视图模型实施响应过滤,只包含客户端所需的字段
2. 对每个端点每个角色使用 GraphQL 字段级授权或 REST 响应 Schema
3. 在序列化层从 API 响应中移除敏感字段
4. 在 API 网关中实施响应 Schema 验证,去除未记录的字段
5. 添加自动化测试验证响应 Schema 与文档一致
npx claudepluginhub killvxk/cybersecurity-skills-zhTests APIs for excessive data exposure by intercepting responses and analyzing for leaked PII, internal IDs, debug info, or sensitive data filtered by frontend. Maps to OWASP API3:2023.
Tests APIs for excessive data exposure by intercepting responses and identifying leaked PII, internal IDs, or debug info not shown in the UI. Maps to OWASP API3:2023.
Tests APIs for excessive data exposure by intercepting responses and identifying leaked PII, internal IDs, or debug info not shown in the UI. Maps to OWASP API3:2023.