Analyzes Outlook PST/OST files for email forensics, extracting content, headers, attachments, deleted emails, and metadata using libpff, pffexport, and pypff.
How this skill is triggered — by the user, by Claude, or both
Slash command
/cybersecurity-skills-zh:analyzing-outlook-pst-for-email-forensicsThe summary Claude sees in its skill listing — used to decide when to auto-load this skill
Microsoft Outlook PST(个人存储表,Personal Storage Table)和 OST(离线存储表,Offline Storage Table)文件是数字取证调查中的关键证据来源。PST 文件以基于 MAPI(消息应用程序编程接口,Messaging Application Programming Interface)属性系统的专有二进制格式存储电子邮件、日历事件、联系人、任务和备注。对这些文件进行取证分析,可以恢复已删除的邮件(来自"可恢复邮件"文件夹)、提取邮件头以追踪邮件路由、分析附件中的恶意软件或外泄数据,以及重建通信模式。现代 PST 文件使用 Unicode 格式,页面大小 4KB,最大可达 50GB;而旧版 ANSI 格式限制为 2GB。
Microsoft Outlook PST(个人存储表,Personal Storage Table)和 OST(离线存储表,Offline Storage Table)文件是数字取证调查中的关键证据来源。PST 文件以基于 MAPI(消息应用程序编程接口,Messaging Application Programming Interface)属性系统的专有二进制格式存储电子邮件、日历事件、联系人、任务和备注。对这些文件进行取证分析,可以恢复已删除的邮件(来自"可恢复邮件"文件夹)、提取邮件头以追踪邮件路由、分析附件中的恶意软件或外泄数据,以及重建通信模式。现代 PST 文件使用 Unicode 格式,页面大小 4KB,最大可达 50GB;而旧版 ANSI 格式限制为 2GB。
| 来源 | 路径 |
|---|---|
| Outlook 2016+ 默认位置 | %USERPROFILE%\Documents\Outlook Files*.pst |
| Outlook 旧版 | %LOCALAPPDATA%\Microsoft\Outlook*.pst |
| OST 缓存 | %LOCALAPPDATA%\Microsoft\Outlook*.ost |
| 归档文件 | %USERPROFILE%\Documents\Outlook Files\archive.pst |
# 从 PST 文件导出所有项目
pffexport -m all evidence.pst -t exported_pst
# 仅导出电子邮件
pffexport -m items evidence.pst -t exported_emails
# 导出已恢复/已删除项目
pffexport -m recovered evidence.pst -t recovered_items
# 获取 PST 文件信息
pffinfo evidence.pst
import pypff
import os
import json
import hashlib
import email
import sys
from datetime import datetime
from collections import defaultdict
class PSTForensicAnalyzer:
"""Outlook PST/OST 文件的取证分析器。"""
def __init__(self, pst_path: str, output_dir: str):
self.pst_path = pst_path
self.output_dir = output_dir
os.makedirs(output_dir, exist_ok=True)
self.pst = pypff.file()
self.pst.open(pst_path)
self.messages = []
self.attachments = []
self.stats = defaultdict(int)
def process_folder(self, folder, folder_path: str = ""):
"""递归处理 PST 文件夹并提取邮件。"""
folder_name = folder.name or "Root"
current_path = f"{folder_path}/{folder_name}" if folder_path else folder_name
for i in range(folder.number_of_sub_messages):
try:
message = folder.get_sub_message(i)
msg_data = self.extract_message(message, current_path)
if msg_data:
self.messages.append(msg_data)
self.stats["total_messages"] += 1
except Exception as e:
self.stats["parse_errors"] += 1
for i in range(folder.number_of_sub_folders):
try:
subfolder = folder.get_sub_folder(i)
self.process_folder(subfolder, current_path)
except Exception:
continue
def extract_message(self, message, folder_path: str) -> dict:
"""从单封邮件中提取取证元数据。"""
msg_data = {
"folder": folder_path,
"subject": message.subject or "",
"sender": message.sender_name or "",
"sender_email": "",
"creation_time": str(message.creation_time) if message.creation_time else None,
"delivery_time": str(message.delivery_time) if message.delivery_time else None,
"modification_time": str(message.modification_time) if message.modification_time else None,
"has_attachments": message.number_of_attachments > 0,
"attachment_count": message.number_of_attachments,
"body_size": len(message.plain_text_body or b""),
"html_size": len(message.html_body or b""),
}
# 提取传输头用于路由分析
headers = message.transport_headers
if headers:
msg_data["headers_present"] = True
msg_data["headers_size"] = len(headers)
# 解析关键头字段
parsed = email.message_from_string(headers)
msg_data["from_header"] = parsed.get("From", "")
msg_data["to_header"] = parsed.get("To", "")
msg_data["date_header"] = parsed.get("Date", "")
msg_data["message_id"] = parsed.get("Message-ID", "")
msg_data["x_originating_ip"] = parsed.get("X-Originating-IP", "")
msg_data["received_headers"] = parsed.get_all("Received", [])
# 处理附件
for j in range(message.number_of_attachments):
try:
attachment = message.get_attachment(j)
att_data = {
"message_subject": msg_data["subject"],
"name": attachment.name or f"attachment_{j}",
"size": attachment.size,
"content_type": "",
}
self.attachments.append(att_data)
self.stats["total_attachments"] += 1
except Exception:
continue
return msg_data
def save_attachments(self, max_size_mb: int = 100):
"""将附件导出到磁盘进行分析。"""
att_dir = os.path.join(self.output_dir, "attachments")
os.makedirs(att_dir, exist_ok=True)
root = self.pst.get_root_folder()
self._save_attachments_recursive(root, att_dir, max_size_mb)
def _save_attachments_recursive(self, folder, att_dir, max_size_mb):
for i in range(folder.number_of_sub_messages):
try:
message = folder.get_sub_message(i)
for j in range(message.number_of_attachments):
att = message.get_attachment(j)
if att.size and att.size < max_size_mb * 1024 * 1024:
name = att.name or f"unknown_{i}_{j}"
safe_name = "".join(c if c.isalnum() or c in ".-_" else "_" for c in name)
path = os.path.join(att_dir, safe_name)
try:
data = att.read_buffer(att.size)
with open(path, "wb") as f:
f.write(data)
except Exception:
continue
except Exception:
continue
for i in range(folder.number_of_sub_folders):
try:
self._save_attachments_recursive(folder.get_sub_folder(i), att_dir, max_size_mb)
except Exception:
continue
def generate_report(self) -> str:
"""生成综合 PST 取证分析报告。"""
root = self.pst.get_root_folder()
self.process_folder(root)
report = {
"analysis_timestamp": datetime.now().isoformat(),
"pst_file": self.pst_path,
"pst_size_bytes": os.path.getsize(self.pst_path),
"statistics": dict(self.stats),
"messages": self.messages[:500],
"attachments": self.attachments[:200],
}
report_path = os.path.join(self.output_dir, "pst_forensic_report.json")
with open(report_path, "w") as f:
json.dump(report, f, indent=2, default=str)
print(f"[*] 邮件总数: {self.stats['total_messages']}")
print(f"[*] 附件总数: {self.stats['total_attachments']}")
print(f"[*] 解析错误: {self.stats['parse_errors']}")
return report_path
def close(self):
self.pst.close()
def main():
if len(sys.argv) < 3:
print("用法: python process.py <pst_file> <output_dir>")
sys.exit(1)
analyzer = PSTForensicAnalyzer(sys.argv[1], sys.argv[2])
analyzer.generate_report()
analyzer.close()
if __name__ == "__main__":
main()
取证调查的关键头字段:
| 头字段 | 取证价值 |
|---|---|
| Received | 邮件路由链(从下到上阅读) |
| X-Originating-IP | 发件人实际 IP 地址 |
| Message-ID | 用于关联的唯一标识符 |
| Date | 发送时间戳 |
| Return-Path | 退信地址(可能与 From 不同) |
| DKIM-Signature | 域名认证签名 |
| Authentication-Results | SPF、DKIM、DMARC 验证结果 |
| X-Mailer | 使用的邮件客户端 |
npx claudepluginhub killvxk/cybersecurity-skills-zhAnalyzes Microsoft Outlook PST and OST files for email forensic evidence including message content, headers, attachments, deleted items, and metadata using libpff and pypff for legal investigations and incident response.
Analyzes Microsoft Outlook PST/OST files for forensic evidence including message content, headers, attachments, deleted items, and metadata using libpff and Python. Useful for incident response and legal investigations.
Analyzes Outlook PST/OST files for email forensics, extracting messages, headers, attachments, deleted items, metadata using libpff, pypff, pst-utils. For incident response and investigations.