From coreweave-pack
Provides Python helpers and Kubernetes patterns for CoreWeave GPU workloads, including affinity configs, resource limits, and inference clients for programmatic management.
How this skill is triggered — by the user, by Claude, or both
Slash command
/coreweave-pack:coreweave-sdk-patternsThis skill is limited to the following tools:
The summary Claude sees in its skill listing — used to decide when to auto-load this skill
CoreWeave is Kubernetes-native -- use kubectl, Kubernetes Python client, or Helm for programmatic management. These patterns cover GPU-aware deployment templates, inference client wrappers, and node affinity configurations.
CoreWeave is Kubernetes-native -- use kubectl, Kubernetes Python client, or Helm for programmatic management. These patterns cover GPU-aware deployment templates, inference client wrappers, and node affinity configurations.
# coreweave_helpers.py
from dataclasses import dataclass
@dataclass
class GPUConfig:
gpu_class: str # A100_PCIE_80GB, H100_SXM5, L40, etc.
gpu_count: int = 1
memory_gb: int = 32
cpu_cores: int = 4
GPU_CATALOG = {
"a100-80gb": GPUConfig("A100_PCIE_80GB", memory_gb=48, cpu_cores=8),
"h100-80gb": GPUConfig("H100_SXM5", memory_gb=64, cpu_cores=12),
"l40": GPUConfig("L40", memory_gb=24, cpu_cores=4),
"a100-8x": GPUConfig("A100_NVLINK_A100_SXM4_80GB", gpu_count=8, memory_gb=256, cpu_cores=64),
}
def gpu_affinity_block(gpu_class: str) -> dict:
return {
"nodeAffinity": {
"requiredDuringSchedulingIgnoredDuringExecution": {
"nodeSelectorTerms": [{
"matchExpressions": [{
"key": "gpu.nvidia.com/class",
"operator": "In",
"values": [gpu_class],
}]
}]
}
}
}
def gpu_resources(config: GPUConfig) -> dict:
return {
"limits": {
"nvidia.com/gpu": str(config.gpu_count),
"memory": f"{config.memory_gb}Gi",
"cpu": str(config.cpu_cores),
},
"requests": {
"nvidia.com/gpu": str(config.gpu_count),
"memory": f"{config.memory_gb // 2}Gi",
"cpu": str(config.cpu_cores // 2),
},
}
# inference_client.py
import requests
from typing import Optional
class CoreWeaveInferenceClient:
def __init__(self, endpoint: str, timeout: int = 30):
self.endpoint = endpoint.rstrip("/")
self.timeout = timeout
self.session = requests.Session()
def generate(self, prompt: str, max_tokens: int = 256, **kwargs) -> str:
resp = self.session.post(
f"{self.endpoint}/v1/completions",
json={"prompt": prompt, "max_tokens": max_tokens, **kwargs},
timeout=self.timeout,
)
resp.raise_for_status()
return resp.json()["choices"][0]["text"]
def chat(self, messages: list[dict], **kwargs) -> str:
resp = self.session.post(
f"{self.endpoint}/v1/chat/completions",
json={"messages": messages, **kwargs},
timeout=self.timeout,
)
resp.raise_for_status()
return resp.json()["choices"][0]["message"]["content"]
def health(self) -> bool:
try:
resp = self.session.get(f"{self.endpoint}/health", timeout=5)
return resp.status_code == 200
except Exception:
return False
import yaml
def generate_inference_deployment(
name: str,
image: str,
gpu_type: str = "a100-80gb",
replicas: int = 1,
port: int = 8000,
) -> str:
config = GPU_CATALOG[gpu_type]
return yaml.dump({
"apiVersion": "apps/v1",
"kind": "Deployment",
"metadata": {"name": name},
"spec": {
"replicas": replicas,
"selector": {"matchLabels": {"app": name}},
"template": {
"metadata": {"labels": {"app": name}},
"spec": {
"containers": [{
"name": name,
"image": image,
"ports": [{"containerPort": port}],
"resources": gpu_resources(config),
}],
"affinity": gpu_affinity_block(config.gpu_class),
},
},
},
})
| Error | Cause | Solution |
|---|---|---|
| GPU class not found | Typo in node label | Use exact values from gpu.nvidia.com/class |
| OOM on inference | Model too large for GPU | Use larger GPU or quantized model |
| Connection refused | Service not ready | Check pod readiness probe |
Apply patterns in coreweave-core-workflow-a for KServe inference deployments.
npx claudepluginhub jeremylongshore/claude-code-plugins-plus-skills --plugin coreweave-packDeploys GPU workloads on CoreWeave Kubernetes with kubectl: vLLM inference server or batch job. For first GPU deploys, inference testing, cluster access checks.
Creates, edits, and optimizes skills for Claude Code, including drafting, evaluating with test prompts, iterating on performance, and improving skill descriptions for better triggering accuracy.