Appearance
Token优化策略
Token是AI API计费的核心单位,优化Token使用可以显著降低成本
概述
Token是大语言模型处理文本的基本单位,也是API计费的核心依据。本教程将教你如何优化Token使用,在保证效果的前提下降低成本。
什么是Token?
Token理解:
英文:1 Token ≈ 4个字符 ≈ 0.75个单词
中文:1 Token ≈ 1.5-2个汉字
示例:
"Hello World" → 2个Token
"你好世界" → 约3-4个Token
"PHP是一种服务器端脚本语言" → 约15个Token
计算工具:
- OpenAI Tokenizer: https://platform.openai.com/tokenizer
- Tiktoken: OpenAI官方分词库Token消耗场景
Token消耗分析:
输入Token(Prompt Tokens)
├── 系统提示词
├── 用户消息
├── 历史对话
├── 文档内容
└── 示例数据
输出Token(Completion Tokens)
├── AI回复内容
├── 推理过程
└── 格式化输出
总Token = 输入Token + 输出TokenToken计算
PHP实现Token估算
php
<?php
class TokenEstimator
{
private const CHARS_PER_TOKEN_EN = 4;
private const CHARS_PER_TOKEN_CN = 2;
public function estimateTokens(string $text): int
{
$chineseChars = preg_match_all('/[\x{4e00}-\x{9fff}]/u', $text);
$englishChars = strlen($text) - $chineseChars * 3;
$chineseTokens = (int)($chineseChars / self::CHARS_PER_TOKEN_CN);
$englishTokens = (int)($englishChars / self::CHARS_PER_TOKEN_EN);
return $chineseTokens + $englishTokens + 1;
}
public function estimateMessagesTokens(array $messages): int
{
$total = 0;
foreach ($messages as $message) {
$total += 4;
$total += $this->estimateTokens($message['content'] ?? '');
$total += $this->estimateTokens($message['role'] ?? '');
}
return $total + 2;
}
public function estimateCost(int $inputTokens, int $outputTokens, string $model): float
{
$pricing = [
'gpt-4o-mini' => ['input' => 0.15, 'output' => 0.6],
'gpt-4o' => ['input' => 2.5, 'output' => 10],
'deepseek-chat' => ['input' => 0.001, 'output' => 0.002],
'claude-3-5-sonnet' => ['input' => 3, 'output' => 15],
];
$price = $pricing[$model] ?? ['input' => 1, 'output' => 2];
$inputCost = ($inputTokens / 1000000) * $price['input'];
$outputCost = ($outputTokens / 1000000) * $price['output'];
return $inputCost + $outputCost;
}
}
// 使用示例
$estimator = new TokenEstimator();
$text = "PHP是一种广泛使用的开源服务器端脚本语言,特别适合Web开发。";
$tokens = $estimator->estimateTokens($text);
echo "预估Token数: {$tokens}\n";
$cost = $estimator->estimateCost(1000, 500, 'deepseek-chat');
echo "预估成本: \${$cost}\n";Prompt优化
精简系统提示
php
<?php
class PromptOptimizer
{
public function optimizeSystemPrompt(string $prompt): string
{
$prompt = $this->removeRedundancy($prompt);
$prompt = $this->compressWhitespace($prompt);
$prompt = $this->removeUnnecessaryWords($prompt);
return $prompt;
}
private function removeRedundancy(string $prompt): string
{
$redundancies = [
'/请务必/g' => '',
'/一定要/g' => '',
'/千万要/g' => '',
'/请注意/g' => '',
'/请记住/g' => '',
'/非常重要/g' => '',
];
foreach ($redundancies as $pattern => $replacement) {
$prompt = preg_replace($pattern, $replacement, $prompt);
}
return $prompt;
}
private function compressWhitespace(string $prompt): string
{
return preg_replace('/\s+/', ' ', trim($prompt));
}
private function removeUnnecessaryWords(string $prompt): string
{
$unnecessary = [
'的话' => '',
'这个' => '',
'那个' => '',
'其实' => '',
'实际上' => '',
];
return str_replace(array_keys($unnecessary), array_values($unnecessary), $prompt);
}
}
// 对比示例
$verbosePrompt = "请务必记住,你是一个专业的PHP开发工程师,一定要用简洁的语言回答问题,千万要注意代码的正确性,这非常重要。";
$optimizedPrompt = "你是PHP开发工程师,简洁回答问题,确保代码正确。";
$estimator = new TokenEstimator();
echo "优化前: " . $estimator->estimateTokens($verbosePrompt) . " tokens\n";
echo "优化后: " . $estimator->estimateTokens($optimizedPrompt) . " tokens\n";结构化提示
php
<?php
class StructuredPromptBuilder
{
public function buildPrompt(string $task, array $context = [], array $constraints = []): string
{
$parts = [];
if (!empty($context)) {
$parts[] = "背景:" . implode(';', $context);
}
$parts[] = "任务:{$task}";
if (!empty($constraints)) {
$parts[] = "要求:" . implode('、', $constraints);
}
return implode("\n", $parts);
}
}
// 使用示例
$builder = new StructuredPromptBuilder();
$prompt = $builder->buildPrompt(
'解释PHP中的单例模式',
['面向PHP初学者'],
['代码示例', '不超过200字']
);
echo $prompt;
// 输出:
// 背景:面向PHP初学者
// 任务:解释PHP中的单例模式
// 要求:代码示例、不超过200字对话历史优化
滑动窗口策略
php
<?php
class SlidingWindowHistory
{
private int $maxMessages;
private int $maxTokens;
public function __construct(int $maxMessages = 10, int $maxTokens = 4000)
{
$this->maxMessages = $maxMessages;
$this->maxTokens = $maxTokens;
}
public function trimHistory(array $messages, TokenEstimator $estimator): array
{
$systemMessage = null;
$conversationMessages = [];
foreach ($messages as $message) {
if ($message['role'] === 'system') {
$systemMessage = $message;
} else {
$conversationMessages[] = $message;
}
}
while (count($conversationMessages) > $this->maxMessages) {
array_shift($conversationMessages);
}
while ($this->estimateTotalTokens($conversationMessages, $estimator) > $this->maxTokens) {
array_shift($conversationMessages);
}
$result = [];
if ($systemMessage) {
$result[] = $systemMessage;
}
return array_merge($result, $conversationMessages);
}
private function estimateTotalTokens(array $messages, TokenEstimator $estimator): int
{
return $estimator->estimateMessagesTokens($messages);
}
}摘要压缩策略
php
<?php
class SummaryCompressor
{
private $client;
public function compressHistory(array $messages, int $targetTokens = 500): string
{
$historyText = $this->messagesToText($messages);
$summaryPrompt = "请将以下对话历史压缩为简洁的摘要,保留关键信息:\n\n{$historyText}";
$result = $this->client->chat([
['role' => 'user', 'content' => $summaryPrompt]
], ['max_tokens' => $targetTokens]);
return $result['choices'][0]['message']['content'];
}
private function messagesToText(array $messages): string
{
$text = '';
foreach ($messages as $message) {
$role = $message['role'] === 'user' ? '用户' : 'AI';
$text .= "{$role}:{$message['content']}\n";
}
return $text;
}
}输出控制
限制输出长度
php
<?php
class OutputController
{
private $client;
public function getShortAnswer(string $question, int $maxWords = 50): string
{
$result = $this->client->chat([
[
'role' => 'system',
'content' => "请用不超过{$maxWords}字回答问题。"
],
['role' => 'user', 'content' => $question]
], ['max_tokens' => $maxWords * 2]);
return $result['choices'][0]['message']['content'];
}
public function getStructuredOutput(string $prompt, string $format = 'json'): string
{
$formatInstructions = [
'json' => '请以JSON格式输出,不要包含其他内容。',
'list' => '请以列表格式输出,每行一项。',
'table' => '请以表格格式输出。',
];
$result = $this->client->chat([
[
'role' => 'system',
'content' => $formatInstructions[$format] ?? ''
],
['role' => 'user', 'content' => $prompt]
]);
return $result['choices'][0]['message']['content'];
}
}批量处理
php
<?php
class BatchProcessor
{
private $client;
public function processBatch(array $items, string $taskTemplate): array
{
$batchPrompt = "请批量处理以下内容:\n\n";
foreach ($items as $index => $item) {
$batchPrompt .= ($index + 1) . ". {$item}\n";
}
$batchPrompt .= "\n请逐项处理,格式:序号. 结果";
$result = $this->client->chat([
['role' => 'user', 'content' => $batchPrompt]
]);
return $this->parseBatchResult($result['choices'][0]['message']['content']);
}
private function parseBatchResult(string $result): array
{
$lines = explode("\n", trim($result));
$results = [];
foreach ($lines as $line) {
if (preg_match('/^(\d+)\.\s*(.+)$/', trim($line), $matches)) {
$results[(int)$matches[1] - 1] = $matches[2];
}
}
return $results;
}
}
// 使用示例
$processor = new BatchProcessor($client);
$questions = ['什么是PHP?', '什么是Python?', '什么是Java?'];
$results = $processor->processBatch($questions, '简要解释');缓存策略
响应缓存
php
<?php
class ResponseCache
{
private string $cacheDir;
private int $ttl;
private TokenEstimator $estimator;
public function __construct(string $cacheDir = '/tmp/ai_cache', int $ttl = 3600)
{
$this->cacheDir = $cacheDir;
$this->ttl = $ttl;
$this->estimator = new TokenEstimator();
if (!is_dir($cacheDir)) {
mkdir($cacheDir, 0755, true);
}
}
public function getOrSet(array $messages, callable $apiCall): array
{
$cacheKey = $this->generateCacheKey($messages);
$cacheFile = $this->cacheDir . '/' . $cacheKey . '.json';
if (file_exists($cacheFile)) {
$cache = json_decode(file_get_contents($cacheFile), true);
if (time() - $cache['timestamp'] < $this->ttl) {
$cache['data']['from_cache'] = true;
$cache['data']['tokens_saved'] = $cache['tokens'];
return $cache['data'];
}
}
$result = $apiCall();
$tokens = $this->estimator->estimateMessagesTokens($messages) +
$this->estimator->estimateTokens($result['choices'][0]['message']['content'] ?? '');
file_put_contents($cacheFile, json_encode([
'timestamp' => time(),
'tokens' => $tokens,
'data' => $result,
]));
$result['from_cache'] = false;
return $result;
}
private function generateCacheKey(array $messages): string
{
return md5(serialize($messages));
}
public function getStats(): array
{
$files = glob($this->cacheDir . '/*.json');
$totalTokens = 0;
$hitCount = 0;
foreach ($files as $file) {
$cache = json_decode(file_get_contents($file), true);
$totalTokens += $cache['tokens'] ?? 0;
if (time() - $cache['timestamp'] < $this->ttl) {
$hitCount++;
}
}
return [
'cache_files' => count($files),
'valid_cache' => $hitCount,
'tokens_saved' => $totalTokens,
];
}
}
// 使用示例
$cache = new ResponseCache();
$result = $cache->getOrSet(
[['role' => 'user', 'content' => '什么是PHP?']],
function() use ($client) {
return $client->chat([['role' => 'user', 'content' => '什么是PHP?']]);
}
);
if ($result['from_cache']) {
echo "从缓存获取,节省 {$result['tokens_saved']} tokens\n";
}模型选择策略
按任务选择模型
php
<?php
class ModelSelector
{
private array $modelCapabilities = [
'simple_qa' => [
'recommended' => ['gpt-4o-mini', 'deepseek-chat', 'qwen-turbo'],
'max_tokens' => 100,
],
'code_generation' => [
'recommended' => ['deepseek-chat', 'codestral-latest', 'gpt-4o'],
'max_tokens' => 1000,
],
'complex_reasoning' => [
'recommended' => ['gpt-4o', 'claude-3-5-sonnet', 'deepseek-reasoner'],
'max_tokens' => 2000,
],
'long_document' => [
'recommended' => ['claude-3-5-sonnet', 'qwen-long', 'moonshot-v1-128k'],
'max_tokens' => 4000,
],
];
private array $modelPricing = [
'gpt-4o-mini' => ['input' => 0.15, 'output' => 0.6],
'gpt-4o' => ['input' => 2.5, 'output' => 10],
'deepseek-chat' => ['input' => 0.001, 'output' => 0.002],
'deepseek-reasoner' => ['input' => 0.004, 'output' => 0.016],
'claude-3-5-sonnet' => ['input' => 3, 'output' => 15],
];
public function selectModel(string $taskType, array $constraints = []): string
{
$capabilities = $this->modelCapabilities[$taskType] ?? $this->modelCapabilities['simple_qa'];
$recommended = $capabilities['recommended'];
if (isset($constraints['cost_priority']) && $constraints['cost_priority']) {
usort($recommended, function($a, $b) {
$priceA = $this->modelPricing[$a]['input'] ?? 999;
$priceB = $this->modelPricing[$b]['input'] ?? 999;
return $priceA <=> $priceB;
});
}
return $recommended[0];
}
public function getMaxTokens(string $taskType): int
{
return $this->modelCapabilities[$taskType]['max_tokens'] ?? 500;
}
}
// 使用示例
$selector = new ModelSelector();
$model = $selector->selectModel('simple_qa', ['cost_priority' => true]);
echo "推荐模型: {$model}\n";常见问题答疑(FAQ)
Q1:如何准确计算Token?
回答:
php
<?php
// 方法1:使用官方库
// composer require tiktoken/tiktoken
// 方法2:估算
$estimator = new TokenEstimator();
$tokens = $estimator->estimateTokens($text);
// 方法3:API返回的usage字段
$result = $client->chat($messages);
$actualTokens = $result['usage']['total_tokens'];Q2:如何减少对话历史Token?
回答:
| 策略 | 说明 | 效果 |
|---|---|---|
| 滑动窗口 | 只保留最近N条消息 | 减少50-70% |
| 摘要压缩 | 将历史压缩为摘要 | 减少80-90% |
| 关键信息提取 | 只保留关键信息 | 减少60-80% |
Q3:缓存策略如何选择?
回答:
| 场景 | 缓存策略 | TTL |
|---|---|---|
| 静态知识问答 | 完全缓存 | 24小时+ |
| 新闻资讯 | 短期缓存 | 1小时 |
| 个性化对话 | 不缓存 | - |
| 模板生成 | 按模板缓存 | 永久 |
Q4:如何平衡质量和成本?
回答:
php
<?php
// 1. 根据任务复杂度选择模型
$simpleTask = 'gpt-4o-mini';
$complexTask = 'gpt-4o';
// 2. 使用缓存减少重复请求
$cache = new ResponseCache();
// 3. 限制输出长度
$result = $client->chat($messages, ['max_tokens' => 500]);
// 4. 批量处理相似请求
$batch = new BatchProcessor($client);Q5:如何监控Token使用?
回答:
php
<?php
class TokenMonitor
{
private array $usage = [];
public function record(string $model, int $inputTokens, int $outputTokens): void
{
if (!isset($this->usage[$model])) {
$this->usage[$model] = ['input' => 0, 'output' => 0, 'requests' => 0];
}
$this->usage[$model]['input'] += $inputTokens;
$this->usage[$model]['output'] += $outputTokens;
$this->usage[$model]['requests']++;
}
public function getReport(): array
{
return $this->usage;
}
}Q6:不同模型的Token计算有差异吗?
回答:
| 模型 | 分词器 | 中文Token比例 |
|---|---|---|
| GPT系列 | tiktoken | 约1.5字/Token |
| Claude | Claude分词器 | 约1.3字/Token |
| DeepSeek | 自定义 | 约1.5字/Token |
| 通义千问 | 自定义 | 约1.5字/Token |
实战练习
基础练习
练习1:实现一个Token计算器。
参考代码:
php
<?php
class SimpleTokenCounter
{
public function count(string $text): int
{
$chinese = preg_match_all('/[\x{4e00}-\x{9fff}]/u', $text);
$english = strlen($text) - $chinese * 3;
return (int)($chinese / 2 + $english / 4);
}
}进阶练习
练习2:实现一个带缓存的智能客户端。
参考代码:
php
<?php
class CachedSmartClient
{
private $client;
private ResponseCache $cache;
public function chat(array $messages): array
{
return $this->cache->getOrSet($messages, function() use ($messages) {
return $this->client->chat($messages);
});
}
}挑战练习
练习3:实现一个成本优化的多模型路由器。
参考代码:
php
<?php
class CostOptimizedRouter
{
private array $clients;
private ModelSelector $selector;
private TokenEstimator $estimator;
public function chat(array $messages, string $taskType): array
{
$model = $this->selector->selectModel($taskType, ['cost_priority' => true]);
$maxTokens = $this->selector->getMaxTokens($taskType);
return $this->clients[$model]->chat($messages, ['max_tokens' => $maxTokens]);
}
}知识点总结
核心要点
- Token计算:理解Token与字符的换算关系
- Prompt优化:精简系统提示,结构化输出
- 历史优化:滑动窗口+摘要压缩
- 输出控制:限制长度,批量处理
- 缓存策略:响应缓存减少重复请求
- 模型选择:按任务选择合适的模型
易错点回顾
| 易错点 | 正确做法 |
|---|---|
| 不计算Token | 使用估算器或API返回值 |
| 不限制输出 | 设置max_tokens |
| 不使用缓存 | 对重复请求使用缓存 |
| 不选择模型 | 根据任务选择合适模型 |
拓展参考资料
官方文档
进阶学习路径
💡 记住:Token优化是降低AI应用成本的核心手段,精简Prompt、使用缓存、选择合适模型是三大关键策略。
