Appearance
Google Gemini API完全指南
Gemini是Google最强大的多模态AI模型,支持文本、图像、音频、视频的理解与生成
概述
Google Gemini是Google DeepMind开发的新一代多模态AI模型,能够无缝处理文本、图像、音频和视频。本教程将带你全面掌握Gemini API的使用方法。
为什么选择Gemini?
| 优势 | 说明 |
|---|---|
| 原生多模态 | 原生支持文本、图像、音频、视频 |
| 超长上下文 | 最高支持2M Token上下文 |
| 免费额度 | 提供慷慨的免费API调用额度 |
| Google生态 | 与Google Cloud深度集成 |
Gemini模型概览
Gemini模型家族:
Gemini 2.0系列(最新)
├── gemini-2.0-flash # 快速版,多模态
├── gemini-2.0-flash-lite # 轻量版,成本最低
└── gemini-2.0-pro-exp # 实验版,能力最强
Gemini 1.5系列(稳定)
├── gemini-1.5-pro # 专业版,1M上下文
├── gemini-1.5-flash # 快速版,性价比高
└── gemini-1.5-flash-8b # 轻量版,速度快
Gemini 1.0系列(经典)
└── gemini-1.0-pro # 标准版,稳定可靠基本概念
API Key
php
<?php
// Google AI Studio获取API Key
// https://aistudio.google.com/app/apikey
// API Key格式
// AIzaSyxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
$apiKey = getenv('GOOGLE_API_KEY');安全设置
Gemini提供独特的内容安全过滤机制:
php
<?php
// 安全设置级别
$safetySettings = [
'HARM_CATEGORY_HARASSMENT' => 'BLOCK_MEDIUM_AND_ABOVE',
'HARM_CATEGORY_HATE_SPEECH' => 'BLOCK_MEDIUM_AND_ABOVE',
'HARM_CATEGORY_SEXUALLY_EXPLICIT' => 'BLOCK_MEDIUM_AND_ABOVE',
'HARM_CATEGORY_DANGEROUS_CONTENT' => 'BLOCK_MEDIUM_AND_ABOVE',
];环境准备
创建Gemini客户端
php
<?php
require 'vendor/autoload.php';
use GuzzleHttp\Client;
use GuzzleHttp\Exception\RequestException;
class GeminiClient
{
private $client;
private $apiKey;
private $baseUrl = 'https://generativelanguage.googleapis.com/v1beta';
public function __construct(string $apiKey)
{
$this->apiKey = $apiKey;
$this->client = new Client([
'base_uri' => $this->baseUrl,
'timeout' => 60,
]);
}
public function generateContent(
string $prompt,
string $model = 'gemini-1.5-flash',
array $safetySettings = []
): array {
$url = "/models/{$model}:generateContent?key={$this->apiKey}";
$body = [
'contents' => [
[
'parts' => [
['text' => $prompt],
],
],
],
];
if (!empty($safetySettings)) {
$body['safetySettings'] = $safetySettings;
}
try {
$response = $this->client->post($url, [
'json' => $body,
]);
return json_decode($response->getBody(), true);
} catch (RequestException $e) {
$errorBody = $e->getResponse() ? $e->getResponse()->getBody()->getContents() : 'Unknown error';
throw new Exception('Gemini API Error: ' . $errorBody);
}
}
}
// 使用示例
$apiKey = getenv('GOOGLE_API_KEY');
$client = new GeminiClient($apiKey);
$result = $client->generateContent('请用一句话介绍PHP语言');
echo $result['candidates'][0]['content']['parts'][0]['text'];运行结果:
PHP是一种广泛使用的开源服务器端脚本语言,特别适合Web开发,可以嵌入HTML中执行。多模态处理
图像理解
php
<?php
class GeminiClient
{
// ... 前面的代码 ...
public function analyzeImage(
string $imagePath,
string $question,
string $model = 'gemini-1.5-flash'
): string {
$imageData = base64_encode(file_get_contents($imagePath));
$mimeType = $this->getMimeType($imagePath);
$url = "/models/{$model}:generateContent?key={$this->apiKey}";
$body = [
'contents' => [
[
'parts' => [
[
'inline_data' => [
'mime_type' => $mimeType,
'data' => $imageData,
],
],
['text' => $question],
],
],
],
];
$response = $this->client->post($url, ['json' => $body]);
$result = json_decode($response->getBody(), true);
return $result['candidates'][0]['content']['parts'][0]['text'];
}
private function getMimeType(string $path): string
{
$types = [
'jpg' => 'image/jpeg',
'jpeg' => 'image/jpeg',
'png' => 'image/png',
'gif' => 'image/gif',
'webp' => 'image/webp',
];
$ext = strtolower(pathinfo($path, PATHINFO_EXTENSION));
return $types[$ext] ?? 'application/octet-stream';
}
}
// 使用示例
$description = $client->analyzeImage('photo.jpg', '请描述这张图片的内容');
echo $description;多图像处理
php
<?php
class GeminiClient
{
// ... 前面的代码 ...
public function compareImages(
array $imagePaths,
string $question,
string $model = 'gemini-1.5-flash'
): string {
$parts = [];
foreach ($imagePaths as $path) {
$imageData = base64_encode(file_get_contents($path));
$mimeType = $this->getMimeType($path);
$parts[] = [
'inline_data' => [
'mime_type' => $mimeType,
'data' => $imageData,
],
];
}
$parts[] = ['text' => $question];
$url = "/models/{$model}:generateContent?key={$this->apiKey}";
$body = [
'contents' => [
['parts' => $parts],
],
];
$response = $this->client->post($url, ['json' => $body]);
$result = json_decode($response->getBody(), true);
return $result['candidates'][0]['content']['parts'][0]['text'];
}
}
// 使用示例
$comparison = $client->compareImages(
['design1.png', 'design2.png'],
'请比较这两个设计方案的优缺点'
);
echo $comparison;高级参数配置
完整参数示例
php
<?php
class GeminiClient
{
// ... 前面的代码 ...
public function generateContentAdvanced(
string $prompt,
string $model = 'gemini-1.5-flash',
array $options = []
): array {
$url = "/models/{$model}:generateContent?key={$this->apiKey}";
$body = [
'contents' => [
[
'parts' => [['text' => $prompt]],
],
],
];
// 生成配置
if (isset($options['generationConfig'])) {
$body['generationConfig'] = $options['generationConfig'];
} else {
$body['generationConfig'] = [
'temperature' => $options['temperature'] ?? 0.7,
'topK' => $options['topK'] ?? 40,
'topP' => $options['topP'] ?? 0.95,
'maxOutputTokens' => $options['maxOutputTokens'] ?? 8192,
];
}
// 安全设置
if (!empty($options['safetySettings'])) {
$body['safetySettings'] = $options['safetySettings'];
}
// 系统指令
if (!empty($options['systemInstruction'])) {
$body['systemInstruction'] = [
'parts' => [['text' => $options['systemInstruction']]],
];
}
$response = $this->client->post($url, ['json' => $body]);
return json_decode($response->getBody(), true);
}
}
// 使用示例
$result = $client->generateContentAdvanced(
'写一首关于人工智能的诗',
'gemini-1.5-flash',
[
'temperature' => 0.9,
'maxOutputTokens' => 500,
'systemInstruction' => '你是一位诗人,擅长写现代诗',
]
);
echo $result['candidates'][0]['content']['parts'][0]['text'];参数详解
| 参数 | 范围 | 默认值 | 说明 |
|---|---|---|---|
| temperature | 0-2 | 0.7 | 控制随机性 |
| topK | 1-100 | 40 | 只考虑前K个候选词 |
| topP | 0-1 | 0.95 | 核采样参数 |
| maxOutputTokens | 1-模型上限 | 8192 | 最大输出Token |
| stopSequences | 字符串数组 | - | 停止序列 |
流式响应处理
php
<?php
class GeminiClient
{
// ... 前面的代码 ...
public function generateContentStream(
string $prompt,
string $model = 'gemini-1.5-flash'
): Generator {
$url = "/models/{$model}:streamGenerateContent?key={$this->apiKey}&alt=sse";
$body = [
'contents' => [
[
'parts' => [['text' => $prompt]],
],
],
];
$response = $this->client->post($url, [
'json' => $body,
'stream' => true,
]);
$body = $response->getBody();
$buffer = '';
while (!$body->eof()) {
$chunk = $body->read(1024);
$buffer .= $chunk;
while (($pos = strpos($buffer, "\n")) !== false) {
$line = substr($buffer, 0, $pos);
$buffer = substr($buffer, $pos + 1);
$line = trim($line);
if (empty($line) || $line === 'data: ') {
continue;
}
if (strpos($line, 'data: ') === 0) {
$json = substr($line, 6);
$data = json_decode($json, true);
if (isset($data['candidates'][0]['content']['parts'][0]['text'])) {
yield $data['candidates'][0]['content']['parts'][0]['text'];
}
}
}
}
}
}
// 使用示例
echo "Gemini回复:";
foreach ($client->generateContentStream('讲一个程序员笑话') as $chunk) {
echo $chunk;
flush();
}多轮对话实现
php
<?php
class GeminiChatSession
{
private GeminiClient $client;
private array $history = [];
private string $model;
private string $systemInstruction;
public function __construct(
GeminiClient $client,
string $model = 'gemini-1.5-flash',
string $systemInstruction = ''
) {
$this->client = $client;
$this->model = $model;
$this->systemInstruction = $systemInstruction;
}
public function chat(string $message): string
{
$url = "/models/{$this->model}:generateContent?key={$this->client->apiKey}";
$contents = [];
foreach ($this->history as $item) {
$contents[] = [
'role' => $item['role'],
'parts' => [['text' => $item['content']]],
];
}
$contents[] = [
'role' => 'user',
'parts' => [['text' => $message]],
];
$body = [
'contents' => $contents,
];
if (!empty($this->systemInstruction)) {
$body['systemInstruction'] = [
'parts' => [['text' => $this->systemInstruction]],
];
}
$response = $this->client->client->post($url, ['json' => $body]);
$result = json_decode($response->getBody(), true);
$assistantMessage = $result['candidates'][0]['content']['parts'][0]['text'];
$this->history[] = ['role' => 'user', 'content' => $message];
$this->history[] = ['role' => 'model', 'content' => $assistantMessage];
return $assistantMessage;
}
public function clearHistory(): void
{
$this->history = [];
}
}常见错误与踩坑点
错误1:忽略安全设置
php
<?php
// ❌ 错误做法:不处理安全过滤
$result = $client->generateContent($prompt);
// ✅ 正确做法:处理安全过滤结果
$result = $client->generateContent($prompt);
if (isset($result['promptFeedback']['blockReason'])) {
echo '内容被安全过滤阻止:' . $result['promptFeedback']['blockReason'];
} else {
echo $result['candidates'][0]['content']['parts'][0]['text'];
}错误2:图像格式不支持
php
<?php
// ❌ 错误做法:直接使用不支持的格式
$imageData = base64_encode(file_get_contents('image.bmp'));
// ✅ 正确做法:转换为支持的格式
function convertToSupportedFormat(string $imagePath): string
{
$supportedFormats = ['jpg', 'jpeg', 'png', 'gif', 'webp'];
$ext = strtolower(pathinfo($imagePath, PATHINFO_EXTENSION));
if (!in_array($ext, $supportedFormats)) {
$image = imagecreatefromstring(file_get_contents($imagePath));
ob_start();
imagepng($image);
$data = ob_get_clean();
imagedestroy($image);
return base64_encode($data);
}
return base64_encode(file_get_contents($imagePath));
}错误3:超出Token限制
php
<?php
// ❌ 错误做法:发送超长文本
$longText = file_get_contents('very_large_file.txt');
$result = $client->generateContent($longText);
// ✅ 正确做法:使用长上下文模型或分块处理
$result = $client->generateContentAdvanced(
$longText,
'gemini-1.5-pro', // 支持1M上下文
['maxOutputTokens' => 8192]
);常见应用场景
场景1:图像内容分析
php
<?php
class ImageAnalyzer
{
private GeminiClient $client;
public function describeImage(string $imagePath): string
{
return $this->client->analyzeImage(
$imagePath,
'请详细描述这张图片的内容,包括主体、背景、色彩、构图等方面。'
);
}
public function extractText(string $imagePath): string
{
return $this->client->analyzeImage(
$imagePath,
'请识别并提取图片中的所有文字内容。'
);
}
public function detectObjects(string $imagePath): array
{
$result = $this->client->analyzeImage(
$imagePath,
'请识别图片中的所有物体,以JSON数组格式返回:[{"object": "物体名", "position": "位置描述", "confidence": "置信度"}]'
);
return json_decode($result, true);
}
}场景2:文档智能处理
php
<?php
class DocumentProcessor
{
private GeminiClient $client;
public function summarizeDocument(string $document): string
{
$prompt = "请为以下文档生成一个简洁的摘要:\n\n{$document}";
$result = $this->client->generateContent($prompt);
return $result['candidates'][0]['content']['parts'][0]['text'];
}
public function extractKeyPoints(string $document): array
{
$prompt = "请从以下文档中提取关键要点,以JSON数组格式返回:\n\n{$document}";
$result = $this->client->generateContent($prompt);
return json_decode($result['candidates'][0]['content']['parts'][0]['text'], true);
}
public function translateDocument(string $document, string $targetLang): string
{
$prompt = "请将以下文档翻译成{$targetLang}:\n\n{$document}";
$result = $this->client->generateContent($prompt);
return $result['candidates'][0]['content']['parts'][0]['text'];
}
}场景3:代码助手
php
<?php
class CodeAssistant
{
private GeminiClient $client;
public function generateCode(string $description, string $language = 'PHP'): string
{
$prompt = "请根据以下描述生成{$language}代码:\n\n{$description}";
$result = $this->client->generateContent($prompt);
return $result['candidates'][0]['content']['parts'][0]['text'];
}
public function explainCode(string $code): string
{
$prompt = "请解释以下代码的功能和实现原理:\n\n```\n{$code}\n```";
$result = $this->client->generateContent($prompt);
return $result['candidates'][0]['content']['parts'][0]['text'];
}
public function debugCode(string $code, string $error): string
{
$prompt = "以下代码出现错误,请分析原因并提供修复方案:\n\n代码:\n```\n{$code}\n```\n\n错误信息:\n{$error}";
$result = $this->client->generateContent($prompt);
return $result['candidates'][0]['content']['parts'][0]['text'];
}
}场景4:内容创作
php
<?php
class ContentCreator
{
private GeminiClient $client;
public function writeArticle(string $topic, string $style = '专业', int $wordCount = 800): string
{
$prompt = "请以{$style}的风格,写一篇关于"{$topic}"的文章,字数约{$wordCount}字。";
$result = $this->client->generateContent($prompt);
return $result['candidates'][0]['content']['parts'][0]['text'];
}
public function generateHeadlines(string $topic, int $count = 5): array
{
$prompt = "请为"{$topic}"生成{$count}个吸引人的标题,以JSON数组格式返回。";
$result = $this->client->generateContent($prompt);
return json_decode($result['candidates'][0]['content']['parts'][0]['text'], true);
}
public function rewriteContent(string $content, string $style): string
{
$prompt = "请将以下内容改写成{$style}风格:\n\n{$content}";
$result = $this->client->generateContent($prompt);
return $result['candidates'][0]['content']['parts'][0]['text'];
}
}场景5:智能问答
php
<?php
class QASystem
{
private GeminiClient $client;
private string $context;
public function __construct(GeminiClient $client, string $context = '')
{
$this->client = $client;
$this->context = $context;
}
public function answer(string $question): string
{
$prompt = $this->context
? "背景信息:\n{$this->context}\n\n问题:{$question}\n\n请根据背景信息回答问题:"
: $question;
$result = $this->client->generateContent($prompt);
return $result['candidates'][0]['content']['parts'][0]['text'];
}
public function answerWithOptions(string $question, array $options): string
{
$optionsStr = implode("\n", array_map(function($k, $v) {
return ($k + 1) . ". {$v}";
}, array_keys($options), $options));
$prompt = "问题:{$question}\n\n选项:\n{$optionsStr}\n\n请选择正确答案并解释原因。";
$result = $this->client->generateContent($prompt);
return $result['candidates'][0]['content']['parts'][0]['text'];
}
}企业级进阶应用场景
场景1:构建智能文档检索系统
php
<?php
class DocumentRetrievalSystem
{
private GeminiClient $client;
private array $documents = [];
public function addDocument(string $id, string $content): void
{
$this->documents[$id] = $content;
}
public function search(string $query, int $topK = 3): array
{
$context = '';
foreach ($this->documents as $id => $content) {
$context .= "【文档ID: {$id}】\n{$content}\n\n";
}
$prompt = <<<PROMPT
以下是一些文档内容:
{$context}
请根据用户查询,找出最相关的{$topK}个文档,并说明相关性。
用户查询:{$query}
请以JSON格式返回:
{
"results": [
{"doc_id": "文档ID", "relevance": "相关性说明", "score": 0-1}
]
}
PROMPT;
$result = $this->client->generateContent($prompt);
return json_decode($result['candidates'][0]['content']['parts'][0]['text'], true);
}
}场景2:构建多模态内容审核系统
php
<?php
class ContentModerationSystem
{
private GeminiClient $client;
public function moderateText(string $text): array
{
$prompt = <<<PROMPT
请审核以下文本内容,检查是否包含:
1. 违法违规内容
2. 不良信息
3. 敏感话题
4. 广告推广
文本内容:
{$text}
请以JSON格式返回审核结果:
{
"is_safe": true/false,
"categories": ["违规类别"],
"confidence": 0-1,
"suggestion": "处理建议"
}
PROMPT;
$result = $this->client->generateContent($prompt);
return json_decode($result['candidates'][0]['content']['parts'][0]['text'], true);
}
public function moderateImage(string $imagePath): array
{
$result = $this->client->analyzeImage(
$imagePath,
'请审核这张图片,检查是否包含违规内容,以JSON格式返回审核结果。'
);
return json_decode($result, true);
}
}常见问题答疑(FAQ)
Q1:Gemini免费额度是多少?
回答:
| 模型 | 免费额度 | 速率限制 |
|---|---|---|
| gemini-1.5-flash | 150万Token/天 | 15 RPM |
| gemini-1.5-pro | 3万Token/天 | 2 RPM |
| gemini-2.0-flash | 100万Token/天 | 10 RPM |
Q2:如何处理安全过滤?
回答:
php
<?php
function handleSafetyFilter(array $result): string
{
if (isset($result['promptFeedback']['blockReason'])) {
$reason = $result['promptFeedback']['blockReason'];
return "内容被安全过滤阻止,原因:{$reason}";
}
$safetyRatings = $result['candidates'][0]['safetyRatings'] ?? [];
foreach ($safetyRatings as $rating) {
if ($rating['probability'] === 'HIGH') {
return "内容可能包含不安全内容:{$rating['category']}";
}
}
return $result['candidates'][0]['content']['parts'][0]['text'];
}Q3:如何选择合适的模型?
回答:
| 场景 | 推荐模型 | 原因 |
|---|---|---|
| 快速响应 | gemini-1.5-flash | 速度快,成本低 |
| 复杂推理 | gemini-1.5-pro | 能力强 |
| 图像处理 | gemini-1.5-flash | 多模态支持好 |
| 长文本处理 | gemini-1.5-pro | 1M上下文 |
Q4:如何处理超长文本?
回答:
php
<?php
// 使用gemini-1.5-pro处理长文本
$longDocument = file_get_contents('large_document.txt');
$result = $client->generateContentAdvanced(
"请总结以下文档:\n\n{$longDocument}",
'gemini-1.5-pro', // 支持1M上下文
['maxOutputTokens' => 2048]
);Q5:如何实现批量处理?
回答:
php
<?php
function batchProcess(GeminiClient $client, array $prompts): array
{
$results = [];
foreach ($prompts as $key => $prompt) {
$results[$key] = $client->generateContent($prompt);
usleep(100000); // 100ms间隔,避免速率限制
}
return $results;
}Q6:如何处理API错误?
回答:
php
<?php
function handleGeminiError(Exception $e): string
{
$message = $e->getMessage();
if (strpos($message, 'API_KEY_INVALID') !== false) {
return 'API Key无效';
}
if (strpos($message, 'RESOURCE_EXHAUSTED') !== false) {
return '请求配额已用尽';
}
if (strpos($message, 'INVALID_ARGUMENT') !== false) {
return '请求参数错误';
}
return '服务暂时不可用';
}实战练习
基础练习
练习1:编写一个简单的Gemini聊天程序。
参考代码:
php
<?php
$apiKey = getenv('GOOGLE_API_KEY');
$client = new GeminiClient($apiKey);
echo "Gemini聊天助手 (输入 'quit' 退出)\n";
while (true) {
echo "\n你: ";
$input = trim(fgets(STDIN));
if ($input === 'quit') {
break;
}
$result = $client->generateContent($input);
echo "Gemini: " . $result['candidates'][0]['content']['parts'][0]['text'] . "\n";
}进阶练习
练习2:实现一个图像描述生成器。
参考代码:
php
<?php
class ImageCaptionGenerator
{
private GeminiClient $client;
public function generateCaption(string $imagePath, string $style = '简洁'): string
{
$prompt = "请为这张图片生成一个{$style}的描述文案:";
return $this->client->analyzeImage($imagePath, $prompt);
}
public function generateAltText(string $imagePath): string
{
$prompt = "请为这张图片生成适合作为alt属性的简短描述:";
return $this->client->analyzeImage($imagePath, $prompt);
}
}挑战练习
练习3:构建一个多模态内容分析系统。
参考代码:
php
<?php
class MultimodalAnalyzer
{
private GeminiClient $client;
public function analyzeContent(string $text, string $imagePath = null): array
{
$analysis = [
'text_analysis' => $this->analyzeText($text),
];
if ($imagePath && file_exists($imagePath)) {
$analysis['image_analysis'] = $this->analyzeImage($imagePath);
$analysis['consistency'] = $this->checkConsistency($text, $imagePath);
}
return $analysis;
}
private function analyzeText(string $text): array
{
$prompt = "请分析以下文本的情感、主题和关键信息:\n\n{$text}";
$result = $this->client->generateContent($prompt);
return json_decode($result['candidates'][0]['content']['parts'][0]['text'], true);
}
private function analyzeImage(string $imagePath): array
{
$result = $this->client->analyzeImage(
$imagePath,
'请分析这张图片的内容、情感和主题,以JSON格式返回。'
);
return json_decode($result, true);
}
private function checkConsistency(string $text, string $imagePath): array
{
$result = $this->client->analyzeImage(
$imagePath,
"请检查图片内容与以下文本是否一致,并说明原因:\n\n{$text}"
);
return ['analysis' => $result];
}
}知识点总结
核心要点
- 多模态能力:原生支持文本、图像、音频、视频
- 长上下文:最高支持2M Token上下文
- 免费额度:慷慨的免费API调用额度
- 安全设置:灵活的内容安全过滤机制
- 模型选择:根据任务选择合适的模型
易错点回顾
| 易错点 | 正确做法 |
|---|---|
| 忽略安全过滤 | 检查promptFeedback和safetyRatings |
| 图像格式不支持 | 转换为支持的格式 |
| 超出Token限制 | 使用长上下文模型 |
| 忽略速率限制 | 添加请求间隔 |
拓展参考资料
官方文档
进阶学习路径
- 本知识点 → Gemini API基础使用
- 下一步 → xAI Grok API
- 进阶 → 流式响应处理
- 高级 → 并发与限流
💡 记住:Gemini的多模态能力和长上下文处理能力是其核心优势,善用这些特性可以构建更强大的AI应用。
