"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.groqDocs = void 0;
exports.groqDocs = [
{
id: "groq-chat",
title: "Groq Chat API",
category: "chat",
provider: "groq",
keywords: ["groq", "chat", "completions", "fast", "inference", "llm"],
content: `# Groq Chat API
Groq uses OpenAI-compatible API format with ultra-fast inference.
## Endpoint
POST https://api.groq.com/openai/v1/chat/completions
## Headers
- Authorization: Bearer YOUR_API_KEY
- Content-Type: application/json
## Request Body
\`\`\`json
{
"model": "llama-3.3-70b-versatile" | "llama-3.1-8b-instant" | "llama-3.2-90b-vision-preview" | "llama-guard-3-8b" | "mixtral-8x7b-32768" | "gemma2-9b-it" | "deepseek-r1-distill-llama-70b",
"messages": [
{
"role": "system" | "user" | "assistant" | "tool",
"content": "string"
}
],
"temperature": number (0-2, default 1),
"max_tokens": number,
"top_p": number (0-1),
"stream": boolean,
"stop": string | string[],
"tools": [
{
"type": "function",
"function": {
"name": "string",
"description": "string",
"parameters": {JSON Schema}
}
}
],
"tool_choice": "auto" | "none" | "required" | {"type": "function", "function": {"name": "string"}},
"response_format": {"type": "text" | "json_object"},
"seed": number
}
\`\`\`
## Response
Same format as OpenAI:
\`\`\`json
{
"id": "chatcmpl-xxx",
"object": "chat.completion",
"created": 1234567890,
"model": "llama-3.3-70b-versatile",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "string",
"tool_calls": [...]
},
"finish_reason": "stop" | "length" | "tool_calls"
}
],
"x_groq": {
"id": "req_xxx",
"usage": {
"queue_time": 0.01,
"prompt_tokens": 10,
"prompt_time": 0.001,
"completion_tokens": 50,
"completion_time": 0.05,
"total_tokens": 60,
"total_time": 0.051
}
}
}
\`\`\`
## Available Models
- llama-3.3-70b-versatile: Latest 70B, best quality
- llama-3.1-8b-instant: Fast 8B model
- llama-3.2-90b-vision-preview: Vision capabilities
- mixtral-8x7b-32768: MoE model, 32K context
- gemma2-9b-it: Google's Gemma 2
- deepseek-r1-distill-llama-70b: DeepSeek reasoning
## Python SDK Example
\`\`\`python
from openai import OpenAI
client = OpenAI(
base_url="https://api.groq.com/openai/v1",
api_key="gsk_..."
)
response = client.chat.completions.create(
model="llama-3.3-70b-versatile",
messages=[
{"role": "user", "content": "Hello!"}
],
temperature=0.7,
max_tokens=1024
)
print(response.choices[0].message.content)
\`\`\`
## Node.js Example
\`\`\`javascript
import OpenAI from "openai";
const client = new OpenAI({
baseURL: "https://api.groq.com/openai/v1",
apiKey: "gsk_..."
});
const response = await client.chat.completions.create({
model: "llama-3.3-70b-versatile",
messages: [{ role: "user", content: "Hello!" }],
max_tokens: 1024
});
console.log(response.choices[0].message.content);
\`\`\`
## cURL Example
\`\`\`bash
curl https://api.groq.com/openai/v1/chat/completions \\
-H "Authorization: Bearer $GROQ_API_KEY" \\
-H "Content-Type: application/json" \\
-d '{
"model": "llama-3.3-70b-versatile",
"messages": [{"role": "user", "content": "Hello!"}],
"temperature": 0.7
}'
\`\`\``
},
{
id: "groq-rate-limits",
title: "Groq Rate Limits & Pricing",
category: "limits",
provider: "groq",
keywords: ["groq", "rate limit", "pricing", "cost", "free", "limits"],
content: `# Groq Rate Limits & Pricing
## Free Tier Rate Limits
- llama-3.3-70b: 30 RPM, 14,400 RPD, 6,000 TPM
- llama-3.1-8b: 30 RPM, 14,400 RPD, 6,000 TPM
- mixtral-8x7b: 30 RPM, 14,400 RPD, 6,000 TPM
- gemma2-9b: 30 RPM, 14,400 RPD, 6,000 TPM
## Paid Tier
Higher limits available with billing.
## Pricing (per 1M tokens)
- llama-3.3-70b: Input $0.59, Output $0.79
- llama-3.1-8b: Input $0.05, Output $0.08
- mixtral-8x7b: Input $0.24, Output $0.24
- gemma2-9b: Input $0.20, Output $0.20
- llama-3.2-90b-vision: Input $0.90, Output $0.90
## Key Features
- Ultra-fast inference (LPUs)
- OpenAI-compatible API
- Free tier available
- No context window limits beyond model specs
## Context Windows
- llama-3.3-70b: 128K tokens
- llama-3.1-8b: 128K tokens
- mixtral-8x7b: 32K tokens
- gemma2-9b: 8K tokens
## Error Codes
- 400: Bad Request
- 401: Invalid API key
- 429: Rate limit exceeded (includes retry-after header)
- 500: Internal error`
},
{
id: "groq-function-calling",
title: "Groq Function Calling",
category: "tools",
provider: "groq",
keywords: ["groq", "function", "tools", "tool calling", "function calling"],
content: `# Groq Function Calling
Groq supports function calling with OpenAI-compatible format.
## Supported Models
- llama-3.3-70b-versatile
- llama-3.1-8b-instant
- mixtral-8x7b-32768
## Tool Definition
\`\`\`json
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get weather for location",
"parameters": {
"type": "object",
"properties": {
"location": {"type": "string"},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}
},
"required": ["location"]
}
}
}
\`\`\`
## Python Example
\`\`\`python
from openai import OpenAI
client = OpenAI(
base_url="https://api.groq.com/openai/v1",
api_key="gsk_..."
)
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get weather",
"parameters": {
"type": "object",
"properties": {
"location": {"type": "string"}
},
"required": ["location"]
}
}
}
]
response = client.chat.completions.create(
model="llama-3.3-70b-versatile",
messages=[{"role": "user", "content": "Weather in Tokyo?"}],
tools=tools
)
# Handle tool calls same as OpenAI
if response.choices[0].message.tool_calls:
for tc in response.choices[0].message.tool_calls:
result = execute_tool(tc.function.name, tc.function.arguments)
response2 = client.chat.completions.create(
model="llama-3.3-70b-versatile",
messages=[
{"role": "user", "content": "Weather in Tokyo?"},
{"role": "assistant", "content": None, "tool_calls": [tc]},
{"role": "tool", "tool_call_id": tc.id, "content": str(result)}
],
tools=tools
)
\`\`\``
}
];
//# sourceMappingURL=groq.js.map