# API ReferenceOMM exposes three compatible API protocols on a single server. Drop in any existing tool or SDK that uses Ollama, OpenAI, or Anthropic APIs without code changes.
# Starting the ServerTerminal
bash Copy
# Start on default port 11434
omm serve
# Custom host and port
omm serve --host 0.0.0.0 --port 8080
# With authentication
omm serve --api-key sk-your-secret-key
# Ollama-Compatible APIFull compatibility with the Ollama REST API. Use any Ollama client library or tool with OMM as the backend.
# GeneratePOST /api/generate
Request
json Copy
{
"model": "llama3.2",
"prompt": "Explain quantum computing in one paragraph",
"stream": true,
"options": {
"temperature": 0.7,
"top_p": 0.9,
"num_predict": 256
}
}
# ChatPOST /api/chat
Request
json Copy
{
"model": "llama3.2",
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"}
],
"stream": true
}
# List ModelsGET /api/tags
Response
json Copy
{
"models": [
{
"name": "llama3.2:latest",
"size": 2019393189,
"modified_at": "2026-04-28T10:00:00Z"
}
]
}
# Pull ModelPOST /api/pull
Request
json Copy
{
"name": "qwen2.5:7b",
"stream": true
}
# OpenAI-Compatible APIFull compatibility with the OpenAI Chat Completions API. Use the OpenAI Python or Node SDK with a base URL change.
# Chat CompletionsPOST /v1/chat/completions
Python SDK
python Copy
from openai import OpenAI
client = OpenAI(
base_url="http://localhost:11434/v1",
api_key="omm" # any non-empty string works without auth
)
response = client.chat.completions.create(
model="llama3.2",
messages=[
{"role": "user", "content": "Write a haiku about coding"}
],
stream=True
)
for chunk in response:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="")
# Models EndpointGET /v1/models
Response
json Copy
{
"data": [
{
"id": "llama3.2",
"object": "model",
"owned_by": "local"
}
]
}
# Anthropic-Compatible APICompatibility with the Anthropic Messages API. Use the Anthropic Python or Node SDK with a base URL change.
# MessagesPOST /anthropic/v1/messages
Python SDK
python Copy
import anthropic
client = anthropic.Anthropic(
base_url="http://localhost:11434/anthropic",
api_key="omm"
)
message = client.messages.create(
model="llama3.2",
max_tokens=1024,
messages=[
{"role": "user", "content": "Explain recursion in simple terms"}
]
)
print(message.content[0].text)
# AuthenticationWhen started with --api-key, OMM requires an Authorization header on all requests:
Terminal
bash Copy
# Ollama API
curl -H "Authorization: Bearer sk-your-secret-key" \
http://localhost:11434/api/tags
# OpenAI API
curl -H "Authorization: Bearer sk-your-secret-key" \
http://localhost:11434/v1/models
# Anthropic API
curl -H "x-api-key: sk-your-secret-key" \
http://localhost:11434/anthropic/v1/messages
# StreamingAll three APIs support Server-Sent Events (SSE) streaming when"stream": true is set in the request. Each chunk contains a partial response. The stream ends with a[DONE] message (OpenAI format) or a final JSON object with "done": true(Ollama format).
# Model ParametersPass generation parameters per-request via the options or parameters object:
Temperaturetemperaturetemperature0.7Top Ptop_ptop_p0.9Top Ktop_k—40Max tokensnum_predictmax_tokens4096Repeat penaltyrepeat_penaltyfrequency_penalty1.1Stop sequencesstopstop[]Seedseedseed(random)