API
- Model Information
- Model List
- Using OpenAI library
- Openai Responses API Support
- GPT-4o Image Generation
- Fill In the Middle
- OpenAI Codex CLI Integration
- How to Use Non-OpenAI Models on Our Platform
- LLM Search
- General Model Forwarding
- Gemini Guides
- Claude Native API Integration
- Claude Text Editor Tool
- Claude Prompt Caching
- New Chapter in Multimodal Interaction
- Ideogram AI Drawing Support
- Jina AI Integration
Terms and Privacy
API
New Chapter in Multimodal Interaction
API
New Chapter in Multimodal Interaction
Adding gemini-2.0-flash-exp Model Support
Instructions:
- Need to add parameters to experience new features:
"modalities":["text","image"]
- Images are passed and output in Base64 encoding
- As an experimental model, it’s recommended to explicitly specify “output image”, otherwise it might only output text
- Default height for output images is 1024px
- Python calls require the latest OpenAI SDK, run
pip install -U openai
first - For more information, visit the Gemini official documentation
Input Reference Structure:
"modalities": ["text","image"]
{
"model": "gemini-2.0-flash-exp",
"messages": [
{
"role": "user",
"content": "Generate a landscape painting and provide a poem to describe it"
}
],
"modalities":["text","image"], //need to add image
"temperature": 0.7
}'
Output Reference Structure:
"choices":
[
{
"index": 0,
"message":
{
"role": "assistant",
"content": "Hello! How can I assist you today?",
"refusal": null,
"multi_mod_content": //📍 New addition
[
{
"text": "",
"inlineData":
{
"data":"base64 str",
"mimeType":"png"
}
},
{
"text": "hello",
"inlineData":
{
}
}
],
"annotations":
[]
},
"logprobs": null,
"finish_reason": "stop"
}
],
Text-to-Image Generation
Input: text Output: text + image
import os
from openai import OpenAI
from PIL import Image
from io import BytesIO
import base64
client = OpenAI(
api_key="AIHUBMIX_API_KEY", # Replace with the key you generated in AiHubMix
base_url="https://api.aihubmix.com/v1",
)
# Using text-only input
response = client.chat.completions.create(
model="gemini-2.0-flash-exp",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "generate an adorable mermaid in the sea, bold outline, chibi cartoon, in the style of Children coloring book, B&W",
}
],
},
],
modalities=["text", "image"],
temperature=0.7,
)
try:
# Print basic response information
print(f"Creation time: {response.created}")
print(f"Token usage: {response.usage.total_tokens}")
# Check if multi_mod_content field exists
if (
hasattr(response.choices[0].message, "multi_mod_content")
and response.choices[0].message.multi_mod_content is not None
):
print("\nResponse content:")
for part in response.choices[0].message.multi_mod_content:
if "text" in part and part["text"] is not None:
print(part["text"])
# Process image content
elif "inline_data" in part and part["inline_data"] is not None:
print("\n🖼️ [Image content received]")
image_data = base64.b64decode(part["inline_data"]["data"])
mime_type = part["inline_data"].get("mime_type", "image/png")
print(f"Image type: {mime_type}")
image = Image.open(BytesIO(image_data))
image.show()
# Save image
output_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "output")
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, "generated_image.jpg")
image.save(output_path)
print(f"✅ Image saved to: {output_path}")
else:
print("No valid multimodal response received, check response structure")
except Exception as e:
print(f"Error processing response: {str(e)}")
Output Example:
Edit Image
Input: text + image
Output: text + image
import os
from openai import OpenAI
from PIL import Image
from io import BytesIO
import base64
client = OpenAI(
api_key="AIHUBMIX_API_KEY", # Replace with the key you generated in AiHubMix
base_url="https://api.aihubmix.com/v1",
)
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "resources", "filled.jpg")
if not os.path.exists(image_path):
raise FileNotFoundError(f"image {image_path} not exists")
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
base64_image = encode_image(image_path)
response = client.chat.completions.create(
model="gemini-2.0-flash-exp",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "describe the image with a concise and engaging paragraph, then fill color as children's crayon style",
},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
},
],
},
],
modalities=["text", "image"],
temperature=0.7,
)
try:
# Print basic response information without base64 data
print(f"Creation time: {response.created}")
print(f"Token usage: {response.usage.total_tokens}")
# Check if multi_mod_content field exists
if (
hasattr(response.choices[0].message, "multi_mod_content")
and response.choices[0].message.multi_mod_content is not None
):
print("\nResponse content:")
for part in response.choices[0].message.multi_mod_content:
if "text" in part and part["text"] is not None:
print(part["text"])
# Process image content
elif "inline_data" in part and part["inline_data"] is not None:
print("\n🖼️ [Image content received]")
image_data = base64.b64decode(part["inline_data"]["data"])
mime_type = part["inline_data"].get("mime_type", "image/png")
print(f"Image type: {mime_type}")
image = Image.open(BytesIO(image_data))
image.show()
# Save image
output_dir = os.path.join(os.path.dirname(image_path), "output")
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, "edited_image.jpg")
image.save(output_path)
print(f"✅ Image saved to: {output_path}")
else:
print("No valid multimodal response received, check response structure")
except Exception as e:
print(f"Error processing response: {str(e)}")
Output Example:
More Examples
- TypeScript
# Coming soon
- curl
# Coming soon