import time
from openai import OpenAI
client = OpenAI(
api_key="EMPTY",
base_url="http://127.0.0.1:22002/v1",
timeout=3600
)
messages = [
{
"role": "user",
"content": [
{
"type": "video_url",
"video_url": {
"url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-VL/space_woaudio.mp4"
}
},
{
"type": "text",
"text": "How long is this video?"
}
]
}
]
start = time.time()
# Configure video frame sampling (vLLM only)
response = client.chat.completions.create(
model="Qwen/Qwen3-VL-235B-A22B-Instruct-FP8",
messages=messages,
max_tokens=2048,
stream=True,
extra_body={"mm_processor_kwargs": {"fps": 2, "do_sample_frames": True}}
)
for chunk in response:
if chunk.choices[0].delta.content is not None:
print(chunk.choices[0].delta.content, end="", flush=True)
print(f"\n\nResponse costs: {time.time() - start:.2f}s")