There is a "Bad request: Invalid response_format provided." after 567 seconds. What is strange is that it worked before, so the JSON schema is valid.
async def test_openai_2():
config = await load_config()
client = AzureOpenAI(
azure_endpoint=config.azure_openai_endpoint_us,
api_key=config.azure_openai_api_key_us.get_secret_value(),
api_version="2025-03-01-preview",
)
start_time = time.time()
try:
response = client.beta.chat.completions.parse(
model="gpt-4o",
messages=[
{
"role": "system",
"content": "",
},
],
response_format=SomePydanticClass,
)
print(response)
finally:
print("Process finished --- %s seconds ---" % (time.time() - start_time))
/app/tests/integration/usage/test_custom_callback.py::test_openai_2 failed: async def test_openai_2():
config = await load_config()
client = AzureOpenAI(
azure_endpoint=config.azure_openai_endpoint_us,
api_key=config.azure_openai_api_key_us.get_secret_value(),
api_version="2025-03-01-preview",
)
start_time = time.time()
try:
> response = client.beta.chat.completions.parse(
model="gpt-4o",
messages=[
{
"role": "system",
"content": "",
},
],
response_format=CollectiveInsurance,
)
tests/integration/usage/test_custom_callback.py:292:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/usr/local/lib/python3.10/site-packages/openai/resources/beta/chat/completions.py:156: in parse
return self._post(
/usr/local/lib/python3.10/site-packages/openai/_base_client.py:1280: in post
return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
/usr/local/lib/python3.10/site-packages/openai/_base_client.py:957: in request
return self._request(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <openai.lib.azure.AzureOpenAI object at 0x7f62df48f7f0>
def _request(
self,
*,
cast_to: Type[ResponseT],
options: FinalRequestOptions,
retries_taken: int,
stream: bool,
stream_cls: type[_StreamT] | None,
) -> ResponseT | _StreamT:
# create a copy of the options we were given so that if the
# options are mutated later & we then retry, the retries are
# given the original options
input_options = model_copy(options)
cast_to = self._maybe_override_cast_to(cast_to, options)
options = self._prepare_options(options)
remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
request = self._build_request(options, retries_taken=retries_taken)
self._prepare_request(request)
kwargs: HttpxSendArgs = {}
if self.custom_auth is not None:
kwargs["auth"] = self.custom_auth
log.debug("Sending HTTP Request: %s %s", request.method, request.url)
try:
response = self._client.send(
request,
stream=stream or self._should_stream_response_body(request=request),
**kwargs,
)
except httpx.TimeoutException as err:
log.debug("Encountered httpx.TimeoutException", exc_info=True)
if remaining_retries > 0:
return self._retry_request(
input_options,
cast_to,
retries_taken=retries_taken,
stream=stream,
stream_cls=stream_cls,
response_headers=None,
)
log.debug("Raising timeout error")
raise APITimeoutError(request=request) from err
except Exception as err:
log.debug("Encountered Exception", exc_info=True)
if remaining_retries > 0:
return self._retry_request(
input_options,
cast_to,
retries_taken=retries_taken,
stream=stream,
stream_cls=stream_cls,
response_headers=None,
)
log.debug("Raising connection error")
raise APIConnectionError(request=request) from err
log.debug(
'HTTP Response: %s %s "%i %s" %s',
request.method,
request.url,
response.status_code,
response.reason_phrase,
response.headers,
)
log.debug("request_id: %s", response.headers.get("x-request-id"))
try:
response.raise_for_status()
except httpx.HTTPStatusError as err: # thrown on 4xx and 5xx status code
log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
if remaining_retries > 0 and self._should_retry(err.response):
err.response.close()
return self._retry_request(
input_options,
cast_to,
retries_taken=retries_taken,
response_headers=err.response.headers,
stream=stream,
stream_cls=stream_cls,
)
# If the response is streamed then we need to explicitly read the response
# to completion before attempting to access the response text.
if not err.response.is_closed:
err.response.read()
log.debug("Re-raising status error")
> raise self._make_status_error_from_response(err.response) from None
E openai.BadRequestError: Error code: 400 - {'error': {'message': 'Bad request: Invalid response_format provided.', 'type': 'invalid_request_error', 'param': None, 'code': None}}
/usr/local/lib/python3.10/site-packages/openai/_base_client.py:1061: BadRequestError
There is a timeout error when in streaming mode after about 241 seconds.
async def test_openai():
config = await load_config()
client = AzureOpenAI(
azure_endpoint=config.azure_openai_endpoint_us,
api_key=config.azure_openai_api_key_us.get_secret_value(),
api_version="2025-03-01-preview",
)
start_time = time.time()
try:
with client.beta.chat.completions.stream(
model="gpt-4o",
messages=[
{
"role": "system",
"content": "",
},
],
response_format=SomeLargePydanticClass,
) as stream:
for event in stream:
print("\n\n[EVENT RECEIVED]--- %s seconds ---" % (time.time() - start_time))
print(event)
finally:
print("Process finished --- %s seconds ---" % (time.time() - start_time))
Running pytest with args: ['-p', 'vscode_pytest', '-s', '--rootdir=/app', '/app/tests/integration/usage/test_custom_callback.py::test_openai']
============================= test session starts ==============================
platform linux -- Python 3.10.17, pytest-8.3.4, pluggy-1.5.0
rootdir: /app
configfile: pyproject.toml
plugins: anyio-4.9.0, asyncio-0.25.1
asyncio: mode=auto, asyncio_default_fixture_loop_scope=function
collected 1 item
tests/integration/usage/test_custom_callback.py F
=================================== FAILURES ===================================
_________________________________ test_openai __________________________________
async def test_openai():
config = await load_config()
client = AzureOpenAI(
azure_endpoint=config.azure_openai_endpoint_us,
api_key=config.azure_openai_api_key_us.get_secret_value(),
api_version="2025-03-01-preview",
)
start_time = time.time()
try:
> with client.beta.chat.completions.stream(
model="gpt-4o",
messages=[
{
"role": "system",
"content": "",
},
],
response_format=CollectiveInsurance,
) as stream:
tests/integration/usage/test_custom_callback.py:318:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/usr/local/lib/python3.10/site-packages/openai/lib/streaming/chat/_completions.py:148: in __enter__
raw_stream = self.__api_request()
/usr/local/lib/python3.10/site-packages/openai/_utils/_utils.py:275: in wrapper
return func(*args, **kwargs)
/usr/local/lib/python3.10/site-packages/openai/resources/chat/completions.py:829: in create
return self._post(
/usr/local/lib/python3.10/site-packages/openai/_base_client.py:1280: in post
return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
/usr/local/lib/python3.10/site-packages/openai/_base_client.py:957: in request
return self._request(
/usr/local/lib/python3.10/site-packages/openai/_base_client.py:1046: in _request
return self._retry_request(
/usr/local/lib/python3.10/site-packages/openai/_base_client.py:1095: in _retry_request
return self._request(
/usr/local/lib/python3.10/site-packages/openai/_base_client.py:1046: in _request
return self._retry_request(
/usr/local/lib/python3.10/site-packages/openai/_base_client.py:1095: in _retry_request
return self._request(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <openai.lib.azure.AzureOpenAI object at 0x7f3f322c7760>
def _request(
self,
*,
cast_to: Type[ResponseT],
options: FinalRequestOptions,
retries_taken: int,
stream: bool,
stream_cls: type[_StreamT] | None,
) -> ResponseT | _StreamT:
# create a copy of the options we were given so that if the
# options are mutated later & we then retry, the retries are
# given the original options
input_options = model_copy(options)
cast_to = self._maybe_override_cast_to(cast_to, options)
options = self._prepare_options(options)
remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
request = self._build_request(options, retries_taken=retries_taken)
self._prepare_request(request)
kwargs: HttpxSendArgs = {}
if self.custom_auth is not None:
kwargs["auth"] = self.custom_auth
log.debug("Sending HTTP Request: %s %s", request.method, request.url)
try:
response = self._client.send(
request,
stream=stream or self._should_stream_response_body(request=request),
**kwargs,
)
except httpx.TimeoutException as err:
log.debug("Encountered httpx.TimeoutException", exc_info=True)
if remaining_retries > 0:
return self._retry_request(
input_options,
cast_to,
retries_taken=retries_taken,
stream=stream,
stream_cls=stream_cls,
response_headers=None,
)
log.debug("Raising timeout error")
raise APITimeoutError(request=request) from err
except Exception as err:
log.debug("Encountered Exception", exc_info=True)
if remaining_retries > 0:
return self._retry_request(
input_options,
cast_to,
retries_taken=retries_taken,
stream=stream,
stream_cls=stream_cls,
response_headers=None,
)
log.debug("Raising connection error")
raise APIConnectionError(request=request) from err
log.debug(
'HTTP Response: %s %s "%i %s" %s',
request.method,
request.url,
response.status_code,
response.reason_phrase,
response.headers,
)
log.debug("request_id: %s", response.headers.get("x-request-id"))
try:
response.raise_for_status()
except httpx.HTTPStatusError as err: # thrown on 4xx and 5xx status code
log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
if remaining_retries > 0 and self._should_retry(err.response):
err.response.close()
return self._retry_request(
input_options,
cast_to,
retries_taken=retries_taken,
response_headers=err.response.headers,
stream=stream,
stream_cls=stream_cls,
)
# If the response is streamed then we need to explicitly read the response
# to completion before attempting to access the response text.
if not err.response.is_closed:
err.response.read()
log.debug("Re-raising status error")
> raise self._make_status_error_from_response(err.response) from None
E openai.APIStatusError: Error code: 408 - {'error': {'code': 'Timeout', 'message': 'The operation was timeout.'}}
/usr/local/lib/python3.10/site-packages/openai/_base_client.py:1061: APIStatusError
(Why is the error different than in non streaming mode)
When making the request after commenting one of the fields, it takes 145 seconds to receive the first event. When making a request with the class for this field, it takes 116 seconds to receive the message. Could there be a timeout for first token generation?
Does generating the CFG for the JSON schema/structured output take a long time? Shouldn't it already be cached since we made successful requests before?