Run Eval
Run the eval with the provided responses.
Args: eval_run_data (EvalRunRequest): Data for the eval run, including responses. workspace_uuid (str, optional): UUID of the workspace. Defaults to None. is_sandbox (bool, optional): Whether to run in sandbox mode. Defaults to False.
Returns: EvalRunResult: The result of the eval run after scoring the responses.
Raises: AymaraAPIError: If the organization is missing or the request is invalid.
Example: POST /api/eval-runs/-/score-responses { "eval_uuid": "...", "responses": [...] }
Query parameters
-
workspace_uuid
string -
is_sandbox
boolean Default value is
false
.
Body
Required
-
eval_uuid
string Required Unique identifier for the eval.
-
eval_run_uuid
string | null -
name
string | null -
ai_description
string | null -
continue_thread
boolean | null Default value is
false
. -
eval_run_examples
array[object] | null Schema for examples to include with an eval run.
-
responses
array[object] Required List of AI responses to eval prompts.
Schema for submitting AI responses to eval prompts.
Responses
-
200 application/json
OK
-
400 application/json
Bad Request
-
401 application/json
Unauthorized
-
403 application/json
Forbidden
-
404 application/json
Not Found
-
409 application/json
Conflict
-
422 application/json
Unprocessable Entity
-
429 application/json
Too Many Requests
-
500 application/json
Internal Server Error
-
503 application/json
Service Unavailable
import os
from aymara_ai import AymaraAI
client = AymaraAI(
api_key=os.environ.get("AYMARA_AI_API_KEY"), # This is the default and can be omitted
)
eval_run_result = client.evals.runs.score_responses(
eval_uuid="eval_uuid",
responses=[{
"prompt_uuid": "prompt_uuid"
}],
)
print(eval_run_result.eval_run_uuid)
curl \
--request POST 'https://api.aymara.ai/v2/eval-runs/-/score-responses' \
--header "x-api-key: $API_KEY" \
--header "Content-Type: application/json" \
--data '{"eval_uuid":"string","eval_run_uuid":"string","name":"string","ai_description":"string","continue_thread":false,"eval_run_examples":[{"example_uuid":"string","type":"pass","prompt":"string","response":"string","explanation":"string"}],"responses":[{"prompt_uuid":"string","thread_uuid":"string","turn_number":1,"continue_thread":false,"content":"string","content_type":"text","exclude_from_scoring":false,"ai_refused":false}]}'
{
"eval_uuid": "string",
"eval_run_uuid": "string",
"name": "string",
"ai_description": "string",
"continue_thread": false,
"eval_run_examples": [
{
"example_uuid": "string",
"type": "pass",
"prompt": "string",
"response": "string",
"explanation": "string"
}
],
"responses": [
{
"prompt_uuid": "string",
"thread_uuid": "string",
"turn_number": 1,
"continue_thread": false,
"content": "string",
"content_type": "text",
"exclude_from_scoring": false,
"ai_refused": false
}
]
}
{
"eval_run_uuid": "string",
"eval_uuid": "string",
"name": "string",
"status": "created",
"created_at": "2025-05-04T09:42:00Z",
"updated_at": "2025-05-04T09:42:00Z",
"evaluation": {
"eval_uuid": "string",
"name": "string",
"ai_description": "string",
"ai_instructions": "string",
"eval_type": "string",
"eval_instructions": "string",
"language": "en",
"modality": "text",
"ground_truth": "string",
"num_prompts": 100,
"prompt_examples": [
{
"content": "string",
"example_uuid": "string",
"type": "good",
"explanation": "string"
}
],
"is_jailbreak": false,
"is_sandbox": false,
"workspace_uuid": "string",
"status": "created",
"created_at": "2025-05-04T09:42:00Z",
"updated_at": "2025-05-04T09:42:00Z"
},
"ai_description": "string",
"workspace_uuid": "string",
"pass_rate": 42.0,
"num_prompts": 42,
"num_responses_scored": 42,
"responses": [
{
"prompt_uuid": "string",
"thread_uuid": "string",
"turn_number": 1,
"continue_thread": false,
"content": "string",
"content_type": "text",
"exclude_from_scoring": false,
"ai_refused": false,
"response_uuid": "string",
"explanation": "string",
"confidence": 42.0,
"is_passed": true,
"next_prompt": {
"prompt_uuid": "string",
"thread_uuid": "string",
"turn_number": 1,
"content": "string",
"category": "string"
}
}
]
}
{
"error": {
"code": "auth.invalid_key",
"message": "string",
"details": {}
},
"request_id": ""
}
{
"error": {
"code": "auth.invalid_key",
"message": "string",
"details": {}
},
"request_id": ""
}
{
"error": {
"code": "auth.invalid_key",
"message": "string",
"details": {}
},
"request_id": ""
}
{
"error": {
"code": "auth.invalid_key",
"message": "string",
"details": {}
},
"request_id": ""
}
{
"error": {
"code": "auth.invalid_key",
"message": "string",
"details": {}
},
"request_id": ""
}
{
"error": {
"code": "auth.invalid_key",
"message": "string",
"details": {}
},
"request_id": ""
}
{
"error": {
"code": "auth.invalid_key",
"message": "string",
"details": {}
},
"request_id": ""
}
{
"error": {
"code": "auth.invalid_key",
"message": "string",
"details": {}
},
"request_id": ""
}
{
"error": {
"code": "auth.invalid_key",
"message": "string",
"details": {}
},
"request_id": ""
}