Create Reinforcement Fine-tuning Job

curl --request POST \
  --url https://api.fireworks.ai/v1/accounts/{account_id}/reinforcementFineTuningJobs \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "dataset": "<string>",
  "evaluator": "<string>",
  "displayName": "<string>",
  "evaluationDataset": "<string>",
  "evalAutoCarveout": true,
  "trainingConfig": {
    "outputModel": "<string>",
    "baseModel": "<string>",
    "warmStartFrom": "<string>",
    "jinjaTemplate": "<string>",
    "learningRate": 123,
    "maxContextLength": 123,
    "loraRank": 123,
    "region": "REGION_UNSPECIFIED",
    "epochs": 123,
    "batchSize": 123,
    "gradientAccumulationSteps": 123,
    "learningRateWarmupSteps": 123,
    "batchSizeSamples": 123,
    "optimizerWeightDecay": 123
  },
  "wandbConfig": {
    "enabled": true,
    "apiKey": "<string>",
    "project": "<string>",
    "entity": "<string>",
    "runId": "<string>"
  },
  "awsS3Config": {
    "credentialsSecret": "<string>",
    "iamRoleArn": "<string>"
  },
  "inferenceParameters": {
    "maxOutputTokens": 123,
    "temperature": 123,
    "topP": 123,
    "responseCandidatesCount": 123,
    "extraBody": "<string>",
    "topK": 123
  },
  "chunkSize": 123,
  "mcpServer": "<string>",
  "nodeCount": 123,
  "lossConfig": {
    "method": "METHOD_UNSPECIFIED",
    "klBeta": 123
  },
  "maxConcurrentRollouts": 123,
  "maxConcurrentEvaluations": 123
}
'

{
  "dataset": "<string>",
  "evaluator": "<string>",
  "name": "<string>",
  "displayName": "<string>",
  "createTime": "2023-11-07T05:31:56Z",
  "completedTime": "2023-11-07T05:31:56Z",
  "evaluationDataset": "<string>",
  "evalAutoCarveout": true,
  "state": "JOB_STATE_UNSPECIFIED",
  "status": {
    "code": "OK",
    "message": "<string>"
  },
  "createdBy": "<string>",
  "trainingConfig": {
    "outputModel": "<string>",
    "baseModel": "<string>",
    "warmStartFrom": "<string>",
    "jinjaTemplate": "<string>",
    "learningRate": 123,
    "maxContextLength": 123,
    "loraRank": 123,
    "region": "REGION_UNSPECIFIED",
    "epochs": 123,
    "batchSize": 123,
    "gradientAccumulationSteps": 123,
    "learningRateWarmupSteps": 123,
    "batchSizeSamples": 123,
    "optimizerWeightDecay": 123
  },
  "wandbConfig": {
    "enabled": true,
    "apiKey": "<string>",
    "project": "<string>",
    "entity": "<string>",
    "runId": "<string>",
    "url": "<string>"
  },
  "awsS3Config": {
    "credentialsSecret": "<string>",
    "iamRoleArn": "<string>"
  },
  "outputStats": "<string>",
  "jobProgress": {
    "percent": 123,
    "epoch": 123,
    "totalInputRequests": 123,
    "totalProcessedRequests": 123,
    "successfullyProcessedRequests": 123,
    "failedRequests": 123,
    "outputRows": 123,
    "inputTokens": 123,
    "outputTokens": 123,
    "cachedInputTokenCount": 123
  },
  "inferenceParameters": {
    "maxOutputTokens": 123,
    "temperature": 123,
    "topP": 123,
    "responseCandidatesCount": 123,
    "extraBody": "<string>",
    "topK": 123
  },
  "chunkSize": 123,
  "outputMetrics": "<string>",
  "mcpServer": "<string>",
  "nodeCount": 123,
  "lossConfig": {
    "method": "METHOD_UNSPECIFIED",
    "klBeta": 123
  },
  "trainerLogsSignedUrl": "<string>",
  "acceleratorSeconds": {},
  "maxConcurrentRollouts": 123,
  "maxConcurrentEvaluations": 123
}

POST

accounts

{account_id}

reinforcementFineTuningJobs

Create Reinforcement Fine-tuning Job

curl --request POST \
  --url https://api.fireworks.ai/v1/accounts/{account_id}/reinforcementFineTuningJobs \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "dataset": "<string>",
  "evaluator": "<string>",
  "displayName": "<string>",
  "evaluationDataset": "<string>",
  "evalAutoCarveout": true,
  "trainingConfig": {
    "outputModel": "<string>",
    "baseModel": "<string>",
    "warmStartFrom": "<string>",
    "jinjaTemplate": "<string>",
    "learningRate": 123,
    "maxContextLength": 123,
    "loraRank": 123,
    "region": "REGION_UNSPECIFIED",
    "epochs": 123,
    "batchSize": 123,
    "gradientAccumulationSteps": 123,
    "learningRateWarmupSteps": 123,
    "batchSizeSamples": 123,
    "optimizerWeightDecay": 123
  },
  "wandbConfig": {
    "enabled": true,
    "apiKey": "<string>",
    "project": "<string>",
    "entity": "<string>",
    "runId": "<string>"
  },
  "awsS3Config": {
    "credentialsSecret": "<string>",
    "iamRoleArn": "<string>"
  },
  "inferenceParameters": {
    "maxOutputTokens": 123,
    "temperature": 123,
    "topP": 123,
    "responseCandidatesCount": 123,
    "extraBody": "<string>",
    "topK": 123
  },
  "chunkSize": 123,
  "mcpServer": "<string>",
  "nodeCount": 123,
  "lossConfig": {
    "method": "METHOD_UNSPECIFIED",
    "klBeta": 123
  },
  "maxConcurrentRollouts": 123,
  "maxConcurrentEvaluations": 123
}
'

{
  "dataset": "<string>",
  "evaluator": "<string>",
  "name": "<string>",
  "displayName": "<string>",
  "createTime": "2023-11-07T05:31:56Z",
  "completedTime": "2023-11-07T05:31:56Z",
  "evaluationDataset": "<string>",
  "evalAutoCarveout": true,
  "state": "JOB_STATE_UNSPECIFIED",
  "status": {
    "code": "OK",
    "message": "<string>"
  },
  "createdBy": "<string>",
  "trainingConfig": {
    "outputModel": "<string>",
    "baseModel": "<string>",
    "warmStartFrom": "<string>",
    "jinjaTemplate": "<string>",
    "learningRate": 123,
    "maxContextLength": 123,
    "loraRank": 123,
    "region": "REGION_UNSPECIFIED",
    "epochs": 123,
    "batchSize": 123,
    "gradientAccumulationSteps": 123,
    "learningRateWarmupSteps": 123,
    "batchSizeSamples": 123,
    "optimizerWeightDecay": 123
  },
  "wandbConfig": {
    "enabled": true,
    "apiKey": "<string>",
    "project": "<string>",
    "entity": "<string>",
    "runId": "<string>",
    "url": "<string>"
  },
  "awsS3Config": {
    "credentialsSecret": "<string>",
    "iamRoleArn": "<string>"
  },
  "outputStats": "<string>",
  "jobProgress": {
    "percent": 123,
    "epoch": 123,
    "totalInputRequests": 123,
    "totalProcessedRequests": 123,
    "successfullyProcessedRequests": 123,
    "failedRequests": 123,
    "outputRows": 123,
    "inputTokens": 123,
    "outputTokens": 123,
    "cachedInputTokenCount": 123
  },
  "inferenceParameters": {
    "maxOutputTokens": 123,
    "temperature": 123,
    "topP": 123,
    "responseCandidatesCount": 123,
    "extraBody": "<string>",
    "topK": 123
  },
  "chunkSize": 123,
  "outputMetrics": "<string>",
  "mcpServer": "<string>",
  "nodeCount": 123,
  "lossConfig": {
    "method": "METHOD_UNSPECIFIED",
    "klBeta": 123
  },
  "trainerLogsSignedUrl": "<string>",
  "acceleratorSeconds": {},
  "maxConcurrentRollouts": 123,
  "maxConcurrentEvaluations": 123
}

Authorizations

Authorization

string

header

required

Bearer authentication using your Fireworks API key. Format: Bearer <API_KEY>

Path Parameters

account_id

string

required

The Account Id

Query Parameters

reinforcementFineTuningJobId

string

ID of the reinforcement fine-tuning job, a random UUID will be generated if not specified.

Body

application/json

dataset

string

required

The name of the dataset used for training.

evaluator

string

required

The evaluator resource name to use for RLOR fine-tuning job.

displayName

string

evaluationDataset

string

The name of a separate dataset to use for evaluation.

evalAutoCarveout

boolean

Whether to auto-carve the dataset for eval.

trainingConfig

BaseTrainingConfig contains common configuration fields shared across different training job types. Next ID: 22 · object

Common training configurations.

Show child attributes

wandbConfig

object

The Weights & Biases team/user account for logging training progress.

Show child attributes

awsS3Config

object

The AWS configuration for S3 dataset access.

Show child attributes

inferenceParameters

RFT inference parameters · object

RFT inference parameters.

Show child attributes

chunkSize

integer<int32>

Data chunking for rollout, default size 200, enabled when dataset > 300. Valid range is 1-10,000.

mcpServer

string

nodeCount

integer<int32>

The number of nodes to use for the fine-tuning job. If not specified, the default is 1.

lossConfig

object

Reinforcement learning loss method + hyperparameters for the underlying trainers.

Show child attributes

maxConcurrentRollouts

integer<int32>

Maximum number of concurrent rollouts during the RFT job.

maxConcurrentEvaluations

integer<int32>

Maximum number of concurrent evaluations during the RFT job.

Response

200 - application/json

A successful response.

dataset

string

required

The name of the dataset used for training.

evaluator

string

required

The evaluator resource name to use for RLOR fine-tuning job.

name

string

displayName

string

createTime

string<date-time>

completedTime

string<date-time>

The completed time for the reinforcement fine-tuning job.

evaluationDataset

string

The name of a separate dataset to use for evaluation.

evalAutoCarveout

boolean

Whether to auto-carve the dataset for eval.

state

enum<string>

default:JOB_STATE_UNSPECIFIED

JobState represents the state an asynchronous job can be in.

JOB_STATE_PAUSED: Job is paused, typically due to account suspension or manual intervention.

Available options:

JOB_STATE_UNSPECIFIED,

JOB_STATE_CREATING,

JOB_STATE_RUNNING,

JOB_STATE_COMPLETED,

JOB_STATE_FAILED,

JOB_STATE_CANCELLED,

JOB_STATE_DELETING,

JOB_STATE_WRITING_RESULTS,

JOB_STATE_VALIDATING,

JOB_STATE_DELETING_CLEANING_UP,

JOB_STATE_PENDING,

JOB_STATE_EXPIRED,

JOB_STATE_RE_QUEUEING,

JOB_STATE_CREATING_INPUT_DATASET,

JOB_STATE_IDLE,

JOB_STATE_CANCELLING,

JOB_STATE_EARLY_STOPPED,

JOB_STATE_PAUSED

status

Mimics [https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto] · object

Show child attributes

createdBy

string

The email address of the user who initiated this fine-tuning job.

trainingConfig

BaseTrainingConfig contains common configuration fields shared across different training job types. Next ID: 22 · object

Common training configurations.

Show child attributes

wandbConfig

object

The Weights & Biases team/user account for logging training progress.

Show child attributes

awsS3Config

object

The AWS configuration for S3 dataset access.

Show child attributes

outputStats

string

The output dataset's aggregated stats for the evaluation job.

jobProgress

object

Job progress.

Show child attributes

inferenceParameters

RFT inference parameters · object

RFT inference parameters.

Show child attributes

chunkSize

integer<int32>

Data chunking for rollout, default size 200, enabled when dataset > 300. Valid range is 1-10,000.

outputMetrics

string

mcpServer

string

nodeCount

integer<int32>

The number of nodes to use for the fine-tuning job. If not specified, the default is 1.

lossConfig

object

Reinforcement learning loss method + hyperparameters for the underlying trainers.

Show child attributes

trainerLogsSignedUrl

string

The signed URL for the trainer logs file (stdout/stderr). Only populated if the account has trainer log reading enabled.

acceleratorSeconds

object

Accelerator seconds used by the job, keyed by accelerator type (e.g., "NVIDIA_H100_80GB"). Updated when job completes or is cancelled.

Show child attributes

maxConcurrentRollouts

integer<int32>

Maximum number of concurrent rollouts during the RFT job.

maxConcurrentEvaluations

integer<int32>

Maximum number of concurrent evaluations during the RFT job.

Resume Supervised Fine-tuning Job

List Reinforcement Fine-tuning Jobs

⌘I

API Reference

Inference

Deployments

Fine-tuning

Evals

Multimedia

Admin

Build SDK (Deprecated)

Create Reinforcement Fine-tuning Job

Authorizations

Path Parameters

Query Parameters

Body

Response