diff --git a/run.py b/run.py index e36712a..e4b6777 100644 --- a/run.py +++ b/run.py @@ -42,7 +42,7 @@ def run(args): def parse_args(): args = argparse.ArgumentParser() - args.add_argument('--backend', type=str, choices=['gpt-4', 'gpt-3.5-turbo'], default='gpt-4') + args.add_argument('--backend', type=str, choices=['gpt-4', 'gpt-3.5-turbo', 'gpt-4o'], default='gpt-4') args.add_argument('--temperature', type=float, default=0.7) args.add_argument('--task', type=str, required=True, choices=['game24', 'text', 'crosswords']) diff --git a/src/tot/models.py b/src/tot/models.py index b3c4fe0..709085f 100644 --- a/src/tot/models.py +++ b/src/tot/models.py @@ -30,10 +30,10 @@ def chatgpt(messages, model="gpt-4", temperature=0.7, max_tokens=1000, n=1, stop cnt = min(n, 20) n -= cnt res = completions_with_backoff(model=model, messages=messages, temperature=temperature, max_tokens=max_tokens, n=cnt, stop=stop) - outputs.extend([choice["message"]["content"] for choice in res["choices"]]) + outputs.extend([choice.message.content for choice in res.choices]) # log completion tokens - completion_tokens += res["usage"]["completion_tokens"] - prompt_tokens += res["usage"]["prompt_tokens"] + completion_tokens += res.usage.completion_tokens + prompt_tokens += res.usage.prompt_tokens return outputs def gpt_usage(backend="gpt-4"): @@ -42,4 +42,6 @@ def gpt_usage(backend="gpt-4"): cost = completion_tokens / 1000 * 0.06 + prompt_tokens / 1000 * 0.03 elif backend == "gpt-3.5-turbo": cost = completion_tokens / 1000 * 0.002 + prompt_tokens / 1000 * 0.0015 + elif backend == "gpt-4o": + cost = completion_tokens / 1000 * 0.00250 + prompt_tokens / 1000 * 0.01 return {"completion_tokens": completion_tokens, "prompt_tokens": prompt_tokens, "cost": cost}