diff --git a/run.py b/run.py
index e36712a..e4b6777 100644
--- a/run.py
+++ b/run.py
@@ -42,7 +42,7 @@ def run(args):
 
 def parse_args():
     args = argparse.ArgumentParser()
-    args.add_argument('--backend', type=str, choices=['gpt-4', 'gpt-3.5-turbo'], default='gpt-4')
+    args.add_argument('--backend', type=str, choices=['gpt-4', 'gpt-3.5-turbo', 'gpt-4o'], default='gpt-4')
     args.add_argument('--temperature', type=float, default=0.7)
 
     args.add_argument('--task', type=str, required=True, choices=['game24', 'text', 'crosswords'])
diff --git a/src/tot/models.py b/src/tot/models.py
index b3c4fe0..709085f 100644
--- a/src/tot/models.py
+++ b/src/tot/models.py
@@ -30,10 +30,10 @@ def chatgpt(messages, model="gpt-4", temperature=0.7, max_tokens=1000, n=1, stop
         cnt = min(n, 20)
         n -= cnt
         res = completions_with_backoff(model=model, messages=messages, temperature=temperature, max_tokens=max_tokens, n=cnt, stop=stop)
-        outputs.extend([choice["message"]["content"] for choice in res["choices"]])
+        outputs.extend([choice.message.content for choice in res.choices])
         # log completion tokens
-        completion_tokens += res["usage"]["completion_tokens"]
-        prompt_tokens += res["usage"]["prompt_tokens"]
+        completion_tokens += res.usage.completion_tokens
+        prompt_tokens += res.usage.prompt_tokens
     return outputs
     
 def gpt_usage(backend="gpt-4"):
@@ -42,4 +42,6 @@ def gpt_usage(backend="gpt-4"):
         cost = completion_tokens / 1000 * 0.06 + prompt_tokens / 1000 * 0.03
     elif backend == "gpt-3.5-turbo":
         cost = completion_tokens / 1000 * 0.002 + prompt_tokens / 1000 * 0.0015
+    elif backend == "gpt-4o":
+        cost = completion_tokens / 1000 * 0.00250 + prompt_tokens / 1000 * 0.01
     return {"completion_tokens": completion_tokens, "prompt_tokens": prompt_tokens, "cost": cost}