support for llama3b and other minor fixes

2025-06-26 18:26:00 +00:00 · 2024-11-16 03:08:30 +00:00 · 2024-11-16 03:08:30 +00:00 · a8c76b3d5c
commit a8c76b3d5c
parent abc75935fd
7 changed files with 46 additions and 21806 deletions
--- a/logs/game24/gpt-4_0.7_naive_cot_sample_100_start900_end1000.json
+++ b/logs/game24/gpt-4_0.7_naive_cot_sample_100_start900_end1000.json
--- a/logs/game24/gpt-4_0.7_naive_standard_sample_100_start900_end1000.json
+++ b/logs/game24/gpt-4_0.7_naive_standard_sample_100_start900_end1000.json
--- a/logs/game24/gpt-4o_0.7_propose5_value1_greedy1_start10_end15.json
+++ b/logs/game24/gpt-4o_0.7_propose5_value1_greedy1_start10_end15.json
@ -1,191 +0,0 @@
-[
-    {
-        "steps": [
-            {
-                "step": 0,
-                "x": "1 1 2 8",
-                "ys": [
-                    ""
-                ],
-                "new_ys": [
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n",
-                    "\n",
-                    "1. \\(1 + 1 = 2\\) (left: 2, 2, 8)\n",
-                    "2. \\(1 + 2 = 3\\) (left: 1, 3, 8)\n",
-                    "3. \\(1 + 8 = 9\\) (left: 1, 2, 9)\n",
-                    "4. \\(1 \\times 1 = 1\\) (left: 1, 2, 8)\n",
-                    "5. \\(1 \\times 2 = 2\\) (left: 1, 2, 8)\n",
-                    "6. \\(1 \\times 8 = 8\\) (left: 1, 2, 8)\n",
-                    "7. \\(2 + 8 = 10\\) (left: 1, 1, 10)\n",
-                    "8. \\(2 - 1 = 1\\) (left: 1, 1, 8)\n",
-                    "9. \\(8 - 1 = 7\\) (left: 1, 2, 7)\n",
-                    "10. \\(8 - 2 = 6\\) (left: 1, 1, 6)\n",
-                    "11. \\(8 \\div 1 = 8\\) (left: 1, 2, 8)\n",
-                    "12. \\(8 \\div 2 = 4\\) (left: 1, 1, 4)\n",
-                    "\n",
-                    "These are some of the possible operations you can perform with the given numbers.\n"
-                ],
-                "values": [
-                    0.001,
-                    0.001,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0,
-                    0.001
-                ],
-                "select_new_ys": [
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n"
-                ]
-            },
-            {
-                "step": 1,
-                "x": "1 1 2 8",
-                "ys": [
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n"
-                ],
-                "new_ys": [
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 2 = 3 (left: 1 3 8)  \n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 8 = 9 (left: 1 2 9)  \n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 2 = 3 (left: 1 3 8)  \n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 8 = 9 (left: 1 2 9)  \n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n2 + 8 = 10 (left: 1 1 10)  \n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 - 1 = 0 (left: 0 2 8)  \n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n2 - 1 = 1 (left: 1 1 8)  \n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n8 - 1 = 7 (left: 1 2 7)  \n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n2 - 1 = 1 (left: 1 1 8)  \n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n8 - 1 = 7 (left: 1 2 7)  \n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n8 - 2 = 6 (left: 1 1 6)  \n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 * 1 = 1 (left: 1 2 8)  \n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 * 2 = 2 (left: 1 2 8)  \n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 * 8 = 8 (left: 1 2 8)  \n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n2 * 8 = 16 (left: 1 1 16)  \n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 / 1 = 1 (left: 1 2 8)  \n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n2 / 1 = 2 (left: 1 1 8)  \n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n8 / 1 = 8 (left: 1 2 8)  \n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n8 / 2 = 4 (left: 1 1 4)\n"
-                ],
-                "values": [
-                    0.0,
-                    0.0,
-                    0.0,
-                    0,
-                    0,
-                    0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0,
-                    0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0
-                ],
-                "select_new_ys": [
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \n"
-                ]
-            },
-            {
-                "step": 2,
-                "x": "1 1 2 8",
-                "ys": [
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \n"
-                ],
-                "new_ys": [
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \nHere are the possible next steps for the input `2 2 8`:\n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \n\n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \n1. \\(2 + 2 = 4\\) (left: 4 8)\n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \n2. \\(2 \\times 2 = 4\\) (left: 4 8)\n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \n3. \\(8 - 2 = 6\\) (left: 2 6)\n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \n4. \\(8 - 2 = 6\\) (left: 2 6)\n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \n5. \\(8 / 2 = 4\\) (left: 2 4)\n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \n6. \\(8 / 2 = 4\\) (left: 2 4)\n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \n\n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \nNote: Steps 1 and 2 result in the same state (4 8), and steps 3 and 4 result in the same state (2 6), as do steps 5 and 6, resulting in the state (2 4).\n"
-                ],
-                "values": [
-                    0.001,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0,
-                    0.001
-                ],
-                "select_new_ys": [
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \nHere are the possible next steps for the input `2 2 8`:\n"
-                ]
-            },
-            {
-                "step": 3,
-                "x": "1 1 2 8",
-                "ys": [
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \nHere are the possible next steps for the input `2 2 8`:\n"
-                ],
-                "new_ys": [
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \nHere are the possible next steps for the input `2 2 8`:\nFor the input `2 2 8`, here are the possible next steps:\n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \nHere are the possible next steps for the input `2 2 8`:\n\n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \nHere are the possible next steps for the input `2 2 8`:\n1. \\(2 + 2 = 4\\) (left: 4 8)\n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \nHere are the possible next steps for the input `2 2 8`:\n2. \\(2 \\times 2 = 4\\) (left: 4 8)\n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \nHere are the possible next steps for the input `2 2 8`:\n3. \\(8 - 2 = 6\\) (left: 2 6)\n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \nHere are the possible next steps for the input `2 2 8`:\n4. \\(8 - 2 = 6\\) (left: 2 6)\n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \nHere are the possible next steps for the input `2 2 8`:\n5. \\(8 \\div 2 = 4\\) (left: 2 4)\n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \nHere are the possible next steps for the input `2 2 8`:\n6. \\(8 \\div 2 = 4\\) (left: 2 4)\n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \nHere are the possible next steps for the input `2 2 8`:\n\n",
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \nHere are the possible next steps for the input `2 2 8`:\nNote: Since the two `2`s are indistinguishable in operations like addition and multiplication, the results are the same, but I've listed them to show all possible pairings.\n"
-                ],
-                "values": [
-                    0,
-                    0.001,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0,
-                    0
-                ],
-                "select_new_ys": [
-                    "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \nHere are the possible next steps for the input `2 2 8`:\n\n"
-                ]
-            }
-        ],
-        "idx": 10,
-        "ys": [
-            "Here are some possible next steps with the numbers 1, 1, 2, and 8:\n1 + 1 = 2 (left: 2 2 8)  \nHere are the possible next steps for the input `2 2 8`:\n\n"
-        ],
-        "infos": [
-            {
-                "r": 0
-            }
-        ],
-        "usage_so_far": {
-            "completion_tokens": 5961,
-            "prompt_tokens": 12152,
-            "cost": 0.7222199999999999
-        },
-        "total_runtime": 95.51803588000075
-    }
-]
--- a/logs/game24/gpt-4o_0.7_propose5_value1_greedy1_start5_end6.json
+++ b/logs/game24/gpt-4o_0.7_propose5_value1_greedy1_start5_end6.json
@ -1,378 +0,0 @@
-[
-    {
-        "steps": [
-            {
-                "step": 0,
-                "x": "1 1 2 12",
-                "ys": [
-                    ""
-                ],
-                "new_ys": [
-                    "Here are the possible next steps with the numbers 1, 1, 2, and 12:\n",
-                    "\n",
-                    "1. Add 1 and 1:\n",
-                    "   - 1 + 1 = 2 (left: 2, 2, 12)\n",
-                    "\n",
-                    "2. Subtract 1 from 2:\n",
-                    "   - 2 - 1 = 1 (left: 1, 1, 12)\n",
-                    "\n",
-                    "3. Multiply 1 and 1:\n",
-                    "   - 1 * 1 = 1 (left: 1, 2, 12)\n",
-                    "\n",
-                    "4. Add 1 and 2:\n",
-                    "   - 1 + 2 = 3 (left: 1, 3, 12)\n",
-                    "\n",
-                    "5. Subtract 1 from 12:\n",
-                    "   - 12 - 1 = 11 (left: 1, 1, 11)\n",
-                    "\n",
-                    "6. Subtract 2 from 12:\n",
-                    "   - 12 - 2 = 10 (left: 1, 1, 10)\n",
-                    "\n",
-                    "7. Divide 12 by 2:\n",
-                    "   - 12 / 2 = 6 (left: 1, 1, 6)\n",
-                    "\n",
-                    "8. Add 1 and 12:\n",
-                    "   - 1 + 12 = 13 (left: 1, 2, 13)\n",
-                    "\n",
-                    "9. Add 2 and 12:\n",
-                    "   - 2 + 12 = 14 (left: 1, 1, 14)\n",
-                    "\n",
-                    "10. Multiply 2 and 12:\n",
-                    "    - 2 * 12 = 24 (left: 1, 1, 24)\n",
-                    "\n",
-                    "These are the possible operations you can perform with the numbers 1, 1, 2, and 12.\n"
-                ],
-                "values": [
-                    0.0,
-                    0.001,
-                    0.001,
-                    0.0,
-                    0,
-                    0.001,
-                    0.0,
-                    0,
-                    0.001,
-                    0.0,
-                    0,
-                    0.0,
-                    0.0,
-                    0,
-                    0.001,
-                    0.0,
-                    0,
-                    0.001,
-                    0.0,
-                    0,
-                    0.001,
-                    0.0,
-                    0,
-                    0.001,
-                    0.0,
-                    0,
-                    0.001,
-                    0.0,
-                    0,
-                    0.001,
-                    0.0,
-                    0,
-                    0.001
-                ],
-                "select_new_ys": [
-                    "\n"
-                ]
-            },
-            {
-                "step": 1,
-                "x": "1 1 2 12",
-                "ys": [
-                    "\n"
-                ],
-                "new_ys": [
-                    "\nTo generate possible next steps, follow a similar process as the example and apply basic arithmetic operations to pairs of numbers. Here's how you can do it:\n",
-                    "\n\n",
-                    "\nGiven a list of numbers, you can select any two numbers and apply one of the following operations: addition, subtraction, multiplication, or division (if divisible). After applying an operation, replace the two numbers with the result, and list the remaining numbers along with the result.\n",
-                    "\n\n",
-                    "\nLet's illustrate this with a new input:\n",
-                    "\n\n",
-                    "\nInput: 3 5 9 15\n",
-                    "\n\n",
-                    "\nPossible next steps:\n",
-                    "\n\n",
-                    "\n1. **3 + 5 = 8** (left: 8 9 15)\n",
-                    "\n2. **3 - 5 = -2** (left: -2 9 15)\n",
-                    "\n3. **5 - 3 = 2** (left: 2 9 15)\n",
-                    "\n4. **3 * 5 = 15** (left: 9 15 15)\n",
-                    "\n5. **9 + 3 = 12** (left: 5 12 15)\n",
-                    "\n6. **9 - 3 = 6** (left: 5 6 15)\n",
-                    "\n7. **3 * 9 = 27** (left: 5 15 27)\n",
-                    "\n8. **15 + 3 = 18** (left: 5 9 18)\n",
-                    "\n9. **15 - 3 = 12** (left: 5 9 12)\n",
-                    "\n10. **3 * 15 = 45** (left: 5 9 45)\n",
-                    "\n11. **5 + 9 = 14** (left: 3 14 15)\n",
-                    "\n12. **9 - 5 = 4** (left: 3 4 15)\n",
-                    "\n13. **5 * 9 = 45** (left: 3 15 45)\n",
-                    "\n14. **15 + 5 = 20** (left: 3 9 20)\n",
-                    "\n15. **15 - 5 = 10** (left: 3 9 10)\n",
-                    "\n16. **5 * 15 = 75** (left: 3 9 75)\n",
-                    "\n17. **9 + 15 = 24** (left: 3 5 24)\n",
-                    "\n18. **15 - 9 = 6** (left: 3 5 6)\n",
-                    "\n19. **9 * 15 = 135** (left: 3 5 135)\n",
-                    "\n\n",
-                    "\nThese are just some of the possible operations. You can continue to explore other combinations and operations based on the context and goals of your problem.\n"
-                ],
-                "values": [
-                    0.0,
-                    0.001,
-                    0.001,
-                    0,
-                    0.001,
-                    0,
-                    0.001,
-                    0,
-                    20.0,
-                    0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                    0,
-                    0.001
-                ],
-                "select_new_ys": [
-                    "\nPossible next steps:\n"
-                ]
-            },
-            {
-                "step": 2,
-                "x": "1 1 2 12",
-                "ys": [
-                    "\nPossible next steps:\n"
-                ],
-                "new_ys": [
-                    "\nPossible next steps:\nTo generate the possible next steps from a given set of numbers, you can consider each pair of numbers and apply the basic arithmetic operations: addition, subtraction, multiplication, and division (when applicable). Here are the steps for generating possible next steps with the numbers you provided:\n",
-                    "\nPossible next steps:\n\n",
-                    "\nPossible next steps:\nInput: 2 8 8 14\n",
-                    "\nPossible next steps:\n\n",
-                    "\nPossible next steps:\n1. **Addition:**\n",
-                    "\nPossible next steps:\n\n",
-                    "\nPossible next steps:\n   - 2 + 8 = 10 (remaining: 8 10 14)\n",
-                    "\nPossible next steps:\n   - 2 + 8 (second 8) = 10 (remaining: 8 10 14)\n",
-                    "\nPossible next steps:\n   - 2 + 14 = 16 (remaining: 8 8 16)\n",
-                    "\nPossible next steps:\n   - 8 + 8 = 16 (remaining: 2 14 16)\n",
-                    "\nPossible next steps:\n   - 8 + 14 = 22 (remaining: 2 8 22)\n",
-                    "\nPossible next steps:\n   - 8 (second 8) + 14 = 22 (remaining: 2 8 22)\n",
-                    "\nPossible next steps:\n\n",
-                    "\nPossible next steps:\n2. **Subtraction:**\n",
-                    "\nPossible next steps:\n\n",
-                    "\nPossible next steps:\n   - 8 - 2 = 6 (remaining: 6 8 14)\n",
-                    "\nPossible next steps:\n   - 8 (second 8) - 2 = 6 (remaining: 6 8 14)\n",
-                    "\nPossible next steps:\n   - 14 - 2 = 12 (remaining: 8 8 12)\n",
-                    "\nPossible next steps:\n   - 14 - 8 = 6 (remaining: 2 8 6)\n",
-                    "\nPossible next steps:\n   - 14 - 8 (second 8) = 6 (remaining: 2 8 6)\n",
-                    "\nPossible next steps:\n   - 8 - 8 = 0 (remaining: 2 14 0)\n",
-                    "\nPossible next steps:\n   - 2 - 8 = -6 (remaining: -6 8 14)\n",
-                    "\nPossible next steps:\n   - 8 - 14 = -6 (remaining: 2 8 -6)\n",
-                    "\nPossible next steps:\n\n",
-                    "\nPossible next steps:\n3. **Multiplication:**\n",
-                    "\nPossible next steps:\n\n",
-                    "\nPossible next steps:\n   - 2 * 8 = 16 (remaining: 8 14 16)\n",
-                    "\nPossible next steps:\n   - 2 * 8 (second 8) = 16 (remaining: 8 14 16)\n",
-                    "\nPossible next steps:\n   - 2 * 14 = 28 (remaining: 8 8 28)\n",
-                    "\nPossible next steps:\n   - 8 * 8 = 64 (remaining: 2 14 64)\n",
-                    "\nPossible next steps:\n   - 8 * 14 = 112 (remaining: 2 8 112)\n",
-                    "\nPossible next steps:\n\n",
-                    "\nPossible next steps:\n4. **Division** (only when the result is an integer):\n",
-                    "\nPossible next steps:\n\n",
-                    "\nPossible next steps:\n   - 8 / 2 = 4 (remaining: 4 8 14)\n",
-                    "\nPossible next steps:\n   - 14 / 2 = 7 (remaining: 8 8 7)\n",
-                    "\nPossible next steps:\n   - 8 / 8 = 1 (remaining: 2 14 1)\n",
-                    "\nPossible next steps:\n   - 14 / 8 = 1.75 (not an integer, so not considered)\n",
-                    "\nPossible next steps:\n\n",
-                    "\nPossible next steps:\nThese are the possible next steps you can take from the given set of numbers. Each operation reduces the set of numbers by one while adding the result of the operation back to the list.\n"
-                ],
-                "values": [
-                    0.001,
-                    20.0,
-                    0.001,
-                    0,
-                    0.001,
-                    0,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0,
-                    0.001,
-                    0,
-                    0.0,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0,
-                    0.001,
-                    0,
-                    0.001,
-                    0.0,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0,
-                    0.001,
-                    0,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0,
-                    0.001
-                ],
-                "select_new_ys": [
-                    "\nPossible next steps:\n\n"
-                ]
-            },
-            {
-                "step": 3,
-                "x": "1 1 2 12",
-                "ys": [
-                    "\nPossible next steps:\n\n"
-                ],
-                "new_ys": [
-                    "\nPossible next steps:\n\nTo determine the possible next steps, we take each pair of numbers and apply the basic arithmetic operations (addition, subtraction, multiplication, and division where applicable) to see what new set of numbers we can create. Let's consider the set of numbers you provided: \\(2, 8, 8, 14\\).\n",
-                    "\nPossible next steps:\n\n\n",
-                    "\nPossible next steps:\n\n### Current Set: 2, 8, 8, 14\n",
-                    "\nPossible next steps:\n\nWe'll evaluate each pair of numbers:\n",
-                    "\nPossible next steps:\n\n\n",
-                    "\nPossible next steps:\n\n1. **2 and 8**\n",
-                    "\nPossible next steps:\n\n   - Addition: \\(2 + 8 = 10\\) \u2192 New set: 8, 10, 14\n",
-                    "\nPossible next steps:\n\n   - Subtraction: \\(8 - 2 = 6\\) \u2192 New set: 6, 8, 14\n",
-                    "\nPossible next steps:\n\n   - Multiplication: \\(2 \\times 8 = 16\\) \u2192 New set: 8, 14, 16\n",
-                    "\nPossible next steps:\n\n   - Division: \\(8 \\div 2 = 4\\) \u2192 New set: 4, 8, 14\n",
-                    "\nPossible next steps:\n\n\n",
-                    "\nPossible next steps:\n\n2. **2 and 14**\n",
-                    "\nPossible next steps:\n\n   - Addition: \\(2 + 14 = 16\\) \u2192 New set: 8, 8, 16\n",
-                    "\nPossible next steps:\n\n   - Subtraction: \\(14 - 2 = 12\\) \u2192 New set: 8, 8, 12\n",
-                    "\nPossible next steps:\n\n   - Multiplication: \\(2 \\times 14 = 28\\) \u2192 New set: 8, 8, 28\n",
-                    "\nPossible next steps:\n\n   - Division: \\(14 \\div 2 = 7\\) \u2192 New set: 7, 8, 8\n",
-                    "\nPossible next steps:\n\n\n",
-                    "\nPossible next steps:\n\n3. **8 and 8**\n",
-                    "\nPossible next steps:\n\n   - Addition: \\(8 + 8 = 16\\) \u2192 New set: 2, 14, 16\n",
-                    "\nPossible next steps:\n\n   - Subtraction: \\(8 - 8 = 0\\) \u2192 New set: 2, 0, 14\n",
-                    "\nPossible next steps:\n\n   - Multiplication: \\(8 \\times 8 = 64\\) \u2192 New set: 2, 14, 64\n",
-                    "\nPossible next steps:\n\n   - Division: \\(8 \\div 8 = 1\\) \u2192 New set: 2, 14, 1\n",
-                    "\nPossible next steps:\n\n\n",
-                    "\nPossible next steps:\n\n4. **8 and 14**\n",
-                    "\nPossible next steps:\n\n   - Addition: \\(8 + 14 = 22\\) \u2192 New set: 2, 8, 22\n",
-                    "\nPossible next steps:\n\n   - Subtraction: \\(14 - 8 = 6\\) \u2192 New set: 2, 6, 8\n",
-                    "\nPossible next steps:\n\n   - Multiplication: \\(8 \\times 14 = 112\\) \u2192 New set: 2, 8, 112\n",
-                    "\nPossible next steps:\n\n   - Division: \\(14 \\div 8 = 1.75\\) (not an integer, usually not considered unless fractional results are allowed)\n",
-                    "\nPossible next steps:\n\n\n",
-                    "\nPossible next steps:\n\n### Summary of Possible Next Steps:\n",
-                    "\nPossible next steps:\n\n- New set: 8, 10, 14\n",
-                    "\nPossible next steps:\n\n- New set: 6, 8, 14\n",
-                    "\nPossible next steps:\n\n- New set: 8, 14, 16\n",
-                    "\nPossible next steps:\n\n- New set: 4, 8, 14\n",
-                    "\nPossible next steps:\n\n- New set: 8, 8, 16\n",
-                    "\nPossible next steps:\n\n- New set: 8, 8, 12\n",
-                    "\nPossible next steps:\n\n- New set: 8, 8, 28\n",
-                    "\nPossible next steps:\n\n- New set: 7, 8, 8\n",
-                    "\nPossible next steps:\n\n- New set: 2, 14, 16\n",
-                    "\nPossible next steps:\n\n- New set: 2, 0, 14\n",
-                    "\nPossible next steps:\n\n- New set: 2, 14, 64\n",
-                    "\nPossible next steps:\n\n- New set: 2, 14, 1\n",
-                    "\nPossible next steps:\n\n- New set: 2, 8, 22\n",
-                    "\nPossible next steps:\n\n- New set: 2, 6, 8\n",
-                    "\nPossible next steps:\n\n- New set: 2, 8, 112\n",
-                    "\nPossible next steps:\n\n\n",
-                    "\nPossible next steps:\n\nThese are all the possible next steps using the given numbers.\n"
-                ],
-                "values": [
-                    0.001,
-                    20.0,
-                    0.001,
-                    0.0,
-                    0,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.0,
-                    0,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0,
-                    0.0,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0.001,
-                    0,
-                    0.001
-                ],
-                "select_new_ys": [
-                    "\nPossible next steps:\n\n\n"
-                ]
-            }
-        ],
-        "idx": 5,
-        "ys": [
-            "\nPossible next steps:\n\n\n"
-        ],
-        "infos": [
-            {
-                "r": 0
-            }
-        ],
-        "usage_so_far": {
-            "completion_tokens": 11939,
-            "prompt_tokens": 39972,
-            "cost": 1.9155
-        }
-    }
-]
--- a/run.py
+++ b/run.py
@ -7,10 +7,23 @@ from src.tot.tasks import get_task
 from src.tot.methods.bfs import solve, naive_solve
 from src.tot.models import gpt_usage

+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+
+
 def run(args):
    '''
    main run function
    '''
+    #load in non-gpt model in this driver function for now to avoid repeated loading later on
+    if args.backend == 'llama':
+        tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
+        model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
+    else:
+        model = None
+        tokenizer = None
+
+    #set up
    task = get_task(args.task)
    logs, cnt_avg, cnt_any = [], 0, 0
    if args.naive_run:
@ -19,20 +32,22 @@ def run(args):
        file = f'./logs/{args.task}/{args.backend}_{args.temperature}_{args.method_generate}{args.n_generate_sample}_{args.method_evaluate}{args.n_evaluate_sample}_{args.method_select}{args.n_select_sample}_start{args.task_start_index}_end{args.task_end_index}.json'
    os.makedirs(os.path.dirname(file), exist_ok=True)

+    #run the specified range of tasks
    for i in range(args.task_start_index, args.task_end_index):

        # solve
        start_timer = time.perf_counter()
        if args.naive_run:
-            ys, info = naive_solve(args, task, i) 
+            ys, info = naive_solve(args, task, i, model, tokenizer) 
        else:
-            ys, info = solve(args, task, i)
+            ys, info = solve(args, task, i, model, tokenizer)
+
        runtime = time.perf_counter()-start_timer
        print(runtime)

        # log
        infos = [task.test_output(i, y) for y in ys]
-        info.update({'idx': i, 'ys': ys, 'infos': infos, 'usage_so_far': gpt_usage(args.backend), 'total_runtime': runtime})
+        info.update({'idx': i, 'ys': ys, 'infos': infos, 'usage_so_far (gpt only)': gpt_usage(args.backend), 'total_runtime': runtime})
        logs.append(info)
        with open(file, 'w') as f:
            json.dump(logs, f, indent=4)
@ -55,7 +70,7 @@ def parse_args():
    args = argparse.ArgumentParser()

    #what model to use
-    args.add_argument('--backend', type=str, choices=['gpt-4o', 'gpt-4o-mini', 'other'], default='gpt-4o')
+    args.add_argument('--backend', type=str, choices=['gpt-4o', 'llama'], default='gpt-4o')

    #what temperature to use
    args.add_argument('--temperature', type=float, default=0.0)
--- a/src/tot/methods/bfs.py
+++ b/src/tot/methods/bfs.py
@ -49,9 +49,9 @@ def get_samples(task, x, y, n_generate_sample, prompt_sample, stop):
    samples = inference_model(prompt, n=n_generate_sample, stop=stop)
    return [y + _ for _ in samples]

-def solve(args, task, idx, to_print=True):
+def solve(args, task, idx, model, tokenizer, to_print=True):
    global inference_model
-    inference_model = partial(inference_model, model=args.backend, temperature=args.temperature)
+    inference_model = partial(inference_model, model=model, tokenizer=tokenizer, temperature=args.temperature)
    # print(inference_model)
    x = task.get_input(idx)  # input
    ys = ['']  # current output candidates
--- a/src/tot/models.py
+++ b/src/tot/models.py
@ -2,7 +2,8 @@ import os
 import openai
 from openai import OpenAI
 import backoff 
-from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+import transformers

 completion_tokens = prompt_tokens = 0

@ -21,45 +22,42 @@ if api_base != "":
 ### Model Inference ###
 #######################

-def inference_model(prompt, model="gpt-4o", temperature=0.7, max_tokens=1000, n=1, stop=None, vllm=False, quant=False) -> list:
+def inference_model(prompt, model, tokenizer, temperature=0.7, max_tokens=1000, n=5, stop=None) -> list:
    '''
    Driver function for model inference.
    '''
-    if model == "llama_3.2" and vllm: #will change this later
-        return llama_32(prompt, quant, vllm, temperature, max_tokens, n, stop)
+    if model: #will modify this later to include support for other variations
+        return hf_model(model, tokenizer, prompt, temperature, max_tokens, n, stop)
    else:
+        model = "gpt-4o"
        messages = [{"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}]
        return chatgpt(model=model, messages=messages, temperature=temperature, max_tokens=max_tokens, n=n, stop=stop)

-def llama_32(prompt, temperature, max_tokens, n, stop, quant=None, vllm=None): #will add vllm support later
-    '''
-    Use llama3.2 for inference
-    '''
-    # if quant:
-    #     tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8")
-    #     model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8")
-    # else:
-    tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B")
-    model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B")
-    
-    inputs = tokenizer(prompt, return_tensors="pt")
-    outputs = hf_model(model, inputs, temperature, max_tokens, n, stop)
-    
-    return outputs
-
-def hf_model(model, input_tokens, temperature=0.7, max_tokens=1000, n=1, stop=None):
+def hf_model(model, tokenizer, prompt, temperature=0.7, max_tokens=1000, n=5, stop=None):
    """
    Given a model (Huggingface) and input tokens, generate an output
    """
    outputs = []

+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+
+    #tokenize inputs
+    inputs = tokenizer(prompt, return_tensors="pt")
+    inputs = inputs.to(device)
+    # print(inputs)
+    model.to(device)
+
    while n > 0:
        cnt = min(n, 20) 
        n -= cnt
-        outputs = model.generate(**input_tokens, temperature=temperature, max_new_tokens=max_tokens, num_return_sequences=cnt) #might add stopping criteria depending on heuristics experimentation
-        #need to take a look at the specific output format once i get access to the gated repo
-        #need to outputs.extend()
+
+        #actual generation
+        out = model.generate(**inputs, temperature=temperature, max_new_tokens=max_tokens, num_return_sequences=cnt) #might add stopping criteria depending on heuristics experimentation
+
+        for o in out:
+            string_answer = tokenizer.decode(o)
+            outputs.extend([string_answer])

    return outputs

@ -67,7 +65,7 @@ def hf_model(model, input_tokens, temperature=0.7, max_tokens=1000, n=1, stop=No
 def completions_with_backoff(**kwargs):
    return client.chat.completions.create(**kwargs)

-def chatgpt(messages, model="gpt-4", temperature=0.7, max_tokens=1000, n=1, stop=None) -> list:
+def chatgpt(model, messages, temperature=0.7, max_tokens=1000, n=5, stop=None) -> list:
    global completion_tokens, prompt_tokens
    outputs = []
    client = OpenAI()
@ -76,9 +74,8 @@ def chatgpt(messages, model="gpt-4", temperature=0.7, max_tokens=1000, n=1, stop
        n -= cnt

        res = client.chat.completions.create(model=model, messages=messages, temperature=temperature, n=cnt, stop=stop)
-        res_answer = res.choices[0].message.content
-        # print(res_answer)
-        outputs.extend([res_answer]) #will add in support for n > 1 hyperparam later
+        res_answer = res.choices[0].message.content #answers get returned in a single message.content string even when n > 1; need to double check on why later
+        outputs.extend([res_answer]) 

        # log completion tokens
        completion_tokens += res.usage.completion_tokens