From 3963f912c0e2f09e226aaea21a3f5029d0a30edf Mon Sep 17 00:00:00 2001 From: Daya Guo <40300434+guoday@users.noreply.github.com> Date: Fri, 29 Dec 2023 00:08:38 +0800 Subject: [PATCH] Update README.md --- Evaluation/PAL-Math/README.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Evaluation/PAL-Math/README.md b/Evaluation/PAL-Math/README.md index 96285d0..a70d0f5 100644 --- a/Evaluation/PAL-Math/README.md +++ b/Evaluation/PAL-Math/README.md @@ -48,16 +48,16 @@ We report experimental results here for mathematical reasoning tasks by using py | Model | Size | GSM8k | MATH | GSM-Hard | SVAMP | TabMWP | ASDiv | MAWPS | Avg | | -------------- | ---- | ----- | ----- | -------- | ----- | ------ | ----- | ----- | ----- | -| CodeShell | 7B | 17.0% | 9.1% | 18.2% | 45.6% | 29.6% | 46.6% | 56.8% | 31.8% | -| CodeGeex-2 | 7B | 23.6% | 9.6% | 22.4% | 48.0% | 47.2% | 46.9% | 66.0% | 37.7% | -| StarCoder-Base | 16B | 27.3% | 11.5% | 24.2% | 44.0% | 45.6% | 54.9% | 73.4% | 40.1% | -| CodeLLama-Base | 7B | 36.4% | 12.3% | 29.7% | 57.6% | 58.4% | 59.6% | 82.6% | 48.0% | -| CodeLLama-Base | 13B | 44.2% | 15.5% | 42.4% | 65.6% | 61.6% | 65.3% | 85.3% | 54.3% | -| CodeLLama-Base | 34B | 58.2% | 22.1% | **55.2%** | 77.2% | 69.6% | 70.0% | 92.8% | 63.6% | +| CodeShell | 7B | 15.8% | 8.6% | 17.3% | 35.5% | 28.2% | 44.4% | 59.8% | 29.9% | +| CodeGeex-2 | 7B | 22.2% | 9.7% | 23.6% | 39.0% | 44.6% | 48.5% | 66.0% | 36.2% | +| StarCoder-Base | 16B | 23.4% | 10.3% | 23.0% | 42.4% | 45.0% | 54.9% | 81.1% | 40.0% | +| CodeLLama-Base | 7B | 31.2% | 12.1% | 30.2% | 54.2% | 52.9% | 59.6% | 82.6% | 46.1% | +| CodeLLama-Base | 13B | 43.1% | 14.4% | 40.2% | 59.2% | 60.3% | 63.6% | 85.3% | 52.3% | +| CodeLLama-Base | 34B | 58.2% | 21.2% | 51.8% | 70.3% | 69.8% | 70.7% | 91.8% | 62.0% | | | | | | | | | | | | -| DeepSeek-Coder-Base | 1.3B | 15.8% | 16.3% | 14.5% | 38.4% | 28.8% | 51.3% | 66.0% | 33.0% | -| DeepSeek-Coder-MQA-Base | 5.7B | 44.8% | 25.4% | 40.6% | 56.8% | 62.4% | 66.8% | 84.2% | 54.4% | -| DeepSeek-Coder-Base | 6.7B | 46.1% | 25.6% | 40.0% | 67.2% | 71.2% | 69.0% | 89.2% | 58.3% | -| DeepSeek-Coder-Base | 33B | **58.2%** | **35.3%** | 54.5% | **78.4%** | **76.8%** | **78.2%** | **94.0%** | **67.9%** | +| DeepSeek-Coder-Base | 1.3B | 14.6% | 16.8% | 14.5% | 36.7% | 30.0% | 48.2% | 62.3% | 31.9% | +| DeepSeek-Coder-MQA-Base | 5.7B | 38.8% | 20.0% | 36.8% | 52.5% | 55.9% | 63.9% | 84.8% | 50.4% | +| DeepSeek-Coder-Base | 6.7B | 43.2% | 19.2% | 40.3% | 58.4% | 67.9% | 67.2% | 87.0% | 54.7% | +| DeepSeek-Coder-Base | 33B | **60.7%** | **29.1%** | **54.1%** | **71.6%** | **75.3%** | **76.7%** | **93.3%** | **65.8%** |