update readme and repo dependency

2025-03-09 21:30:30 +00:00 · 2024-12-18 19:54:05 +08:00 · 2024-12-18 19:54:05 +08:00 · 8bde1c1ae1
commit 8bde1c1ae1
parent cd4bb743e2
5 changed files with 19 additions and 2 deletions
--- a/README.md
+++ b/README.md
@ -124,6 +124,23 @@ conversation = [
    {"role": "<|Assistant|>", "content": ""},
 ]

+
+# multiple images/interleaved image-text
+conversation_multi_images = [
+    {
+        "role": "<|User|>",
+        "content": "This is image_1: <image>\n"
+                   "This is image_2: <image>\n"
+                   "This is image_3: <image>\n If I am a vegetarian, what can I cook with these ingredients?",
+        "images": [
+            "images/multi_image_1.png",
+            "images/multi_image_2.jpg",
+            "images/multi_image_3.jpg",
+        ],
+    },
+    {"role": "<|Assistant|>", "content": ""}
+]
+
 # load images and prepare for inputs
 pil_images = load_pil_images(conversation)
 prepare_inputs = vl_chat_processor(
@ -148,7 +165,7 @@ outputs = vl_gpt.language.generate(
    use_cache=True
 )

-answer = tokenizer.decode(outputs[0].cpu().tolist(), skip_special_tokens=True)
+answer = tokenizer.decode(outputs[0].cpu().tolist(), skip_special_tokens=False)
 print(f"{prepare_inputs['sft_format'][0]}", answer)
 ```

--- a/images/multi_image_1.png
+++ b/images/multi_image_1.png
--- a/images/multi_image_2.jpg
+++ b/images/multi_image_2.jpg
--- a/images/multi_image_3.jpg
+++ b/images/multi_image_3.jpg
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,5 @@
 torch==2.0.1
-transformers>=4.38.2
+transformers==4.38.2
 timm>=0.9.16
 accelerate
 sentencepiece