mirror of
https://github.com/deepseek-ai/DeepSeek-VL
synced 2024-11-25 13:32:07 +00:00
Add Replicate Badge and Web demo
This commit is contained in:
parent
8d4d9a6ccf
commit
2cd14b3a5d
17
.dockerignore
Normal file
17
.dockerignore
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
# The .dockerignore file excludes files from the container build process.
|
||||||
|
#
|
||||||
|
# https://docs.docker.com/engine/reference/builder/#dockerignore-file
|
||||||
|
|
||||||
|
# Exclude Git files
|
||||||
|
.git
|
||||||
|
.github
|
||||||
|
.gitignore
|
||||||
|
|
||||||
|
# Exclude Python cache files
|
||||||
|
__pycache__
|
||||||
|
.mypy_cache
|
||||||
|
.pytest_cache
|
||||||
|
.ruff_cache
|
||||||
|
|
||||||
|
# Exclude Python virtual environment
|
||||||
|
/venv
|
@ -17,7 +17,9 @@
|
|||||||
<a href="https://huggingface.co/deepseek-ai" target="_blank">
|
<a href="https://huggingface.co/deepseek-ai" target="_blank">
|
||||||
<img alt="Hugging Face" src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-DeepSeek%20AI-ffc107?color=ffc107&logoColor=white" />
|
<img alt="Hugging Face" src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-DeepSeek%20AI-ffc107?color=ffc107&logoColor=white" />
|
||||||
</a>
|
</a>
|
||||||
|
<a href="https://replicate.com/lucataco/deepseek-vl-7b-base" target="_blank_">
|
||||||
|
<img src="https://replicate.com/lucataco/deepseek-vl-7b-base/badge" alt="Replicate"/>
|
||||||
|
</a>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
19
cog.yaml
Normal file
19
cog.yaml
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
# Configuration for Cog ⚙️
|
||||||
|
# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md
|
||||||
|
|
||||||
|
build:
|
||||||
|
gpu: true
|
||||||
|
python_version: "3.9"
|
||||||
|
python_packages:
|
||||||
|
- "accelerate==0.27.2"
|
||||||
|
- "attrdict==2.0.1"
|
||||||
|
- "einops==0.7.0"
|
||||||
|
- "sentencepiece==0.2.0"
|
||||||
|
- "torch==2.0.1"
|
||||||
|
- "torchvision==0.15.2"
|
||||||
|
- "transformers>=4.38.2"
|
||||||
|
- "timm>=0.9.16"
|
||||||
|
- "hf_transfer==0.1.6"
|
||||||
|
|
||||||
|
# predict.py defines how predictions are run on your model
|
||||||
|
predict: "predict.py:Predictor"
|
82
predict.py
Normal file
82
predict.py
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
# Prediction interface for Cog ⚙️
|
||||||
|
# https://github.com/replicate/cog/blob/main/docs/python.md
|
||||||
|
|
||||||
|
from cog import BasePredictor, Input, Path, ConcatenateIterator
|
||||||
|
import os
|
||||||
|
import torch
|
||||||
|
from threading import Thread
|
||||||
|
from deepseek_vl.utils.io import load_pil_images
|
||||||
|
from transformers import AutoModelForCausalLM, TextIteratorStreamer
|
||||||
|
from deepseek_vl.models import VLChatProcessor, MultiModalityCausalLM
|
||||||
|
|
||||||
|
# Enable faster download speed
|
||||||
|
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
||||||
|
MODEL_NAME = "deepseek-ai/deepseek-vl-7b-base"
|
||||||
|
CACHE_DIR = "checkpoints"
|
||||||
|
|
||||||
|
|
||||||
|
class Predictor(BasePredictor):
|
||||||
|
def setup(self) -> None:
|
||||||
|
"""Load the model into memory to make running multiple predictions efficient"""
|
||||||
|
self.vl_chat_processor: VLChatProcessor = VLChatProcessor.from_pretrained(
|
||||||
|
MODEL_NAME,
|
||||||
|
cache_dir=CACHE_DIR
|
||||||
|
)
|
||||||
|
self.tokenizer = self.vl_chat_processor.tokenizer
|
||||||
|
vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained(
|
||||||
|
MODEL_NAME,
|
||||||
|
torch_dtype=torch.bfloat16,
|
||||||
|
cache_dir=CACHE_DIR
|
||||||
|
)
|
||||||
|
self.vl_gpt = vl_gpt.to('cuda')
|
||||||
|
|
||||||
|
@torch.inference_mode()
|
||||||
|
def predict(
|
||||||
|
self,
|
||||||
|
image: Path = Input(description="Input image"),
|
||||||
|
prompt: str = Input(description="Input prompt", default="Describe this image"),
|
||||||
|
max_new_tokens: int = Input(description="Maximum number of tokens to generate", default=512)
|
||||||
|
) -> ConcatenateIterator[str]:
|
||||||
|
"""Run a single prediction on the model"""
|
||||||
|
conversation = [
|
||||||
|
{
|
||||||
|
"role": "User",
|
||||||
|
"content": "<image_placeholder>"+prompt,
|
||||||
|
"images": [str(image)]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "Assistant",
|
||||||
|
"content": ""
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
# load images and prepare for inputs
|
||||||
|
pil_images = load_pil_images(conversation)
|
||||||
|
prepare_inputs = self.vl_chat_processor(
|
||||||
|
conversations=conversation,
|
||||||
|
images=pil_images,
|
||||||
|
force_batchify=True
|
||||||
|
).to('cuda')
|
||||||
|
|
||||||
|
streamer = TextIteratorStreamer(
|
||||||
|
self.tokenizer, skip_prompt=True, skip_special_tokens=True
|
||||||
|
)
|
||||||
|
|
||||||
|
thread = Thread(
|
||||||
|
target=self.vl_gpt.language_model.generate,
|
||||||
|
kwargs={
|
||||||
|
"inputs_embeds": self.vl_gpt.prepare_inputs_embeds(**prepare_inputs),
|
||||||
|
"attention_mask": prepare_inputs.attention_mask,
|
||||||
|
"pad_token_id": self.tokenizer.eos_token_id,
|
||||||
|
"bos_token_id": self.tokenizer.bos_token_id,
|
||||||
|
"eos_token_id": self.tokenizer.eos_token_id,
|
||||||
|
"max_new_tokens": max_new_tokens,
|
||||||
|
"do_sample": False,
|
||||||
|
"use_cache": True,
|
||||||
|
"streamer": streamer,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
thread.start()
|
||||||
|
for new_token in streamer:
|
||||||
|
yield new_token
|
||||||
|
thread.join()
|
Loading…
Reference in New Issue
Block a user