mirror of
https://github.com/deepseek-ai/DeepSeek-VL2
synced 2025-01-22 12:25:32 +00:00
remove clear_cuda_cache in forward
This commit is contained in:
parent
a8341f36dd
commit
9789f97283
@ -618,8 +618,6 @@ class DeepseekVLV2ForCausalLM(DeepseekVLV2PreTrainedModel):
|
||||
cache_position=cache_position
|
||||
)
|
||||
|
||||
self._clear_cuda_cache()
|
||||
|
||||
return outputs
|
||||
|
||||
def _clear_cuda_cache(self):
|
||||
|
11
inference.py
11
inference.py
@ -126,9 +126,11 @@ def main(args):
|
||||
# print(key, value.shape, type(value))
|
||||
|
||||
with torch.no_grad():
|
||||
# run image encoder to get the image embeddings
|
||||
# inputs_embeds = vl_gpt.prepare_inputs_embeds(**prepare_inputs)
|
||||
|
||||
inputs_embeds = None
|
||||
past_key_values = None
|
||||
|
||||
if args.chunk_size > 0:
|
||||
# incremental_prefilling when using 40G GPU for vl2-small
|
||||
inputs_embeds, past_key_values = vl_gpt.incremental_prefilling(
|
||||
input_ids=prepare_inputs.input_ids,
|
||||
@ -180,6 +182,9 @@ if __name__ == "__main__":
|
||||
parser.add_argument("--model_path", type=str, required=True,
|
||||
default="deepseek-ai/deepseek-vl2",
|
||||
help="model name or local path to the model")
|
||||
parser.add_argument("--chunk_size", type=int, default=512, help="chunk size for the model for prefiiling")
|
||||
parser.add_argument("--chunk_size", type=int, default=-1,
|
||||
help="chunk size for the model for prefiiling. "
|
||||
"When using 40G gpu for vl2-small, set a chunk_size for incremental_prefilling."
|
||||
"Otherwise, default value is -1, which means we do not use incremental_prefilling.")
|
||||
args = parser.parse_args()
|
||||
main(args)
|
||||
|
Loading…
Reference in New Issue
Block a user