diff --git a/README.md b/README.md index d362e9c..350dcac 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ Mini-Omni is an open-source multimodal large language model that can **hear, tal ✅ **Talking while thinking**, with the ability to generate text and audio at the same time. -✅ **Streaming audio outupt** capabilities. +✅ **Streaming audio output** capabilities. ✅ With "Audio-to-Text" and "Audio-to-Audio" **batch inference** to further boost the performance. diff --git a/inference.py b/inference.py index 4d721d0..d184925 100644 --- a/inference.py +++ b/inference.py @@ -399,7 +399,7 @@ class OmniInference: model = self.model with self.fabric.init_tensor(): - model.set_kv_cache(batch_size=2) + model.set_kv_cache(batch_size=2,device=self.device) mel, leng = load_audio(audio_path) audio_feature, input_ids = get_input_ids_whisper_ATBatch(mel, leng, self.whispermodel, self.device) diff --git a/server.py b/server.py index 5740613..c6e5d98 100644 --- a/server.py +++ b/server.py @@ -46,9 +46,9 @@ def create_app(): return server.server -def serve(ip='0.0.0.0', port=60808): +def serve(ip='0.0.0.0', port=60808, device='cuda:0'): - OmniChatServer(ip, port=port, run_app=True) + OmniChatServer(ip, port=port,run_app=True, device=device) if __name__ == "__main__":