feat: add gpustack model provider (#10158)

2025-12-11 11:56:53 +08:00 · 2024-11-01 17:23:30 +08:00
parent 3c85136279
commit 76b0328eb1
17 changed files with 705 additions and 1 deletions
--- a/api/tests/integration_tests/model_runtime/gpustack/test_llm.py
+++ b/api/tests/integration_tests/model_runtime/gpustack/test_llm.py
@@ -0,0 +1,162 @@
+import os
+from collections.abc import Generator
+
+import pytest
+
+from core.model_runtime.entities.llm_entities import (
+    LLMResult,
+    LLMResultChunk,
+    LLMResultChunkDelta,
+)
+from core.model_runtime.entities.message_entities import (
+    AssistantPromptMessage,
+    PromptMessageTool,
+    SystemPromptMessage,
+    UserPromptMessage,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.gpustack.llm.llm import GPUStackLanguageModel
+
+
+def test_validate_credentials_for_chat_model():
+    model = GPUStackLanguageModel()
+
+    with pytest.raises(CredentialsValidateFailedError):
+        model.validate_credentials(
+            model="llama-3.2-1b-instruct",
+            credentials={
+                "endpoint_url": "invalid_url",
+                "api_key": "invalid_api_key",
+                "mode": "chat",
+            },
+        )
+
+    model.validate_credentials(
+        model="llama-3.2-1b-instruct",
+        credentials={
+            "endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
+            "api_key": os.environ.get("GPUSTACK_API_KEY"),
+            "mode": "chat",
+        },
+    )
+
+
+def test_invoke_completion_model():
+    model = GPUStackLanguageModel()
+
+    response = model.invoke(
+        model="llama-3.2-1b-instruct",
+        credentials={
+            "endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
+            "api_key": os.environ.get("GPUSTACK_API_KEY"),
+            "mode": "completion",
+        },
+        prompt_messages=[UserPromptMessage(content="ping")],
+        model_parameters={"temperature": 0.7, "top_p": 1.0, "max_tokens": 10},
+        stop=[],
+        user="abc-123",
+        stream=False,
+    )
+
+    assert isinstance(response, LLMResult)
+    assert len(response.message.content) > 0
+    assert response.usage.total_tokens > 0
+
+
+def test_invoke_chat_model():
+    model = GPUStackLanguageModel()
+
+    response = model.invoke(
+        model="llama-3.2-1b-instruct",
+        credentials={
+            "endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
+            "api_key": os.environ.get("GPUSTACK_API_KEY"),
+            "mode": "chat",
+        },
+        prompt_messages=[UserPromptMessage(content="ping")],
+        model_parameters={"temperature": 0.7, "top_p": 1.0, "max_tokens": 10},
+        stop=[],
+        user="abc-123",
+        stream=False,
+    )
+
+    assert isinstance(response, LLMResult)
+    assert len(response.message.content) > 0
+    assert response.usage.total_tokens > 0
+
+
+def test_invoke_stream_chat_model():
+    model = GPUStackLanguageModel()
+
+    response = model.invoke(
+        model="llama-3.2-1b-instruct",
+        credentials={
+            "endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
+            "api_key": os.environ.get("GPUSTACK_API_KEY"),
+            "mode": "chat",
+        },
+        prompt_messages=[UserPromptMessage(content="Hello World!")],
+        model_parameters={"temperature": 0.7, "top_p": 1.0, "max_tokens": 10},
+        stop=["you"],
+        stream=True,
+        user="abc-123",
+    )
+
+    assert isinstance(response, Generator)
+    for chunk in response:
+        assert isinstance(chunk, LLMResultChunk)
+        assert isinstance(chunk.delta, LLMResultChunkDelta)
+        assert isinstance(chunk.delta.message, AssistantPromptMessage)
+        assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
+
+
+def test_get_num_tokens():
+    model = GPUStackLanguageModel()
+
+    num_tokens = model.get_num_tokens(
+        model="????",
+        credentials={
+            "endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
+            "api_key": os.environ.get("GPUSTACK_API_KEY"),
+            "mode": "chat",
+        },
+        prompt_messages=[
+            SystemPromptMessage(
+                content="You are a helpful AI assistant.",
+            ),
+            UserPromptMessage(content="Hello World!"),
+        ],
+        tools=[
+            PromptMessageTool(
+                name="get_current_weather",
+                description="Get the current weather in a given location",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "The city and state e.g. San Francisco, CA",
+                        },
+                        "unit": {"type": "string", "enum": ["c", "f"]},
+                    },
+                    "required": ["location"],
+                },
+            )
+        ],
+    )
+
+    assert isinstance(num_tokens, int)
+    assert num_tokens == 80
+
+    num_tokens = model.get_num_tokens(
+        model="????",
+        credentials={
+            "endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
+            "api_key": os.environ.get("GPUSTACK_API_KEY"),
+            "mode": "chat",
+        },
+        prompt_messages=[UserPromptMessage(content="Hello World!")],
+    )
+
+    assert isinstance(num_tokens, int)
+    assert num_tokens == 10