mirror of
http://112.124.100.131/huang.ze/ebiz-dify-ai.git
synced 2025-12-10 03:16:51 +08:00
feat: add api-based extension & external data tool & moderation backend (#1403)
Co-authored-by: takatost <takatost@gmail.com>
This commit is contained in:
0
api/core/moderation/__init__.py
Normal file
0
api/core/moderation/__init__.py
Normal file
1
api/core/moderation/api/__builtin__
Normal file
1
api/core/moderation/api/__builtin__
Normal file
@@ -0,0 +1 @@
|
||||
3
|
||||
0
api/core/moderation/api/__init__.py
Normal file
0
api/core/moderation/api/__init__.py
Normal file
88
api/core/moderation/api/api.py
Normal file
88
api/core/moderation/api/api.py
Normal file
@@ -0,0 +1,88 @@
|
||||
from pydantic import BaseModel
|
||||
|
||||
from core.moderation.base import Moderation, ModerationInputsResult, ModerationOutputsResult, ModerationAction
|
||||
from core.extension.api_based_extension_requestor import APIBasedExtensionRequestor, APIBasedExtensionPoint
|
||||
from core.helper.encrypter import decrypt_token
|
||||
from extensions.ext_database import db
|
||||
from models.api_based_extension import APIBasedExtension
|
||||
|
||||
|
||||
class ModerationInputParams(BaseModel):
|
||||
app_id: str = ""
|
||||
inputs: dict = {}
|
||||
query: str = ""
|
||||
|
||||
|
||||
class ModerationOutputParams(BaseModel):
|
||||
app_id: str = ""
|
||||
text: str
|
||||
|
||||
|
||||
class ApiModeration(Moderation):
|
||||
name: str = "api"
|
||||
|
||||
@classmethod
|
||||
def validate_config(cls, tenant_id: str, config: dict) -> None:
|
||||
"""
|
||||
Validate the incoming form config data.
|
||||
|
||||
:param tenant_id: the id of workspace
|
||||
:param config: the form config data
|
||||
:return:
|
||||
"""
|
||||
cls._validate_inputs_and_outputs_config(config, False)
|
||||
|
||||
api_based_extension_id = config.get("api_based_extension_id")
|
||||
if not api_based_extension_id:
|
||||
raise ValueError("api_based_extension_id is required")
|
||||
|
||||
extension = cls._get_api_based_extension(tenant_id, api_based_extension_id)
|
||||
if not extension:
|
||||
raise ValueError("API-based Extension not found. Please check it again.")
|
||||
|
||||
def moderation_for_inputs(self, inputs: dict, query: str = "") -> ModerationInputsResult:
|
||||
flagged = False
|
||||
preset_response = ""
|
||||
|
||||
if self.config['inputs_config']['enabled']:
|
||||
params = ModerationInputParams(
|
||||
app_id=self.app_id,
|
||||
inputs=inputs,
|
||||
query=query
|
||||
)
|
||||
|
||||
result = self._get_config_by_requestor(APIBasedExtensionPoint.APP_MODERATION_INPUT, params.dict())
|
||||
return ModerationInputsResult(**result)
|
||||
|
||||
return ModerationInputsResult(flagged=flagged, action=ModerationAction.DIRECT_OUTPUT, preset_response=preset_response)
|
||||
|
||||
def moderation_for_outputs(self, text: str) -> ModerationOutputsResult:
|
||||
flagged = False
|
||||
preset_response = ""
|
||||
|
||||
if self.config['outputs_config']['enabled']:
|
||||
params = ModerationOutputParams(
|
||||
app_id=self.app_id,
|
||||
text=text
|
||||
)
|
||||
|
||||
result = self._get_config_by_requestor(APIBasedExtensionPoint.APP_MODERATION_OUTPUT, params.dict())
|
||||
return ModerationOutputsResult(**result)
|
||||
|
||||
return ModerationOutputsResult(flagged=flagged, action=ModerationAction.DIRECT_OUTPUT, preset_response=preset_response)
|
||||
|
||||
def _get_config_by_requestor(self, extension_point: APIBasedExtensionPoint, params: dict) -> dict:
|
||||
extension = self._get_api_based_extension(self.tenant_id, self.config.get("api_based_extension_id"))
|
||||
requestor = APIBasedExtensionRequestor(extension.api_endpoint, decrypt_token(self.tenant_id, extension.api_key))
|
||||
|
||||
result = requestor.request(extension_point, params)
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _get_api_based_extension(tenant_id: str, api_based_extension_id: str) -> APIBasedExtension:
|
||||
extension = db.session.query(APIBasedExtension).filter(
|
||||
APIBasedExtension.tenant_id == tenant_id,
|
||||
APIBasedExtension.id == api_based_extension_id
|
||||
).first()
|
||||
|
||||
return extension
|
||||
113
api/core/moderation/base.py
Normal file
113
api/core/moderation/base.py
Normal file
@@ -0,0 +1,113 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Optional
|
||||
from pydantic import BaseModel
|
||||
from enum import Enum
|
||||
|
||||
from core.extension.extensible import Extensible, ExtensionModule
|
||||
|
||||
|
||||
class ModerationAction(Enum):
|
||||
DIRECT_OUTPUT = 'direct_output'
|
||||
OVERRIDED = 'overrided'
|
||||
|
||||
|
||||
class ModerationInputsResult(BaseModel):
|
||||
flagged: bool = False
|
||||
action: ModerationAction
|
||||
preset_response: str = ""
|
||||
inputs: dict = {}
|
||||
query: str = ""
|
||||
|
||||
|
||||
class ModerationOutputsResult(BaseModel):
|
||||
flagged: bool = False
|
||||
action: ModerationAction
|
||||
preset_response: str = ""
|
||||
text: str = ""
|
||||
|
||||
|
||||
class Moderation(Extensible, ABC):
|
||||
"""
|
||||
The base class of moderation.
|
||||
"""
|
||||
module: ExtensionModule = ExtensionModule.MODERATION
|
||||
|
||||
def __init__(self, app_id: str, tenant_id: str, config: Optional[dict] = None) -> None:
|
||||
super().__init__(tenant_id, config)
|
||||
self.app_id = app_id
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def validate_config(cls, tenant_id: str, config: dict) -> None:
|
||||
"""
|
||||
Validate the incoming form config data.
|
||||
|
||||
:param tenant_id: the id of workspace
|
||||
:param config: the form config data
|
||||
:return:
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def moderation_for_inputs(self, inputs: dict, query: str = "") -> ModerationInputsResult:
|
||||
"""
|
||||
Moderation for inputs.
|
||||
After the user inputs, this method will be called to perform sensitive content review
|
||||
on the user inputs and return the processed results.
|
||||
|
||||
:param inputs: user inputs
|
||||
:param query: query string (required in chat app)
|
||||
:return:
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def moderation_for_outputs(self, text: str) -> ModerationOutputsResult:
|
||||
"""
|
||||
Moderation for outputs.
|
||||
When LLM outputs content, the front end will pass the output content (may be segmented)
|
||||
to this method for sensitive content review, and the output content will be shielded if the review fails.
|
||||
|
||||
:param text: LLM output content
|
||||
:return:
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@classmethod
|
||||
def _validate_inputs_and_outputs_config(self, config: dict, is_preset_response_required: bool) -> None:
|
||||
# inputs_config
|
||||
inputs_config = config.get("inputs_config")
|
||||
if not isinstance(inputs_config, dict):
|
||||
raise ValueError("inputs_config must be a dict")
|
||||
|
||||
# outputs_config
|
||||
outputs_config = config.get("outputs_config")
|
||||
if not isinstance(outputs_config, dict):
|
||||
raise ValueError("outputs_config must be a dict")
|
||||
|
||||
inputs_config_enabled = inputs_config.get("enabled")
|
||||
outputs_config_enabled = outputs_config.get("enabled")
|
||||
if not inputs_config_enabled and not outputs_config_enabled:
|
||||
raise ValueError("At least one of inputs_config or outputs_config must be enabled")
|
||||
|
||||
# preset_response
|
||||
if not is_preset_response_required:
|
||||
return
|
||||
|
||||
if inputs_config_enabled:
|
||||
if not inputs_config.get("preset_response"):
|
||||
raise ValueError("inputs_config.preset_response is required")
|
||||
|
||||
if len(inputs_config.get("preset_response")) > 100:
|
||||
raise ValueError("inputs_config.preset_response must be less than 100 characters")
|
||||
|
||||
if outputs_config_enabled:
|
||||
if not outputs_config.get("preset_response"):
|
||||
raise ValueError("outputs_config.preset_response is required")
|
||||
|
||||
if len(outputs_config.get("preset_response")) > 100:
|
||||
raise ValueError("outputs_config.preset_response must be less than 100 characters")
|
||||
|
||||
|
||||
class ModerationException(Exception):
|
||||
pass
|
||||
48
api/core/moderation/factory.py
Normal file
48
api/core/moderation/factory.py
Normal file
@@ -0,0 +1,48 @@
|
||||
from core.extension.extensible import ExtensionModule
|
||||
from core.moderation.base import Moderation, ModerationInputsResult, ModerationOutputsResult
|
||||
from extensions.ext_code_based_extension import code_based_extension
|
||||
|
||||
|
||||
class ModerationFactory:
|
||||
__extension_instance: Moderation
|
||||
|
||||
def __init__(self, name: str, app_id: str, tenant_id: str, config: dict) -> None:
|
||||
extension_class = code_based_extension.extension_class(ExtensionModule.MODERATION, name)
|
||||
self.__extension_instance = extension_class(app_id, tenant_id, config)
|
||||
|
||||
@classmethod
|
||||
def validate_config(cls, name: str, tenant_id: str, config: dict) -> None:
|
||||
"""
|
||||
Validate the incoming form config data.
|
||||
|
||||
:param name: the name of extension
|
||||
:param tenant_id: the id of workspace
|
||||
:param config: the form config data
|
||||
:return:
|
||||
"""
|
||||
code_based_extension.validate_form_schema(ExtensionModule.MODERATION, name, config)
|
||||
extension_class = code_based_extension.extension_class(ExtensionModule.MODERATION, name)
|
||||
extension_class.validate_config(tenant_id, config)
|
||||
|
||||
def moderation_for_inputs(self, inputs: dict, query: str = "") -> ModerationInputsResult:
|
||||
"""
|
||||
Moderation for inputs.
|
||||
After the user inputs, this method will be called to perform sensitive content review
|
||||
on the user inputs and return the processed results.
|
||||
|
||||
:param inputs: user inputs
|
||||
:param query: query string (required in chat app)
|
||||
:return:
|
||||
"""
|
||||
return self.__extension_instance.moderation_for_inputs(inputs, query)
|
||||
|
||||
def moderation_for_outputs(self, text: str) -> ModerationOutputsResult:
|
||||
"""
|
||||
Moderation for outputs.
|
||||
When LLM outputs content, the front end will pass the output content (may be segmented)
|
||||
to this method for sensitive content review, and the output content will be shielded if the review fails.
|
||||
|
||||
:param text: LLM output content
|
||||
:return:
|
||||
"""
|
||||
return self.__extension_instance.moderation_for_outputs(text)
|
||||
1
api/core/moderation/keywords/__builtin__
Normal file
1
api/core/moderation/keywords/__builtin__
Normal file
@@ -0,0 +1 @@
|
||||
2
|
||||
0
api/core/moderation/keywords/__init__.py
Normal file
0
api/core/moderation/keywords/__init__.py
Normal file
60
api/core/moderation/keywords/keywords.py
Normal file
60
api/core/moderation/keywords/keywords.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from core.moderation.base import Moderation, ModerationInputsResult, ModerationOutputsResult, ModerationAction
|
||||
|
||||
|
||||
class KeywordsModeration(Moderation):
|
||||
name: str = "keywords"
|
||||
|
||||
@classmethod
|
||||
def validate_config(cls, tenant_id: str, config: dict) -> None:
|
||||
"""
|
||||
Validate the incoming form config data.
|
||||
|
||||
:param tenant_id: the id of workspace
|
||||
:param config: the form config data
|
||||
:return:
|
||||
"""
|
||||
cls._validate_inputs_and_outputs_config(config, True)
|
||||
|
||||
if not config.get("keywords"):
|
||||
raise ValueError("keywords is required")
|
||||
|
||||
if len(config.get("keywords")) > 1000:
|
||||
raise ValueError("keywords length must be less than 1000")
|
||||
|
||||
def moderation_for_inputs(self, inputs: dict, query: str = "") -> ModerationInputsResult:
|
||||
flagged = False
|
||||
preset_response = ""
|
||||
|
||||
if self.config['inputs_config']['enabled']:
|
||||
preset_response = self.config['inputs_config']['preset_response']
|
||||
|
||||
if query:
|
||||
inputs['query__'] = query
|
||||
keywords_list = self.config['keywords'].split('\n')
|
||||
flagged = self._is_violated(inputs, keywords_list)
|
||||
|
||||
return ModerationInputsResult(flagged=flagged, action=ModerationAction.DIRECT_OUTPUT, preset_response=preset_response)
|
||||
|
||||
def moderation_for_outputs(self, text: str) -> ModerationOutputsResult:
|
||||
flagged = False
|
||||
preset_response = ""
|
||||
|
||||
if self.config['outputs_config']['enabled']:
|
||||
keywords_list = self.config['keywords'].split('\n')
|
||||
flagged = self._is_violated({'text': text}, keywords_list)
|
||||
preset_response = self.config['outputs_config']['preset_response']
|
||||
|
||||
return ModerationOutputsResult(flagged=flagged, action=ModerationAction.DIRECT_OUTPUT, preset_response=preset_response)
|
||||
|
||||
def _is_violated(self, inputs: dict, keywords_list: list) -> bool:
|
||||
for value in inputs.values():
|
||||
if self._check_keywords_in_value(keywords_list, value):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _check_keywords_in_value(self, keywords_list, value):
|
||||
for keyword in keywords_list:
|
||||
if keyword.lower() in value.lower():
|
||||
return True
|
||||
return False
|
||||
1
api/core/moderation/openai_moderation/__builtin__
Normal file
1
api/core/moderation/openai_moderation/__builtin__
Normal file
@@ -0,0 +1 @@
|
||||
1
|
||||
0
api/core/moderation/openai_moderation/__init__.py
Normal file
0
api/core/moderation/openai_moderation/__init__.py
Normal file
46
api/core/moderation/openai_moderation/openai_moderation.py
Normal file
46
api/core/moderation/openai_moderation/openai_moderation.py
Normal file
@@ -0,0 +1,46 @@
|
||||
from core.moderation.base import Moderation, ModerationInputsResult, ModerationOutputsResult, ModerationAction
|
||||
from core.model_providers.model_factory import ModelFactory
|
||||
|
||||
|
||||
class OpenAIModeration(Moderation):
|
||||
name: str = "openai_moderation"
|
||||
|
||||
@classmethod
|
||||
def validate_config(cls, tenant_id: str, config: dict) -> None:
|
||||
"""
|
||||
Validate the incoming form config data.
|
||||
|
||||
:param tenant_id: the id of workspace
|
||||
:param config: the form config data
|
||||
:return:
|
||||
"""
|
||||
cls._validate_inputs_and_outputs_config(config, True)
|
||||
|
||||
def moderation_for_inputs(self, inputs: dict, query: str = "") -> ModerationInputsResult:
|
||||
flagged = False
|
||||
preset_response = ""
|
||||
|
||||
if self.config['inputs_config']['enabled']:
|
||||
preset_response = self.config['inputs_config']['preset_response']
|
||||
|
||||
if query:
|
||||
inputs['query__'] = query
|
||||
flagged = self._is_violated(inputs)
|
||||
|
||||
return ModerationInputsResult(flagged=flagged, action=ModerationAction.DIRECT_OUTPUT, preset_response=preset_response)
|
||||
|
||||
def moderation_for_outputs(self, text: str) -> ModerationOutputsResult:
|
||||
flagged = False
|
||||
preset_response = ""
|
||||
|
||||
if self.config['outputs_config']['enabled']:
|
||||
flagged = self._is_violated({'text': text})
|
||||
preset_response = self.config['outputs_config']['preset_response']
|
||||
|
||||
return ModerationOutputsResult(flagged=flagged, action=ModerationAction.DIRECT_OUTPUT, preset_response=preset_response)
|
||||
|
||||
def _is_violated(self, inputs: dict):
|
||||
text = '\n'.join(inputs.values())
|
||||
openai_moderation = ModelFactory.get_moderation_model(self.tenant_id, "openai", "moderation")
|
||||
is_not_invalid = openai_moderation.run(text)
|
||||
return not is_not_invalid
|
||||
Reference in New Issue
Block a user