Skip to content

Commit c0be115

Browse files
amumu96wuzhaoxin
andauthored
FEAT: support qwenvl2 vllm engine (#2428)
Co-authored-by: wuzhaoxin <15667065080@162.com>
1 parent b295f9a commit c0be115

18 files changed

+167
-67
lines changed

xinference/model/llm/llm_family.json

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6909,18 +6909,15 @@
69096909
"model_id":"Qwen/Qwen2-VL-72B-Instruct-GPTQ-{quantization}"
69106910
}
69116911
],
6912-
"prompt_style":{
6913-
"style_name":"QWEN",
6914-
"system_prompt":"You are a helpful assistant",
6915-
"roles":[
6916-
"user",
6917-
"assistant"
6918-
],
6919-
"stop": [
6920-
"<|im_end|>",
6921-
"<|endoftext|>"
6922-
]
6923-
}
6912+
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
6913+
"stop_token_ids": [
6914+
151645,
6915+
151643
6916+
],
6917+
"stop": [
6918+
"<|im_end|>",
6919+
"<|endoftext|>"
6920+
]
69246921
},
69256922
{
69266923
"version": 1,

xinference/model/llm/llm_family_modelscope.json

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4627,14 +4627,15 @@
46274627
"model_hub": "modelscope"
46284628
}
46294629
],
4630-
"prompt_style": {
4631-
"style_name": "QWEN",
4632-
"system_prompt": "You are a helpful assistant",
4633-
"roles": [
4634-
"user",
4635-
"assistant"
4636-
]
4637-
}
4630+
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
4631+
"stop_token_ids": [
4632+
151645,
4633+
151643
4634+
],
4635+
"stop": [
4636+
"<|im_end|>",
4637+
"<|endoftext|>"
4638+
]
46384639
},
46394640
{
46404641
"version": 1,

xinference/model/llm/transformers/cogvlm2.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
parse_messages,
3030
)
3131
from .core import PytorchChatModel, PytorchGenerateConfig
32-
from .utils import get_max_src_len
32+
from .utils import cache_clean, get_max_src_len
3333

3434
logger = logging.getLogger(__name__)
3535

@@ -176,6 +176,7 @@ def get_query_and_history(
176176
query = content
177177
return query, image, history
178178

179+
@cache_clean
179180
def chat(
180181
self,
181182
messages: List[Dict],

xinference/model/llm/transformers/cogvlm2_video.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
parse_messages,
2929
)
3030
from .core import PytorchChatModel, PytorchGenerateConfig
31+
from .utils import cache_clean
3132

3233
logger = logging.getLogger(__name__)
3334

@@ -227,6 +228,7 @@ def get_query_and_history(
227228

228229
return query, image, video, history
229230

231+
@cache_clean
230232
def chat(
231233
self,
232234
messages: List[Dict],

xinference/model/llm/transformers/deepseek_vl.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from ..llm_family import LLMFamilyV1, LLMSpecV1
2929
from ..utils import generate_chat_completion, generate_completion_chunk
3030
from .core import PytorchChatModel, PytorchGenerateConfig
31+
from .utils import cache_clean
3132

3233
logger = logging.getLogger(__name__)
3334

@@ -137,6 +138,7 @@ def _fill_placeholder(_url, _index):
137138
return "".join(new_content), images
138139
return content, []
139140

141+
@cache_clean
140142
def chat(
141143
self,
142144
messages: List[Dict],

xinference/model/llm/transformers/glm4v.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from ..llm_family import LLMFamilyV1, LLMSpecV1
2727
from ..utils import _decode_image, generate_chat_completion, generate_completion_chunk
2828
from .core import PytorchChatModel, PytorchGenerateConfig
29-
from .utils import get_max_src_len
29+
from .utils import cache_clean, get_max_src_len
3030

3131
logger = logging.getLogger(__name__)
3232

@@ -129,6 +129,7 @@ def _get_processed_msgs(messages: List[Dict]) -> List[Dict]:
129129
res.append({"role": role, "content": text})
130130
return res
131131

132+
@cache_clean
132133
def chat(
133134
self,
134135
messages: List[Dict],

xinference/model/llm/transformers/intern_vl.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
parse_messages,
2828
)
2929
from .core import PytorchChatModel, PytorchGenerateConfig
30+
from .utils import cache_clean
3031

3132
logger = logging.getLogger(__name__)
3233

@@ -326,6 +327,7 @@ def load(self, **kwargs):
326327
use_fast=False,
327328
)
328329

330+
@cache_clean
329331
def chat(
330332
self,
331333
messages: List[Dict],

xinference/model/llm/transformers/minicpmv25.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
parse_messages,
3030
)
3131
from .core import PytorchChatModel, PytorchGenerateConfig
32+
from .utils import cache_clean
3233

3334
logger = logging.getLogger(__name__)
3435

@@ -119,6 +120,7 @@ def _message_content_to_chat(self, content):
119120
raise RuntimeError("Only one image per message is supported")
120121
return content, []
121122

123+
@cache_clean
122124
def chat(
123125
self,
124126
messages: List[Dict],

xinference/model/llm/transformers/minicpmv26.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
parse_messages,
3131
)
3232
from .core import PytorchChatModel, PytorchGenerateConfig
33+
from .utils import cache_clean
3334

3435
logger = logging.getLogger(__name__)
3536

@@ -198,6 +199,7 @@ def _convert_to_specific_style(self, messages: List[Dict]) -> Tuple:
198199
msgs.append({"role": "user", "content": images_chat + [content]})
199200
return msgs, video_existed
200201

202+
@cache_clean
201203
def chat(
202204
self,
203205
messages: List[Dict],

xinference/model/llm/transformers/omnilmm.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from ..llm_family import LLMFamilyV1, LLMSpecV1
2525
from ..utils import generate_chat_completion, parse_messages
2626
from .core import PytorchChatModel, PytorchGenerateConfig
27+
from .utils import cache_clean
2728

2829
logger = logging.getLogger(__name__)
2930

@@ -87,6 +88,7 @@ def _ensure_url(_url):
8788
return images, other_content
8889
return [], [{"type": "text", "text": content}]
8990

91+
@cache_clean
9092
def chat(
9193
self,
9294
messages: List[Dict],

0 commit comments

Comments
 (0)