Video-Text-to-Text
Transformers
Safetensors
English
qwen2_5_vl
image-text-to-text
multimodal
text-generation-inference
Instructions to use OpenGVLab/VideoChat-R1_5-7B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use OpenGVLab/VideoChat-R1_5-7B with Transformers:
# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("OpenGVLab/VideoChat-R1_5-7B") model = AutoModelForImageTextToText.from_pretrained("OpenGVLab/VideoChat-R1_5-7B") - Notebooks
- Google Colab
- Kaggle
Update README.md
Browse files
README.md
CHANGED
|
@@ -97,13 +97,13 @@ for percption in range(num_percptions):
|
|
| 97 |
example_prompt = QA_THINK_GLUE.replace("[QUESTION]", item["problem"]["question"])
|
| 98 |
|
| 99 |
|
| 100 |
-
ans = inference(video_path,
|
| 101 |
|
| 102 |
pattern_glue = r'<glue>(.*?)</glue>'
|
| 103 |
match_glue = re.search(pattern_glue, ans, re.DOTALL)
|
| 104 |
# print(f'ann:{ans}')
|
| 105 |
answers.append(ans)
|
| 106 |
-
|
| 107 |
try:
|
| 108 |
if match_glue:
|
| 109 |
glue = match_glue.group(1)
|
|
|
|
| 97 |
example_prompt = QA_THINK_GLUE.replace("[QUESTION]", item["problem"]["question"])
|
| 98 |
|
| 99 |
|
| 100 |
+
ans = inference(video_path, example_prompt, model, processor, device=device, client=client, pred_glue=pred_glue)
|
| 101 |
|
| 102 |
pattern_glue = r'<glue>(.*?)</glue>'
|
| 103 |
match_glue = re.search(pattern_glue, ans, re.DOTALL)
|
| 104 |
# print(f'ann:{ans}')
|
| 105 |
answers.append(ans)
|
| 106 |
+
pred_glue = None
|
| 107 |
try:
|
| 108 |
if match_glue:
|
| 109 |
glue = match_glue.group(1)
|