-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathtest_qwen_model.py
More file actions
83 lines (68 loc) · 2.44 KB
/
test_qwen_model.py
File metadata and controls
83 lines (68 loc) · 2.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
"""
===============================================================================
Script for Generating Responses using Fine-tuned Qwen Model
===============================================================================
Author:
-------
houalex@gmail.com
Date:
-----
Feb 14, 2025
Description:
-------------
This script loads a fine-tuned Qwen model, which was trained on specific datasets,
to generate natural language responses to user prompts. The model is used to generate
text based on a provided question or statement. The generated response is printed
to the console.
Dependencies:
-------------
- transformers (for loading and using the Qwen model)
- torch (for model inference on GPU/CPU)
- pipeline (for text generation)
Usage:
------
1. Install the required dependencies:
$ pip install transformers torch
2. Run the script to:
- Load the fine-tuned Qwen model
- Generate responses based on the provided prompt
- Print the generated response
License:
--------
Apache License 2.0 (Apache-2.0)
===============================================================================
"""
# -*- coding: utf-8 -*-
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch
# Load the fine-tuned model
model = AutoModelForCausalLM.from_pretrained(
"./model/deepseek-distill-qwen2.5-1.5b",
torch_dtype=torch.float16, # Using fp16 for faster inference if possible
device_map="auto" # Automatically select the best device (GPU if available)
)
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("./model/deepseek-distill-qwen2.5-1.5b")
model.resize_token_embeddings(len(tokenizer)) # Resize token embeddings to match tokenizer size
# Create a text generation pipeline
chat_pipeline = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer
)
# Generate a response based on a prompt
prompt = """<|user|>
What do you think about Trump?
<|end|>
<|assistant|>
"""
# Generate the response with specified parameters
output = chat_pipeline(
prompt,
max_new_tokens=500, # Limit the output length to 500 tokens
temperature=0.7, # Controls randomness of the output (higher is more random)
do_sample=True, # Use sampling rather than greedy search
eos_token_id=tokenizer.eos_token_id # Ensure the model stops at the end-of-sequence token
)
# Print the generated response
print(output[0]['generated_text'])