-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexample_instruction_training.py
More file actions
109 lines (89 loc) · 2.91 KB
/
example_instruction_training.py
File metadata and controls
109 lines (89 loc) · 2.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
"""
Example script showing how to use instruction-based training with LLMCustoms.
"""
from llmcustoms.First_Wrapper.first_wrpr import FineTuner
# Example 1: Train with Dolly-15k dataset from HuggingFace
print("=" * 60)
print("Example 1: Training with Dolly-15k from HuggingFace")
print("=" * 60)
tuner = FineTuner(
data_path=None, # Not using local file
dataset_name="databricks/databricks-dolly-15k", # HuggingFace dataset
model="auto", # Auto-select best model for your hardware
preset="auto", # Auto-select best preset
training_mode="instruction", # Instruction-based training
prompt_template="alpaca", # Use Alpaca prompt format
mask_instruction=True # Only train on responses
)
# Start training
model_path = tuner.train()
print(f"\nModel saved at: {model_path}")
# Example 2: Train with local CSV file
print("\n" + "=" * 60)
print("Example 2: Training with local CSV file")
print("=" * 60)
# Your CSV should have columns: instruction, response, context (optional)
tuner2 = FineTuner(
data_path="./my_instructions.csv",
model="phi", # Specific model
preset="balanced", # Specific preset
training_mode="instruction",
prompt_template="simple" # Use simple prompt format
)
model_path2 = tuner2.train()
print(f"\nModel saved at: {model_path2}")
# Example 3: Custom field mapping
print("\n" + "=" * 60)
print("Example 3: Custom field mapping")
print("=" * 60)
tuner3 = FineTuner(
data_path="./custom_data.json",
model="gemma",
preset="highspeed",
training_mode="instruction"
)
# If your dataset has different field names
tuner3.data_handler.set_field_mapping(
instruction_field="question", # Your dataset uses "question" instead of "instruction"
response_field="answer", # Your dataset uses "answer" instead of "response"
context_field="background" # Your dataset uses "background" instead of "context"
)
model_path3 = tuner3.train()
print(f"\nModel saved at: {model_path3}")
# Example 4: Different prompt templates
print("\n" + "=" * 60)
print("Example 4: Using different prompt templates")
print("=" * 60)
# Alpaca template (default)
tuner_alpaca = FineTuner(
dataset_name="databricks/databricks-dolly-15k",
model="auto",
preset="auto",
prompt_template="alpaca"
)
# Simple template
tuner_simple = FineTuner(
dataset_name="databricks/databricks-dolly-15k",
model="auto",
preset="auto",
prompt_template="simple"
)
# Vicuna template
tuner_vicuna = FineTuner(
dataset_name="databricks/databricks-dolly-15k",
model="auto",
preset="auto",
prompt_template="vicuna"
)
# Custom template
custom_template = """Question: {instruction}
Context: {context}
Answer: """
tuner_custom = FineTuner(
dataset_name="databricks/databricks-dolly-15k",
model="auto",
preset="auto",
prompt_template=custom_template
)
print("\nAll examples configured successfully!")
print("Uncomment the train() calls to actually train the models.")