-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutil.py
More file actions
176 lines (154 loc) · 6.74 KB
/
util.py
File metadata and controls
176 lines (154 loc) · 6.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#%%
import os
import getpass
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import gc
import psutil
import torch.nn.functional as F
import random
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont
from typing import List
from accelerate import load_checkpoint_and_dispatch, init_empty_weights
for cache_dir_loc in ['/scr/scr-with-most-space', '/scr-ssd', '/scr']:
if os.path.exists(cache_dir_loc):
CACHE_DIR = cache_dir_loc + '/' + getpass.getuser()
try:
os.makedirs(CACHE_DIR, exist_ok = True)
break
except:
pass
print(f'Using cache dir {CACHE_DIR}')
def get_base_model(base_model, base_model_state_dict = None):
model = AutoModelForCausalLM.from_pretrained(base_model, cache_dir = CACHE_DIR, device_map = 'auto')
if base_model_state_dict is not None:
print('Loading base model state dict...')
model.load_state_dict(torch.load(base_model_state_dict, map_location=model.device))
return model
# def get_base_model(base_model, base_model_state_dict = None):
# if base_model_state_dict is not None:
# with init_empty_weights():
# model = AutoModelForCausalLM.from_pretrained(base_model, cache_dir = CACHE_DIR)
# model = load_checkpoint_and_dispatch(model, base_model_state_dict, device_map='auto')
# else:
# model = AutoModelForCausalLM.from_pretrained(base_model, cache_dir = CACHE_DIR, device_map = 'auto')
# return model
def set_seed(seed):
random.seed(seed)
np.random.seed(int(seed))
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(seed)
def weighted_lm_loss(model, input_ids, target_ids, attention_mask, weights):
outputs = model(input_ids=input_ids,
attention_mask=attention_mask,
labels = target_ids
)
batch_size = len(input_ids)
#reshape logits and labels to be (batch_size*(seq_len-1), vocab_size)
#we drop the last logit as there are no labels, and the first label as there are no logits
reshaped_logits = outputs.logits[:, :-1, :].reshape(-1, outputs.logits.shape[-1]) #shape (batch_size*(seq_len-1), vocab_size)
reshaped_labels = target_ids[:, 1:].reshape(-1) #shape (batch_size*(seq_len-1))
l = F.cross_entropy(reshaped_logits, reshaped_labels, ignore_index = -100, reduction = 'none') #shape (batch_size*(seq_len-1))
return (l.reshape(batch_size, -1)*weights[:, 1:]*attention_mask[:, 1:]).sum()/attention_mask[:, 1:].sum()
def create_colored_text(words: List[str], data: List[float], font_path='DejaVuSansMono.ttf', pos_cmap=None, neg_cmap=None, max_intensity=.8) -> Image:
# Create a colormap that maps your data range to colors
if pos_cmap is None:
pos_cmap = plt.cm.get_cmap('Reds')
if neg_cmap is None:
neg_cmap = plt.cm.get_cmap('Blues')
max_mag = max(abs(np.min(data)), np.max(data))
cmap = lambda x: pos_cmap(x*max_intensity/max_mag) if x > 0 else neg_cmap(-x*max_intensity/max_mag)
max_width = 800
line_height = 25
# Set the font
font = ImageFont.truetype(font_path, 16)
# Find the maximum font size of all the words
max_font_size = max([font.getbbox(word)[3] for word in words])
# Initialize the x- and y-coordinates for drawing the words
x = 0
y = 0
for word in words:
word_width = font.getlength(word)
if x + word_width > max_width:
# Move to the next line
x = 0
y += line_height
x += word_width
final_height = y + line_height
# Create a new image with a white background
image = Image.new('RGB', (max_width, final_height), (255, 255, 255))
# Get a drawing context
draw = ImageDraw.Draw(image)
x = 0
y = 0
# Iterate over the words in the text passage
for i, word in enumerate(words):
# Get the numeric value for the current word
value = data[i]
# Map the numeric value to a color from the colormap
color = cmap(value)
# Get the color in 8-bit RGB format
rgb_color = tuple(int(c * 255) for c in color[:3])
word_width = font.getlength(word)
# Check if the word fits on the current line
if x + word_width > max_width:
# Move to the next line
x = 0
y += line_height
# Draw the word with the mapped color and black foreground color
draw.rectangle([(x, y), (x + word_width, y + max_font_size)], fill=rgb_color)
draw.text((x, y), word, font=font, fill=(0, 0, 0))
# Increment the x-coordinate for drawing the next word
x += word_width
image = image.crop((0, 0, max_width, y + line_height)).resize((max_width, y + line_height))
return image
# %%
def shuffle_groups(df, group_col):
"""
Shuffles the order of groups in a Pandas DataFrame without shuffling the order of items within each group.
Parameters:
- df: the input DataFrame
- group_col: the name of the column containing the groups to be shuffled
Returns:
- a shuffled copy of the input DataFrame
"""
# Get a list of unique groups
groups = df[group_col].unique()
# Shuffle the list of groups
np.random.shuffle(groups)
# Define a sorting key that sorts by the shuffled order of groups
def sort_key(row):
return np.argwhere(groups == row[group_col])[0][0]
df['temp'] = df.apply(sort_key, axis=1)
shuffled_df = df.sort_values('temp', kind='stable').drop('temp', axis=1).reset_index(drop=True)
return shuffled_df
#given a pd dataframe, return a head of the dataframe such that column column has k unique values
def return_k_unique(df, k, column):
if k >= len(df[column].unique()):
return df
else:
values_to_keep = df[column].unique()[:k]
return df[df.apply(lambda x: x[column] in values_to_keep, axis=1)]
# %%
def debug_memory():
# For CPU RAM
cpu_ram_available_gb = psutil.virtual_memory().available / (1024 ** 3)
cpu_ram_total_gb = psutil.virtual_memory().total / (1024 ** 3)
print(f'CPU RAM free: {cpu_ram_available_gb:.2f} GB out of {cpu_ram_total_gb:.2f} GB')
if torch.cuda.is_available():
gpu_ram_allocated_gb = torch.cuda.memory_allocated() / (1024 ** 3)
gpu_ram_total_gb = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3)
print(f'GPU RAM actively used: {gpu_ram_allocated_gb:.2f} GB out of {gpu_ram_total_gb:.2f} GB')
else:
print('No GPU available.')
param_count = 0
for obj in gc.get_objects():
try:
if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)):
param_count += 1
except:
pass
print('Number of tensors: ', param_count)