-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathRunLLM_ollama.py
More file actions
121 lines (88 loc) · 4.18 KB
/
RunLLM_ollama.py
File metadata and controls
121 lines (88 loc) · 4.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from radprompter import Prompt, RadPrompter, vLLMClient, OllamaClient, OpenAIClient
import os
import pandas as pd
import numpy as np
import pandas as pd
MODEL = 'mixtral8x22b'
INPUT_FILE = '~/Desktop/SIIMCombinedReports.xlsx'
OUTPUT_FILE = '~/Desktop/SIIM_Results-' + MODEL + '.csv'
TEMP_OUT = '~/Desktop/output-' + MODEL + '.csv'
if __name__ == '__main__':
prompt = Prompt("SIIM.toml")
client = OllamaClient(
model="llama3",
base_url="http://localhost:11434/v1",
temperature=0.0,
seed=42
)
# delete any prior output
if os.path.exists(TEMP_OUT):
os.remove(TEMP_OUT)
engine = RadPrompter(
client=client,
prompt=prompt,
output_file=TEMP_OUT,
)
# Load the Excel file into a DataFrame
reports_df = pd.read_excel(INPUT_FILE)
# strip spaces out of the FIndings column
reports_df['Findings'] = reports_df['Findings'].str.replace(' ', '')
reports_df['Report'] = reports_df['Report'].str.replace('\n', '')
reports_df['Report'] = reports_df['Report'].str.replace('_0x000D_', '')
reports_df['Report'] = reports_df['Report'].str.replace(' ', '')
reports_df['Report'] = reports_df['Report'].str.replace(' ', '')
reports_df = reports_df.replace({np.nan: 'No', 'None': 'No'})
reports_df
#---
# Splitting the reports_df into separate dataframes based on the 'ExamClass' column
# Creating a dictionary to hold the dataframes for each category
categories = reports_df['ExamClass'].unique()
print (categories)
dfs = {category: reports_df[reports_df['ExamClass'] == category] for category in categories}
# Now dfs dictionary contains separate dataframes for each category in 'ExamClass'
# For example, to access the dataframe for 'Cervical Spine Fracture', you can use dfs['Cervical Spine Fracture']
dfs['Cervical Spine Fracture']
#---
# Summing up the number of rows with 'None' and not 'None' in the 'Findings' column for each category
# Initialize a dictionary to store the results
category_summary = {}
# Iterate over each category dataframe
for category, df in dfs.items():
unique_values = df['Findings'].unique() # Get unique values in 'Findings' column
unique_counts = df['Findings'].value_counts() # Count the number of each unique value
total_count = len(df) # Total number of rows
category_summary[category] = {
'Unique_Values': unique_values,
'Unique_Counts': unique_counts,
'Total': total_count
}
# Print the results for each column
for category, counts in category_summary.items():
print(f"Category: {category}")
for value, count in counts['Unique_Counts'].items():
print(f"{value}: {count} {count*100//counts['Total']}%")
print(f"Total: {counts['Total']}")
print()
#---
# Extract all reports from the 'Report' column and clean them by removing extra whitespace and blank lines
reports = [{'report': report.strip(), 'filename': category} for report, category in zip(reports_df['Report'], reports_df['ExamClass']) if report.strip()]
#---
print ('Doing inference...')
#out=engine(reports)
#---
output_df = pd.read_csv(TEMP_OUT, index_col='index')
# rename the colume in output_df from 'filename' to 'ExamClass'
out_df = output_df.rename(columns={'filename': 'ExamClass'})
# Delete the column with reports
out_df.drop(columns=['report'], inplace=True, axis=1)
# Merge the 'Findings' column from reports_df into output_df
out_df = out_df.join(reports_df['Findings'])
#---
if os.path.exists(OUTPUT_FILE):
os.remove(OUTPUT_FILE)
# Write the combined dataframe to a CSV fil
out_df.to_csv(OUTPUT_FILE)
print('Examine the output file to assure no reports or other PHI. Please send this file back to BJE@mayo.edu')
##########################################################################
# This file was converted using nb2py: https://github.com/BardiaKh/nb2py #
##########################################################################