diff --git a/back_end/ai_risk_justifications.csv b/back_end/ai_risk_justifications.csv new file mode 100644 index 0000000..9d230df --- /dev/null +++ b/back_end/ai_risk_justifications.csv @@ -0,0 +1,16 @@ +Risk Type,Risk Description,Low,Low / Medium,Medium,Medium / High,High +Data Privacy,Storage of PII or other sensitive information,Public data only,"Contains non-sensitive personal information +(Ex: user authentication / login)","Contains non-sensitive personal or confidential information +(Ex: collecting chat logs)","Contains regulated data types, all anonymized","Contains regulated data types, NOT all anonymized" +Intellectual Property / Copyright,Violation of IP / Copyright policy,Public domain/licensed data,"Mostly licensed, minor infringement risk",Mixed licensed / unlicensed content,Significant unclear / infringing content,"Highly likely to infringe, unlicensed content" +Misinformation,Hallucination and / or providing inaccurate information,"Highly reliable, factual",Minor inaccuracies in rare cases,"Noticeable inaccuracies, needs verification","Frequent inaccuracies, needs oversight","Highly prone to hallucination, unreliable" +"Dangerous, Violent, or Hateful Content","Creation of any dangerous, violent, or hateful content",Strong safeguards prevent harm,"Harmful content unlikely, edge cases",Moderate effort to generate harmful content,Minimal prompting for harmful content,Proactive harmful content or easily manipulated +Bias,"Any harmful bias, related to model, architecture, data ingestion, etc.",No apparent bias,Bias unrelated to use case,"Bias present, mitigated in planned usage",Bias impacts most planned usage,Bias impacts all usage +Information Leak,Leak of any information trained on and / or retained by the system,No sensitive data,"Minimal sensitive data, sanitized","Potential sensitive data, unlikely leakage","Sensitive data, relative leakage risk",Highly likely to leak sensitive info +Prompt Attack,Succeptibility to adverse prompt attack and / or prompt injections,Highly robust,"Generally robust, known vulnerabilities",Vulnerable to some prompt injections,Susceptible to attacks,Easily compromised via prompt injection +Decision Making,Autonomy level of the AI within the system,Info/recommendations only,"Automated tasks, strict boundaries","Moderate autonomy, safeguards","Significant autonomy, some oversight","Complete autonomy, no human intervention" +New Technology,Novelty of technology or tool being implemented,Well-established tech and infrastructure,"Relatively new, growing support","New tech, limited support","Very new / experimental, unstable","Bleeding-edge, unproven, high failure risk" +Model Drift,Upkeep after initial deployment to retain and / or improve performance,"Highly stable, minimal maintenance","Slow degradation, infrequent retraining","Moderate degradation, regular retraining","Rapid degradation, frequent retraining","Highly unstable, constant retraining needed" +Data Maintenance,Requirements to update / refresh source data,Static/rarely changing data,Occasional updates (yearly),"Regular updates (monthly), moderate complexity","Frequent updates (weekly / daily), complex","Constant, dynamic updates, highly complex" +Reputation Risk,Damage to public trust and perception of the state agency due to AI performance or outputs,"Limited and defined scope of use case without free form interaction, small portion of public expected to interact",Limited and defined scope of use case without free-form interaction,"Use case has free-form interaction, small portion of public expected to interact","Use case has free-form interaction, delivering non-critical information",Use case has free-form interaction and will deliver critical information +Discriminatory Public Impact,Insufficient disclosure of AI use or inability to explain AI-driven public-facing decisions,"Explainability of all decisions documented, automatically presented in outputs","Explainability of all decisions documented, available upon request","Some decisions cannot be explained, no impact to vulnerable members of population","Most decisions cannot be explained, no impact to vulnerable members of population","Most decisions cannot be explained, expected to impact vulnerable members of population" diff --git a/back_end/ai_risk_justifications.xlsx b/back_end/ai_risk_justifications.xlsx deleted file mode 100644 index 051c5f1..0000000 Binary files a/back_end/ai_risk_justifications.xlsx and /dev/null differ diff --git a/back_end/gemini_agents.py b/back_end/gemini_agents.py index 572de46..35f5b5f 100644 --- a/back_end/gemini_agents.py +++ b/back_end/gemini_agents.py @@ -24,8 +24,8 @@ from back_end.gemini_authentication import initialize_gemini_client # Import Google Generative AI libraries -from google import genai -from google.genai.types import Tool, GenerateContentConfig, UrlContext +import google.genai as genai +from google.genai.types import GenerationConfig # Constants for agent configuration TEMPERATURE = 0.2 @@ -60,15 +60,14 @@ def extract_response_text(response): return "" # Create a wrapper function to handle all Gemini API calls -def execute_gemini_request(client, model, contents, config, agent_name="Gemini API", retries=3, delay=2, backoff_factor=5): +def execute_gemini_request(model, contents, config, agent_name="Gemini API", retries=3, delay=2, backoff_factor=5): """ Comprehensive wrapper function for Gemini API calls with robust error handling. - Provides retry logic with exponential backoff, checks for blocked prompts, + Provides retry logic with exponential backoff, checks for blocked prompts, validates response content, and handles various error conditions. Args: - client: The Gemini API client - model: The model ID to use + model: The Gemini generative model contents: The content to send to the API config: The API configuration agent_name: Name of the agent for display purposes @@ -82,10 +81,9 @@ def execute_gemini_request(client, model, contents, config, agent_name="Gemini A for attempt in range(retries): try: # Make the API call - response = client.models.generate_content( - model=model, + response = model.generate_content( contents=contents, - config=config + generation_config=config ) # Check for immediate prompt blocking @@ -160,7 +158,7 @@ def execute_gemini_request(client, model, contents, config, agent_name="Gemini A return False, "Maximum retries exceeded.", True # Function to get response from Gemini Model API -def get_gemini_response(client, user_message, chat_history=None): +def get_gemini_response(model, user_message, chat_history=None): """Get response from Gemini Model API""" # Prepare the conversation contents = [] @@ -175,13 +173,11 @@ def get_gemini_response(client, user_message, chat_history=None): try: # Generate content with Gemini - response = client.models.generate_content( - model=MODEL_ID, + response = model.generate_content( contents=contents, - config=GenerateContentConfig( + generation_config=GenerationConfig( temperature=TEMPERATURE, max_output_tokens=MAX_OUTPUT_TOKENS, - response_modalities=["TEXT"], ) ) @@ -191,7 +187,7 @@ def get_gemini_response(client, user_message, chat_history=None): return f"Sorry, I encountered an error: {str(e)}" # URL Agent using Gemini -def url_agent(client, urls, project_details, risk_matrix_csv, user_question): +def url_agent(model, urls, project_details, risk_matrix_csv, user_question): """Researches information from URLs in the context of project details""" # Construct the system prompt @@ -218,21 +214,15 @@ def url_agent(client, urls, project_details, risk_matrix_csv, user_question): ) try: - # Create URL context tool - url_context_tool = Tool(url_context=UrlContext()) - - # Prepare the API config with URL tool - config = GenerateContentConfig( + # Prepare the API config + config = GenerationConfig( temperature=TEMPERATURE, max_output_tokens=MAX_OUTPUT_TOKENS, - tools=[url_context_tool], - response_modalities=["TEXT"], ) # Make the API call using our wrapper success, response_or_error, is_error = execute_gemini_request( - client=client, - model=MODEL_ID, + model=model, contents=system_prompt, config=config, agent_name="URL Research Agent" @@ -242,28 +232,23 @@ def url_agent(client, urls, project_details, risk_matrix_csv, user_question): if not success: # Log the error and return a structured error response st.error(f"Error in URL Agent: {response_or_error}") - return f"Error in URL research: {response_or_error}", None, None + return f"Error in URL research: {response_or_error}", None # Process the successful response response = response_or_error - # Check for URL metadata in the response - url_metadata = None - if hasattr(response, 'candidates') and response.candidates and hasattr(response.candidates[0], 'url_context_metadata'): - url_metadata = response.candidates[0].url_context_metadata - # Extract the response text response_text = extract_response_text(response) # Just return the response text directly - return response_text.strip() if response_text else "No response was generated from URL analysis.", None, url_metadata + return response_text.strip() if response_text else "No response was generated from URL analysis.", None except Exception as e: st.error(f"Error in URL Agent: {str(e)}") - return f"Error in URL research: {str(e)}", None, None + return f"Error in URL research: {str(e)}", None # Compiling Agent using Gemini -def compiling_agent(client, pdf_agent_result, url_agent_result, url_metadata, project_details, risk_matrix_csv, user_question): +def compiling_agent(model, pdf_agent_result, url_agent_result, project_details, risk_matrix_csv, user_question): """Compiles results from PDF and URL agents to provide a comprehensive answer""" # Construct the system prompt @@ -288,8 +273,6 @@ def compiling_agent(client, pdf_agent_result, url_agent_result, url_metadata, pr # Add URL agent results if available if url_agent_result: system_prompt += f"**URL Research Results:**\n{url_agent_result}\n\n" - if url_metadata: - system_prompt += f"**URL Metadata:**\n{str(url_metadata)}\n\n" else: system_prompt += "**URL Research Results:** No URL data was provided or analyzed.\n\n" @@ -300,17 +283,15 @@ def compiling_agent(client, pdf_agent_result, url_agent_result, url_metadata, pr ) # Prepare the API config - config = GenerateContentConfig( + config = GenerationConfig( temperature=TEMPERATURE, max_output_tokens=MAX_OUTPUT_TOKENS, - response_modalities=["TEXT"], ) try: # Make the API call using our wrapper success, response_or_error, is_error = execute_gemini_request( - client=client, - model=MODEL_ID, + model=model, contents=system_prompt, config=config, agent_name="Compiling Agent" @@ -338,7 +319,7 @@ def compiling_agent(client, pdf_agent_result, url_agent_result, url_metadata, pr return f"Error in compiling results: {str(e)}", None # Risk Assessment Agent using Gemini -def risk_assessment_agent(client, risk_category, risk_levels, category_explanations, project_details, pdf_data=None, urls=None): +def risk_assessment_agent(model, risk_category, risk_levels, category_explanations, project_details, pdf_data=None, urls=None): """Agent that assesses a specific risk category based on project details and reference materials""" # Construct the system prompt @@ -383,17 +364,15 @@ def risk_assessment_agent(client, risk_category, risk_levels, category_explanati ) # Prepare the API config - config = GenerateContentConfig( + config = GenerationConfig( temperature=TEMPERATURE, max_output_tokens=MAX_OUTPUT_TOKENS, - response_modalities=["TEXT"], ) try: # Make the API call using our wrapper success, response_or_error, is_error = execute_gemini_request( - client=client, - model=MODEL_ID, + model=model, contents=system_prompt, config=config, agent_name=f"Risk Assessment Agent - Investigating: {risk_category}" @@ -472,12 +451,12 @@ def risk_assessment_agent(client, risk_category, risk_levels, category_explanati } # Perform Risk Assessment function -def perform_risk_assessment(client, risk_matrix_df, risk_levels, project_details, pdf_files=None, urls=None): +def perform_risk_assessment(model, risk_matrix_df, risk_levels, project_details, pdf_files=None, urls=None): """ Evaluates all risk categories in the risk matrix and returns a comprehensive assessment. Args: - client: The Gemini API client + model: The Gemini generative model risk_matrix_df: DataFrame containing risk categories and level descriptions risk_levels: List of risk levels from lowest to highest project_details: Dictionary of project information for assessment @@ -515,7 +494,7 @@ def perform_risk_assessment(client, risk_matrix_df, risk_levels, project_details st.info(f"Processing {len(pdf_files)} PDF files. This may take a few minutes...") # Get PDF parts for Gemini API - pdf_parts = process_pdfs_with_gemini_file_api(client, pdf_files, risk_matrix_df) + pdf_parts = process_pdfs_with_gemini_file_api(pdf_files, risk_matrix_df) # Prepare PDF data summary for context if pdf_parts: @@ -577,7 +556,7 @@ def perform_risk_assessment(client, risk_matrix_df, risk_levels, project_details try: # Call the risk assessment agent assessment = risk_assessment_agent( - client=client, + model=model, risk_category=category, risk_levels=risk_levels, category_explanations=category_explanations, @@ -608,12 +587,11 @@ def perform_risk_assessment(client, risk_matrix_df, risk_levels, project_details return risk_assessment_results # Function to process PDFs with Gemini -def process_pdfs_with_gemini_file_api(client, pdf_files, risk_matrix_df=None): +def process_pdfs_with_gemini_file_api(pdf_files, risk_matrix_df=None): """ Uploads PDFs directly to Gemini using the File API Args: - client: The Gemini client instance pdf_files: List of PDF file dictionaries from Streamlit uploader risk_matrix_df: Optional parameter for compatibility, not used in this implementation @@ -651,14 +629,11 @@ def process_pdfs_with_gemini_file_api(client, pdf_files, risk_matrix_df=None): file_path = pathlib.Path(tmp_path) try: - # Create the PDF part using the correct method - pdf_part = genai.types.Part.from_bytes( - data=open(tmp_path, 'rb').read(), - mime_type='application/pdf' - ) + # Upload the file + uploaded_file = genai.upload_file(path=tmp_path, display_name=file_name) # Add the part to our list - uploaded_gemini_files.append(pdf_part) + uploaded_gemini_files.append(uploaded_file) except Exception as e: st.error(f"Error processing {file_name}: {str(e)}") diff --git a/back_end/gemini_authentication.py b/back_end/gemini_authentication.py index 7ab9562..044d087 100644 --- a/back_end/gemini_authentication.py +++ b/back_end/gemini_authentication.py @@ -7,8 +7,9 @@ Handles API key validation and client initialization. """ -from google import genai +import google.genai as genai import streamlit as st +from back_end.gemini_agents import MODEL_ID def initialize_gemini_client(api_key): """ @@ -25,12 +26,11 @@ def initialize_gemini_client(api_key): return None try: - # Create a client with the API key (following test.py pattern) - client = genai.Client(api_key=api_key) - - # Don't try to verify with list() as that doesn't work - # Just return the initialized client - return client + # Configure the client with the API key + genai.configure(api_key=api_key) + # Create a generative model + model = genai.GenerativeModel(MODEL_ID) + return model except Exception as e: st.error(f"Failed to initialize Gemini client: {str(e)}") return None @@ -49,11 +49,10 @@ def validate_api_key(api_key): return False try: - # Create a client with the API key - client = genai.Client(api_key=api_key) - - # We can't use list() to verify since that doesn't exist - # Just return True if we can create the client without error + # Configure the client with the API key + genai.configure(api_key=api_key) + # Create a generative model to test the key + genai.GenerativeModel(MODEL_ID) return True except Exception: - return False \ No newline at end of file + return False diff --git a/back_end/risk_matrix_template.csv b/back_end/risk_matrix_template.csv new file mode 100644 index 0000000..39bec63 --- /dev/null +++ b/back_end/risk_matrix_template.csv @@ -0,0 +1,3 @@ +Risk Type,Risk Description,Low,Low / Medium,Medium,Medium / High,High +{insert name of the risk type},"{insert description of the risk type, 1-3 sentences}",{insert criteria for this risk level},{insert criteria for this risk level},{insert criteria for this risk level},{insert criteria for this risk level},{insert criteria for this risk level} +{repeat above for as many rows as needed},{repeat above for as many rows as needed},{repeat above for as many rows as needed},{repeat above for as many rows as needed},{repeat above for as many rows as needed},{repeat above for as many rows as needed},{repeat above for as many rows as needed} diff --git a/back_end/risk_matrix_template.xlsx b/back_end/risk_matrix_template.xlsx deleted file mode 100644 index 896d9a9..0000000 Binary files a/back_end/risk_matrix_template.xlsx and /dev/null differ diff --git a/find_google.py b/find_google.py new file mode 100644 index 0000000..18f5c05 --- /dev/null +++ b/find_google.py @@ -0,0 +1,2 @@ +import google +print(google.__path__) diff --git a/front_end/streamlit_front_end.py b/front_end/streamlit_front_end.py index 11b682c..99ac5ff 100644 --- a/front_end/streamlit_front_end.py +++ b/front_end/streamlit_front_end.py @@ -84,7 +84,7 @@ st.markdown("### Need a template?") # Read the template file - template_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "back_end", "risk_matrix_template.xlsx") + template_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "back_end", "risk_matrix_template.csv") if os.path.exists(template_path): with open(template_path, "rb") as template_file: @@ -93,8 +93,8 @@ st.download_button( label="Download Template", data=template_bytes, - file_name="risk_matrix_template.xlsx", - mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + file_name="risk_matrix_template.csv", + mime="text/csv", help="Download a template to get started" ) @@ -295,9 +295,9 @@ # Step 1: Process the risk matrix - CHANGED TO KEEP AS DATAFRAME if uploaded_file == "default": # Load the default State of Utah Gen-AI Risk Matrix - default_matrix_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "back_end", "ai_risk_justifications.xlsx") + default_matrix_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "back_end", "ai_risk_justifications.csv") if os.path.exists(default_matrix_path): - risk_matrix_df = pd.read_excel(default_matrix_path, engine="openpyxl") + risk_matrix_df = pd.read_csv(default_matrix_path) loading_placeholder.success("Using the default State of Utah Gen-AI Risk Matrix") else: loading_placeholder.error("Default matrix file not found. Please contact the administrator.") @@ -307,7 +307,7 @@ file_extension = os.path.splitext(uploaded_file.name)[1] # Use the correct pandas function based on the file extension - if file_extension == ".csv": + if file_extension.lower() == ".csv": risk_matrix_df = pd.read_csv(uploaded_file) else: risk_matrix_df = pd.read_excel(uploaded_file, engine="openpyxl") @@ -442,7 +442,7 @@ # Perform risk assessment st.session_state.current_agent_status = "Performing risk assessment..." risk_results = perform_risk_assessment( - client=st.session_state.gemini_client, + model=st.session_state.gemini_client, risk_matrix_df=risk_matrix_df, risk_levels=risk_levels, project_details=project_details, diff --git a/inspect_genai.py b/inspect_genai.py new file mode 100644 index 0000000..0d16353 --- /dev/null +++ b/inspect_genai.py @@ -0,0 +1,11 @@ +import google.genai +import inspect + +for name, obj in inspect.getmembers(google.genai): + if inspect.ismodule(obj): + print(f"Module: {name}") + for sub_name, sub_obj in inspect.getmembers(obj): + if inspect.isclass(sub_obj): + print(f" Class: {sub_name}") + elif inspect.isclass(obj): + print(f"Class: {name}")