Skip to content
Merged

Ag2 #20

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ africastalking==1.2.8
black==24.8.0
pylint==3.2.6
ipython==8.30.0
ollama==0.4.4
ollama==0.5.1
black==24.8.0
pyment==0.3.3
codecarbon==2.7.1
Expand All @@ -16,11 +16,12 @@ nltk==3.9.1
soundfile==0.12.1
groq==0.13.1
numpy==2.2.1
pyautogen==0.2.18
ag2==0.9.6
flaml[automl]
edge-tts==7.0.0
deprecated==1.2.18
pydantic==2.9.2
flask==3.0.0
flask-cors==6.0.0
requests==2.32.4
ag2[ollama]
83 changes: 70 additions & 13 deletions utils/function_call.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,52 @@
from logging.handlers import RotatingFileHandler
from importlib.metadata import version
import asyncio
import africastalking
import re
import warnings
from typing import Optional, Union

import ollama
import requests
from autogen import ConversableAgent

# Suppress Pydantic UserWarning from autogen
warnings.filterwarnings(
"ignore",
category=UserWarning,
message=r".*Field.*in.*has conflict with protected namespace.*",
)


# Monkey-patch for pydantic issue with autogen
# See: https://github.com/microsoft/autogen/issues/1996
try:
from pydantic._internal import _typing_extra
except ImportError:
pass # not a pydantic v2.7.0+ installation, no issue
else:
try:
# pydantic v2.7.0+
from pydantic._internal._typing_extra import try_eval_type
except ImportError:
# autogen is not yet compatible with pydantic v2.7.0+
# see: https://github.com/microsoft/autogen/issues/1996
# monkey-patch pydantic
from typing import Any, Dict, Type

def try_eval_type(t: Type[Any]) -> Type[Any]:
try:
return _typing_extra._eval_type(
t, globalns=None, localns=None, type_aliases=None
)
except (NameError, TypeError):
return t

_typing_extra.try_eval_type = try_eval_type


from autogen.agentchat.conversable_agent import ConversableAgent
from pydantic import BaseModel, field_validator, ValidationError
from typing import Union
from typing import Optional
import re
from .communication_apis import send_mobile_data_wrapper, send_mobile_data_original

from .communication_apis import send_mobile_data_wrapper

# from codecarbon import EmissionsTracker # Import the EmissionsTracker
from duckduckgo_search import DDGS
Expand Down Expand Up @@ -454,7 +491,7 @@ def send_message(phone_number: str, message: str, username: str, **kwargs) -> st

try:
# Use absolute import for communication_apis to avoid relative import errors
from communication_apis import send_message as comm_send_message
from .communication_apis import send_message as comm_send_message

masked_number = mask_phone_number(phone_number)
logger.info("Delegating message sending to %s", masked_number)
Expand Down Expand Up @@ -512,7 +549,10 @@ def send_ussd(phone_number: str, code: str, **kwargs) -> str:


def get_wallet_balance(**kwargs) -> str:
"""Fetch the current wallet balance from Africa's Talking account using the documented API endpoint."""
"""
Fetch the current wallet balance from Africa's Talking account using the
documented API endpoint.
"""
try:
username = os.getenv("AT_USERNAME")
api_key = os.getenv("AT_API_KEY")
Expand Down Expand Up @@ -842,7 +882,7 @@ def search_news(query: str, max_results: int = 5, **kwargs) -> str:

Returns
-------
str : The search results.
str : The search results, formatted for readability.

Examples
--------
Expand All @@ -860,8 +900,24 @@ def search_news(query: str, max_results: int = 5, **kwargs) -> str:
max_results=max_results,
**kwargs,
)
logger.debug("The search results are: %s", results)
return json.dumps(results)
logger.debug("The raw search results are: %s", results)

if not results:
return "No news found for your query."

formatted_results = []
for article in results:
title = article.get("title", "No Title")
source = article.get("source", "No Source")
body = article.get("body", "No Summary")
url = article.get("url", "No URL")

formatted_article = (
f"Title: {title}\n" f"Source: {source}\n" f"Summary: {body}\n" f"URL: {url}"
)
formatted_results.append(formatted_article)

return "\n\n---\n\n".join(formatted_results)


def translate_text(text: str, target_language: str) -> str:
Expand Down Expand Up @@ -938,7 +994,8 @@ def translate_text(text: str, target_language: str) -> str:

message = f"Zoe, translate '{text}' to {normalized_language}"
result = joe.initiate_chat(zoe, message=message, max_turns=2)
return result
# Extract the last message from the chat history, which is the translation
return result.summary


# Asynchronous function to handle the conversation with the model
Expand Down Expand Up @@ -1426,7 +1483,7 @@ async def run(model: str, user_input: str):
if not user_prompt:
logger.info("No input provided. Exiting...")
break
elif user_prompt.lower() == "exit":
if user_prompt.lower() == "exit":
break

# Run the asynchronous function with tracker
Expand Down
99 changes: 57 additions & 42 deletions voice_stt_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@
from logging.handlers import RotatingFileHandler
import asyncio
from importlib.metadata import version, PackageNotFoundError
import tempfile
import warnings
from typing import Optional

# Third-Party Library Imports
import gradio as gr
Expand All @@ -50,8 +51,6 @@

# Local Module Imports
from utils.function_call import send_airtime, send_message, search_news, translate_text
from typing import Optional
from utils.models import ReceiptData, LineItem
from utils.constants import VISION_SYSTEM_PROMPT, API_SYSTEM_PROMPT

# ------------------------------------------------------------------------------------
Expand All @@ -62,6 +61,13 @@
langtrace.init(api_key=os.getenv("LANGTRACE_API_KEY"))
groq_client = groq.Client(api_key=os.getenv("GROQ_API_KEY"))

# Suppress Pydantic UserWarning from autogen
warnings.filterwarnings(
"ignore",
category=UserWarning,
message=r".*Field.*in.*has conflict with protected namespace.*",
)

# Set up the logger
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG) # Set the logger to handle all levels DEBUG and above
Expand Down Expand Up @@ -131,12 +137,20 @@


async def text_to_speech(text: str) -> None:
"""
Generate speech from text using edge-tts.

Parameters
----------
text : str
The text to convert to speech.
"""
try:
communicate = edge_tts.Communicate(text, VOICE)
await communicate.save(OUTPUT_FILE)
logger.info(f"Generated speech output: {OUTPUT_FILE}")
logger.info("Generated speech output: %s", OUTPUT_FILE)
except Exception as e:
logger.error(f"TTS Error: {str(e)}")
logger.error("TTS Error: %s", str(e))
raise


Expand Down Expand Up @@ -248,7 +262,7 @@ async def text_to_speech(text: str) -> None:
@with_langtrace_root_span()
async def process_user_message(
message: str,
history: list,
history: list, # pylint: disable=unused-argument
use_vision: bool = False,
image_path: Optional[str] = None,
) -> str:
Expand Down Expand Up @@ -286,15 +300,18 @@ async def process_user_message(
messages.append({"role": "user", "content": message})

try:
model_name = "llama3.2-vision" if use_vision else "qwen2.5:0.5b"
# Use 'llava' as it's a common Ollama vision model.
# Ensure you have pulled the model with `ollama pull llava`.
# You can use llama3.2-vision as well
model_name = "llava" if use_vision else "qwen2.5:0.5b"
response = await client.chat(
model=model_name,
messages=messages,
tools=None if use_vision else tools,
format="json" if use_vision else None,
options={"temperature": 0},
)
except Exception as e:
except Exception:
logger.exception("Failed to get response from Ollama client.")
return "An unexpected error occurred while communicating with the assistant."

Expand Down Expand Up @@ -353,16 +370,11 @@ async def process_user_message(
}
)

return f"Function `{tool_name}` executed successfully. Response:\n{function_response}"
except (
send_airtime.ErrorType,
send_message.ErrorType,
search_news.ErrorType,
translate_text.ErrorType,
) as e:
logger.error("Handled error in tool `%s`: %s", tool_name, e)
return f"Error executing `{tool_name}`: {str(e)}"
except Exception as e: # pylint: disable=broad-exception-caught
return (
f"Function `{tool_name}` executed successfully. Response:\n"
f"{function_response}"
)
except Exception as e:
logger.exception("Unexpected error in tool `%s`: %s", tool_name, e)
return f"An unexpected error occurred while executing `{tool_name}`."
else:
Expand Down Expand Up @@ -420,12 +432,12 @@ async def process_audio_and_llm(audio):
response = await process_user_message(transcription, [])
return f"Transcription: {transcription}\nLLM Response: {response}"

except Exception as e:
logger.exception("Error during transcription or LLM processing: %s", e)
return f"Error: {str(e)}"
except Exception as e:
logger.exception("Error in audio processing: %s", e)
return f"Error: {str(e)}"
except Exception as exc:
logger.exception("Error during transcription or LLM processing: %s", exc)
return f"Error: {str(exc)}"
except Exception as exc:
logger.exception("Error in audio processing: %s", exc)
return f"Error: {str(exc)}"


def gradio_interface(message: str, history: list) -> str:
Expand All @@ -447,8 +459,8 @@ def gradio_interface(message: str, history: list) -> str:
try:
response = asyncio.run(process_user_message(message, history))
return response
except Exception as e: # pylint: disable=broad-exception-caught
logger.exception("Error in gradio_interface: %s", e)
except Exception as exc:
logger.exception("Error in gradio_interface: %s", exc)
return "An unexpected error occurred while processing your message."


Expand Down Expand Up @@ -540,9 +552,9 @@ def show_transcription(audio):
)
logger.info("Audio transcribed successfully: %s", transcription)
return transcription
except Exception as e:
logger.exception("Error during transcription: %s", e)
return f"Error: {str(e)}"
except Exception as exc:
logger.exception("Error during transcription: %s", exc)
return f"Error: {str(exc)}"

# Define TTS Function
async def generate_tts(text: str) -> str:
Expand All @@ -552,28 +564,28 @@ async def generate_tts(text: str) -> str:
try:
communicate = edge_tts.Communicate(text, VOICE)
await communicate.save(OUTPUT_FILE)
logger.info(f"TTS audio generated successfully: {OUTPUT_FILE}")
logger.info("TTS audio generated successfully: %s", OUTPUT_FILE)
return OUTPUT_FILE
except Exception as e:
logger.error(f"TTS Generation Error: {str(e)}")
except Exception as exc:
logger.error("TTS Generation Error: %s", str(exc))
return None

# Wire up the components
transcribe_button.click(
transcribe_button.click( # pylint: disable=no-member
fn=show_transcription, inputs=audio_input, outputs=transcription_preview
)

# Process the edited text
process_button.click(
process_button.click( # pylint: disable=no-member
fn=lambda x: asyncio.run(process_user_message(x, [])),
inputs=transcription_preview,
outputs=audio_output,
)

# Connect TTS Button to Function
tts_button.click(
tts_button.click( # pylint: disable=no-member
fn=lambda txt: asyncio.run(generate_tts(txt)),
inputs=audio_output, # Replace with the component holding the final text
inputs=audio_output,
outputs=tts_audio,
)

Expand All @@ -596,17 +608,20 @@ async def generate_tts(text: str) -> str:
result_text = gr.Textbox(label="Analysis Result")

async def process_with_speech(image):
"""
Process image with vision model and return analysis.
"""
try:
# Get text result first
text_result = await process_user_message(
"Analyze this receipt", [], use_vision=True, image_path=image
)
return text_result
except Exception as e:
logger.error(f"Processing error: {str(e)}")
return str(e)
except Exception as exc:
logger.error("Processing error: %s", str(exc))
return str(exc)

scan_button.click(
scan_button.click( # pylint: disable=no-member
fn=lambda img: asyncio.run(process_with_speech(img)),
inputs=image_input,
outputs=result_text,
Expand All @@ -621,6 +636,6 @@ async def process_with_speech(image):
logger.info("Launching Gradio interface...")
demo.launch(inbrowser=True, server_name="0.0.0.0", server_port=7860)
logger.info("Gradio interface launched successfully.")
except Exception as e:
logger.exception("Failed to launch Gradio interface: %s", e)
except Exception as exc:
logger.exception("Failed to launch Gradio interface: %s", exc)
logger.info("Script execution completed")