diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..75d7c6a --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,56 @@ +name: CI + +on: + push: + branches: ["main", "master", "copilot/**"] + pull_request: + branches: ["main", "master"] + +# Restrict default GITHUB_TOKEN permissions to read-only +permissions: + contents: read + +jobs: + lint-and-test: + runs-on: ubuntu-latest + permissions: + contents: read + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Lint with flake8 + run: | + pip install flake8 + # Stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # Treat all other issues as warnings (non-blocking) + flake8 . --count --exit-zero --max-line-length=100 --statistics + + - name: Run tests + run: pytest test_chatbot.py -v + + docker-build: + runs-on: ubuntu-latest + needs: lint-and-test + permissions: + contents: read + + steps: + - uses: actions/checkout@v4 + + - name: Build Docker image (web mode) + run: docker build --build-arg MODE=web -t ai-chatbot:web . + + - name: Build Docker image (api mode) + run: docker build --build-arg MODE=api -t ai-chatbot:api . diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..688ab4d --- /dev/null +++ b/.gitignore @@ -0,0 +1,27 @@ +# Python cache +__pycache__/ +*.py[cod] + +# Virtual environment +venv/ +.venv/ + +# Jupyter/IPython +.ipynb_checkpoints/ + +# System files +.DS_Store +Thumbs.db + +# IDE settings +.vscode/ +.idea/ + +# Environment / secrets +.env + +# Pytest cache +.pytest_cache/ + +# Temporary / generated files +converted_dialog.csv diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..6e714b6 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,29 @@ +FROM python:3.12-slim + +# Build arguments to select the runtime mode: +# web β†’ run the Streamlit web demo (default) +# api β†’ run the FastAPI REST backend +ARG MODE=web + +WORKDIR /app + +# Install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy source +COPY . . + +# Expose ports +# 8501 β†’ Streamlit +# 8000 β†’ FastAPI / uvicorn +EXPOSE 8501 8000 + +ENV MODE=${MODE} + +CMD ["sh", "-c", "\ + if [ \"$MODE\" = 'api' ]; then \ + uvicorn api:app --host 0.0.0.0 --port 8000; \ + else \ + streamlit run web_demo.py --server.port 8501 --server.address 0.0.0.0; \ + fi"] diff --git a/README.md b/README.md index eee76cc..0a6a802 100644 --- a/README.md +++ b/README.md @@ -1,220 +1,278 @@ -# πŸ€– Simple Python Chatbot with GUI (Tkinter) +# πŸ€– AI ChatBot -## πŸš€ Project Description -Welcome to the **Simple Python Chatbot Project with GUI**! This repository provides an interactive chatbot built with **Python**, using pattern matching on a dialog dataset from Kaggle, and running inside a friendly **Tkinter-based desktop GUI**. +![CI](https://github.com/joshuvavinith/AI_ChatBot/actions/workflows/ci.yml/badge.svg) +![Python](https://img.shields.io/badge/python-3.12-blue) +![License](https://img.shields.io/badge/license-MIT-green) -The chatbot uses a simple but effective pattern matching approach to respond to user queries based on a dataset of over 1,700 dialog patterns. It automatically downloads a rich conversation dataset from Kaggle to provide more natural and varied responses. If Kaggle is unavailable, it falls back to a local dataset. It's perfect for beginners, students, or hobby projects. +A modern, extensible Python chatbot with two backends: + +| Mode | When active | What it uses | +|------|-------------|--------------| +| **LLM** | `OPENAI_API_KEY` is set | OpenAI Chat Completions (GPT-3.5 / GPT-4o) | +| **Pattern Matching** | No API key present | Offline CSV dialog dataset | + +Three interfaces are available: a **Tkinter desktop GUI**, a **Streamlit web app**, and a **FastAPI REST backend**. --- ## πŸ“š Table of Contents - [Key Features](#-key-features) -- [Technologies Used](#-technologies-used) -- [Installation Instructions](#-installation-instructions) -- [Kaggle Dataset Setup](#-kaggle-dataset-setup) -- [Usage Example](#-usage-example) -- [Development Process](#-development-process) -- [Contributing Guidelines](#-contributing-guidelines) -- [Evaluation Metrics](#-evaluation-metrics) -- [Future Work](#-future-work) -- [Architecture Diagram](#-architecture-diagram) -- [Interaction with the Chatbot](#-interaction-with-the-chatbot) -- [Additional Information](#-additional-information) -- [Connect with Us](#-connect-with-us) +- [Quick Start](#-quick-start) +- [Installation](#-installation) +- [Configuration](#-configuration) +- [Running the Chatbot](#-running-the-chatbot) + - [Desktop GUI (Tkinter)](#-desktop-gui-tkinter) + - [Web UI (Streamlit)](#-web-ui-streamlit) + - [REST API (FastAPI)](#-rest-api-fastapi) +- [Docker](#-docker) +- [API Reference](#-api-reference) +- [Testing](#-testing) +- [CI/CD](#-cicd) +- [Architecture](#-architecture) +- [Contributing](#-contributing) --- ## ✨ Key Features -- πŸ” **Pattern matching chatbot** using simple but effective techniques -- πŸ’¬ **Interactive GUI** built with Tkinter -- πŸ“₯ **Kaggle dataset integration** with automatic download -- πŸ“ **Fallback to local dialog dataset** if Kaggle is unavailable -- πŸ” Supports exact and partial matching for better responses -- ⚑ Lightweight with minimal external dependencies -- 🧩 Easily extensible by adding more dialog patterns +- πŸ€– **LLM backend** β€” connects to OpenAI's API for intelligent, context-aware responses +- πŸ“‹ **Offline fallback** β€” pattern matching on a dialog dataset; works without internet/API key +- 🌐 **Streamlit web UI** β€” chat from any browser with streaming token output (LLM mode) +- πŸ”Œ **FastAPI REST API** β€” `/chat` and `/train` endpoints; per-session conversation memory +- πŸ–₯️ **Tkinter desktop GUI** β€” original GUI updated to show backend mode +- 🧠 **Conversation memory** β€” recent exchanges are passed to the LLM for follow-up questions +- 🐳 **Docker support** β€” single image supports both web and API modes via `MODE` build arg +- βœ… **Tests** β€” pytest suite covering core logic and API endpoints +- πŸ”„ **CI/CD** β€” GitHub Actions workflow: lint β†’ test β†’ Docker build --- -## πŸ› οΈ Technologies Used +## πŸš€ Quick Start + +```bash +git clone https://github.com/joshuvavinith/AI_ChatBot.git +cd AI_ChatBot +pip install -r requirements.txt -- **Python 3.x** – Works with any modern Python version -- **Tkinter** – Built-in GUI framework -- **KaggleHub** – For downloading Kaggle datasets -- **CSV** – For reading dialog data -- **Random** – For selecting varied responses -- **Git** – For version control +# (optional) enable LLM mode +echo "OPENAI_API_KEY=sk-..." > .env -> βœ… This implementation uses minimal external dependencies, with KaggleHub being the only non-standard library required. The core functionality works even without internet access by falling back to local data. +# Start the web UI +streamlit run web_demo.py +``` --- -## πŸ”§ Installation Instructions +## πŸ”§ Installation -### 1. Clone the Repository +### 1. Clone & set up environment ```bash git clone https://github.com/joshuvavinith/AI_ChatBot.git cd AI_ChatBot +python -m venv venv +source venv/bin/activate # Windows: venv\Scripts\activate +pip install -r requirements.txt ``` -### 2. Create a Virtual Environment (Recommended) +### 2. (Optional) Configure API key -```bash -python -m venv venv -source venv/bin/activate # On Windows: venv\Scripts\activate +Create a `.env` file in the project root: + +```dotenv +OPENAI_API_KEY=sk-your-key-here ``` -### 3. Install Dependencies +Or export it as an environment variable: ```bash -pip install -r requirements.txt +export OPENAI_API_KEY=sk-your-key-here ``` +Without an API key the bot automatically falls back to offline pattern matching. + --- -## πŸ”‘ Kaggle Dataset Setup +## βš™οΈ Configuration -This project uses the [Simple Dialogs for Chatbot](https://www.kaggle.com/datasets/grafstor/simple-dialogs-for-chatbot) dataset via **KaggleHub**. The dataset contains over 1,800 conversation pairs that significantly enhance the chatbot's response capabilities. +| Variable | Default | Description | +|----------|---------|-------------| +| `OPENAI_API_KEY` | *(unset)* | Enables LLM mode when present | -### Dataset Features: +### Kaggle Dataset (optional) -- 1,800+ question-answer pairs -- Covers a wide range of casual conversation topics -- Automatically downloaded and processed at runtime -- Converted from TXT to CSV format for compatibility +The pattern-matching bot can use a richer Kaggle dialog dataset. +To enable it, place `kaggle.json` in `~/.kaggle/` (or set `KAGGLE_USERNAME` / `KAGGLE_KEY`). +If unavailable, the bot falls back to `dialog.csv`. -### To enable Kaggle downloads: +--- + +## πŸ’¬ Running the Chatbot -1. Go to [kaggle.com/account](https://www.kaggle.com/account) and create an API token. -2. Download the `kaggle.json` file. -3. Place it in: +### πŸ–₯️ Desktop GUI (Tkinter) - - Linux/macOS: `~/.kaggle/kaggle.json` - - Windows: `C:\Users\\.kaggle\kaggle.json` +```bash +python ai_chatbot.py +``` -Alternatively, set environment variables: +### 🌐 Web UI (Streamlit) ```bash -export KAGGLE_USERNAME=your_username -export KAGGLE_KEY=your_key +streamlit run web_demo.py ``` -> βœ… If the dataset can't be downloaded, the chatbot will automatically fall back to the local dialog.csv file with basic conversation patterns. +Open your browser at `http://localhost:8501`. + +Features: +- Full conversation history +- Streaming token output in LLM mode (looks like ChatGPT) +- "Clear conversation" button in the sidebar + +### πŸ”Œ REST API (FastAPI) + +```bash +uvicorn api:app --reload +``` + +Interactive docs available at `http://localhost:8000/docs`. --- -## πŸ’¬ Usage Example +## 🐳 Docker -### ▢️ To Run the GUI Chatbot: +### Build ```bash -python ai_chatbot.py +# Web UI (default) +docker build -t ai-chatbot:web . + +# API mode +docker build --build-arg MODE=api -t ai-chatbot:api . ``` -### πŸ–₯️ GUI Features: +### Run -- Type your message in the input box -- Hit **Enter** or click **Send** -- The chatbot responds immediately -- Say `"bye"` or `"exit"` to end the chat +```bash +# Web UI β€” visit http://localhost:8501 +docker run -p 8501:8501 -e OPENAI_API_KEY=sk-... ai-chatbot:web + +# REST API β€” visit http://localhost:8000/docs +docker run -p 8000:8000 -e OPENAI_API_KEY=sk-... ai-chatbot:api +``` --- -## 🧠 Development Process +## πŸ“‘ API Reference -1. **Dataset Retrieval**: Uses `kagglehub` to fetch dialog data from Kaggle -2. **Training**: Trains ChatterBot using NLTK preprocessed dialogs -3. **Interface**: Built with Tkinter for easy interaction -4. **Fallback**: Uses hardcoded sample training data if download fails +### `POST /chat` ---- +Send a message and get a reply. Omit `session_id` to start a new session. -## 🀝 Contributing Guidelines +```json +// Request +{ "message": "Hello!", "session_id": "optional-uuid" } -We welcome contributions! πŸ™Œ +// Response +{ + "reply": "Hi there! How can I help you?", + "session_id": "550e8400-e29b-41d4-a716-446655440000", + "mode": "pattern" +} +``` -### How to Contribute: +### `DELETE /sessions/{session_id}` -1. **Fork this repository** -2. Create a branch: +Reset (delete) a conversation session. - ```bash - git checkout -b feature-branch - ``` +### `POST /train` -3. Make your changes and commit: +Reload pattern-matching data from a CSV file on the server. - ```bash - git commit -m "Add new feature" - ``` +```json +// Request +{ "dialog_file": "/path/to/dialog.csv" } -4. Push and create a PR: +// Response +{ "status": "retrained", "patterns_loaded": 42 } +``` - ```bash - git push origin feature-branch - ``` +### `GET /health` -> πŸ’‘ Follow Python best practices and test before submitting. +```json +{ "status": "ok" } +``` --- -## πŸ“ˆ Evaluation Metrics +## πŸ§ͺ Testing -- **BLEU Score** – Quality of generated response -- **Accuracy** – Expected vs actual answers -- **Responsiveness** – Time between input and output -- **User Feedback** – Manual quality testing +```bash +pytest test_chatbot.py -v +``` + +The test suite covers: +- `SimpleBot` β€” training, exact/partial matching, defaults, missing file +- `ChatBot` β€” offline mode, history management, streaming, history cap, retraining +- FastAPI β€” all endpoints (health, chat, delete session, train) --- -## 🌱 Future Work +## πŸ”„ CI/CD + +GitHub Actions runs on every push and pull request to `main`: -- 🌐 Add API/web support for Flask or FastAPI -- 🧠 Switch to GPT/BERT for smarter conversations -- πŸ—£οΈ Voice integration with `speech_recognition` -- πŸ’Ύ Save and reload previous conversation history -- πŸ–₯️ Package as a desktop app using `pyinstaller` +1. **Lint** β€” `flake8` for syntax errors and undefined names +2. **Test** β€” `pytest` full suite +3. **Docker build** β€” both `web` and `api` images --- -## πŸ“Š Architecture Diagram +## πŸ“ Architecture ``` -+-------------+ +----------------------+ +-------------+ -| User Input +------->+ ChatBot Engine +------->+ Bot Reply | -+------+------+ +----------------------+ +-------------+ - | - v - [ Tkinter GUI ] - | - v -[ Kaggle Dataset Trainer ] ++------------------+ +------------------+ +------------------+ +| Streamlit Web | | FastAPI REST | | Tkinter Desktop | +| (web_demo.py) | | (api.py) | | (ai_chatbot.py) | ++--------+---------+ +--------+---------+ +--------+---------+ + | | | + +------------------------+-------------------------+ + | + +--------v---------+ + | ChatBot | ← ai_chatbot.py + | (facade) | + +--+----------+----+ + | | + +------------+ +------------+ + | | + +--------v---------+ +-----------v------+ + | LLMBot | | SimpleBot | + | (OpenAI API) | | (CSV patterns) | + +------------------+ +------------------+ ``` --- -## πŸ’¬ Interaction with the Chatbot +## 🀝 Contributing -The chatbot can be integrated or extended with: +1. Fork this repository +2. Create a feature branch: `git checkout -b feature/my-feature` +3. Make your changes and run `pytest test_chatbot.py -v` +4. Commit and push: `git push origin feature/my-feature` +5. Open a pull request -- πŸ“š Custom datasets (CSV/TXT) -- ☁️ Cloud API support -- πŸ”Š Voice UI -- πŸ’‘ Smart context-based conversations +Please follow PEP 8 and include tests for any new logic. --- -## πŸ“± Additional Information +## πŸ“„ License -- **Live Demo**: Coming soon! -- **License**: [MIT License](./LICENSE) +[MIT License](./LICENSE) --- -## πŸ”— Connect with Us +## πŸ”— Connect -- πŸ“§ Email: [joshuvavinith.g@care.ac.in](mailto:joshuvavinith.g@care.ac.in) -- πŸ™ GitHub: [@joshuvavinith](https://github.com/joshuvavinith) +- πŸ“§ [joshuvavinith.g@care.ac.in](mailto:joshuvavinith.g@care.ac.in) +- πŸ™ [@joshuvavinith](https://github.com/joshuvavinith) diff --git a/ai_chatbot.py b/ai_chatbot.py index cd0becc..baf17bd 100644 --- a/ai_chatbot.py +++ b/ai_chatbot.py @@ -1,167 +1,347 @@ -import os -import tkinter as tk -from tkinter import scrolledtext +""" +AI ChatBot β€” core module. + +Supports two backends, selected automatically: + 1. LLM backend – uses the OpenAI Chat Completions API when an + OPENAI_API_KEY environment variable (or .env file) is present. + 2. Pattern-matching backend – offline fallback using a CSV dialog dataset. + +The public surface area is intentionally small so that web_demo.py and +api.py can both import from this module without pulling in Tkinter. +""" + import csv +import os import random -import kagglehub +from typing import Optional + +# Load .env variables if python-dotenv is available +try: + from dotenv import load_dotenv + load_dotenv() +except ImportError: + pass + +# --------------------------------------------------------------------------- +# Pattern-matching backend +# --------------------------------------------------------------------------- -# Simple chatbot class using pattern matching class SimpleBot: - def __init__(self): - self.responses = {} - self.default_responses = [ + """Offline chatbot backed by a CSV dialog dataset.""" + + def __init__(self) -> None: + self.responses: dict[str, list[str]] = {} + self.default_responses: list[str] = [ "I'm not sure I understand. Could you rephrase that?", "Interesting question! I'm still learning.", "I don't have an answer for that yet.", - "Could you tell me more about that?" + "Could you tell me more about that?", ] - - def train(self, dialog_file): + + def train(self, dialog_file: str) -> None: try: with open(dialog_file, encoding="utf-8") as file: reader = csv.reader(file) - next(reader) # Skip header row - current_dialog = None - question = None - + next(reader) # skip header + question: Optional[str] = None + for row in reader: - if len(row) >= 3: # dialog_id, line_id, text - dialog_id = row[0] + if len(row) >= 3: line_id = row[1] text = row[2] - - if line_id == '1': # This is a question/prompt + + if line_id == "1": question = text.lower() - elif line_id == '2' and question: # This is a response - if question not in self.responses: - self.responses[question] = [] - self.responses[question].append(text) + elif line_id == "2" and question: + self.responses.setdefault(question, []).append(text) question = None + print(f"Trained with {len(self.responses)} dialog patterns") - except Exception as e: - print(f"Error loading training data: {e}") - - def get_response(self, message): - message = message.lower() - - # Check for exact matches - if message in self.responses: - return random.choice(self.responses[message]) - - # Check for partial matches - for pattern, responses in self.responses.items(): - if pattern in message or message in pattern: - return random.choice(responses) - - # Return default response if no match + except Exception as exc: + print(f"Error loading training data: {exc}") + + def get_response(self, message: str, history: Optional[list] = None) -> str: + """Return a pattern-matched reply. *history* is accepted but unused.""" + key = message.lower() + + if key in self.responses: + return random.choice(self.responses[key]) + + for pattern, replies in self.responses.items(): + if pattern in key or key in pattern: + return random.choice(replies) + return random.choice(self.default_responses) -# Initialize chatbot -chatbot = SimpleBot() -# Add method to parse Kaggle's dialogs.txt format -def parse_kaggle_dialogs(file_path): +# --------------------------------------------------------------------------- +# LLM backend (OpenAI) +# --------------------------------------------------------------------------- + +class LLMBot: + """Chatbot backed by the OpenAI Chat Completions API.""" + + SYSTEM_PROMPT = ( + "You are a helpful, friendly, and concise AI assistant. " + "Answer clearly and stay on topic." + ) + + def __init__(self, api_key: Optional[str] = None, model: str = "gpt-3.5-turbo") -> None: + from openai import OpenAI # deferred import so SimpleBot works without openai + self.model = model + self.client = OpenAI(api_key=api_key or os.environ.get("OPENAI_API_KEY")) + + def get_response(self, message: str, history: Optional[list] = None) -> str: + """Call the OpenAI API and return the assistant reply. + + *history* is a list of ``{"role": ..., "content": ...}`` dicts + representing the conversation so far (not including the current message). + """ + messages = [{"role": "system", "content": self.SYSTEM_PROMPT}] + if history: + messages.extend(history) + messages.append({"role": "user", "content": message}) + + completion = self.client.chat.completions.create( + model=self.model, + messages=messages, + max_tokens=512, + temperature=0.7, + ) + return completion.choices[0].message.content.strip() + + def stream_response(self, message: str, history: Optional[list] = None): + """Yield response tokens one by one (for streaming UIs).""" + messages = [{"role": "system", "content": self.SYSTEM_PROMPT}] + if history: + messages.extend(history) + messages.append({"role": "user", "content": message}) + + stream = self.client.chat.completions.create( + model=self.model, + messages=messages, + max_tokens=512, + temperature=0.7, + stream=True, + ) + for chunk in stream: + delta = chunk.choices[0].delta.content + if delta: + yield delta + + +# --------------------------------------------------------------------------- +# Unified ChatBot facade +# --------------------------------------------------------------------------- + +class ChatBot: + """High-level chatbot that auto-selects LLM or pattern-matching backend. + + Priority: + 1. Use LLMBot if ``OPENAI_API_KEY`` is set. + 2. Fall back to SimpleBot otherwise. + + Conversation history is maintained internally so callers only need to + pass the current user message. + """ + + MAX_HISTORY = 20 # keep last N turns in context + + def __init__(self, dialog_file: Optional[str] = None, force_offline: bool = False) -> None: + self.history: list[dict] = [] + self._llm: Optional[LLMBot] = None + self._simple: Optional[SimpleBot] = None + + if not force_offline and os.environ.get("OPENAI_API_KEY"): + try: + self._llm = LLMBot() + print("Using LLM backend (OpenAI).") + except Exception as exc: + print(f"LLM init failed ({exc}), falling back to pattern-matching.") + + if self._llm is None: + self._simple = SimpleBot() + self._load_training_data(dialog_file) + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + @property + def mode(self) -> str: + return "llm" if self._llm else "pattern" + + def chat(self, message: str) -> str: + """Process *message* and return a reply; history is updated automatically.""" + if self._llm: + reply = self._llm.get_response(message, self.history) + else: + reply = self._simple.get_response(message, self.history) # type: ignore[union-attr] + + self._update_history(message, reply) + return reply + + def stream_chat(self, message: str): + """Yield response tokens (LLM mode) or yield the full reply at once.""" + if self._llm: + tokens: list[str] = [] + for token in self._llm.stream_response(message, self.history): + tokens.append(token) + yield token + reply = "".join(tokens) + else: + reply = self._simple.get_response(message, self.history) # type: ignore[union-attr] + yield reply + + self._update_history(message, reply) + + @property + def pattern_count(self) -> int: + """Number of dialog patterns loaded (0 in LLM mode).""" + return len(self._simple.responses) if self._simple else 0 + + def reset_history(self) -> None: + self.history.clear() + + def train(self, dialog_file: str) -> None: + """Retrain the pattern-matching bot from a CSV file.""" + if self._simple: + self._simple.train(dialog_file) + + # ------------------------------------------------------------------ + # Helpers + # ------------------------------------------------------------------ + + def _update_history(self, user_msg: str, bot_reply: str) -> None: + self.history.append({"role": "user", "content": user_msg}) + self.history.append({"role": "assistant", "content": bot_reply}) + # Trim to avoid unbounded growth + if len(self.history) > self.MAX_HISTORY * 2: + self.history = self.history[-(self.MAX_HISTORY * 2):] + + def _load_training_data(self, dialog_file: Optional[str]) -> None: + if dialog_file and os.path.exists(dialog_file): + self._simple.train(dialog_file) # type: ignore[union-attr] + return + + # Try Kaggle dataset first, fall back to local dialog.csv + try: + import kagglehub + print("Attempting to download chatbot data from Kaggle...") + data_path = kagglehub.dataset_download("grafstor/simple-dialogs-for-chatbot") + + for root_dir, _, files in os.walk(data_path): + for fname in files: + if fname.lower() == "dialogs.txt": + kaggle_file = os.path.join(root_dir, fname) + converted = _parse_kaggle_dialogs(kaggle_file) + if converted: + print("Training chatbot with Kaggle data...") + self._simple.train(converted) # type: ignore[union-attr] + return + raise FileNotFoundError("dialogs.txt not found in Kaggle dataset") + + except Exception as exc: + print(f"Kaggle dataset unavailable ({exc}), using local data.") + + local = os.path.join(os.path.dirname(__file__), "dialog.csv") + if os.path.exists(local): + print("Training chatbot with local dialog data...") + self._simple.train(local) # type: ignore[union-attr] + else: + print("No training data found; bot will use default responses only.") + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _parse_kaggle_dialogs(file_path: str) -> Optional[str]: + """Convert Kaggle's dialogs.txt into the CSV format expected by SimpleBot.""" try: print(f"Parsing Kaggle dialogs from {file_path}...") - dialog_pairs = [] - current_dialog_id = 0 - - with open(file_path, 'r', encoding='utf-8') as f: - lines = f.readlines() - - # Create a temporary CSV file in the expected format + with open(file_path, "r", encoding="utf-8") as fh: + lines = fh.readlines() + temp_csv_path = os.path.join(os.path.dirname(file_path), "converted_dialog.csv") - with open(temp_csv_path, 'w', encoding='utf-8') as f: - f.write("dialog_id,line_id,text\n") - - for i in range(0, len(lines)-1, 2): - if i+1 < len(lines): - question = lines[i].strip() - answer = lines[i+1].strip() - - if question and answer: - current_dialog_id += 1 - f.write(f"{current_dialog_id},1,{question}\n") - f.write(f"{current_dialog_id},2,{answer}\n") - - print(f"Converted {current_dialog_id} dialog pairs to CSV format") + dialog_id = 0 + with open(temp_csv_path, "w", encoding="utf-8") as fh: + fh.write("dialog_id,line_id,text\n") + for i in range(0, len(lines) - 1, 2): + q, a = lines[i].strip(), lines[i + 1].strip() + if q and a: + dialog_id += 1 + fh.write(f"{dialog_id},1,{q}\n") + fh.write(f"{dialog_id},2,{a}\n") + + print(f"Converted {dialog_id} dialog pairs to CSV format") return temp_csv_path - except Exception as e: - print(f"Error parsing Kaggle dialogs: {e}") + except Exception as exc: + print(f"Error parsing Kaggle dialogs: {exc}") return None -# Try to download the dataset from Kaggle using kagglehub -try: - print("Attempting to download chatbot data from Kaggle...") - data_path = kagglehub.dataset_download("grafstor/simple-dialogs-for-chatbot") - - # Look for dialogs.txt (the actual file in the dataset) - for root, dirs, files in os.walk(data_path): - for file in files: - if file.lower() == "dialogs.txt": - kaggle_dialog_file = os.path.join(root, file) - print(f"Found Kaggle dialog file: {kaggle_dialog_file}") - - # Convert the Kaggle format to our expected CSV format - converted_file = parse_kaggle_dialogs(kaggle_dialog_file) - if converted_file: - print("Training chatbot with Kaggle data...") - chatbot.train(converted_file) - break - else: - continue - break - else: - raise FileNotFoundError("Dialog file not found in Kaggle dataset") - -except Exception as e: - print(f"Error with Kaggle dataset: {e}") - print("Falling back to local dialog data...") - - # Fallback to local dialog.csv file - local_dialog_file = "dialog.csv" - try: - print("Training chatbot with local dialog data...") - chatbot.train(local_dialog_file) - except Exception as e: - print(f"Error loading local training data: {e}") -# Create GUI -root = tk.Tk() -root.title("Chat with GUIBot") -root.geometry("500x550") +# --------------------------------------------------------------------------- +# Tkinter GUI (only runs when this file is executed directly) +# --------------------------------------------------------------------------- -chat_log = scrolledtext.ScrolledText(root, wrap=tk.WORD) -chat_log.config(state=tk.DISABLED) -chat_log.pack(padx=10, pady=10, fill=tk.BOTH, expand=True) +def _run_gui() -> None: + import tkinter as tk + from tkinter import scrolledtext -entry_frame = tk.Frame(root) -entry_frame.pack(padx=10, pady=10, fill=tk.X) + bot = ChatBot() -user_input = tk.Entry(entry_frame, font=("Arial", 14)) -user_input.pack(side=tk.LEFT, fill=tk.BOTH, expand=True, padx=(0, 10)) + gui_root = tk.Tk() + gui_root.title("Chat with AI Bot") + gui_root.geometry("500x580") + # ── mode badge ────────────────────────────────────────────────────────── + mode_label = tk.Label( + gui_root, + text=f"Mode: {'πŸ€– LLM (OpenAI)' if bot.mode == 'llm' else 'πŸ“‹ Pattern Matching'}", + font=("Arial", 10), + fg="#555", + ) + mode_label.pack(anchor="w", padx=12, pady=(6, 0)) -def send_message(): - message = user_input.get() - if message.strip(): + # ── chat log ──────────────────────────────────────────────────────────── + chat_log = scrolledtext.ScrolledText(gui_root, wrap=tk.WORD) + chat_log.config(state=tk.DISABLED) + chat_log.pack(padx=10, pady=6, fill=tk.BOTH, expand=True) + + # ── input row ─────────────────────────────────────────────────────────── + entry_frame = tk.Frame(gui_root) + entry_frame.pack(padx=10, pady=10, fill=tk.X) + + user_input = tk.Entry(entry_frame, font=("Arial", 14)) + user_input.pack(side=tk.LEFT, fill=tk.BOTH, expand=True, padx=(0, 10)) + + def send_message() -> None: + message = user_input.get().strip() + if not message: + return chat_log.config(state=tk.NORMAL) - chat_log.insert(tk.END, "You: " + message + "\n") - response = chatbot.get_response(message) - chat_log.insert(tk.END, "Bot: " + str(response) + "\n\n") + chat_log.insert(tk.END, f"You: {message}\n") + user_input.delete(0, tk.END) + + response = bot.chat(message) + chat_log.insert(tk.END, f"Bot: {response}\n\n") chat_log.config(state=tk.DISABLED) chat_log.yview(tk.END) - user_input.delete(0, tk.END) -send_btn = tk.Button(entry_frame, text="Send", command=send_message) -send_btn.pack(side=tk.RIGHT) + send_btn = tk.Button(entry_frame, text="Send", command=send_message) + send_btn.pack(side=tk.RIGHT) + + user_input.bind("", lambda _: send_message()) + + # ── welcome message ───────────────────────────────────────────────────── + chat_log.config(state=tk.NORMAL) + chat_log.insert(tk.END, "Bot: Hello! I'm your AI chatbot. How can I help you today?\n\n") + chat_log.config(state=tk.DISABLED) -user_input.bind("", lambda event=None: send_message()) + print("Starting GUI chatbot...") + gui_root.mainloop() -# Add a welcome message -chat_log.config(state=tk.NORMAL) -chat_log.insert(tk.END, "Bot: Hello! I'm your simple chatbot. How can I help you today?\n\n") -chat_log.config(state=tk.DISABLED) -print("Starting GUI chatbot...") -root.mainloop() +if __name__ == "__main__": + _run_gui() diff --git a/api.py b/api.py new file mode 100644 index 0000000..8eabdb5 --- /dev/null +++ b/api.py @@ -0,0 +1,149 @@ +""" +api.py β€” FastAPI REST backend for AI ChatBot. + +Run with: + uvicorn api:app --reload + +Endpoints: + POST /chat Send a message and receive a reply. + POST /train Reload the pattern-matching bot from a CSV file. + GET /health Health check. + DELETE /sessions/{id} Reset a conversation session. + +Environment variables: + OPENAI_API_KEY Optional – enables the LLM backend. +""" + +import os +import uuid +from typing import Optional + +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel + +# Load .env if available +try: + from dotenv import load_dotenv + load_dotenv() +except ImportError: + pass + +from ai_chatbot import ChatBot + +# --------------------------------------------------------------------------- +# App setup +# --------------------------------------------------------------------------- + +app = FastAPI( + title="AI ChatBot API", + description=( + "REST API for the AI ChatBot. Supports LLM (OpenAI) and offline " + "pattern-matching backends. Maintains per-session conversation history." + ), + version="1.0.0", +) + +# Per-session bots (keyed by session_id string) +_sessions: dict[str, ChatBot] = {} + + +def clear_all_sessions() -> None: + """Remove all active sessions. Intended for use in tests.""" + _sessions.clear() + + +def _get_or_create_session(session_id: Optional[str]) -> tuple[str, ChatBot]: + """Return (session_id, ChatBot) for the given session; create if missing.""" + global _default_bot + + if session_id is None: + # Create a new session + session_id = str(uuid.uuid4()) + + if session_id not in _sessions: + _sessions[session_id] = ChatBot() + + return session_id, _sessions[session_id] + + +# --------------------------------------------------------------------------- +# Request / Response schemas +# --------------------------------------------------------------------------- + +class ChatRequest(BaseModel): + message: str + session_id: Optional[str] = None + + +class ChatResponse(BaseModel): + reply: str + session_id: str + mode: str + + +class TrainRequest(BaseModel): + dialog_file: str + + +class TrainResponse(BaseModel): + status: str + patterns_loaded: int + + +# --------------------------------------------------------------------------- +# Endpoints +# --------------------------------------------------------------------------- + +@app.get("/health") +def health_check(): + """Simple health check.""" + return {"status": "ok"} + + +@app.post("/chat", response_model=ChatResponse) +def chat(request: ChatRequest): + """Send a message and receive a reply. + + If *session_id* is omitted, a new session is created and its ID is + returned so the client can continue the conversation. + """ + if not request.message.strip(): + raise HTTPException(status_code=400, detail="Message must not be empty.") + + session_id, bot = _get_or_create_session(request.session_id) + reply = bot.chat(request.message) + + return ChatResponse(reply=reply, session_id=session_id, mode=bot.mode) + + +@app.delete("/sessions/{session_id}") +def reset_session(session_id: str): + """Delete a conversation session (clears history).""" + if session_id in _sessions: + del _sessions[session_id] + return {"status": "session deleted", "session_id": session_id} + raise HTTPException(status_code=404, detail="Session not found.") + + +@app.post("/train", response_model=TrainResponse) +def train(request: TrainRequest): + """Reload pattern-matching data from a CSV file on the server. + + The file must exist on the server filesystem. This endpoint only + affects future sessions created after the reload (existing sessions + keep their current bot instance). + """ + if not os.path.exists(request.dialog_file): + raise HTTPException( + status_code=404, + detail=f"File not found: {request.dialog_file}", + ) + + # Retrain a fresh bot and store it as the template for new sessions + fresh_bot = ChatBot(dialog_file=request.dialog_file, force_offline=True) + patterns = fresh_bot.pattern_count + + # Clear all existing sessions so next requests pick up new data + _sessions.clear() + + return TrainResponse(status="retrained", patterns_loaded=patterns) diff --git a/requirements.txt b/requirements.txt index 7c416df..256e933 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,17 @@ -# Dependencies for the chatbot with Kaggle dataset integration -kagglehub \ No newline at end of file +# Core +kagglehub +python-dotenv>=1.0.0 + +# LLM backend (optional β€” enables OpenAI-powered responses) +openai>=1.0.0 + +# Web UI +streamlit>=1.28.0 + +# REST API +fastapi>=0.104.0 +uvicorn[standard]>=0.23.0 + +# Testing +pytest>=7.4.0 +httpx>=0.25.0 \ No newline at end of file diff --git a/test_chatbot.py b/test_chatbot.py new file mode 100644 index 0000000..fb814a2 --- /dev/null +++ b/test_chatbot.py @@ -0,0 +1,218 @@ +""" +test_chatbot.py β€” Unit tests for AI ChatBot. + +Run with: + pytest test_chatbot.py -v +""" + +import os +import tempfile +import pytest +from fastapi.testclient import TestClient + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +DIALOG_CSV = """\ +dialog_id,line_id,text +1,1,hello +1,2,Hi there! How can I help? +2,1,what is your name +2,2,I'm a chatbot built with Python! +3,1,bye +3,2,Goodbye! Have a great day. +""" + + +@pytest.fixture +def dialog_file(tmp_path): + """Write a small dialog CSV and return its path.""" + path = tmp_path / "test_dialog.csv" + path.write_text(DIALOG_CSV, encoding="utf-8") + return str(path) + + +# --------------------------------------------------------------------------- +# SimpleBot tests +# --------------------------------------------------------------------------- + +class TestSimpleBot: + def test_train_loads_patterns(self, dialog_file): + from ai_chatbot import SimpleBot + + bot = SimpleBot() + bot.train(dialog_file) + assert len(bot.responses) == 3 + + def test_exact_match(self, dialog_file): + from ai_chatbot import SimpleBot + + bot = SimpleBot() + bot.train(dialog_file) + assert bot.get_response("hello") == "Hi there! How can I help?" + + def test_partial_match(self, dialog_file): + from ai_chatbot import SimpleBot + + bot = SimpleBot() + bot.train(dialog_file) + reply = bot.get_response("hey, hello there") + assert reply == "Hi there! How can I help?" + + def test_default_response_on_unknown(self, dialog_file): + from ai_chatbot import SimpleBot + + bot = SimpleBot() + bot.train(dialog_file) + reply = bot.get_response("zxqwerty unknown phrase 12345") + assert isinstance(reply, str) + assert len(reply) > 0 + + def test_train_missing_file(self): + from ai_chatbot import SimpleBot + + bot = SimpleBot() + bot.train("/nonexistent/path/dialog.csv") + assert bot.responses == {} + + +# --------------------------------------------------------------------------- +# ChatBot (facade) tests β€” forced offline mode +# --------------------------------------------------------------------------- + +class TestChatBot: + def test_offline_mode(self, dialog_file): + from ai_chatbot import ChatBot + + bot = ChatBot(dialog_file=dialog_file, force_offline=True) + assert bot.mode == "pattern" + + def test_chat_returns_string(self, dialog_file): + from ai_chatbot import ChatBot + + bot = ChatBot(dialog_file=dialog_file, force_offline=True) + reply = bot.chat("hello") + assert isinstance(reply, str) + assert len(reply) > 0 + + def test_history_grows_with_turns(self, dialog_file): + from ai_chatbot import ChatBot + + bot = ChatBot(dialog_file=dialog_file, force_offline=True) + assert bot.history == [] + bot.chat("hello") + assert len(bot.history) == 2 # one user + one assistant + + def test_reset_history(self, dialog_file): + from ai_chatbot import ChatBot + + bot = ChatBot(dialog_file=dialog_file, force_offline=True) + bot.chat("hello") + bot.reset_history() + assert bot.history == [] + + def test_stream_chat_yields_text(self, dialog_file): + from ai_chatbot import ChatBot + + bot = ChatBot(dialog_file=dialog_file, force_offline=True) + tokens = list(bot.stream_chat("hello")) + assert len(tokens) >= 1 + assert "".join(tokens) == "Hi there! How can I help?" + + def test_history_capped(self, dialog_file): + from ai_chatbot import ChatBot + + bot = ChatBot(dialog_file=dialog_file, force_offline=True) + # Drive history past MAX_HISTORY * 2 + for _ in range(bot.MAX_HISTORY + 5): + bot.chat("hello") + assert len(bot.history) <= bot.MAX_HISTORY * 2 + + def test_retrain(self, dialog_file, tmp_path): + from ai_chatbot import ChatBot + + bot = ChatBot(dialog_file=dialog_file, force_offline=True) + + new_csv = tmp_path / "new_dialog.csv" + new_csv.write_text( + "dialog_id,line_id,text\n1,1,howdy\n1,2,Howdy partner!\n", + encoding="utf-8", + ) + bot.train(str(new_csv)) + assert bot.chat("howdy") == "Howdy partner!" + + +# --------------------------------------------------------------------------- +# FastAPI tests +# --------------------------------------------------------------------------- + +@pytest.fixture +def api_client(dialog_file, monkeypatch): + """Return a TestClient with the FastAPI app; force offline mode.""" + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + + # Reset module-level state between tests + import api as api_module + api_module.clear_all_sessions() + + # Pre-populate a session backed by test data + from ai_chatbot import ChatBot + bot = ChatBot(dialog_file=dialog_file, force_offline=True) + sid = "test-session" + api_module._sessions[sid] = bot + + from fastapi.testclient import TestClient + return TestClient(api_module.app), sid + + +class TestAPI: + def test_health(self, api_client): + client, _ = api_client + resp = client.get("/health") + assert resp.status_code == 200 + assert resp.json()["status"] == "ok" + + def test_chat_creates_session(self, api_client): + client, _ = api_client + resp = client.post("/chat", json={"message": "hello"}) + assert resp.status_code == 200 + data = resp.json() + assert "reply" in data + assert "session_id" in data + assert data["mode"] in ("llm", "pattern") + + def test_chat_with_session(self, api_client): + client, sid = api_client + resp = client.post("/chat", json={"message": "hello", "session_id": sid}) + assert resp.status_code == 200 + data = resp.json() + assert data["session_id"] == sid + assert isinstance(data["reply"], str) + + def test_chat_empty_message(self, api_client): + client, _ = api_client + resp = client.post("/chat", json={"message": " "}) + assert resp.status_code == 400 + + def test_delete_session(self, api_client): + client, sid = api_client + resp = client.delete(f"/sessions/{sid}") + assert resp.status_code == 200 + + # Session should be gone now + resp2 = client.delete(f"/sessions/{sid}") + assert resp2.status_code == 404 + + def test_train_valid_file(self, api_client, dialog_file): + client, _ = api_client + resp = client.post("/train", json={"dialog_file": dialog_file}) + assert resp.status_code == 200 + data = resp.json() + assert data["status"] == "retrained" + assert data["patterns_loaded"] > 0 + + def test_train_missing_file(self, api_client): + client, _ = api_client + resp = client.post("/train", json={"dialog_file": "/nonexistent/file.csv"}) + assert resp.status_code == 404 diff --git a/web_demo.py b/web_demo.py new file mode 100644 index 0000000..d141b9f --- /dev/null +++ b/web_demo.py @@ -0,0 +1,121 @@ +""" +web_demo.py β€” Streamlit web interface for AI ChatBot. + +Run with: + streamlit run web_demo.py + +Environment variables: + OPENAI_API_KEY Set this to enable the LLM backend (optional). +""" + +import os + +import streamlit as st + +# Load .env if available +try: + from dotenv import load_dotenv + load_dotenv() +except ImportError: + pass + +from ai_chatbot import ChatBot + +# --------------------------------------------------------------------------- +# Page configuration +# --------------------------------------------------------------------------- +st.set_page_config( + page_title="AI ChatBot", + page_icon="πŸ€–", + layout="centered", +) + +st.title("πŸ€– AI ChatBot") +st.caption( + "Powered by **OpenAI GPT** (LLM mode) when `OPENAI_API_KEY` is set, " + "otherwise uses offline **pattern-matching**." +) + +# --------------------------------------------------------------------------- +# Session-level chatbot instance +# --------------------------------------------------------------------------- +@st.cache_resource +def _get_bot_factory() -> ChatBot: + """Return a template bot (loads data once); actual per-session bots copy from this.""" + return ChatBot() + + +# Per-session bot stored in session_state so each browser tab/user gets its own history +if "bot" not in st.session_state: + st.session_state.bot = _get_bot_factory() + +bot: ChatBot = st.session_state.bot + +# Initialise message history in session state +if "messages" not in st.session_state: + st.session_state.messages: list[dict] = [] + st.session_state.messages.append( + { + "role": "assistant", + "content": "Hello! I'm your AI chatbot. How can I help you today?", + } + ) + +# --------------------------------------------------------------------------- +# Sidebar +# --------------------------------------------------------------------------- +with st.sidebar: + st.header("βš™οΈ Settings") + st.info(f"**Backend:** {'πŸ€– LLM (OpenAI)' if bot.mode == 'llm' else 'πŸ“‹ Pattern Matching'}") + + if st.button("πŸ—‘οΈ Clear conversation"): + st.session_state.messages = [ + { + "role": "assistant", + "content": "Conversation cleared. How can I help you?", + } + ] + bot.reset_history() + st.rerun() + + st.markdown("---") + st.markdown("### About") + st.markdown( + "This chatbot supports two backends:\n" + "- **LLM mode**: uses OpenAI's API for intelligent, context-aware replies.\n" + "- **Pattern mode**: offline fallback using a dialog dataset.\n\n" + "Set `OPENAI_API_KEY` in your environment or a `.env` file to enable LLM mode." + ) + +# --------------------------------------------------------------------------- +# Chat history display +# --------------------------------------------------------------------------- +for msg in st.session_state.messages: + with st.chat_message(msg["role"]): + st.markdown(msg["content"]) + +# --------------------------------------------------------------------------- +# Chat input +# --------------------------------------------------------------------------- +if prompt := st.chat_input("Type a message…"): + # Show user message + st.session_state.messages.append({"role": "user", "content": prompt}) + with st.chat_message("user"): + st.markdown(prompt) + + # Generate and stream the assistant reply + with st.chat_message("assistant"): + if bot.mode == "llm": + # Streaming response + response_placeholder = st.empty() + full_response = "" + for token in bot.stream_chat(prompt): + full_response += token + response_placeholder.markdown(full_response + "β–Œ") + response_placeholder.markdown(full_response) + reply = full_response + else: + reply = bot.chat(prompt) + st.markdown(reply) + + st.session_state.messages.append({"role": "assistant", "content": reply})