Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 93 additions & 82 deletions monitoring/conversation_diversity_coverage_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,48 +141,55 @@ def _analyze_vocabulary_diversity(
all_words = set()
word_frequencies = Counter()

for _, conv in conversations.iterrows():
text = conv["conversation_text"].lower()
words = re.findall(r"\b[a-zA-Z]+\b", text)
# ⚑ Bolt: Replace .iterrows() with vectorized access and zip to significantly reduce iteration overhead
for text, dataset, tier in zip(
conversations["conversation_text"],
conversations["dataset"],
conversations["tier"],
):
text_lower = text.lower()
words = re.findall(r"\b[a-zA-Z]+\b", text_lower)

# Update global vocabulary
all_words.update(words)
word_frequencies.update(words)

# Update dataset vocabularies
dataset_vocabularies[conv["dataset"]].update(words)
tier_vocabularies[conv["tier"]].update(words)
dataset_vocabularies[dataset].update(words)
tier_vocabularies[tier].update(words)

# Calculate vocabulary diversity metrics
vocabulary_stats = {
"total_unique_words": len(all_words),
"total_word_instances": sum(word_frequencies.values()),
"vocabulary_richness": len(all_words) / sum(word_frequencies.values())
if sum(word_frequencies.values()) > 0
else 0,
"vocabulary_richness": (
len(all_words) / sum(word_frequencies.values())
if sum(word_frequencies.values()) > 0
else 0
),
"most_common_words": word_frequencies.most_common(20),
"rare_words_count": sum(
1 for count in word_frequencies.values() if count == 1
),
"rare_words_percentage": sum(
1 for count in word_frequencies.values() if count == 1
)
/ len(all_words)
* 100
if len(all_words) > 0
else 0,
"rare_words_percentage": (
sum(1 for count in word_frequencies.values() if count == 1)
/ len(all_words)
* 100
if len(all_words) > 0
else 0
),
}

# Dataset vocabulary diversity
dataset_vocab_stats = {}
for dataset, vocab in dataset_vocabularies.items():
dataset_vocab_stats[dataset] = {
"unique_words": len(vocab),
"vocabulary_overlap_with_global": len(vocab.intersection(all_words))
/ len(all_words)
* 100
if len(all_words) > 0
else 0,
"vocabulary_overlap_with_global": (
len(vocab.intersection(all_words)) / len(all_words) * 100
if len(all_words) > 0
else 0
),
"unique_to_dataset": len(vocab - (all_words - vocab)),
"conversation_count": len(
conversations[conversations["dataset"] == dataset]
Expand All @@ -194,11 +201,11 @@ def _analyze_vocabulary_diversity(
for tier, vocab in tier_vocabularies.items():
tier_vocab_stats[tier] = {
"unique_words": len(vocab),
"vocabulary_overlap_with_global": len(vocab.intersection(all_words))
/ len(all_words)
* 100
if len(all_words) > 0
else 0,
"vocabulary_overlap_with_global": (
len(vocab.intersection(all_words)) / len(all_words) * 100
if len(all_words) > 0
else 0
),
"conversation_count": len(conversations[conversations["tier"] == tier]),
}

Expand Down Expand Up @@ -276,14 +283,14 @@ def _analyze_topic_diversity(self, conversations: pd.DataFrame) -> Dict[str, Any
topic_diversity = {
"cluster_count": n_clusters,
"cluster_analysis": cluster_analysis,
"topic_distribution_entropy": self._calculate_cluster_entropy(
cluster_labels
)
if n_clusters > 1
else 0,
"average_cluster_size": len(texts) / n_clusters
if n_clusters > 0
else len(texts),
"topic_distribution_entropy": (
self._calculate_cluster_entropy(cluster_labels)
if n_clusters > 1
else 0
),
"average_cluster_size": (
len(texts) / n_clusters if n_clusters > 0 else len(texts)
),
}

except Exception as e:
Expand All @@ -303,9 +310,13 @@ def _analyze_style_diversity(self, conversations: pd.DataFrame) -> Dict[str, Any

style_metrics = []

for _, conv in conversations.iterrows():
text = conv["conversation_text"]

# ⚑ Bolt: Replace .iterrows() with zip to avoid slow pandas row generation during iteration
for conv_id, dataset, tier, text in zip(
conversations["conversation_id"],
conversations["dataset"],
conversations["tier"],
conversations["conversation_text"],
):
# Style indicators
question_density = (
text.count("?") / len(text) * 1000 if len(text) > 0 else 0
Expand All @@ -320,29 +331,30 @@ def _analyze_style_diversity(self, conversations: pd.DataFrame) -> Dict[str, Any
sentence_length_std = np.std(sentence_lengths) if sentence_lengths else 0

# Formality indicators
text_lower = text.lower()
formal_words = len(
re.findall(
r"\b(therefore|however|furthermore|consequently|nevertheless)\b",
text.lower(),
text_lower,
)
)
informal_words = len(
re.findall(r"\b(yeah|okay|cool|awesome|wow|hey)\b", text.lower())
re.findall(r"\b(yeah|okay|cool|awesome|wow|hey)\b", text_lower)
)

# Personal vs impersonal style
personal_pronouns = len(
re.findall(r"\b(I|you|we|my|your|our)\b", text.lower())
re.findall(r"\b(I|you|we|my|your|our)\b", text_lower)
)
Comment on lines +334 to 348
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

# First, check if the file exists and get its structure
find . -type f -name "conversation_diversity_coverage_analyzer.py" | head -5

Repository: daggerstuff/ai

Length of output: 114


🏁 Script executed:

# Read the relevant section around lines 334-348
wc -l monitoring/conversation_diversity_coverage_analyzer.py

Repository: daggerstuff/ai

Length of output: 117


🏁 Script executed:

# Examine the specific code section
sed -n '334,348p' monitoring/conversation_diversity_coverage_analyzer.py

Repository: daggerstuff/ai

Length of output: 598


🏁 Script executed:

# Get more context - see the full function/method containing this code
sed -n '320,360p' monitoring/conversation_diversity_coverage_analyzer.py

Repository: daggerstuff/ai

Length of output: 1674


Fix uppercase I pattern on lowercased text in personal pronoun detection.

Line 334 lowercases the text, but line 347's regex still searches for uppercase I, which will never match. This drops all first-person singular pronouns from the personal_pronouns count and artificially deflates personal_style_score.

Proposed fix
            personal_pronouns = len(
-                re.findall(r"\b(I|you|we|my|your|our)\b", text_lower)
+                re.findall(r"\b(i|you|we|my|your|our)\b", text_lower)
            )
πŸ€– Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@monitoring/conversation_diversity_coverage_analyzer.py` around lines 334 -
348, The personal pronoun regex is using uppercase "I" while the text has been
lowercased into text_lower, so first-person singular never matches; update the
pattern used to compute personal_pronouns (the re.findall call that assigns
personal_pronouns) to match lowercase "i" (or use a case-insensitive flag) and
keep the same word-boundary tokens (e.g., use "i" instead of "I" in the group or
pass re.IGNORECASE) so personal_pronouns and the derived personal_style_score
count correctly.

impersonal_indicators = len(
re.findall(r"\b(one|it|there|this|that)\b", text.lower())
re.findall(r"\b(one|it|there|this|that)\b", text_lower)
)

style_metrics.append(
{
"conversation_id": conv["conversation_id"],
"dataset": conv["dataset"],
"tier": conv["tier"],
"conversation_id": conv_id,
"dataset": dataset,
"tier": tier,
"question_density": question_density,
"exclamation_density": exclamation_density,
"sentence_length_variation": sentence_length_std,
Expand Down Expand Up @@ -402,10 +414,12 @@ def _analyze_response_pattern_diversity(
"interaction_patterns": {},
}

# Response length patterns
# ⚑ Bolt: Consolidate multiple .iterrows() passes into a single vectorized list iteration
length_categories = []
for _, conv in conversations.iterrows():
text_length = len(conv["conversation_text"])
structure_patterns = []

for text in conversations["conversation_text"]:
text_length = len(text)
if text_length < 100:
length_categories.append("short")
elif text_length < 500:
Expand All @@ -415,26 +429,6 @@ def _analyze_response_pattern_diversity(
else:
length_categories.append("very_long")

length_distribution = Counter(length_categories)
pattern_analysis["response_length_patterns"] = {
"distribution": dict(length_distribution),
"diversity_score": len(length_distribution) / 4 * 100, # Max 4 categories
}

# Dialogue turn patterns
turn_counts = conversations["turn_count"].tolist()
turn_distribution = Counter(turn_counts)
pattern_analysis["dialogue_turn_patterns"] = {
"distribution": dict(list(turn_distribution.most_common(10))),
"average_turns": np.mean(turn_counts),
"turn_diversity": len(turn_distribution),
}

# Response structure patterns
structure_patterns = []
for _, conv in conversations.iterrows():
text = conv["conversation_text"]

# Identify structure patterns
has_questions = "?" in text
has_lists = bool(re.search(r"\n\s*[-*β€’]\s+", text))
Expand All @@ -455,6 +449,21 @@ def _analyze_response_pattern_diversity(
"".join(pattern) if pattern else "P"
) # P for plain text

length_distribution = Counter(length_categories)
pattern_analysis["response_length_patterns"] = {
"distribution": dict(length_distribution),
"diversity_score": len(length_distribution) / 4 * 100, # Max 4 categories
}

# Dialogue turn patterns
turn_counts = conversations["turn_count"].tolist()
turn_distribution = Counter(turn_counts)
pattern_analysis["dialogue_turn_patterns"] = {
"distribution": dict(list(turn_distribution.most_common(10))),
"average_turns": np.mean(turn_counts),
"turn_diversity": len(turn_distribution),
}

structure_distribution = Counter(structure_patterns)
pattern_analysis["response_structure_patterns"] = {
"distribution": dict(structure_distribution.most_common(10)),
Expand All @@ -479,9 +488,11 @@ def _analyze_dataset_coverage(self, conversations: pd.DataFrame) -> Dict[str, An
"dataset_count": len(dataset_counts),
"largest_dataset": dataset_counts.index[0],
"smallest_dataset": dataset_counts.index[-1],
"size_ratio": dataset_counts.iloc[0] / dataset_counts.iloc[-1]
if len(dataset_counts) > 1
else 1,
"size_ratio": (
dataset_counts.iloc[0] / dataset_counts.iloc[-1]
if len(dataset_counts) > 1
else 1
),
}

# Tier coverage
Expand Down Expand Up @@ -687,11 +698,11 @@ def _analyze_diversity_trends(self, conversations: pd.DataFrame) -> Dict[str, An
"monthly_diversity_scores": {
str(k): v for k, v in monthly_diversity.to_dict().items()
},
"trend_direction": "increasing"
if monthly_diversity.iloc[-1] > monthly_diversity.iloc[0]
else "decreasing"
if len(monthly_diversity) > 1
else "stable",
"trend_direction": (
"increasing"
if monthly_diversity.iloc[-1] > monthly_diversity.iloc[0]
else "decreasing" if len(monthly_diversity) > 1 else "stable"
),
Comment on lines +701 to +705
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟑 Minor

🧩 Analysis chain

🏁 Script executed:

cat -n monitoring/conversation_diversity_coverage_analyzer.py | sed -n '690,720p'

Repository: daggerstuff/ai

Length of output: 1455


🏁 Script executed:

cat -n monitoring/conversation_diversity_coverage_analyzer.py | sed -n '650,750p'

Repository: daggerstuff/ai

Length of output: 4710


🏁 Script executed:

find . -name "*.py" -type f -exec grep -l "trend_direction" {} \;

Repository: daggerstuff/ai

Length of output: 769


🏁 Script executed:

find . -name "*test*.py" -type f -exec grep -l "trend_direction\|_analyze_diversity_trends" {} \;

Repository: daggerstuff/ai

Length of output: 40


🏁 Script executed:

rg "trend_direction" --type py -B 2 -A 2

Repository: daggerstuff/ai

Length of output: 24155


Handle flat monthly diversity trends explicitly.

Lines 701-705 classify any non-increasing multi-month series as "decreasing". When the first and last monthly diversity scores are equal, the result should be "stable", not "decreasing".

πŸ› Proposed fix
                     "trend_direction": (
-                        "increasing"
-                        if monthly_diversity.iloc[-1] > monthly_diversity.iloc[0]
-                        else "decreasing" if len(monthly_diversity) > 1 else "stable"
+                        "stable"
+                        if len(monthly_diversity) <= 1
+                        else (
+                            "increasing"
+                            if monthly_diversity.iloc[-1] > monthly_diversity.iloc[0]
+                            else (
+                                "decreasing"
+                                if monthly_diversity.iloc[-1] < monthly_diversity.iloc[0]
+                                else "stable"
+                            )
+                        )
                     ),
πŸ€– Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@monitoring/conversation_diversity_coverage_analyzer.py` around lines 701 -
705, The trend_direction assignment for monthly_diversity incorrectly labels a
flat multi-month series as "decreasing"; update the conditional in the block
that computes "trend_direction" (where monthly_diversity is used) to explicitly
check equality between monthly_diversity.iloc[-1] and monthly_diversity.iloc[0]
and return "stable" if they are equal, return "increasing" if last > first,
otherwise return "decreasing" (and keep the existing fallback to "stable" when
len(monthly_diversity) <= 1).

}
else:
trends_analysis["monthly_trends"] = {}
Expand Down Expand Up @@ -719,11 +730,11 @@ def _assess_coverage_completeness(
completeness_analysis["combination_coverage"] = {
"total_possible": total_possible_combinations,
"actual_combinations": actual_combinations,
"coverage_percentage": actual_combinations
/ total_possible_combinations
* 100
if total_possible_combinations > 0
else 0,
"coverage_percentage": (
actual_combinations / total_possible_combinations * 100
if total_possible_combinations > 0
else 0
),
}

# Content coverage assessment
Expand All @@ -745,9 +756,9 @@ def _assess_coverage_completeness(
category: {
"count": count,
"percentage": count / total_conversations * 100,
"adequacy": "good"
if count > total_conversations * 0.2
else "needs_improvement",
"adequacy": (
"good" if count > total_conversations * 0.2 else "needs_improvement"
),
}
for category, count in size_categories.items()
}
Expand Down