Skip to content

Commit c78a39a

Browse files
committed
Add stats script & update frontend
1 parent a28b8fd commit c78a39a

20 files changed

Lines changed: 132 additions & 59 deletions

File tree

app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ class SearchRequest(BaseModel):
297297
query: str = Field(..., max_length=2000)
298298
top_k: int = Field(10, gt=0, le=50)
299299
repo_group: Union[str, List[str]] = "all"
300-
type_filter: str = "all" # all, function, type, template_function, template_type
300+
type_filter: str = "all" # all, function, type, template
301301
language_filter: Union[str, List[str]] = "all"
302302

303303
# Mapping for language filtering based on file extensions

backend/generate_stats.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import sqlite3
2+
import os
3+
import re
4+
from collections import Counter
5+
6+
# Paths
7+
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
8+
DB_PATH = os.path.join(BASE_DIR, "functions.db")
9+
10+
def categorize_type(entity_type, code):
11+
"""Categorize entity into English labels based on native DB types."""
12+
mapping = {
13+
'function': "Functions",
14+
'type': "Types",
15+
'template': "Templates"
16+
}
17+
return mapping.get(entity_type, "Others")
18+
19+
def get_file_extension(filepath):
20+
"""Extract file extension from path."""
21+
_, ext = os.path.splitext(filepath)
22+
return ext or "no extension"
23+
24+
def generate_stats():
25+
if not os.path.exists(DB_PATH):
26+
print(f"Error: {DB_PATH} not found.")
27+
return
28+
29+
conn = sqlite3.connect(DB_PATH)
30+
cursor = conn.cursor()
31+
32+
print("Fetching data from database...")
33+
cursor.execute("SELECT repo_group, filepath, type, code FROM functions")
34+
rows = cursor.fetchall()
35+
36+
repo_group_stats = Counter()
37+
extension_stats = Counter()
38+
category_stats = Counter()
39+
40+
for repo_group, filepath, entity_type, code in rows:
41+
# Category
42+
category = categorize_type(entity_type, code)
43+
category_stats[category] += 1
44+
45+
# Repo Group
46+
repo_group_stats[repo_group] += 1
47+
48+
# File Extension
49+
ext = get_file_extension(filepath)
50+
extension_stats[ext] += 1
51+
52+
conn.close()
53+
54+
# Output Results
55+
print("\n" + "="*40)
56+
print(" MEDIAWIKI CODE ENTITY STATISTICS")
57+
print("="*40)
58+
59+
print("\n--- Statistics by Category ---")
60+
for cat, count in category_stats.most_common():
61+
print(f"{cat:<20}: {count:>8}")
62+
63+
print("\n--- Statistics by Repository Group ---")
64+
for group, count in repo_group_stats.most_common():
65+
print(f"{group:<20}: {count:>8}")
66+
67+
print("\n--- Statistics by File Extension ---")
68+
for ext, count in extension_stats.most_common():
69+
print(f"{ext:<20}: {count:>8}")
70+
71+
print("\n" + "="*40)
72+
print(f" Total Entities: {len(rows):>18}")
73+
print("="*40)
74+
75+
if __name__ == "__main__":
76+
generate_stats()

frontend/i18n/as.json

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@
1616
"type_all": "সকলো",
1717
"type_function": "ফাংচন",
1818
"type_type": "প্ৰকাৰ (Types)",
19-
"type_template_function": "টেমপ্লেট ফাংচন",
20-
"type_template_type": "টেমপ্লেট প্ৰকাৰ",
2119
"fetching_code": "Software Heritage-ৰ পৰা ক’ড গোটোৱা হৈছে...",
2220
"error_fetching_code": "SWH আৰ্কাইভত এই শ্নীপেটটো নাই",
2321
"group_all": "সকলো",
@@ -38,5 +36,6 @@
3836
"sloan_grant_prefix": "এই গৱেষণাটো ",
3937
"sloan_grant_mid": " গ্ৰাণ্ট #",
4038
"sloan_grant_suffix": " ৰ দ্বাৰা পুঁজি প্ৰদান কৰা হৈছে (sloan.org)।",
41-
"issues": "ইছু ট্ৰেকাৰ"
39+
"issues": "ইছু ট্ৰেকাৰ",
40+
"type_template": "টেমপ্লেট (Templates)"
4241
}

frontend/i18n/bn.json

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@
1616
"type_all": "সব",
1717
"type_function": "ফাংশন",
1818
"type_type": "ধরণ (Types)",
19-
"type_template_function": "টেমপ্লেট ফাংশন",
20-
"type_template_type": "টেমপ্লেট ধরণ",
2119
"fetching_code": "Software Heritage থেকে কোড সংগ্রহ করা হচ্ছে...",
2220
"error_fetching_code": "SWH আর্কাইভে এই স্নিপেটটি নেই",
2321
"group_all": "সব",
@@ -38,5 +36,6 @@
3836
"sloan_grant_prefix": "এই গবেষণাটি ",
3937
"sloan_grant_mid": " দ্বারা অনুদান #",
4038
"sloan_grant_suffix": " এর সাথে অর্থায়ন করা হয়েছে (sloan.org)।",
41-
"issues": "ইস্যু ট্র্যাকার"
39+
"issues": "ইস্যু ট্র্যাকার",
40+
"type_template": "টেমপ্লেট"
4241
}

frontend/i18n/en.json

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@
1212
"type_all": "All Entities",
1313
"type_function": "Functions",
1414
"type_type": "Types",
15-
"type_template_function": "Template Functions",
16-
"type_template_type": "Template Types",
15+
"type_template": "Templates",
1716
"fetching_code": "Fetching code from Software Heritage...",
1817
"error_fetching_code": "Snippet unavailable in SWH Archive",
1918
"group_all": "Everything",

frontend/i18n/fr.json

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
"type_all": "Toutes les entités",
1313
"type_function": "Fonctions",
1414
"type_type": "Types",
15-
"type_template_function": "Fonctions Template",
16-
"type_template_type": "Types Template",
1715
"fetching_code": "Récupération du code depuis Software Heritage...",
1816
"error_fetching_code": "Snippet non disponible dans l'archive SWH",
1917
"group_all": "Tout",
@@ -40,5 +38,6 @@
4038
"view_swhid": "Voir SWHID",
4139
"created_by": "Créé par :",
4240
"licence": "Licence :",
43-
"issues": "Suivi des problèmes"
41+
"issues": "Suivi des problèmes",
42+
"type_template": "Modèles"
4443
}

frontend/i18n/gu.json

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@
1616
"type_all": "બધું",
1717
"type_function": "ફંકશન્સ",
1818
"type_type": "પ્રકાર (Types)",
19-
"type_template_function": "ટેમ્પલેટ ફંકશન્સ",
20-
"type_template_type": "ટેમ્પલેટ પ્રકાર",
2119
"fetching_code": "Software Heritage પરથી કોડ મેળવી રહ્યા છીએ...",
2220
"error_fetching_code": "SWH આર્કાઇવમાં સ્નિપેટ ઉપલબ્ધ નથી",
2321
"group_all": "બધું",
@@ -38,5 +36,6 @@
3836
"sloan_grant_prefix": "આ અભ્યાસ માટે ",
3937
"sloan_grant_mid": " ગ્રાન્ટ #",
4038
"sloan_grant_suffix": " દ્વારા ભંડોળ ਪૂરું પાડવામાં આવ્યું છે (sloan.org).",
41-
"issues": "ઇશ્યૂ ટ્રેકર"
39+
"issues": "ઇશ્યૂ ટ્રેકર",
40+
"type_template": "ટેમ્પલેટ્સ"
4241
}

frontend/i18n/hi.json

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
"type_all": "सभी",
1313
"type_function": "फंक्शंस",
1414
"type_type": "प्रकार (Types)",
15-
"type_template_function": "टेम्प्लेट फंक्शंस",
16-
"type_template_type": "टेम्प्लेट प्रकार",
1715
"fetching_code": "Software Heritage से कोड प्राप्त किया जा रहा है...",
1816
"error_fetching_code": "SWH आर्काइव में स्निपेट अनुपलब्ध है",
1917
"group_all": "सभी",
@@ -38,5 +36,6 @@
3836
"view_swhid": "SWHID देखें",
3937
"created_by": "द्वारा बनाया गया:",
4038
"licence": "लाइसेंस:",
41-
"issues": "समस्या ट्रैकर"
39+
"issues": "समस्या ट्रैकर",
40+
"type_template": "टेम्प्लेट"
4241
}

frontend/i18n/it.json

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@
1212
"type_all": "Tutte le entità",
1313
"type_function": "Funzioni",
1414
"type_type": "Tipi",
15-
"type_template_function": "Funzioni Template",
16-
"type_template_type": "Tipi Template",
15+
"type_template": "Template",
1716
"fetching_code": "Recupero codice da Software Heritage...",
1817
"error_fetching_code": "Snippet non disponibile nell'archivio SWH",
1918
"group_all": "Tutto",

frontend/i18n/kn.json

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@
1616
"type_all": "ಎಲ್ಲಾ",
1717
"type_function": "ಫಂಕ್ಷನ್‌ಗಳು",
1818
"type_type": "ವಿಧಗಳು (Types)",
19-
"type_template_function": "ಟೆಂಪ್ಲೇಟ್ ಫಂಕ್ಷನ್‌ಗಳು",
20-
"type_template_type": "ಟೆಂಪ್ಲೇಟ್ ವಿಧಗಳು",
2119
"fetching_code": "Software Heritage ನಿಂದ ಕೋಡ್ ಪಡೆಯಲಾಗುತ್ತಿದೆ...",
2220
"error_fetching_code": "SWH ಆರ್ಕೈವ್‌ನಲ್ಲಿ ಸ್ನಿಪ್ಪೆಟ್ ಲಭ್ಯವಿಲ್ಲ",
2321
"group_all": "ಎಲ್ಲಾ",
@@ -38,5 +36,6 @@
3836
"sloan_grant_prefix": "ಈ ಅಧ್ಯಯನಕ್ಕೆ ",
3937
"sloan_grant_mid": " ಅನುದಾನ #",
4038
"sloan_grant_suffix": " ಮೂಲಕ ಧನಸಹായ ನೀಡಲಾಗಿದೆ (sloan.org).",
41-
"issues": "ಸಮಸ್ಯೆ ಟ್ರ್ಯಾಕರ್"
39+
"issues": "ಸಮಸ್ಯೆ ಟ್ರ್ಯಾಕರ್",
40+
"type_template": "ಟೆಂಪ್ಲೇಟ್‌ಗಳು"
4241
}

0 commit comments

Comments
 (0)