From d12d51d08eaf52e7c88483039f6282d55dafc88b Mon Sep 17 00:00:00 2001
From: Sanchit Bansal <sanchitbansal.06@gmail.com>
Date: Fri, 4 Sep 2020 19:10:35 +0530
Subject: [PATCH 1/5] Untranslated text discrepancy to be highlighted in blue

---
 .../CommcareTranslationChecker.py             | 79 ++++++++++++++++++-
 1 file changed, 75 insertions(+), 4 deletions(-)
diff --git a/CommcareTranslationChecker/CommcareTranslationChecker.py b/CommcareTranslationChecker/CommcareTranslationChecker.py
index 982fe3a..0cf5098 100644
--- a/CommcareTranslationChecker/CommcareTranslationChecker.py
+++ b/CommcareTranslationChecker/CommcareTranslationChecker.py
@@ -19,10 +19,12 @@
 NON_LINGUISTIC_CHARACTERS = "~`!@#$%^&*()_-+={[}]|\\:;\"'<,>.?/"
 MISMATCH_FILL_STYLE_NAME = "mismatchFillStyle"
 LESSER_MISMATCH_FILL_STYLE_NAME = "lesserMismatchFillStyle"
+LANG_MISMATCH_FILL_STYLE_NAME = "langMismatchFillStyle"
 
 # DEFINE COLORS
 RED = '00FF0000'
 YELLOW = '00FFFF00'
+BLUE = '000000FF'
 
 
 # DEFINE METHODS #
@@ -95,10 +97,16 @@ def register_styles(wb):
         name=LESSER_MISMATCH_FILL_STYLE_NAME,
         fill=xl.styles.PatternFill(fgColor=xl.styles.colors.Color(YELLOW), fill_type="solid"),
         alignment=xl.styles.Alignment(wrap_text=True))
+    langMismatchFillStyle = xl.styles.NamedStyle(
+        name=LANG_MISMATCH_FILL_STYLE_NAME,
+        fill=xl.styles.PatternFill(fgColor=xl.styles.colors.Color(BLUE), fill_type="solid"),
+        alignment=xl.styles.Alignment(wrap_text=True))
     if MISMATCH_FILL_STYLE_NAME not in wb.named_styles:
         wb.add_named_style(mismatchFillStyle)
     if LESSER_MISMATCH_FILL_STYLE_NAME not in wb.named_styles:
         wb.add_named_style(lesserMismatchFillStyle)
+    if LANG_MISMATCH_FILL_STYLE_NAME not in wb.named_styles:
+        wb.add_named_style(langMismatchFillStyle)
 
 
 def convertCellToOutputValueList(cell):
@@ -140,6 +148,59 @@ def convertCellToOutputValueList(cell):
     return outputList, messages
 
 
+def convertCellToDict(cell):
+    """
+    Convert an Excel cell to a dict of strings with occurence. <output value...> tags are ignored. 
+    If the Excel cell contains 'jr://file/' empty dict is returned.
+    Input:
+    cell (xl.cell.cell.Cell): Cell whose contents are to be parsed
+
+    Output:
+    Dict with strings as key and occurence as value
+
+   """
+    outputDict = {}
+    x = cell.value
+    if'jr://file/' in x:
+        return outputDict
+
+    try:
+        x = re.sub(r'(?:\s)<output[^, ]*', '', x)
+        x = re.sub(r'(?:\s)value=[^, ]*', '', x)
+    except Exception as e:
+        raise FatalError("FATAL ERROR determining string values for worksheet %s cell %s : %s" %
+                         (cell.parent.title, cell.coordinate, str(e)))
+    
+    for i in x.split():
+        i = re.sub(r'[\W\_]','',i)
+        if not outputDict.get(i) and (i and i.strip()) and not (re.match(r'^[0-9]*$', i)):
+            outputDict[i] = 1
+
+    return outputDict
+
+def linguisticCharChecker(baseDict, colDict):
+    """
+    Convert an Excel cell to a dict of strings with occurence. <output value...> tags are ignored. 
+    If the Excel cell contains 'jr://file/' empty dict is returned.
+    Input:
+    cell (xl.cell.cell.Cell): Cell whose contents are to be parsed
+
+    Output:
+    Dict with strings as key and occurence as value
+
+   """
+    sharedWords = list(set(baseDict).intersection(colDict))
+    if not sharedWords:
+        return sharedWords, False
+    for i in list(colDict.keys()):
+        if i in sharedWords:
+            continue
+        else:
+            if re.match(r"^[A-Za-z0-9]*$",i):
+                return sharedWords, False
+            else:
+                return sharedWords, True
+
 def createOutputCell(cell, wsOut):
     '''
     Make a copy of a Cell object into the exact same coordinates in the target Worksheet.
@@ -268,6 +329,7 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig
     if baseColumnIdx is None:
         baseColumnIdx = sorted(columnDictKeyList)[0]
     baseOutputValueList, error_messages = convertCellToOutputValueList(row[baseColumnIdx])
+    baseValueDict = convertCellToDict(row[baseColumnIdx])
     messages.extend(error_messages)
     if ignoreOrder:
         baseOutputValueList = sorted(baseOutputValueList)
@@ -285,6 +347,9 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig
             if ignoreOrder:
                 curOutputValueList = sorted(curOutputValueList)
             curFormatDict = {}
+            if (colIdx != baseColumnIdx):    
+                curValueDict = convertCellToDict(row[colIdx])
+                sharedWords, bool_translation = linguisticCharChecker(baseValueDict, curValueDict)
 
             # Initialize block_tags_fixed_flag to False, if any fix is applied, set to True
             block_tags_fixed_flag = False
@@ -368,7 +433,7 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig
 
                 if len(mismatchTypes) > 0:
                     mismatchDict[colIdx] = (curOutputValueList, mismatchTypes)
-
+              
                 if wsOut:
                     cellOut = getOutputCell(row[colIdx], wsOut)
                     # If output value mismatch is present, style the cell with MISMATCH_FILL_STYLE
@@ -384,7 +449,10 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig
                         mismatchTypesCellOut = wsOut.rows[getOutputCell(row[0], wsOut).row-1][mismatchTypesColIdx]
                         mismatchTypesCellOut.value = ",".join(mismatchTypes)
                         mismatchTypesCellOut.style = curMismatchFillStyle
-
+                    if bool_translation and sharedWords:
+                        translationMismatchFillStyle = LANG_MISMATCH_FILL_STYLE_NAME
+                        cellOut.style = translationMismatchFillStyle
+                    
                 if not block_tags_fixed_flag:
                     outputText = row[colIdx].value
                 # If there are any extra output values remove them
@@ -422,6 +490,9 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig
                         if baseOutputValueList != curOutputValueList:
                             currFixedCell.style = MISMATCH_FILL_STYLE_NAME
 
+                if bool_translation and sharedWords:
+                    translationMismatchFillStyle = LANG_MISMATCH_FILL_STYLE_NAME
+                    cellOut.style = translationMismatchFillStyle
         except AttributeError as e:
             messages.append(str(e))
         except Exception as e:
@@ -570,7 +641,7 @@ def validate_workbook(file_obj, args=None):
 
             # Dictionaries mapping column index to column name
             defaultColumnDict = {}
-
+            
             maxHeaderIdx = 0
             # Find all columns of format "default_[CODE]"
             ws_rows = list(ws.rows)
@@ -603,7 +674,7 @@ def validate_workbook(file_obj, args=None):
                         for colIdx in defaultColumnDict.keys():
                             if defaultColumnDict[colIdx] == baseColumn:
                                 baseColumnIdx = colIdx
-
+                    
                     # Check row for mismatch and print results
                     rowCheckResults = checkRowForMismatch(
                         row, defaultColumnDict, fixedColumnDict, baseColumnIdx, ignoreOrder, wsOut, mismatchFlagIdx,

From bf9bae3469f7df8fb02815b613e13332af842a02 Mon Sep 17 00:00:00 2001
From: Sanchit Bansal <sanchitbansal.06@gmail.com>
Date: Fri, 4 Sep 2020 20:18:16 +0530
Subject: [PATCH 2/5] Fixed mismatch column to be highlighted in Blue

---
 .../CommcareTranslationChecker.py             | 30 +++++++++++--------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/CommcareTranslationChecker/CommcareTranslationChecker.py b/CommcareTranslationChecker/CommcareTranslationChecker.py
index 0cf5098..5a82f1b 100644
--- a/CommcareTranslationChecker/CommcareTranslationChecker.py
+++ b/CommcareTranslationChecker/CommcareTranslationChecker.py
@@ -347,9 +347,15 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig
             if ignoreOrder:
                 curOutputValueList = sorted(curOutputValueList)
             curFormatDict = {}
-            if (colIdx != baseColumnIdx):    
+            sharedWords =[]
+            if (colIdx != baseColumnIdx):
                 curValueDict = convertCellToDict(row[colIdx])
                 sharedWords, bool_translation = linguisticCharChecker(baseValueDict, curValueDict)
+                if wsOut:
+                    cellOut = getOutputCell(row[colIdx], wsOut)
+                    if len(sharedWords) > 0 and bool_translation:
+                        curMismatchFillStyle = LANG_MISMATCH_FILL_STYLE_NAME
+                        cellOut.style = curMismatchFillStyle
 
             # Initialize block_tags_fixed_flag to False, if any fix is applied, set to True
             block_tags_fixed_flag = False
@@ -433,7 +439,7 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig
 
                 if len(mismatchTypes) > 0:
                     mismatchDict[colIdx] = (curOutputValueList, mismatchTypes)
-              
+
                 if wsOut:
                     cellOut = getOutputCell(row[colIdx], wsOut)
                     # If output value mismatch is present, style the cell with MISMATCH_FILL_STYLE
@@ -449,10 +455,7 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig
                         mismatchTypesCellOut = wsOut.rows[getOutputCell(row[0], wsOut).row-1][mismatchTypesColIdx]
                         mismatchTypesCellOut.value = ",".join(mismatchTypes)
                         mismatchTypesCellOut.style = curMismatchFillStyle
-                    if bool_translation and sharedWords:
-                        translationMismatchFillStyle = LANG_MISMATCH_FILL_STYLE_NAME
-                        cellOut.style = translationMismatchFillStyle
-                    
+
                 if not block_tags_fixed_flag:
                     outputText = row[colIdx].value
                 # If there are any extra output values remove them
@@ -490,9 +493,6 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig
                         if baseOutputValueList != curOutputValueList:
                             currFixedCell.style = MISMATCH_FILL_STYLE_NAME
 
-                if bool_translation and sharedWords:
-                    translationMismatchFillStyle = LANG_MISMATCH_FILL_STYLE_NAME
-                    cellOut.style = translationMismatchFillStyle
         except AttributeError as e:
             messages.append(str(e))
         except Exception as e:
@@ -502,6 +502,11 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig
                              (row[colIdx].parent.title, row[colIdx].coordinate, str(e)))
 
     mismatchCell = wsOut.cell(row=getOutputCell(row[0], wsOut).row, column=1).offset(column=mismatchFlagIdx)
+    if len(sharedWords) > 0:
+        if bool_translation:
+            curMismatchFillStyle = LANG_MISMATCH_FILL_STYLE_NAME
+            mismatchCell.value = "Y"
+            mismatchCell.style = curMismatchFillStyle
     if len(mismatchDict) > 0:
         curMismatchFillStyle = LESSER_MISMATCH_FILL_STYLE_NAME
         for key in mismatchDict:
@@ -510,7 +515,8 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig
         mismatchCell.value = "Y"
         mismatchCell.style = curMismatchFillStyle
     else:
-        mismatchCell.value = "N"
+        if(mismatchCell.value!="Y"):
+            mismatchCell.value = "N"
 
     return baseColumnDict, mismatchDict
 
@@ -641,7 +647,7 @@ def validate_workbook(file_obj, args=None):
 
             # Dictionaries mapping column index to column name
             defaultColumnDict = {}
-            
+
             maxHeaderIdx = 0
             # Find all columns of format "default_[CODE]"
             ws_rows = list(ws.rows)
@@ -674,7 +680,7 @@ def validate_workbook(file_obj, args=None):
                         for colIdx in defaultColumnDict.keys():
                             if defaultColumnDict[colIdx] == baseColumn:
                                 baseColumnIdx = colIdx
-                    
+
                     # Check row for mismatch and print results
                     rowCheckResults = checkRowForMismatch(
                         row, defaultColumnDict, fixedColumnDict, baseColumnIdx, ignoreOrder, wsOut, mismatchFlagIdx,

From d618f275498afead3232a213aeefb4d00568fbb2 Mon Sep 17 00:00:00 2001
From: Sanchit Bansal <sanchitbansal.06@gmail.com>
Date: Fri, 4 Sep 2020 22:46:47 +0530
Subject: [PATCH 3/5] Updated comments

---
 CommcareTranslationChecker/CommcareTranslationChecker.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/CommcareTranslationChecker/CommcareTranslationChecker.py b/CommcareTranslationChecker/CommcareTranslationChecker.py
index 5a82f1b..161a236 100644
--- a/CommcareTranslationChecker/CommcareTranslationChecker.py
+++ b/CommcareTranslationChecker/CommcareTranslationChecker.py
@@ -180,13 +180,8 @@ def convertCellToDict(cell):
 
 def linguisticCharChecker(baseDict, colDict):
     """
-    Convert an Excel cell to a dict of strings with occurence. <output value...> tags are ignored. 
-    If the Excel cell contains 'jr://file/' empty dict is returned.
-    Input:
-    cell (xl.cell.cell.Cell): Cell whose contents are to be parsed
-
-    Output:
-    Dict with strings as key and occurence as value
+    takes base column word dictionary and current column word dictionary as input. 
+    Returns list of common words and whether the script is english in both the columns or not. 
 
    """
     sharedWords = list(set(baseDict).intersection(colDict))

From bbb0cf75cee8a3ae8887b413cb72b3231f741f67 Mon Sep 17 00:00:00 2001
From: Sanchit Bansal <sanchitbansal.06@gmail.com>
Date: Tue, 22 Sep 2020 00:01:11 +0530
Subject: [PATCH 4/5] updated general formatting

---
 CommcareTranslationChecker/CommcareTranslationChecker.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/CommcareTranslationChecker/CommcareTranslationChecker.py b/CommcareTranslationChecker/CommcareTranslationChecker.py
index 161a236..281407c 100644
--- a/CommcareTranslationChecker/CommcareTranslationChecker.py
+++ b/CommcareTranslationChecker/CommcareTranslationChecker.py
@@ -156,12 +156,12 @@ def convertCellToDict(cell):
     cell (xl.cell.cell.Cell): Cell whose contents are to be parsed
 
     Output:
-    Dict with strings as key and occurence as value
+    Dict with strings as key and value as 1. All keys are unique in the dict.
 
    """
     outputDict = {}
     x = cell.value
-    if'jr://file/' in x:
+    if 'jr://file/' in x:
         return outputDict
 
     try:
@@ -510,7 +510,7 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig
         mismatchCell.value = "Y"
         mismatchCell.style = curMismatchFillStyle
     else:
-        if(mismatchCell.value!="Y"):
+        if mismatchCell.value!="Y":
             mismatchCell.value = "N"
 
     return baseColumnDict, mismatchDict

From 3eaf3d22983e893a90c5b2544d0b385f2943025c Mon Sep 17 00:00:00 2001
From: Sanchit Bansal <sanchitbansal.06@gmail.com>
Date: Wed, 23 Sep 2020 17:14:18 +0530
Subject: [PATCH 5/5] Added appropriate spacing

---
 CommcareTranslationChecker/CommcareTranslationChecker.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/CommcareTranslationChecker/CommcareTranslationChecker.py b/CommcareTranslationChecker/CommcareTranslationChecker.py
index 281407c..b742584 100644
--- a/CommcareTranslationChecker/CommcareTranslationChecker.py
+++ b/CommcareTranslationChecker/CommcareTranslationChecker.py
@@ -172,7 +172,7 @@ def convertCellToDict(cell):
                          (cell.parent.title, cell.coordinate, str(e)))
     
     for i in x.split():
-        i = re.sub(r'[\W\_]','',i)
+        i = re.sub(r'[\W\_]', '', i)
         if not outputDict.get(i) and (i and i.strip()) and not (re.match(r'^[0-9]*$', i)):
             outputDict[i] = 1
 
@@ -191,7 +191,7 @@ def linguisticCharChecker(baseDict, colDict):
         if i in sharedWords:
             continue
         else:
-            if re.match(r"^[A-Za-z0-9]*$",i):
+            if re.match(r"^[A-Za-z0-9]*$", i):
                 return sharedWords, False
             else:
                 return sharedWords, True
@@ -510,7 +510,7 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig
         mismatchCell.value = "Y"
         mismatchCell.style = curMismatchFillStyle
     else:
-        if mismatchCell.value!="Y":
+        if mismatchCell.value != "Y":
             mismatchCell.value = "N"
 
     return baseColumnDict, mismatchDict