From d12d51d08eaf52e7c88483039f6282d55dafc88b Mon Sep 17 00:00:00 2001 From: Sanchit Bansal Date: Fri, 4 Sep 2020 19:10:35 +0530 Subject: [PATCH 1/5] Untranslated text discrepancy to be highlighted in blue --- .../CommcareTranslationChecker.py | 79 ++++++++++++++++++- 1 file changed, 75 insertions(+), 4 deletions(-) diff --git a/CommcareTranslationChecker/CommcareTranslationChecker.py b/CommcareTranslationChecker/CommcareTranslationChecker.py index 982fe3a..0cf5098 100644 --- a/CommcareTranslationChecker/CommcareTranslationChecker.py +++ b/CommcareTranslationChecker/CommcareTranslationChecker.py @@ -19,10 +19,12 @@ NON_LINGUISTIC_CHARACTERS = "~`!@#$%^&*()_-+={[}]|\\:;\"'<,>.?/" MISMATCH_FILL_STYLE_NAME = "mismatchFillStyle" LESSER_MISMATCH_FILL_STYLE_NAME = "lesserMismatchFillStyle" +LANG_MISMATCH_FILL_STYLE_NAME = "langMismatchFillStyle" # DEFINE COLORS RED = '00FF0000' YELLOW = '00FFFF00' +BLUE = '000000FF' # DEFINE METHODS # @@ -95,10 +97,16 @@ def register_styles(wb): name=LESSER_MISMATCH_FILL_STYLE_NAME, fill=xl.styles.PatternFill(fgColor=xl.styles.colors.Color(YELLOW), fill_type="solid"), alignment=xl.styles.Alignment(wrap_text=True)) + langMismatchFillStyle = xl.styles.NamedStyle( + name=LANG_MISMATCH_FILL_STYLE_NAME, + fill=xl.styles.PatternFill(fgColor=xl.styles.colors.Color(BLUE), fill_type="solid"), + alignment=xl.styles.Alignment(wrap_text=True)) if MISMATCH_FILL_STYLE_NAME not in wb.named_styles: wb.add_named_style(mismatchFillStyle) if LESSER_MISMATCH_FILL_STYLE_NAME not in wb.named_styles: wb.add_named_style(lesserMismatchFillStyle) + if LANG_MISMATCH_FILL_STYLE_NAME not in wb.named_styles: + wb.add_named_style(langMismatchFillStyle) def convertCellToOutputValueList(cell): @@ -140,6 +148,59 @@ def convertCellToOutputValueList(cell): return outputList, messages +def convertCellToDict(cell): + """ + Convert an Excel cell to a dict of strings with occurence. tags are ignored. + If the Excel cell contains 'jr://file/' empty dict is returned. + Input: + cell (xl.cell.cell.Cell): Cell whose contents are to be parsed + + Output: + Dict with strings as key and occurence as value + + """ + outputDict = {} + x = cell.value + if'jr://file/' in x: + return outputDict + + try: + x = re.sub(r'(?:\s) tags are ignored. + If the Excel cell contains 'jr://file/' empty dict is returned. + Input: + cell (xl.cell.cell.Cell): Cell whose contents are to be parsed + + Output: + Dict with strings as key and occurence as value + + """ + sharedWords = list(set(baseDict).intersection(colDict)) + if not sharedWords: + return sharedWords, False + for i in list(colDict.keys()): + if i in sharedWords: + continue + else: + if re.match(r"^[A-Za-z0-9]*$",i): + return sharedWords, False + else: + return sharedWords, True + def createOutputCell(cell, wsOut): ''' Make a copy of a Cell object into the exact same coordinates in the target Worksheet. @@ -268,6 +329,7 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig if baseColumnIdx is None: baseColumnIdx = sorted(columnDictKeyList)[0] baseOutputValueList, error_messages = convertCellToOutputValueList(row[baseColumnIdx]) + baseValueDict = convertCellToDict(row[baseColumnIdx]) messages.extend(error_messages) if ignoreOrder: baseOutputValueList = sorted(baseOutputValueList) @@ -285,6 +347,9 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig if ignoreOrder: curOutputValueList = sorted(curOutputValueList) curFormatDict = {} + if (colIdx != baseColumnIdx): + curValueDict = convertCellToDict(row[colIdx]) + sharedWords, bool_translation = linguisticCharChecker(baseValueDict, curValueDict) # Initialize block_tags_fixed_flag to False, if any fix is applied, set to True block_tags_fixed_flag = False @@ -368,7 +433,7 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig if len(mismatchTypes) > 0: mismatchDict[colIdx] = (curOutputValueList, mismatchTypes) - + if wsOut: cellOut = getOutputCell(row[colIdx], wsOut) # If output value mismatch is present, style the cell with MISMATCH_FILL_STYLE @@ -384,7 +449,10 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig mismatchTypesCellOut = wsOut.rows[getOutputCell(row[0], wsOut).row-1][mismatchTypesColIdx] mismatchTypesCellOut.value = ",".join(mismatchTypes) mismatchTypesCellOut.style = curMismatchFillStyle - + if bool_translation and sharedWords: + translationMismatchFillStyle = LANG_MISMATCH_FILL_STYLE_NAME + cellOut.style = translationMismatchFillStyle + if not block_tags_fixed_flag: outputText = row[colIdx].value # If there are any extra output values remove them @@ -422,6 +490,9 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig if baseOutputValueList != curOutputValueList: currFixedCell.style = MISMATCH_FILL_STYLE_NAME + if bool_translation and sharedWords: + translationMismatchFillStyle = LANG_MISMATCH_FILL_STYLE_NAME + cellOut.style = translationMismatchFillStyle except AttributeError as e: messages.append(str(e)) except Exception as e: @@ -570,7 +641,7 @@ def validate_workbook(file_obj, args=None): # Dictionaries mapping column index to column name defaultColumnDict = {} - + maxHeaderIdx = 0 # Find all columns of format "default_[CODE]" ws_rows = list(ws.rows) @@ -603,7 +674,7 @@ def validate_workbook(file_obj, args=None): for colIdx in defaultColumnDict.keys(): if defaultColumnDict[colIdx] == baseColumn: baseColumnIdx = colIdx - + # Check row for mismatch and print results rowCheckResults = checkRowForMismatch( row, defaultColumnDict, fixedColumnDict, baseColumnIdx, ignoreOrder, wsOut, mismatchFlagIdx, From bf9bae3469f7df8fb02815b613e13332af842a02 Mon Sep 17 00:00:00 2001 From: Sanchit Bansal Date: Fri, 4 Sep 2020 20:18:16 +0530 Subject: [PATCH 2/5] Fixed mismatch column to be highlighted in Blue --- .../CommcareTranslationChecker.py | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/CommcareTranslationChecker/CommcareTranslationChecker.py b/CommcareTranslationChecker/CommcareTranslationChecker.py index 0cf5098..5a82f1b 100644 --- a/CommcareTranslationChecker/CommcareTranslationChecker.py +++ b/CommcareTranslationChecker/CommcareTranslationChecker.py @@ -347,9 +347,15 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig if ignoreOrder: curOutputValueList = sorted(curOutputValueList) curFormatDict = {} - if (colIdx != baseColumnIdx): + sharedWords =[] + if (colIdx != baseColumnIdx): curValueDict = convertCellToDict(row[colIdx]) sharedWords, bool_translation = linguisticCharChecker(baseValueDict, curValueDict) + if wsOut: + cellOut = getOutputCell(row[colIdx], wsOut) + if len(sharedWords) > 0 and bool_translation: + curMismatchFillStyle = LANG_MISMATCH_FILL_STYLE_NAME + cellOut.style = curMismatchFillStyle # Initialize block_tags_fixed_flag to False, if any fix is applied, set to True block_tags_fixed_flag = False @@ -433,7 +439,7 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig if len(mismatchTypes) > 0: mismatchDict[colIdx] = (curOutputValueList, mismatchTypes) - + if wsOut: cellOut = getOutputCell(row[colIdx], wsOut) # If output value mismatch is present, style the cell with MISMATCH_FILL_STYLE @@ -449,10 +455,7 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig mismatchTypesCellOut = wsOut.rows[getOutputCell(row[0], wsOut).row-1][mismatchTypesColIdx] mismatchTypesCellOut.value = ",".join(mismatchTypes) mismatchTypesCellOut.style = curMismatchFillStyle - if bool_translation and sharedWords: - translationMismatchFillStyle = LANG_MISMATCH_FILL_STYLE_NAME - cellOut.style = translationMismatchFillStyle - + if not block_tags_fixed_flag: outputText = row[colIdx].value # If there are any extra output values remove them @@ -490,9 +493,6 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig if baseOutputValueList != curOutputValueList: currFixedCell.style = MISMATCH_FILL_STYLE_NAME - if bool_translation and sharedWords: - translationMismatchFillStyle = LANG_MISMATCH_FILL_STYLE_NAME - cellOut.style = translationMismatchFillStyle except AttributeError as e: messages.append(str(e)) except Exception as e: @@ -502,6 +502,11 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig (row[colIdx].parent.title, row[colIdx].coordinate, str(e))) mismatchCell = wsOut.cell(row=getOutputCell(row[0], wsOut).row, column=1).offset(column=mismatchFlagIdx) + if len(sharedWords) > 0: + if bool_translation: + curMismatchFillStyle = LANG_MISMATCH_FILL_STYLE_NAME + mismatchCell.value = "Y" + mismatchCell.style = curMismatchFillStyle if len(mismatchDict) > 0: curMismatchFillStyle = LESSER_MISMATCH_FILL_STYLE_NAME for key in mismatchDict: @@ -510,7 +515,8 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig mismatchCell.value = "Y" mismatchCell.style = curMismatchFillStyle else: - mismatchCell.value = "N" + if(mismatchCell.value!="Y"): + mismatchCell.value = "N" return baseColumnDict, mismatchDict @@ -641,7 +647,7 @@ def validate_workbook(file_obj, args=None): # Dictionaries mapping column index to column name defaultColumnDict = {} - + maxHeaderIdx = 0 # Find all columns of format "default_[CODE]" ws_rows = list(ws.rows) @@ -674,7 +680,7 @@ def validate_workbook(file_obj, args=None): for colIdx in defaultColumnDict.keys(): if defaultColumnDict[colIdx] == baseColumn: baseColumnIdx = colIdx - + # Check row for mismatch and print results rowCheckResults = checkRowForMismatch( row, defaultColumnDict, fixedColumnDict, baseColumnIdx, ignoreOrder, wsOut, mismatchFlagIdx, From d618f275498afead3232a213aeefb4d00568fbb2 Mon Sep 17 00:00:00 2001 From: Sanchit Bansal Date: Fri, 4 Sep 2020 22:46:47 +0530 Subject: [PATCH 3/5] Updated comments --- CommcareTranslationChecker/CommcareTranslationChecker.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/CommcareTranslationChecker/CommcareTranslationChecker.py b/CommcareTranslationChecker/CommcareTranslationChecker.py index 5a82f1b..161a236 100644 --- a/CommcareTranslationChecker/CommcareTranslationChecker.py +++ b/CommcareTranslationChecker/CommcareTranslationChecker.py @@ -180,13 +180,8 @@ def convertCellToDict(cell): def linguisticCharChecker(baseDict, colDict): """ - Convert an Excel cell to a dict of strings with occurence. tags are ignored. - If the Excel cell contains 'jr://file/' empty dict is returned. - Input: - cell (xl.cell.cell.Cell): Cell whose contents are to be parsed - - Output: - Dict with strings as key and occurence as value + takes base column word dictionary and current column word dictionary as input. + Returns list of common words and whether the script is english in both the columns or not. """ sharedWords = list(set(baseDict).intersection(colDict)) From bbb0cf75cee8a3ae8887b413cb72b3231f741f67 Mon Sep 17 00:00:00 2001 From: Sanchit Bansal Date: Tue, 22 Sep 2020 00:01:11 +0530 Subject: [PATCH 4/5] updated general formatting --- CommcareTranslationChecker/CommcareTranslationChecker.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CommcareTranslationChecker/CommcareTranslationChecker.py b/CommcareTranslationChecker/CommcareTranslationChecker.py index 161a236..281407c 100644 --- a/CommcareTranslationChecker/CommcareTranslationChecker.py +++ b/CommcareTranslationChecker/CommcareTranslationChecker.py @@ -156,12 +156,12 @@ def convertCellToDict(cell): cell (xl.cell.cell.Cell): Cell whose contents are to be parsed Output: - Dict with strings as key and occurence as value + Dict with strings as key and value as 1. All keys are unique in the dict. """ outputDict = {} x = cell.value - if'jr://file/' in x: + if 'jr://file/' in x: return outputDict try: @@ -510,7 +510,7 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig mismatchCell.value = "Y" mismatchCell.style = curMismatchFillStyle else: - if(mismatchCell.value!="Y"): + if mismatchCell.value!="Y": mismatchCell.value = "N" return baseColumnDict, mismatchDict From 3eaf3d22983e893a90c5b2544d0b385f2943025c Mon Sep 17 00:00:00 2001 From: Sanchit Bansal Date: Wed, 23 Sep 2020 17:14:18 +0530 Subject: [PATCH 5/5] Added appropriate spacing --- CommcareTranslationChecker/CommcareTranslationChecker.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CommcareTranslationChecker/CommcareTranslationChecker.py b/CommcareTranslationChecker/CommcareTranslationChecker.py index 281407c..b742584 100644 --- a/CommcareTranslationChecker/CommcareTranslationChecker.py +++ b/CommcareTranslationChecker/CommcareTranslationChecker.py @@ -172,7 +172,7 @@ def convertCellToDict(cell): (cell.parent.title, cell.coordinate, str(e))) for i in x.split(): - i = re.sub(r'[\W\_]','',i) + i = re.sub(r'[\W\_]', '', i) if not outputDict.get(i) and (i and i.strip()) and not (re.match(r'^[0-9]*$', i)): outputDict[i] = 1 @@ -191,7 +191,7 @@ def linguisticCharChecker(baseDict, colDict): if i in sharedWords: continue else: - if re.match(r"^[A-Za-z0-9]*$",i): + if re.match(r"^[A-Za-z0-9]*$", i): return sharedWords, False else: return sharedWords, True @@ -510,7 +510,7 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig mismatchCell.value = "Y" mismatchCell.style = curMismatchFillStyle else: - if mismatchCell.value!="Y": + if mismatchCell.value != "Y": mismatchCell.value = "N" return baseColumnDict, mismatchDict