-
Notifications
You must be signed in to change notification settings - Fork 4
Untranslated text discrepancy to be highlighted in blue #26
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d12d51d
bf9bae3
d618f27
bbb0cf7
3eaf3d2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,10 +19,12 @@ | |
| NON_LINGUISTIC_CHARACTERS = "~`!@#$%^&*()_-+={[}]|\\:;\"'<,>.?/" | ||
| MISMATCH_FILL_STYLE_NAME = "mismatchFillStyle" | ||
| LESSER_MISMATCH_FILL_STYLE_NAME = "lesserMismatchFillStyle" | ||
| LANG_MISMATCH_FILL_STYLE_NAME = "langMismatchFillStyle" | ||
|
|
||
| # DEFINE COLORS | ||
| RED = '00FF0000' | ||
| YELLOW = '00FFFF00' | ||
| BLUE = '000000FF' | ||
|
|
||
|
|
||
| # DEFINE METHODS # | ||
|
|
@@ -95,10 +97,16 @@ def register_styles(wb): | |
| name=LESSER_MISMATCH_FILL_STYLE_NAME, | ||
| fill=xl.styles.PatternFill(fgColor=xl.styles.colors.Color(YELLOW), fill_type="solid"), | ||
| alignment=xl.styles.Alignment(wrap_text=True)) | ||
| langMismatchFillStyle = xl.styles.NamedStyle( | ||
| name=LANG_MISMATCH_FILL_STYLE_NAME, | ||
| fill=xl.styles.PatternFill(fgColor=xl.styles.colors.Color(BLUE), fill_type="solid"), | ||
| alignment=xl.styles.Alignment(wrap_text=True)) | ||
| if MISMATCH_FILL_STYLE_NAME not in wb.named_styles: | ||
| wb.add_named_style(mismatchFillStyle) | ||
| if LESSER_MISMATCH_FILL_STYLE_NAME not in wb.named_styles: | ||
| wb.add_named_style(lesserMismatchFillStyle) | ||
| if LANG_MISMATCH_FILL_STYLE_NAME not in wb.named_styles: | ||
| wb.add_named_style(langMismatchFillStyle) | ||
|
|
||
|
|
||
| def convertCellToOutputValueList(cell): | ||
|
|
@@ -140,6 +148,54 @@ def convertCellToOutputValueList(cell): | |
| return outputList, messages | ||
|
|
||
|
|
||
| def convertCellToDict(cell): | ||
| """ | ||
| Convert an Excel cell to a dict of strings with occurence. <output value...> tags are ignored. | ||
| If the Excel cell contains 'jr://file/' empty dict is returned. | ||
| Input: | ||
| cell (xl.cell.cell.Cell): Cell whose contents are to be parsed | ||
|
|
||
| Output: | ||
| Dict with strings as key and value as 1. All keys are unique in the dict. | ||
|
|
||
| """ | ||
| outputDict = {} | ||
| x = cell.value | ||
| if 'jr://file/' in x: | ||
| return outputDict | ||
|
|
||
| try: | ||
| x = re.sub(r'(?:\s)<output[^, ]*', '', x) | ||
| x = re.sub(r'(?:\s)value=[^, ]*', '', x) | ||
| except Exception as e: | ||
| raise FatalError("FATAL ERROR determining string values for worksheet %s cell %s : %s" % | ||
| (cell.parent.title, cell.coordinate, str(e))) | ||
|
|
||
| for i in x.split(): | ||
| i = re.sub(r'[\W\_]', '', i) | ||
| if not outputDict.get(i) and (i and i.strip()) and not (re.match(r'^[0-9]*$', i)): | ||
| outputDict[i] = 1 | ||
|
|
||
| return outputDict | ||
|
|
||
| def linguisticCharChecker(baseDict, colDict): | ||
| """ | ||
| takes base column word dictionary and current column word dictionary as input. | ||
| Returns list of common words and whether the script is english in both the columns or not. | ||
|
|
||
| """ | ||
| sharedWords = list(set(baseDict).intersection(colDict)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we just compare only the shared words or even the occurrence count. For example, if base text has word w 2 times, and translated text has same word w only once, what do we do?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We take only unique words in the dictionary and value will always be 1 in the dictionary. Hence this situation doesn't arise. |
||
| if not sharedWords: | ||
| return sharedWords, False | ||
| for i in list(colDict.keys()): | ||
| if i in sharedWords: | ||
| continue | ||
| else: | ||
| if re.match(r"^[A-Za-z0-9]*$", i): | ||
| return sharedWords, False | ||
| else: | ||
| return sharedWords, True | ||
|
|
||
| def createOutputCell(cell, wsOut): | ||
| ''' | ||
| Make a copy of a Cell object into the exact same coordinates in the target Worksheet. | ||
|
|
@@ -268,6 +324,7 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig | |
| if baseColumnIdx is None: | ||
| baseColumnIdx = sorted(columnDictKeyList)[0] | ||
| baseOutputValueList, error_messages = convertCellToOutputValueList(row[baseColumnIdx]) | ||
| baseValueDict = convertCellToDict(row[baseColumnIdx]) | ||
| messages.extend(error_messages) | ||
| if ignoreOrder: | ||
| baseOutputValueList = sorted(baseOutputValueList) | ||
|
|
@@ -285,6 +342,15 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig | |
| if ignoreOrder: | ||
| curOutputValueList = sorted(curOutputValueList) | ||
| curFormatDict = {} | ||
| sharedWords =[] | ||
| if (colIdx != baseColumnIdx): | ||
| curValueDict = convertCellToDict(row[colIdx]) | ||
| sharedWords, bool_translation = linguisticCharChecker(baseValueDict, curValueDict) | ||
| if wsOut: | ||
| cellOut = getOutputCell(row[colIdx], wsOut) | ||
| if len(sharedWords) > 0 and bool_translation: | ||
| curMismatchFillStyle = LANG_MISMATCH_FILL_STYLE_NAME | ||
| cellOut.style = curMismatchFillStyle | ||
|
|
||
| # Initialize block_tags_fixed_flag to False, if any fix is applied, set to True | ||
| block_tags_fixed_flag = False | ||
|
|
@@ -431,6 +497,11 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig | |
| (row[colIdx].parent.title, row[colIdx].coordinate, str(e))) | ||
|
|
||
| mismatchCell = wsOut.cell(row=getOutputCell(row[0], wsOut).row, column=1).offset(column=mismatchFlagIdx) | ||
| if len(sharedWords) > 0: | ||
| if bool_translation: | ||
| curMismatchFillStyle = LANG_MISMATCH_FILL_STYLE_NAME | ||
| mismatchCell.value = "Y" | ||
| mismatchCell.style = curMismatchFillStyle | ||
| if len(mismatchDict) > 0: | ||
| curMismatchFillStyle = LESSER_MISMATCH_FILL_STYLE_NAME | ||
| for key in mismatchDict: | ||
|
|
@@ -439,7 +510,8 @@ def checkRowForMismatch(row, columnDict, fixedColumnDict, baseColumnIdx=None, ig | |
| mismatchCell.value = "Y" | ||
| mismatchCell.style = curMismatchFillStyle | ||
| else: | ||
| mismatchCell.value = "N" | ||
| if mismatchCell.value != "Y": | ||
| mismatchCell.value = "N" | ||
|
|
||
| return baseColumnDict, mismatchDict | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If a word occurs more than once, do we increment occurrence count or we ignore it.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We take only unique words in the dictionary. The keys are unique and value will always be 1. We do not increment count if the word occurs again.