-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnominalization.py
More file actions
42 lines (37 loc) · 1.55 KB
/
nominalization.py
File metadata and controls
42 lines (37 loc) · 1.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import spacy
import en_core_web_sm
nlp = en_core_web_sm.load()
def textInAList(text):
'''Puts text into a list and removes beginning whitespace.'''
listOfSentences = text.split(".")[:-1]
result = []
for sentence in listOfSentences:
newsentence = sentence.strip(" ") # removes whitespace before sentence
result.append(newsentence)
return result
def nominalizationIdentification(inputText):
'''
Puts [Nominalization] next to sentences that have a nominalization
as their subject.
'''
listOfSuffixes = ['ness', 'sis', 'tion', 'sion', 'cion', 'ing', 'ment', 'nesses', 'sises', 'tions', 'cions', 'ings']
textInAlist = textInAList(inputText)
outputSentence = []
finalOutputSentence = ""
for sentence in textInAlist:
nlpSentence = nlp(sentence)
for i,word in enumerate(nlpSentence):
if word.dep_ == 'nsubj' or word.dep_ == 'nsubjpass':
subject = word.text
if (subject[-4:] in listOfSuffixes) or (subject[-3:] in listOfSuffixes) or (subject[-5:] in listOfSuffixes):
outputSentence.append(word.text + "[Nominalization]")
else:
outputSentence.append(word.text)
# puts a period on the last word of the sentence
elif i == len(nlpSentence)-1:
outputSentence.append(word.text + ".")
else:
outputSentence.append(word.text)
outputAsString = " ".join(outputSentence)
finalOutputSentence += outputAsString
return finalOutputSentence