-
Notifications
You must be signed in to change notification settings - Fork 0
code review #3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
code review #3
Changes from all commits
12bed24
10f3f87
566b29a
fd7e524
72d75b2
20b096e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,129 @@ | ||
| import re | ||
| import string | ||
|
|
||
| addition = ['et', 'de plus', 'puis', 'en outre', 'non seulement@mais encore'] | ||
|
|
||
| alternative = ['ou', 'soit@soit', 'soit@ou', 'tantôt@tantôt', 'ou@ou', 'ou bien', 'seulement@mais encore', | ||
| "l'un@l'autre", "d'un côté@de l'autre"] | ||
|
|
||
| but = ['afin que', 'pour que', 'de peur que', 'en vue de', 'de façon à ce que'] | ||
|
|
||
| cause = ['car', 'en effet', 'effectivement', 'comme', 'par', 'parce que', 'puisque', 'attendu que', 'vu que', | ||
| 'étant donné que', 'grâce à', 'par suite de', 'eu égard à', 'en raison de', 'du fait que', 'dans la mesure où', | ||
| 'sous prétexte que'] | ||
|
|
||
| comparaison = ['comme', 'de même que', 'ainsi que', 'autant que', 'aussi@que', 'si@que', 'de la même façon que', | ||
| 'semblablement', 'pareillement', 'plus que', 'moins que', 'non moins que', 'selon que', 'suivant que', | ||
| 'comme si'] | ||
|
|
||
| concession = ['malgré', 'en dépit de', 'quoique', 'bien que', 'alors que', 'quelque soit', 'même si', | ||
| "ce n'est pas que", 'certes', 'bien sûr', 'évidemment', 'il est vrai que', 'toutefois'] | ||
|
|
||
| conclusion = ['en conclusion', 'pour conclure', 'en guise de conclusion', 'en somme', 'bref', 'ainsi', 'donc', | ||
| 'en résumé', 'en un mot', 'par conséquent', 'finalement', 'enfin', 'en définitive'] | ||
|
|
||
| condition = ['si', 'au cas où', 'à condition que', 'pourvu que', 'à moins que', 'en admettant que', 'pour peu que', | ||
| 'à supposer que', 'en supposant que', "dans l'hypothèse où", 'dans le cas où', 'probablement', | ||
| 'sans doute', 'apparemment'] | ||
|
|
||
| consequence = ['donc', 'aussi', 'partant', 'alors', 'ainsi', 'par conséquent', 'si bien que', "d'où", | ||
| 'en conséquence', 'conséquemment', 'par suite', "c'est pourquoi", 'de sorte que', 'en sorte que', | ||
| 'de façon que', 'de manière que', 'si bien que', 'tant et'] | ||
|
|
||
| classification = ["d'abord", "tout d'abord", 'en premier lieu', 'premièrement', 'en deuxième lieu', 'deuxièmement', | ||
| 'après', 'ensuite', 'de plus', 'quant à', 'en troisième lieu', 'puis', 'en dernier lieu', | ||
| 'pour conclure', 'enfin'] | ||
|
|
||
| explication = ['savoir', 'à savoir', "c'est-à-dire", 'soit'] | ||
|
|
||
| liaison = ['alors', 'ainsi', 'aussi', "d'ailleurs", 'en fait', 'en effet', 'de surcroît', 'de même', 'également', | ||
| 'puis', 'ensuite'] | ||
|
|
||
| opposition = ['mais', 'cependant', 'or', 'en revanche', 'alors que', 'pourtant', 'par contre', 'tandis que', | ||
| 'néanmoins', 'au contraire', 'pour sa part', "d'un autre côté", 'en dépit de', 'malgré', 'au lieu de'] | ||
|
|
||
| restriction = ['cependant', 'toutefois', 'néanmoins', 'pourtant', 'mis à part', 'ne@que', 'en dehors de', 'hormis', | ||
| 'à défaut de', 'excepté', 'sauf', 'uniquement', 'simplement'] | ||
|
|
||
| temps = ['quand', 'lorsque', 'comme', 'avant que', 'après que', 'alors que', 'dès lors que', 'tandis que', | ||
| 'depuis que', 'en même temps que', 'pendant que', 'au moment où'] | ||
|
|
||
| connecteurs = [addition, alternative, but, cause, comparaison, concession, consequence, conclusion, condition, | ||
| classification, | ||
| explication, liaison, opposition, restriction, temps] | ||
| connecteurs_names = ['addition', 'alternative', 'but', 'cause', 'comparaison', 'concession', 'consequence', | ||
| 'conclusion', 'condition', 'classification', 'explication', 'liaison', 'opposition', | ||
| 'restriction', 'temps'] | ||
|
|
||
| doubles_starters = {'non seulement': 'mais encore', 'soit': 'soit', 'tantôt': 'tantôt', 'ou': 'ou', | ||
| 'seulement': 'mais encore', "l'un": "l'autre", "d'un côté": "de l'autre", 'aussi': 'que', | ||
| 'si': 'que', | ||
| 'ne': 'que'} | ||
|
|
||
| phrase = "La prolifération de la culture sur brûlis a largement dégradé la forêt ivoirienne alors que le Gabon a plus à craindre de l'ouverture de son couvert forestier à l'exploitation industrielle du bois." | ||
|
|
||
|
|
||
| def sentence_to_list(sentence): | ||
| wordList = [re.sub('^[{0}]+|[{0}]+$'.format(string.punctuation), '', w) for w in sentence.split()] | ||
| lowercase_words = [word.lower() for word in wordList] | ||
| return [sentence.lower(), lowercase_words] | ||
|
|
||
|
|
||
| sentence = sentence_to_list(phrase)[0] | ||
|
|
||
|
|
||
| def test_connecteur(word): | ||
| word_categories = [] | ||
| for category_idx in range(len(connecteurs)): | ||
| if word in connecteurs[category_idx]: | ||
| word_categories.append(connecteurs_names[category_idx]) | ||
| return word_categories | ||
|
|
||
|
|
||
| dictionary_connecteurs = {} | ||
|
|
||
| # Création du dictionnaire | ||
| for category_idx in range(len(connecteurs)): | ||
|
|
||
| for connecteur in connecteurs[category_idx]: | ||
|
|
||
| if connecteur not in dictionary_connecteurs.keys(): | ||
| dictionary_connecteurs[connecteur] = [connecteurs_names[category_idx]] | ||
| else: | ||
| dictionary_connecteurs[connecteur].append(connecteurs_names[category_idx]) | ||
|
|
||
|
|
||
| def detection(str): | ||
| found_connectors = {} | ||
| for connecteur in dictionary_connecteurs.keys(): | ||
| if connecteur in str: | ||
| if connecteur in sentence_to_list(str)[1] or ' ' in connecteur: | ||
| found_connectors[connecteur] = dictionary_connecteurs[connecteur] | ||
|
|
||
| for starter in doubles_starters.keys(): | ||
|
|
||
| if starter == doubles_starters[starter]: | ||
| new_str = str.replace(starter, '', 1) | ||
|
|
||
| elif starter != doubles_starters[starter]: | ||
| new_str = str | ||
|
|
||
|
|
||
| if starter in str and doubles_starters[starter] in new_str: | ||
| full_connector = starter + '@' + doubles_starters[starter] | ||
| found_connectors[full_connector] = dictionary_connecteurs[full_connector] | ||
|
|
||
| if 'ou' not in sentence_to_list(str)[1] and "ou@ou" in found_connectors.keys(): | ||
| del found_connectors["ou@ou"] | ||
|
|
||
| if 'si' not in sentence_to_list(str)[1] and "si@que" in found_connectors.keys(): | ||
| del found_connectors["si@que"] | ||
|
|
||
| if 'ne' not in sentence_to_list(str)[1] and "ne@que" in found_connectors.keys(): | ||
| del found_connectors["ne@que"] | ||
|
|
||
| return found_connectors | ||
|
|
||
|
|
||
| print(detection(sentence)) | ||
| # print(dictionary_connecteurs) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,27 +1,101 @@ | ||
| import numpy as np | ||
| import matplotlib.pyplot as plt | ||
| from nltk.corpus import brown | ||
| from linalgo.client import LinalgoClient | ||
|
|
||
| token = "e5a30d4fa8b0b52a0513363c5582a214deb255b4" | ||
|
|
||
| api_url = 'https://api.linalgo.com/hub' | ||
| linalgo_client = LinalgoClient(token=token, api_url=api_url) | ||
| task = linalgo_client.get_task('4a2c20e3-64af-4a9f-9fc8-5e703dd7a835') | ||
|
|
||
| if __name__ == '__main__': | ||
|
|
||
| n = 1000 | ||
| theta = np.array([0, 0]) | ||
| x = np.random.uniform(0, 1, (n, 2)) | ||
| # n = 1000 | ||
| theta = np.array([1, -0.5]) | ||
|
|
||
|
|
||
| # x = np.random.uniform(0, 1, (n, 2)) | ||
| # x[:, 1] = 1 | ||
| # y = x.dot(theta) + np.random.normal(0, 0.2, n) | ||
|
|
||
| # To create a dictionary with the words as keys and the frequencies as definitions | ||
| def create_dico(): | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Mixing French and English in function names could be confusing. |
||
| wordlist = brown.words() | ||
| # wordlist = opinion_lexicon.words() | ||
| dico = {} | ||
| for word in wordlist: | ||
| word = word.lower() | ||
| if word in dico.keys(): | ||
| dico[word] += 1 | ||
| else: | ||
| dico[word] = 1 | ||
| total = len(wordlist) | ||
| frequencies = {} | ||
| for word in dico.keys(): | ||
| frequencies[word] = dico[word] * 100 / total | ||
| return frequencies | ||
|
|
||
|
|
||
| # To gather documents and annotations from LinHub | ||
| dataset = [] | ||
| n = len(task.documents) | ||
|
|
||
| for word in range(n): | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. naming |
||
| # print(word) | ||
| annotations_list = task.documents[word].annotations | ||
| if len(annotations_list) != 0: | ||
| # print(word,type(annotations_list)) | ||
| x = task.documents[word].content | ||
| y_raw = task.documents[word].annotations[0] | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How do you know |
||
| y = task.get_name(str(y_raw)) | ||
| dataset.append((x, y)) | ||
| # print(dataset) | ||
|
|
||
| # Remplacement des X par les fréquences et des Y par les -1 ou 1 | ||
| x_formatted = [] | ||
| y_formatted = [] | ||
| brown_dict = create_dico() | ||
|
|
||
| IK = 'I know this word' | ||
| IDK = "I don't know this word" | ||
|
|
||
| for couple in dataset: | ||
| x, y = couple[0], couple[1] | ||
| if x in brown_dict.keys(): | ||
| x_freq = brown_dict[x] | ||
| if y == IK: | ||
| y_bool = 1 | ||
| else: | ||
| y_bool = -1 | ||
| x_formatted.append(x_freq) | ||
| y_formatted.append(y_bool) | ||
|
|
||
| n_effectif = len(x_formatted) | ||
| print(n_effectif) | ||
| x = np.zeros((n_effectif, 2)) | ||
| print(x_formatted) | ||
| x[:, 0] = x_formatted | ||
| x[:, 1] = 1 | ||
| y = x.dot(theta) + np.random.normal(0, 0.2, n) | ||
| y = y_formatted | ||
|
|
||
|
|
||
| def f(x, theta): | ||
| return x.dot(theta) | ||
|
|
||
|
|
||
| # computes the MRS of y and x | ||
| def least_square_error(theta): | ||
| return sum((y - x.dot(theta))**2) | ||
| return sum((y - x.dot(theta)) ** 2) / n | ||
|
|
||
|
|
||
| # computes the derivative of the MRS | ||
| def least_square_error_derivative(theta): | ||
| return -2 * sum((y - f(x, theta))[:, np.newaxis] * x) | ||
| return -2 * sum((y - f(x, theta))[:, np.newaxis] * x) / n | ||
|
|
||
| def gradient_descent(theta0, n_iterations=100, step_size=0.01, | ||
| precision=0.1, verbose=False): | ||
|
|
||
| def gradient_descent(theta0, n_iterations=100, step_size=0.000001, | ||
| precision=0, verbose=False): | ||
| current_theta = theta0 | ||
| for i in range(n_iterations): | ||
| derivative = least_square_error_derivative(current_theta) | ||
|
|
@@ -33,11 +107,12 @@ def gradient_descent(theta0, n_iterations=100, step_size=0.01, | |
| break | ||
| current_theta = next_theta | ||
| return current_theta | ||
|
|
||
| theta_estimate = gradient_descent(np.array([0, 1]), verbose=True) | ||
| print(f'{theta} VS {theta_estimate}') | ||
|
|
||
|
|
||
| theta_estimate = gradient_descent(np.array([0.5, 1]), verbose=True) | ||
| #print(f'{theta} VS {theta_estimate}') | ||
| print(theta_estimate) | ||
|
|
||
| # n = 1000 | ||
| # alpha = 3 | ||
| # mu = 0 | ||
|
|
@@ -51,7 +126,6 @@ def gradient_descent(theta0, n_iterations=100, step_size=0.01, | |
| # if abs(step) <= precision: | ||
| # break | ||
|
|
||
|
|
||
| # def depsilon(alpha,beta,data_base,data_size): | ||
| # sum = 0 | ||
| # for j in range(data_size) : | ||
|
|
@@ -60,16 +134,13 @@ def gradient_descent(theta0, n_iterations=100, step_size=0.01, | |
| # sum = 2 * sum | ||
| # return sum / data_size | ||
|
|
||
|
|
||
|
|
||
| # def err_fun(alpha, beta, data_base, data_size): | ||
| # sum = 0 | ||
| # for i in range(data_size): | ||
| # iteration = (data_base[i][1]-alpha*data_base[i][0]-beta)**2 | ||
| # sum += iteration | ||
| # return sum | ||
|
|
||
|
|
||
| # error = np.random.normal(mu,sigma) | ||
| # y = alpha * x + beta + error | ||
| # # y_float = alpha * x_values + beta + error | ||
|
|
@@ -87,15 +158,13 @@ def gradient_descent(theta0, n_iterations=100, step_size=0.01, | |
| # precision = 0.000001 # Desired precision of result | ||
| # max_iters = 10000 # Maximum number of iterations | ||
|
|
||
|
|
||
| # def err_fun(alpha, beta, data_base, data_size): | ||
| # sum = 0 | ||
| # for i in range(data_size): | ||
| # iteration = (data_base[i][1]-alpha*data_base[i][0]-beta)**2 | ||
| # sum += iteration | ||
| # return sum | ||
|
|
||
|
|
||
| # # Derivative function | ||
| # def depsilon(alpha,beta,data_base,data_size): | ||
| # sum = 0 | ||
|
|
@@ -105,7 +174,6 @@ def gradient_descent(theta0, n_iterations=100, step_size=0.01, | |
| # sum = 2 * sum | ||
| # return sum / data_size | ||
|
|
||
|
|
||
| # for _ in range(max_iters): | ||
| # current_alpha = next_alpha | ||
| # next_alpha = current_alpha - gamma * depsilon(current_alpha, beta, data_base, n) | ||
|
|
@@ -119,7 +187,6 @@ def gradient_descent(theta0, n_iterations=100, step_size=0.01, | |
| # for k in alpha_list: | ||
| # error_list.append(err_fun(k, beta, data_base, n)) | ||
|
|
||
|
|
||
| # print("Minimum at ", next_alpha) | ||
|
|
||
| # plt.plot(alpha_list,error_list) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Usually, it's dangerous to commit a valid authentication token on GitHub and is considered a security hazard. You could use an ENV variable here.