-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest1.py
More file actions
85 lines (59 loc) · 3.14 KB
/
test1.py
File metadata and controls
85 lines (59 loc) · 3.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from flask import Flask, request, render_template
import pandas as pd
import re
app = Flask(__name__)
# Load the dataset (ensure it's not empty and handle potential errors)
try:
df = pd.read_csv('dataset.csv')
if df.empty:
raise ValueError("Dataset is empty. Please check your CSV file.")
except FileNotFoundError:
print("Error: 'dataset.csv' not found.")
exit() # or handle the error in a way that makes sense for your app
except pd.errors.ParserError:
print("Error: Could not parse 'dataset.csv'. Check the file format.")
exit()
except Exception as e: # Catch general exceptions during file loading
print(f"An unexpected error occurred while loading the dataset: {e}")
exit()
# Preprocessing function (moved outside the search function for efficiency)
def preprocess_text(text):
if pd.isna(text):
return ""
text = re.sub(r"[^a-zA-Z0-9\s]", "", text) # Remove special characters and punctuation
text = text.lower() # Lowercase the text
return text
# Preprocess relevant columns for searching efficiency.
for col in ['Title', 'Abstract']: # Preprocess columns applicable for your data
if col in df.columns:
df[f'processed_{col}'] = df[col].apply(preprocess_text)
@app.route('/')
def home():
return render_template('newindex.html')
@app.route('/search', methods=['POST'])
def search():
query = request.form.get("keywords")
if not query: # Handle empty queries
message = "Please enter search keywords."
return render_template('newresults.html', tables=None, message=message, keywords=None)
query = query.upper()
and_parts = [part.strip() for part in query.split(" AND ")]
filtered_df = df.copy() # Start with the full dataset
for part in and_parts:
or_keywords = [kw.strip() for kw in part.split(" OR ")]
# Create a boolean mask for the current AND part
combined_mask = pd.Series(False, index=filtered_df.index) # Initialize to all False
for kw in or_keywords:
# Use the preprocessed columns for efficiency
title_mask = filtered_df[f'processed_Title'].str.contains(preprocess_text(kw), case=False, na=False, regex=False) if 'processed_Title' in filtered_df else pd.Series(False, index=filtered_df.index)
abstract_mask = filtered_df[f'processed_Abstract'].str.contains(preprocess_text(kw), case=False, na=False, regex=False) if 'processed_Abstract' in filtered_df else pd.Series(False, index=filtered_df.index)
# Combine the title and abstract matches
combined_mask = combined_mask | title_mask | abstract_mask # Give higher priority to Title matches
# Apply the mask to the DataFrame for the current AND part
filtered_df = filtered_df[combined_mask]
if filtered_df.empty:
message = "No results found for the given keywords."
return render_template('newresults.html', tables=None, message=message, keywords=query)
return render_template('newresults.html', tables=[filtered_df.to_html(classes='data')], keywords=query, num_results=len(filtered_df)) # Include the number of results
if __name__ == '__main__':
app.run(debug=True)