-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathutils.py
More file actions
128 lines (100 loc) · 3.75 KB
/
utils.py
File metadata and controls
128 lines (100 loc) · 3.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
from typing import Dict, Any
import streamlit as st
from streamlit.runtime.uploaded_file_manager import UploadedFile
from typing import Dict, Union
import os
import time
from config import *
def validate_file(uploaded_file: UploadedFile) -> Dict[str, Union[bool, str, float]]:
if not uploaded_file:
return {"valid": False, "error": "No file uploaded"}
# Check file extension
file_extension = os.path.splitext(uploaded_file.name)[1].lower()
if file_extension not in SUPPORTED_FORMATS:
return {
"valid": False,
"error": f"Unsupported format. Please upload: {', '.join(SUPPORTED_FORMATS)}"
}
# Check file size
file_size_mb = uploaded_file.size / (1024 * 1024)
if file_size_mb > MAX_FILE_SIZE_MB:
return {
"valid": False,
"error": f"File too large. Maximum size: {MAX_FILE_SIZE_MB}MB"
}
return {"valid": True, "size_mb": round(file_size_mb, 2)}
def display_text_stats(stats: Dict[str, int]):
if not stats:
return
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Characters", f"{stats.get('characters', 0):,}")
with col2:
st.metric("Words", f"{stats.get('words', 0):,}")
with col3:
st.metric("Sentences", f"{stats.get('sentences', 0):,}")
with col4:
st.metric("Est. Pages", stats.get('pages_estimated', 0))
def display_summary_results(result: Dict[str, Any]):
if not result.get('success', False):
st.error(f"{result.get('error', 'Unknown error')}")
return
# Display metrics
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Original Words", f"{result['original_length']:,}")
with col2:
st.metric("Summary Words", f"{result['summary_length']:,}")
with col3:
st.metric("Compression Ratio", f"{result['compression_ratio']}%")
with col4:
st.metric("Chunks Processed", result['chunks_processed'])
# Display performance metrics if available
if 'total_time' in result:
st.info(f"**Processing completed in {result['total_time']:.2f} seconds**")
# Display summary
st.subheader("Summary")
st.write(result['summary'])
# Download option
st.download_button(
label="📥 Download Summary",
data=result['summary'],
file_name="pdf_summary.txt",
mime="text/plain"
)
def create_summary_report(original_text: str, result: Dict[str, Any],
filename: str) -> str:
if not result.get('success'):
return ""
report = f"""
# PDF Summary Report
## Document Information
- **Original File:** {filename}
- **Processing Date:** {time.strftime('%Y-%m-%d %H:%M:%S')}
- **Original Text Length:** {result['original_length']} words
- **Summary Length:** {result['summary_length']} words
- **Compression Ratio:** {result['compression_ratio']}%
- **Chunks Processed:** {result['chunks_processed']}
## Summary
{result['summary']}
---
*Generated by PDF Summarizer using distilbart-cnn-12-6*
"""
return report
def display_model_info():
with st.expander("About the AI Model"):
st.markdown("""
**Model:** `sshleifer/distilbart-cnn-12-6`
- **Type:** Distilled BART model for summarization
- **Optimized for:** News articles and general text summarization
- **Max Input Length:** ~1024 tokens per chunk
- **Advantages:** Faster inference, good quality summaries
- **Processing:** Text is automatically chunked for longer documents
""")
def setup_page_config():
st.set_page_config(
page_title=PAGE_TITLE,
page_icon=PAGE_ICON,
layout=LAYOUT,
initial_sidebar_state="collapsed"
)