awacke1 commited on
Commit
c19da60
·
verified ·
1 Parent(s): ca88193

Create backup23.app.py

Browse files
Files changed (1) hide show
  1. backup23.app.py +1266 -0
backup23.app.py ADDED
@@ -0,0 +1,1266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import anthropic
3
+ import openai
4
+ import base64
5
+ import cv2
6
+ import glob
7
+ import json
8
+ import math
9
+ import os
10
+ import pytz
11
+ import random
12
+ import re
13
+ import requests
14
+ import textract
15
+ import time
16
+ import zipfile
17
+ import plotly.graph_objects as go
18
+ import streamlit.components.v1 as components
19
+ from datetime import datetime
20
+ from audio_recorder_streamlit import audio_recorder
21
+ from bs4 import BeautifulSoup
22
+ from collections import defaultdict, deque, Counter
23
+ from dotenv import load_dotenv
24
+ from gradio_client import Client
25
+ from huggingface_hub import InferenceClient
26
+ from io import BytesIO
27
+ from PIL import Image
28
+ from PyPDF2 import PdfReader
29
+ from urllib.parse import quote
30
+ from xml.etree import ElementTree as ET
31
+ from openai import OpenAI
32
+ import extra_streamlit_components as stx
33
+ from streamlit.runtime.scriptrunner import get_script_run_ctx
34
+ import asyncio
35
+ import edge_tts
36
+ from streamlit_marquee import streamlit_marquee
37
+ from typing import Tuple, Optional
38
+ import pandas as pd
39
+
40
+ # ─────────────────────────────────────────────────────────
41
+ # 1. CORE CONFIGURATION & SETUP
42
+ # ─────────────────────────────────────────────────────────
43
+
44
+ st.set_page_config(
45
+ page_title="🚲TalkingAIResearcher🏆",
46
+ page_icon="🚲🏆",
47
+ layout="wide",
48
+ initial_sidebar_state="auto",
49
+ menu_items={
50
+ 'Get Help': 'https://huggingface.co/awacke1',
51
+ 'Report a bug': 'https://huggingface.co/spaces/awacke1',
52
+ 'About': "🚲TalkingAIResearcher🏆"
53
+ }
54
+ )
55
+ load_dotenv()
56
+
57
+ # ▶ Available English voices for Edge TTS
58
+ EDGE_TTS_VOICES = [
59
+ "en-US-AriaNeural",
60
+ "en-US-GuyNeural",
61
+ "en-US-JennyNeural",
62
+ "en-GB-SoniaNeural",
63
+ "en-GB-RyanNeural",
64
+ "en-AU-NatashaNeural",
65
+ "en-AU-WilliamNeural",
66
+ "en-CA-ClaraNeural",
67
+ "en-CA-LiamNeural"
68
+ ]
69
+
70
+ # ▶ Initialize Session State
71
+ if 'marquee_settings' not in st.session_state:
72
+ st.session_state['marquee_settings'] = {
73
+ "background": "#1E1E1E",
74
+ "color": "#FFFFFF",
75
+ "font-size": "14px",
76
+ "animationDuration": "20s",
77
+ "width": "100%",
78
+ "lineHeight": "35px"
79
+ }
80
+ if 'tts_voice' not in st.session_state:
81
+ st.session_state['tts_voice'] = EDGE_TTS_VOICES[0]
82
+ if 'audio_format' not in st.session_state:
83
+ st.session_state['audio_format'] = 'mp3'
84
+ if 'transcript_history' not in st.session_state:
85
+ st.session_state['transcript_history'] = []
86
+ if 'chat_history' not in st.session_state:
87
+ st.session_state['chat_history'] = []
88
+ if 'openai_model' not in st.session_state:
89
+ st.session_state['openai_model'] = "gpt-4o-2024-05-13"
90
+ if 'messages' not in st.session_state:
91
+ st.session_state['messages'] = []
92
+ if 'last_voice_input' not in st.session_state:
93
+ st.session_state['last_voice_input'] = ""
94
+ if 'editing_file' not in st.session_state:
95
+ st.session_state['editing_file'] = None
96
+ if 'edit_new_name' not in st.session_state:
97
+ st.session_state['edit_new_name'] = ""
98
+ if 'edit_new_content' not in st.session_state:
99
+ st.session_state['edit_new_content'] = ""
100
+ if 'viewing_prefix' not in st.session_state:
101
+ st.session_state['viewing_prefix'] = None
102
+ if 'should_rerun' not in st.session_state:
103
+ st.session_state['should_rerun'] = False
104
+ if 'old_val' not in st.session_state:
105
+ st.session_state['old_val'] = None
106
+ if 'last_query' not in st.session_state:
107
+ st.session_state['last_query'] = ""
108
+ if 'marquee_content' not in st.session_state:
109
+ st.session_state['marquee_content'] = "🚀 Welcome to TalkingAIResearcher | 🤖 Your Research Assistant"
110
+
111
+ # ▶ Additional keys for performance, caching, etc.
112
+ if 'audio_cache' not in st.session_state:
113
+ st.session_state['audio_cache'] = {}
114
+ if 'download_link_cache' not in st.session_state:
115
+ st.session_state['download_link_cache'] = {}
116
+ if 'operation_timings' not in st.session_state:
117
+ st.session_state['operation_timings'] = {}
118
+ if 'performance_metrics' not in st.session_state:
119
+ st.session_state['performance_metrics'] = defaultdict(list)
120
+ if 'enable_audio' not in st.session_state:
121
+ st.session_state['enable_audio'] = True # Turn TTS on/off
122
+
123
+ # ▶ API Keys
124
+ openai_api_key = os.getenv('OPENAI_API_KEY', "")
125
+ anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "")
126
+ xai_key = os.getenv('xai',"")
127
+ if 'OPENAI_API_KEY' in st.secrets:
128
+ openai_api_key = st.secrets['OPENAI_API_KEY']
129
+ if 'ANTHROPIC_API_KEY' in st.secrets:
130
+ anthropic_key = st.secrets["ANTHROPIC_API_KEY"]
131
+
132
+ openai.api_key = openai_api_key
133
+ openai_client = OpenAI(api_key=openai.api_key, organization=os.getenv('OPENAI_ORG_ID'))
134
+ HF_KEY = os.getenv('HF_KEY')
135
+ API_URL = os.getenv('API_URL')
136
+
137
+ # ▶ Helper constants
138
+ FILE_EMOJIS = {
139
+ "md": "📝",
140
+ "mp3": "🎵",
141
+ "wav": "🔊"
142
+ }
143
+
144
+ # ─────────────────────────────────────────────────────────
145
+ # 2. PERFORMANCE MONITORING & TIMING
146
+ # ─────────────────────────────────────────────────────────
147
+
148
+ class PerformanceTimer:
149
+ """
150
+ ⏱️ A context manager for timing operations with automatic logging.
151
+ Usage:
152
+ with PerformanceTimer("my_operation"):
153
+ # do something
154
+ The duration is stored into `st.session_state['operation_timings']`
155
+ and appended to the `performance_metrics` list.
156
+ """
157
+ def __init__(self, operation_name: str):
158
+ self.operation_name = operation_name
159
+ self.start_time = None
160
+
161
+ def __enter__(self):
162
+ self.start_time = time.time()
163
+ return self
164
+
165
+ def __exit__(self, exc_type, exc_val, exc_tb):
166
+ if not exc_type: # Only log if no exception occurred
167
+ duration = time.time() - self.start_time
168
+ st.session_state['operation_timings'][self.operation_name] = duration
169
+ st.session_state['performance_metrics'][self.operation_name].append(duration)
170
+
171
+ def log_performance_metrics():
172
+ """
173
+ 📈 Display performance metrics in the sidebar, including a timing breakdown
174
+ and a small bar chart of average times.
175
+ """
176
+ st.sidebar.markdown("### ⏱️ Performance Metrics")
177
+
178
+ metrics = st.session_state['operation_timings']
179
+ if metrics:
180
+ total_time = sum(metrics.values())
181
+ st.sidebar.write(f"**Total Processing Time:** {total_time:.2f}s")
182
+
183
+ # Break down each operation time
184
+ for operation, duration in metrics.items():
185
+ percentage = (duration / total_time) * 100
186
+ st.sidebar.write(f"**{operation}:** {duration:.2f}s ({percentage:.1f}%)")
187
+
188
+ # Show timing history chart
189
+ history_data = []
190
+ for op, times in st.session_state['performance_metrics'].items():
191
+ if times: # Only if we have data
192
+ avg_time = sum(times) / len(times)
193
+ history_data.append({"Operation": op, "Avg Time (s)": avg_time})
194
+
195
+ if history_data:
196
+ st.sidebar.markdown("### 📊 Timing History (Avg)")
197
+ chart_data = pd.DataFrame(history_data)
198
+ st.sidebar.bar_chart(chart_data.set_index("Operation"))
199
+
200
+ # ─────────────────────────────────────────────────────────
201
+ # 3. HELPER FUNCTIONS (FILENAMES, LINKS, MARQUEE, ETC.)
202
+ # ─────────────────────────────────────────────────────────
203
+
204
+ def get_central_time():
205
+ """🌎 Get current time in US Central timezone."""
206
+ central = pytz.timezone('US/Central')
207
+ return datetime.now(central)
208
+
209
+ def format_timestamp_prefix():
210
+ """📅 Generate a timestamp prefix"""
211
+ ct = get_central_time()
212
+ #return ct.strftime("%m_%d_%y_%I_%M_%p")
213
+ return ct.strftime("%Y%m%d_%H%M%S")
214
+
215
+ def initialize_marquee_settings():
216
+ """🌈 Initialize marquee defaults if needed."""
217
+ if 'marquee_settings' not in st.session_state:
218
+ st.session_state['marquee_settings'] = {
219
+ "background": "#1E1E1E",
220
+ "color": "#FFFFFF",
221
+ "font-size": "14px",
222
+ "animationDuration": "20s",
223
+ "width": "100%",
224
+ "lineHeight": "35px"
225
+ }
226
+
227
+ def get_marquee_settings():
228
+ """🔧 Retrieve marquee settings from session."""
229
+ initialize_marquee_settings()
230
+ return st.session_state['marquee_settings']
231
+
232
+ def update_marquee_settings_ui():
233
+ """🖌 Add color pickers & sliders for marquee config in the sidebar."""
234
+ st.sidebar.markdown("### 🎯 Marquee Settings")
235
+ cols = st.sidebar.columns(2)
236
+ with cols[0]:
237
+ bg_color = st.color_picker("🎨 Background",
238
+ st.session_state['marquee_settings']["background"],
239
+ key="bg_color_picker")
240
+ text_color = st.color_picker("✍️ Text",
241
+ st.session_state['marquee_settings']["color"],
242
+ key="text_color_picker")
243
+ with cols[1]:
244
+ font_size = st.slider("📏 Size", 10, 24, 14, key="font_size_slider")
245
+ duration = st.slider("⏱️ Speed (secs)", 1, 20, 20, key="duration_slider")
246
+
247
+ st.session_state['marquee_settings'].update({
248
+ "background": bg_color,
249
+ "color": text_color,
250
+ "font-size": f"{font_size}px",
251
+ "animationDuration": f"{duration}s"
252
+ })
253
+
254
+ def display_marquee(text, settings, key_suffix=""):
255
+ """
256
+ 🎉 Show a marquee text with style from the marquee settings.
257
+ Automatically truncates text to ~280 chars to avoid overflow.
258
+ """
259
+ truncated_text = text[:280] + "..." if len(text) > 280 else text
260
+ streamlit_marquee(
261
+ content=truncated_text,
262
+ **settings,
263
+ key=f"marquee_{key_suffix}"
264
+ )
265
+ st.write("")
266
+
267
+ def get_high_info_terms(text: str, top_n=10) -> list:
268
+ """
269
+ 📌 Extract top_n frequent words & bigrams (excluding common stopwords).
270
+ Useful for generating short descriptive keywords from Q/A content.
271
+ """
272
+ stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with'])
273
+ words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
274
+ bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
275
+ combined = words + bi_grams
276
+ filtered = [term for term in combined if term not in stop_words and len(term.split()) <= 2]
277
+ counter = Counter(filtered)
278
+ return [term for term, freq in counter.most_common(top_n)]
279
+
280
+ def clean_text_for_filename(text: str) -> str:
281
+ """
282
+ 🏷️ Remove special chars & short unhelpful words from text for safer filenames.
283
+ Returns a lowercased, underscore-joined token string.
284
+ """
285
+ text = text.lower()
286
+ text = re.sub(r'[^\w\s-]', '', text)
287
+ words = text.split()
288
+ stop_short = set(['the', 'and', 'for', 'with', 'this', 'that', 'ai', 'library'])
289
+ filtered = [w for w in words if len(w) > 3 and w not in stop_short]
290
+ return '_'.join(filtered)[:200]
291
+
292
+ def generate_filename(prompt, response, file_type="md", max_length=200):
293
+ """
294
+ 📁 Create a shortened filename based on prompt+response content:
295
+ 1) Extract top info terms,
296
+ 2) Combine snippet from prompt+response,
297
+ 3) Remove duplicates,
298
+ 4) Truncate if needed.
299
+ """
300
+ prefix = format_timestamp_prefix() + "_"
301
+ combined_text = (prompt + " " + response)[:200]
302
+ info_terms = get_high_info_terms(combined_text, top_n=5)
303
+ snippet = (prompt[:40] + " " + response[:40]).strip()
304
+ snippet_cleaned = clean_text_for_filename(snippet)
305
+
306
+ # Remove duplicates
307
+ name_parts = info_terms + [snippet_cleaned]
308
+ seen = set()
309
+ unique_parts = []
310
+ for part in name_parts:
311
+ if part not in seen:
312
+ seen.add(part)
313
+ unique_parts.append(part)
314
+
315
+ full_name = '_'.join(unique_parts).strip('_')
316
+ leftover_chars = max_length - len(prefix) - len(file_type) - 1
317
+ if len(full_name) > leftover_chars:
318
+ full_name = full_name[:leftover_chars]
319
+
320
+ return f"{prefix}{full_name}.{file_type}"
321
+
322
+ def create_file(prompt, response, file_type="md"):
323
+ """
324
+ 📝 Create a text file from prompt + response with a sanitized filename.
325
+ Returns the created filename.
326
+ """
327
+ filename = generate_filename(prompt.strip(), response.strip(), file_type)
328
+ with open(filename, 'w', encoding='utf-8') as f:
329
+ f.write(prompt + "\n\n" + response)
330
+ return filename
331
+
332
+
333
+
334
+ def get_download_link(file, file_type="zip"):
335
+ """
336
+ Convert a file to base64 and return an HTML link for download.
337
+ """
338
+ with open(file, "rb") as f:
339
+ b64 = base64.b64encode(f.read()).decode()
340
+ if file_type == "zip":
341
+ return f'<a href="data:application/zip;base64,{b64}" download="{os.path.basename(file)}">📂 Download {os.path.basename(file)}</a>'
342
+ elif file_type == "mp3":
343
+ return f'<a href="data:audio/mpeg;base64,{b64}" download="{os.path.basename(file)}">🎵 Download {os.path.basename(file)}</a>'
344
+ elif file_type == "wav":
345
+ return f'<a href="data:audio/wav;base64,{b64}" download="{os.path.basename(file)}">🔊 Download {os.path.basename(file)}</a>'
346
+ elif file_type == "md":
347
+ return f'<a href="data:text/markdown;base64,{b64}" download="{os.path.basename(file)}">📝 Download {os.path.basename(file)}</a>'
348
+ else:
349
+ return f'<a href="data:application/octet-stream;base64,{b64}" download="{os.path.basename(file)}">Download {os.path.basename(file)}</a>'
350
+
351
+ def clean_for_speech(text: str) -> str:
352
+ """Clean up text for TTS output."""
353
+ text = text.replace("\n", " ")
354
+ text = text.replace("</s>", " ")
355
+ text = text.replace("#", "")
356
+ text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text)
357
+ text = re.sub(r"\s+", " ", text).strip()
358
+ return text
359
+
360
+ async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
361
+ """Async TTS generation with edge-tts library."""
362
+ text = clean_for_speech(text)
363
+ if not text.strip():
364
+ return None
365
+ rate_str = f"{rate:+d}%"
366
+ pitch_str = f"{pitch:+d}Hz"
367
+ communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
368
+ out_fn = generate_filename(text, text, file_type=file_format)
369
+ await communicate.save(out_fn)
370
+ return out_fn
371
+
372
+ def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
373
+ """Wrapper for the async TTS generate call."""
374
+ return asyncio.run(edge_tts_generate_audio(text, voice, rate, pitch, file_format))
375
+
376
+ def play_and_download_audio(file_path, file_type="mp3"):
377
+ """Streamlit audio + a quick download link."""
378
+ if file_path and os.path.exists(file_path):
379
+ st.audio(file_path)
380
+ dl_link = get_download_link(file_path, file_type=file_type)
381
+ st.markdown(dl_link, unsafe_allow_html=True)
382
+
383
+ def save_qa_with_audio(question, answer, voice=None):
384
+ """Save Q&A to markdown and also generate audio."""
385
+ if not voice:
386
+ voice = st.session_state['tts_voice']
387
+
388
+ combined_text = f"# Question\n{question}\n\n# Answer\n{answer}"
389
+ md_file = create_file(question, answer, "md")
390
+ audio_text = f"{question}\n\nAnswer: {answer}"
391
+ audio_file = speak_with_edge_tts(
392
+ audio_text,
393
+ voice=voice,
394
+ file_format=st.session_state['audio_format']
395
+ )
396
+ return md_file, audio_file
397
+
398
+
399
+ # ─────────────────────────────────────────────────────────
400
+ # 4. OPTIMIZED AUDIO GENERATION (ASYNC TTS + CACHING)
401
+ # ─────────────────────────────────────────────────────────
402
+
403
+ def clean_for_speech(text: str) -> str:
404
+ """
405
+ 🔉 Clean up text for TTS output with enhanced cleaning.
406
+ Removes markdown, code blocks, links, etc.
407
+ """
408
+ with PerformanceTimer("text_cleaning"):
409
+ # Remove markdown headers
410
+ text = re.sub(r'#+ ', '', text)
411
+ # Remove link formats [text](url)
412
+ text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
413
+ # Remove emphasis markers (*, _, ~, `)
414
+ text = re.sub(r'[*_~`]', '', text)
415
+ # Remove code blocks
416
+ text = re.sub(r'```[\s\S]*?```', '', text)
417
+ text = re.sub(r'`[^`]*`', '', text)
418
+ # Remove excess whitespace
419
+ text = re.sub(r'\s+', ' ', text).replace("\n", " ")
420
+ # Remove hidden S tokens
421
+ text = text.replace("</s>", " ")
422
+ # Remove URLs
423
+ text = re.sub(r'https?://\S+', '', text)
424
+ text = re.sub(r'\(https?://[^\)]+\)', '', text)
425
+ text = text.strip()
426
+ return text
427
+
428
+ async def async_edge_tts_generate(
429
+ text: str,
430
+ voice: str,
431
+ rate: int = 0,
432
+ pitch: int = 0,
433
+ file_format: str = "mp3"
434
+ ) -> Tuple[Optional[str], float]:
435
+ """
436
+ 🎶 Asynchronous TTS generation with caching and performance tracking.
437
+ Returns (filename, generation_time).
438
+ """
439
+ with PerformanceTimer("tts_generation") as timer:
440
+ # ▶ Clean & validate text
441
+ text = clean_for_speech(text)
442
+ if not text.strip():
443
+ return None, 0
444
+
445
+ # ▶ Check cache (avoid regenerating the same TTS)
446
+ cache_key = f"{text[:100]}_{voice}_{rate}_{pitch}_{file_format}"
447
+ if cache_key in st.session_state['audio_cache']:
448
+ return st.session_state['audio_cache'][cache_key], 0
449
+
450
+ try:
451
+ # ▶ Generate audio
452
+ rate_str = f"{rate:+d}%"
453
+ pitch_str = f"{pitch:+d}Hz"
454
+ communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
455
+
456
+ # ▶ Generate unique filename
457
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
458
+ filename = f"audio_{timestamp}_{random.randint(1000, 9999)}.{file_format}"
459
+
460
+ # ▶ Save audio file
461
+ await communicate.save(filename)
462
+
463
+ # ▶ Store in cache
464
+ st.session_state['audio_cache'][cache_key] = filename
465
+
466
+ # ▶ Return path + timing
467
+ return filename, time.time() - timer.start_time
468
+
469
+ except Exception as e:
470
+ st.error(f"❌ Error generating audio: {str(e)}")
471
+ return None, 0
472
+
473
+ async def async_save_qa_with_audio(
474
+ question: str,
475
+ answer: str,
476
+ voice: Optional[str] = None
477
+ ) -> Tuple[str, Optional[str], float, float]:
478
+ """
479
+ 📝 Asynchronously save Q&A to markdown, then generate audio if enabled.
480
+ Returns (md_file, audio_file, md_time, audio_time).
481
+ """
482
+ voice = voice or st.session_state['tts_voice']
483
+
484
+ with PerformanceTimer("qa_save") as timer:
485
+ # ▶ Save Q/A as markdown
486
+ md_start = time.time()
487
+ md_file = create_file(question, answer, "md")
488
+ md_time = time.time() - md_start
489
+
490
+ # ▶ Generate audio (if globally enabled)
491
+ audio_file = None
492
+ audio_time = 0
493
+ if st.session_state['enable_audio']:
494
+ audio_text = f"{question}\n\nAnswer: {answer}"
495
+ audio_file, audio_time = await async_edge_tts_generate(
496
+ audio_text,
497
+ voice=voice,
498
+ file_format=st.session_state['audio_format']
499
+ )
500
+
501
+ return md_file, audio_file, md_time, audio_time
502
+
503
+ def save_qa_with_audio(question, answer, voice=None):
504
+ """Save Q&A to markdown and also generate audio."""
505
+ if not voice:
506
+ voice = st.session_state['tts_voice']
507
+
508
+ combined_text = f"# Question\n{question}\n\n# Answer\n{answer}"
509
+ md_file = create_file(question, answer, "md")
510
+ audio_text = f"{question}\n\nAnswer: {answer}"
511
+ audio_file = speak_with_edge_tts(
512
+ audio_text,
513
+ voice=voice,
514
+ file_format=st.session_state['audio_format']
515
+ )
516
+ return md_file, audio_file
517
+
518
+
519
+
520
+
521
+ def create_download_link_with_cache(file_path: str, file_type: str = "mp3") -> str:
522
+ """
523
+ ⬇️ Create a download link for a file with caching & error handling.
524
+ """
525
+ with PerformanceTimer("download_link_generation"):
526
+ cache_key = f"dl_{file_path}"
527
+ if cache_key in st.session_state['download_link_cache']:
528
+ return st.session_state['download_link_cache'][cache_key]
529
+
530
+ try:
531
+ with open(file_path, "rb") as f:
532
+ b64 = base64.b64encode(f.read()).decode()
533
+ filename = os.path.basename(file_path)
534
+
535
+ if file_type == "mp3":
536
+ link = f'<a href="data:audio/mpeg;base64,{b64}" download="{filename}">🎵 Download {filename}</a>'
537
+ elif file_type == "wav":
538
+ link = f'<a href="data:audio/wav;base64,{b64}" download="{filename}">🔊 Download {filename}</a>'
539
+ elif file_type == "md":
540
+ link = f'<a href="data:text/markdown;base64,{b64}" download="{filename}">📝 Download {filename}</a>'
541
+ else:
542
+ link = f'<a href="data:application/octet-stream;base64,{b64}" download="{filename}">⬇️ Download {filename}</a>'
543
+
544
+ st.session_state['download_link_cache'][cache_key] = link
545
+ return link
546
+
547
+ except Exception as e:
548
+ st.error(f"❌ Error creating download link: {str(e)}")
549
+ return ""
550
+
551
+ # ─────────────────────────────────────────────────────────
552
+ # 5. RESEARCH / ARXIV FUNCTIONS
553
+ # ─────────────────────────────────────────────────────────
554
+
555
+ def parse_arxiv_refs(ref_text: str):
556
+ """
557
+ 📜 Given a multi-line markdown with Arxiv references,
558
+ parse them into a list of dicts: {date, title, url, authors, summary}.
559
+ """
560
+ if not ref_text:
561
+ return []
562
+ results = []
563
+ current_paper = {}
564
+ lines = ref_text.split('\n')
565
+
566
+ for i, line in enumerate(lines):
567
+ if line.count('|') == 2:
568
+ # Found a new paper line
569
+ if current_paper:
570
+ results.append(current_paper)
571
+ if len(results) >= 20:
572
+ break
573
+ try:
574
+ header_parts = line.strip('* ').split('|')
575
+ date = header_parts[0].strip()
576
+ title = header_parts[1].strip()
577
+ url_match = re.search(r'(https://arxiv.org/\S+)', line)
578
+ url = url_match.group(1) if url_match else f"paper_{len(results)}"
579
+
580
+ current_paper = {
581
+ 'date': date,
582
+ 'title': title,
583
+ 'url': url,
584
+ 'authors': '',
585
+ 'summary': '',
586
+ 'full_audio': None,
587
+ 'download_base64': '',
588
+ }
589
+ except Exception as e:
590
+ st.warning(f"⚠️ Error parsing paper header: {str(e)}")
591
+ current_paper = {}
592
+ continue
593
+ elif current_paper:
594
+ # If authors not set, fill it; otherwise, fill summary
595
+ if not current_paper['authors']:
596
+ current_paper['authors'] = line.strip('* ')
597
+ else:
598
+ if current_paper['summary']:
599
+ current_paper['summary'] += ' ' + line.strip()
600
+ else:
601
+ current_paper['summary'] = line.strip()
602
+
603
+ if current_paper:
604
+ results.append(current_paper)
605
+
606
+ return results[:20]
607
+
608
+ def create_paper_links_md(papers):
609
+ """
610
+ 🔗 Create a minimal .md content linking to each paper's Arxiv URL.
611
+ """
612
+ lines = ["# Paper Links\n"]
613
+ for i, p in enumerate(papers, start=1):
614
+ lines.append(f"{i}. **{p['title']}** — [Arxiv]({p['url']})")
615
+ return "\n".join(lines)
616
+
617
+ async def create_paper_audio_files(papers, input_question):
618
+ """
619
+ 🎧 For each paper, generate TTS audio summary and store the path in `paper['full_audio']`.
620
+ Also creates a base64 download link in `paper['download_base64']`.
621
+ """
622
+ for paper in papers:
623
+ try:
624
+ audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
625
+ audio_text = clean_for_speech(audio_text)
626
+ file_format = st.session_state['audio_format']
627
+ audio_file, _ = await async_edge_tts_generate(
628
+ audio_text,
629
+ voice=st.session_state['tts_voice'],
630
+ file_format=file_format
631
+ )
632
+ paper['full_audio'] = audio_file
633
+
634
+ if audio_file:
635
+ # Convert to base64 link
636
+ ext = file_format
637
+ download_link = create_download_link_with_cache(audio_file, file_type=ext)
638
+ paper['download_base64'] = download_link
639
+
640
+ except Exception as e:
641
+ st.warning(f"⚠️ Error processing paper {paper['title']}: {str(e)}")
642
+ paper['full_audio'] = None
643
+ paper['download_base64'] = ''
644
+
645
+ def display_papers(papers, marquee_settings):
646
+ """
647
+ 📑 Display paper info in the main area with marquee + expanders + audio.
648
+ """
649
+ st.write("## 🔎 Research Papers")
650
+ for i, paper in enumerate(papers, start=1):
651
+ marquee_text = f"📄 {paper['title']} | 👤 {paper['authors'][:120]} | 📝 {paper['summary'][:200]}"
652
+ display_marquee(marquee_text, marquee_settings, key_suffix=f"paper_{i}")
653
+
654
+ with st.expander(f"{i}. 📄 {paper['title']}", expanded=True):
655
+ st.markdown(f"**{paper['date']} | {paper['title']}** — [Arxiv Link]({paper['url']})")
656
+ st.markdown(f"*Authors:* {paper['authors']}")
657
+ st.markdown(paper['summary'])
658
+ if paper.get('full_audio'):
659
+ st.write("📚 **Paper Audio**")
660
+ st.audio(paper['full_audio'])
661
+ if paper['download_base64']:
662
+ st.markdown(paper['download_base64'], unsafe_allow_html=True)
663
+
664
+ def display_papers_in_sidebar(papers):
665
+ """
666
+ 🔎 Mirrors the paper listing in the sidebar with expanders, audio, etc.
667
+ """
668
+ st.sidebar.title("🎶 Papers & Audio")
669
+ for i, paper in enumerate(papers, start=1):
670
+ with st.sidebar.expander(f"{i}. {paper['title']}"):
671
+ st.markdown(f"**Arxiv:** [Link]({paper['url']})")
672
+ if paper['full_audio']:
673
+ st.audio(paper['full_audio'])
674
+ if paper['download_base64']:
675
+ st.markdown(paper['download_base64'], unsafe_allow_html=True)
676
+ st.markdown(f"**Authors:** {paper['authors']}")
677
+ if paper['summary']:
678
+ st.markdown(f"**Summary:** {paper['summary'][:300]}...")
679
+
680
+ # ─────────────────────────────────────────────────────────
681
+ # 6. ZIP FUNCTION
682
+ # ─────────────────────────────────────────────────────────
683
+
684
+ def create_zip_of_files(md_files, mp3_files, wav_files, input_question):
685
+ """
686
+ 📦 Zip up all relevant files, generating a short name from high-info terms.
687
+ Returns the zip filename if created, else None.
688
+ """
689
+ md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
690
+ all_files = md_files + mp3_files + wav_files
691
+ if not all_files:
692
+ return None
693
+
694
+ all_content = []
695
+ for f in all_files:
696
+ if f.endswith('.md'):
697
+ with open(f, "r", encoding='utf-8') as file:
698
+ all_content.append(file.read())
699
+ elif f.endswith('.mp3') or f.endswith('.wav'):
700
+ basename = os.path.splitext(os.path.basename(f))[0]
701
+ words = basename.replace('_', ' ')
702
+ all_content.append(words)
703
+
704
+ all_content.append(input_question)
705
+ combined_content = " ".join(all_content)
706
+ info_terms = get_high_info_terms(combined_content, top_n=10)
707
+
708
+ timestamp = format_timestamp_prefix()
709
+ name_text = '-'.join(term for term in info_terms[:5])
710
+ short_zip_name = (timestamp + "_" + name_text)[:20] + ".zip"
711
+
712
+ with zipfile.ZipFile(short_zip_name, 'w') as z:
713
+ for f in all_files:
714
+ z.write(f)
715
+ return short_zip_name
716
+
717
+ # ─────────────────────────────────────────────────────────
718
+ # 7. MAIN AI LOGIC: LOOKUP & TAB HANDLERS
719
+ # ─────────────────────────────────────────────────────────
720
+
721
+
722
+ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
723
+ titles_summary=True, full_audio=False, useArxiv=True, useArxivAudio=False):
724
+ """Main routine that uses Anthropic (Claude) + Gradio ArXiv RAG pipeline."""
725
+ start = time.time()
726
+ ai_constitution = """
727
+ You are a medical and machine learning review board expert and streamlit python and html5 expert. You are tasked with creating a streamlit app.py and requirements.txt for a solution that answers the questions with a working app to demonstrate. You are to use the paper list below to answer the question thinking through step by step how to create a streamlit app.py and requirements.txt for the solution that answers the questions with a working app to demonstrate.
728
+ """
729
+
730
+ # --- 1) Claude API
731
+ client = anthropic.Anthropic(api_key=anthropic_key)
732
+ user_input = q
733
+ response = client.messages.create(
734
+ model="claude-3-sonnet-20240229",
735
+ max_tokens=1000,
736
+ messages=[
737
+ {"role": "user", "content": user_input}
738
+ ])
739
+ st.write("Claude's reply 🧠:")
740
+ st.markdown(response.content[0].text)
741
+
742
+ # Save & produce audio
743
+ result = response.content[0].text
744
+ create_file(q, result)
745
+ md_file, audio_file = save_qa_with_audio(q, result)
746
+ st.subheader("📝 Main Response Audio")
747
+ play_and_download_audio(audio_file, st.session_state['audio_format'])
748
+
749
+
750
+ if useArxiv:
751
+ q = q + result # Feed Arxiv the question and Claude's answer for prompt fortification to get better answers and references
752
+ # --- 2) Arxiv RAG
753
+ #st.write("Arxiv's AI this Evening is Mixtral 8x7B...")
754
+ st.write('Running Arxiv RAG with Claude inputs.')
755
+ #st.code(q, language="python", line_numbers=True, wrap_lines=True)
756
+
757
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
758
+ refs = client.predict(
759
+ q,
760
+ 10,
761
+ "Semantic Search",
762
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
763
+ api_name="/update_with_rag_md"
764
+ )[0]
765
+
766
+ #r2 = client.predict(
767
+ # q,
768
+ # "mistralai/Mixtral-8x7B-Instruct-v0.1",
769
+ # True,
770
+ # api_name="/ask_llm"
771
+ #)
772
+ #result = f"### 🔎 {q}\n\n{r2}\n\n{refs}"
773
+
774
+ result = f"🔎 {q}\n\n{refs}" # use original question q with result paired with paper references for best prompt fortification
775
+
776
+ md_file, audio_file = save_qa_with_audio(q, result)
777
+ st.subheader("📝 Main Response Audio")
778
+ play_and_download_audio(audio_file, st.session_state['audio_format'])
779
+
780
+ # --- 3) Parse + handle papers
781
+ papers = parse_arxiv_refs(refs)
782
+ if papers:
783
+ # Create minimal links page first
784
+ paper_links = create_paper_links_md(papers)
785
+ links_file = create_file(q, paper_links, "md")
786
+ st.markdown(paper_links)
787
+
788
+ # Then create audio for each paper
789
+ if useArxivAudio:
790
+ create_paper_audio_files(papers, input_question=q)
791
+
792
+ display_papers(papers, get_marquee_settings()) # scrolling marquee per paper and summary
793
+
794
+ display_papers_in_sidebar(papers) # sidebar entry per paper and summary
795
+ else:
796
+ st.warning("No papers found in the response.")
797
+
798
+
799
+ # --- 4) Claude API with arxiv list of papers to app.py
800
+ client = anthropic.Anthropic(api_key=anthropic_key)
801
+ user_input = q + '\n\n' + 'Use the reference papers below to answer the question by creating a python streamlit app.py and requirements.txt with python libraries for creating a single app.py application that answers the questions with working code to demonstrate.'+ '\n\n'
802
+ response = client.messages.create(
803
+ model="claude-3-sonnet-20240229",
804
+ max_tokens=1000,
805
+ messages=[
806
+ {"role": "user", "content": user_input}
807
+ ])
808
+ r2 = response.content[0].text
809
+ st.write("Claude's reply 🧠:")
810
+ st.markdown(r2)
811
+
812
+
813
+
814
+ elapsed = time.time() - start
815
+ st.write(f"**Total Elapsed:** {elapsed:.2f} s")
816
+ return result
817
+
818
+
819
+
820
+
821
+
822
+
823
+
824
+ def perform_ai_lookup_old(
825
+ q,
826
+ vocal_summary=True,
827
+ extended_refs=False,
828
+ titles_summary=True,
829
+ full_audio=False
830
+ ):
831
+ """
832
+ 🔮 Main routine that uses Anthropic (Claude) + optional Gradio ArXiv RAG pipeline.
833
+ Currently demonstrates calling Anthropic and returning the text.
834
+ """
835
+ with PerformanceTimer("ai_lookup"):
836
+ start = time.time()
837
+
838
+ # ▶ Example call to Anthropic (Claude)
839
+ client = anthropic.Anthropic(api_key=anthropic_key)
840
+ user_input = q
841
+
842
+ # Here we do a minimal prompt, just to show the call
843
+ # (You can enhance your prompt engineering as needed)
844
+ response = client.completions.create(
845
+ model="claude-2",
846
+ max_tokens_to_sample=512,
847
+ prompt=f"{anthropic.HUMAN_PROMPT} {user_input}{anthropic.AI_PROMPT}"
848
+ )
849
+
850
+ result_text = response.completion.strip()
851
+
852
+ # ▶ Print and store
853
+ st.write("### Claude's reply 🧠:")
854
+ st.markdown(result_text)
855
+
856
+
857
+ # Save & produce audio
858
+ #create_file(q, result_text)
859
+ #md_file, audio_file = save_qa_with_audio(q, result_text)
860
+ #st.subheader("📝 Main Response Audio")
861
+ #play_and_download_audio(audio_file, st.session_state['audio_format'])
862
+
863
+
864
+
865
+ # ▶ We'll add to the chat history
866
+ st.session_state.chat_history.append({"user": q, "claude": result_text})
867
+
868
+ # ▶ Return final text
869
+ end = time.time()
870
+ st.write(f"**Elapsed:** {end - start:.2f}s")
871
+
872
+ return result_text
873
+
874
+ async def process_voice_input(text):
875
+ """
876
+ 🎤 When user sends a voice query, we run the AI lookup + Q/A with audio.
877
+ Then we store the resulting markdown & audio in session or disk.
878
+ """
879
+ if not text:
880
+ return
881
+ st.subheader("🔍 Search Results")
882
+
883
+ # ▶ Call AI
884
+ result = perform_ai_lookup(
885
+ text,
886
+ vocal_summary=True,
887
+ extended_refs=False,
888
+ titles_summary=True,
889
+ full_audio=True
890
+ )
891
+
892
+ # ▶ Save Q&A as Markdown + audio (async)
893
+ md_file, audio_file, md_time, audio_time = await async_save_qa_with_audio(text, result)
894
+
895
+ st.subheader("📝 Generated Files")
896
+ st.write(f"**Markdown:** {md_file} (saved in {md_time:.2f}s)")
897
+ if audio_file:
898
+ st.write(f"**Audio:** {audio_file} (generated in {audio_time:.2f}s)")
899
+ st.audio(audio_file)
900
+ dl_link = create_download_link_with_cache(audio_file, file_type=st.session_state['audio_format'])
901
+ st.markdown(dl_link, unsafe_allow_html=True)
902
+
903
+ def display_voice_tab():
904
+ """
905
+ 🎙️ Display the voice input tab with TTS settings and real-time usage.
906
+ """
907
+
908
+ # ▶ Voice Settings
909
+ st.sidebar.markdown("### 🎤 Voice Settings")
910
+ caption_female = 'Top: 🌸 **Aria** – 🎶 **Jenny** – 🌺 **Sonia** – 🌌 **Natasha** – 🌷 **Clara**'
911
+ caption_male = 'Bottom: 🌟 **Guy** – 🛠️ **Ryan** – 🎻 **William** – 🌟 **Liam**'
912
+
913
+ # Optionally, replace with your own local image or comment out
914
+ st.sidebar.image('Group Picture - Voices.png', caption=caption_female + ' | ' + caption_male)
915
+
916
+ selected_voice = st.sidebar.selectbox(
917
+ "👄 Select TTS Voice:",
918
+ options=EDGE_TTS_VOICES,
919
+ index=EDGE_TTS_VOICES.index(st.session_state['tts_voice'])
920
+ )
921
+
922
+ st.sidebar.markdown("""
923
+ # 🎙️ Voice Character Agent Selector 🎭
924
+ *Female Voices*:
925
+ - 🌸 **Aria** – Elegant, creative storytelling
926
+ - 🎶 **Jenny** – Friendly, conversational
927
+ - 🌺 **Sonia** – Bold, confident
928
+ - 🌌 **Natasha** – Sophisticated, mysterious
929
+ - 🌷 **Clara** – Cheerful, empathetic
930
+
931
+ *Male Voices*:
932
+ - 🌟 **Guy** – Authoritative, versatile
933
+ - 🛠️ **Ryan** – Approachable, casual
934
+ - 🎻 **William** – Classic, scholarly
935
+ - 🌟 **Liam** – Energetic, engaging
936
+ """)
937
+
938
+
939
+ # ▶ Audio Format
940
+ st.markdown("### 🔊 Audio Format")
941
+ selected_format = st.radio(
942
+ "Choose Audio Format:",
943
+ options=["MP3", "WAV"],
944
+ index=0
945
+ )
946
+
947
+ # ▶ Update session state if changed
948
+ if selected_voice != st.session_state['tts_voice']:
949
+ st.session_state['tts_voice'] = selected_voice
950
+ st.rerun()
951
+ if selected_format.lower() != st.session_state['audio_format']:
952
+ st.session_state['audio_format'] = selected_format.lower()
953
+ st.rerun()
954
+
955
+ # ▶ Text Input
956
+ user_text = st.text_area("💬 Message:", height=100)
957
+ user_text = user_text.strip().replace('\n', ' ')
958
+
959
+ # ▶ Send Button
960
+ if st.button("📨 Send"):
961
+ # Run our process_voice_input as an async function
962
+ asyncio.run(process_voice_input(user_text))
963
+
964
+ # ▶ Chat History
965
+ st.subheader("📜 Chat History")
966
+ for c in st.session_state.chat_history:
967
+ st.write("**You:**", c["user"])
968
+ st.write("**Response:**", c["claude"])
969
+
970
+ # ─────────────────────────────────────────────────────────
971
+ # FILE HISTORY SIDEBAR
972
+ # ─────────────────────────────────────────────────────────
973
+
974
+ def display_file_history_in_sidebar():
975
+ """
976
+ 📂 Shows a history of local .md, .mp3, .wav files (newest first),
977
+ with quick icons and optional download links.
978
+ """
979
+ st.sidebar.markdown("---")
980
+ st.sidebar.markdown("### 📂 File History")
981
+
982
+ # ▶ Gather all files
983
+ md_files = glob.glob("*.md")
984
+ mp3_files = glob.glob("*.mp3")
985
+ wav_files = glob.glob("*.wav")
986
+ all_files = md_files + mp3_files + wav_files
987
+
988
+ if not all_files:
989
+ st.sidebar.write("No files found.")
990
+ return
991
+
992
+ # ▶ Sort newest first
993
+ all_files = sorted(all_files, key=os.path.getmtime, reverse=True)
994
+
995
+ #for f in all_files:
996
+ # fname = os.path.basename(f)
997
+ # ext = os.path.splitext(fname)[1].lower().strip('.')
998
+ # emoji = FILE_EMOJIS.get(ext, '📦')
999
+ # time_str = datetime.fromtimestamp(os.path.getmtime(f)).strftime("%Y-%m-%d %H:%M:%S")
1000
+
1001
+ #with st.sidebar.expander(f"{emoji} {fname}"):
1002
+ # st.write(f"**Modified:** {time_str}")
1003
+ # if ext == "md":
1004
+ # with open(f, "r", encoding="utf-8") as file_in:
1005
+ # snippet = file_in.read(200).replace("\n", " ")
1006
+ # if len(snippet) == 200:
1007
+ # snippet += "..."
1008
+ # st.write(snippet)
1009
+ # dl_link = create_download_link_with_cache(f, file_type="md")
1010
+ # st.markdown(dl_link, unsafe_allow_html=True)
1011
+ # elif ext in ["mp3","wav"]:
1012
+ # st.audio(f)
1013
+ # dl_link = create_download_link_with_cache(f, file_type=ext)
1014
+ # st.markdown(dl_link, unsafe_allow_html=True)
1015
+ # else:
1016
+ # dl_link = create_download_link_with_cache(f)
1017
+ # st.markdown(dl_link, unsafe_allow_html=True)
1018
+
1019
+
1020
+
1021
+ # Group files by their query prefix (timestamp_query)
1022
+ grouped_files = {}
1023
+ for f in all_files:
1024
+ fname = os.path.basename(f)
1025
+ prefix = '_'.join(fname.split('_')[:6]) # Get timestamp part
1026
+ if prefix not in grouped_files:
1027
+ grouped_files[prefix] = {'md': [], 'audio': [], 'loaded': False}
1028
+
1029
+ ext = os.path.splitext(fname)[1].lower()
1030
+ if ext == '.md':
1031
+ grouped_files[prefix]['md'].append(f)
1032
+ elif ext in ['.mp3', '.wav']:
1033
+ grouped_files[prefix]['audio'].append(f)
1034
+
1035
+ # Sort groups by timestamp (newest first)
1036
+ sorted_groups = sorted(grouped_files.items(), key=lambda x: x[0], reverse=True)
1037
+
1038
+ # 🗑⬇️ Sidebar delete all and zip all download
1039
+ col1, col4 = st.sidebar.columns(2)
1040
+ with col1:
1041
+ if st.button("🗑 Delete All"):
1042
+ for f in all_files:
1043
+ os.remove(f)
1044
+ st.rerun()
1045
+ st.session_state.should_rerun = True
1046
+ with col4:
1047
+ if st.button("⬇️ Zip All"):
1048
+ zip_name = create_zip_of_files(md_files, mp3_files, wav_files,
1049
+ st.session_state.get('last_query', ''))
1050
+ if zip_name:
1051
+ st.sidebar.markdown(get_download_link(zip_name, "zip"),
1052
+ unsafe_allow_html=True)
1053
+
1054
+ # Display grouped files
1055
+ for prefix, files in sorted_groups:
1056
+ # Get a preview of content from first MD file
1057
+ preview = ""
1058
+ if files['md']:
1059
+ with open(files['md'][0], "r", encoding="utf-8") as f:
1060
+ preview = f.read(200).replace("\n", " ")
1061
+ if len(preview) > 200:
1062
+ preview += "..."
1063
+
1064
+ # Create unique key for this group
1065
+ group_key = f"group_{prefix}"
1066
+ if group_key not in st.session_state:
1067
+ st.session_state[group_key] = False
1068
+
1069
+ # Display group expander
1070
+ with st.sidebar.expander(f"📑 Query Group: {prefix}"):
1071
+ st.write("**Preview:**")
1072
+ st.write(preview)
1073
+
1074
+ # Load full content button
1075
+ if st.button("📖 View Full Content", key=f"btn_{prefix}"):
1076
+ st.session_state[group_key] = True
1077
+
1078
+ # Only show full content and audio if button was clicked
1079
+ if st.session_state[group_key]:
1080
+ # Display markdown files
1081
+ for md_file in files['md']:
1082
+ with open(md_file, "r", encoding="utf-8") as f:
1083
+ content = f.read()
1084
+ st.markdown("**Full Content:**")
1085
+ st.markdown(content)
1086
+ st.markdown(get_download_link(md_file, file_type="md"),
1087
+ unsafe_allow_html=True)
1088
+
1089
+ # Display audio files
1090
+ usePlaySidebar=False
1091
+ if usePlaySidebar:
1092
+ for audio_file in files['audio']:
1093
+ ext = os.path.splitext(audio_file)[1].replace('.', '')
1094
+ st.audio(audio_file)
1095
+ st.markdown(get_download_link(audio_file, file_type=ext),
1096
+ unsafe_allow_html=True)
1097
+
1098
+
1099
+
1100
+
1101
+
1102
+ # ─────────────────────────────────────────────────────────
1103
+ # MAIN APP
1104
+ # ─────────────────────────────────────────────────────────
1105
+
1106
+ def main():
1107
+ # ▶ 1) Setup marquee UI in the sidebar
1108
+ update_marquee_settings_ui()
1109
+ marquee_settings = get_marquee_settings()
1110
+
1111
+ # ▶ 2) Display the marquee welcome
1112
+ display_marquee(
1113
+ st.session_state['marquee_content'],
1114
+ {**marquee_settings, "font-size": "28px", "lineHeight": "50px"},
1115
+ key_suffix="welcome"
1116
+ )
1117
+
1118
+ # ▶ 3) Main action tabs and model use choices
1119
+ tab_main = st.radio("Action:", ["🎤 Voice", "📸 Media", "🔍 ArXiv", "📝 Editor"],
1120
+ horizontal=True)
1121
+
1122
+ useArxiv = st.checkbox("Search Arxiv for Research Paper Answers", value=True)
1123
+ useArxivAudio = st.checkbox("Generate Audio File for Research Paper Answers", value=False)
1124
+
1125
+ # ▶ 4) Show or hide custom component (optional example)
1126
+ mycomponent = components.declare_component("mycomponent", path="mycomponent")
1127
+ val = mycomponent(my_input_value="Hello from MyComponent")
1128
+
1129
+ if val:
1130
+ val_stripped = val.replace('\\n', ' ')
1131
+ edited_input = st.text_area("✏️ Edit Input:", value=val_stripped, height=100)
1132
+ run_option = st.selectbox("Model:", ["Arxiv", "Other (demo)"])
1133
+ col1, col2 = st.columns(2)
1134
+ with col1:
1135
+ autorun = st.checkbox("⚙ AutoRun", value=True)
1136
+ with col2:
1137
+ full_audio = st.checkbox("📚FullAudio", value=False)
1138
+
1139
+ input_changed = (val != st.session_state.old_val)
1140
+
1141
+ if autorun and input_changed:
1142
+ st.session_state.old_val = val
1143
+ st.session_state.last_query = edited_input
1144
+ perform_ai_lookup(edited_input,
1145
+ vocal_summary=True,
1146
+ extended_refs=False,
1147
+ titles_summary=True,
1148
+ full_audio=full_audio, useArxiv=useArxiv, useArxivAudio=useArxivAudio)
1149
+ else:
1150
+ if st.button("▶ Run"):
1151
+ st.session_state.old_val = val
1152
+ st.session_state.last_query = edited_input
1153
+ perform_ai_lookup(edited_input,
1154
+ vocal_summary=True,
1155
+ extended_refs=False,
1156
+ titles_summary=True,
1157
+ full_audio=full_audio, useArxiv=useArxiv, useArxivAudio=useArxivAudio)
1158
+
1159
+ # ─────────────────────────────────────────────────────────
1160
+ # TAB: ArXiv
1161
+ # ─────────────────────────────────────────────────────────
1162
+ if tab_main == "🔍 ArXiv":
1163
+ st.subheader("🔍 Query ArXiv")
1164
+ q = st.text_input("🔍 Query:", key="arxiv_query")
1165
+
1166
+ st.markdown("### 🎛 Options")
1167
+ vocal_summary = st.checkbox("🎙ShortAudio", value=True, key="option_vocal_summary")
1168
+ extended_refs = st.checkbox("📜LongRefs", value=False, key="option_extended_refs")
1169
+ titles_summary = st.checkbox("🔖TitlesOnly", value=True, key="option_titles_summary")
1170
+ full_audio = st.checkbox("📚FullAudio", value=False, key="option_full_audio")
1171
+ full_transcript = st.checkbox("🧾FullTranscript", value=False, key="option_full_transcript")
1172
+
1173
+ if q and st.button("🔍Run"):
1174
+ st.session_state.last_query = q
1175
+ result = perform_ai_lookup(q,
1176
+ vocal_summary=vocal_summary,
1177
+ extended_refs=extended_refs,
1178
+ titles_summary=titles_summary,
1179
+ full_audio=full_audio)
1180
+ if full_transcript:
1181
+ create_file(q, result, "md")
1182
+
1183
+ # ─────────────────────────────────────────────────────────
1184
+ # TAB: Voice
1185
+ # ─────────────────────────────────────────────────────────
1186
+ elif tab_main == "🎤 Voice":
1187
+ display_voice_tab()
1188
+
1189
+ # ─────────────────────────────────────────────────────────
1190
+ # TAB: Media
1191
+ # ─────────────────────────────────────────────────────────
1192
+ elif tab_main == "📸 Media":
1193
+ st.header("📸 Media Gallery")
1194
+ tabs = st.tabs(["🎵 Audio", "🖼 Images", "🎥 Video"])
1195
+
1196
+ # ▶ AUDIO sub-tab
1197
+ with tabs[0]:
1198
+ st.subheader("🎵 Audio Files")
1199
+ audio_files = glob.glob("*.mp3") + glob.glob("*.wav")
1200
+ if audio_files:
1201
+ for a in audio_files:
1202
+ with st.expander(os.path.basename(a)):
1203
+ st.audio(a)
1204
+ ext = os.path.splitext(a)[1].replace('.', '')
1205
+ dl_link = create_download_link_with_cache(a, file_type=ext)
1206
+ st.markdown(dl_link, unsafe_allow_html=True)
1207
+ else:
1208
+ st.write("No audio files found.")
1209
+
1210
+ # ▶ IMAGES sub-tab
1211
+ with tabs[1]:
1212
+ st.subheader("🖼 Image Files")
1213
+ imgs = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg")
1214
+ if imgs:
1215
+ c = st.slider("Cols", 1, 5, 3, key="cols_images")
1216
+ cols = st.columns(c)
1217
+ for i, f in enumerate(imgs):
1218
+ with cols[i % c]:
1219
+ st.image(Image.open(f), use_container_width=True)
1220
+ else:
1221
+ st.write("No images found.")
1222
+
1223
+ # ▶ VIDEO sub-tab
1224
+ with tabs[2]:
1225
+ st.subheader("🎥 Video Files")
1226
+ vids = glob.glob("*.mp4") + glob.glob("*.mov") + glob.glob("*.avi")
1227
+ if vids:
1228
+ for v in vids:
1229
+ with st.expander(os.path.basename(v)):
1230
+ st.video(v)
1231
+ else:
1232
+ st.write("No videos found.")
1233
+
1234
+ # ─────────────────────────────────────────────────────────
1235
+ # TAB: Editor
1236
+ # ─────────────────────────────────────────────────────────
1237
+ elif tab_main == "📝 Editor":
1238
+ st.write("### 📝 File Editor (Minimal Demo)")
1239
+ st.write("Select or create a file to edit. More advanced features can be added as needed.")
1240
+
1241
+ # ─────────────────────────────────────────────────────────
1242
+ # SIDEBAR: FILE HISTORY + PERFORMANCE METRICS
1243
+ # ─────────────────────────────────────────────────────────
1244
+ display_file_history_in_sidebar()
1245
+ log_performance_metrics()
1246
+
1247
+ # ▶ Some light CSS styling
1248
+ st.markdown("""
1249
+ <style>
1250
+ .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
1251
+ .stMarkdown { font-family: 'Helvetica Neue', sans-serif; }
1252
+ .stButton>button { margin-right: 0.5rem; }
1253
+ </style>
1254
+ """, unsafe_allow_html=True)
1255
+
1256
+ # ▶ Rerun if needed
1257
+ if st.session_state.should_rerun:
1258
+ st.session_state.should_rerun = False
1259
+ st.rerun()
1260
+
1261
+ # ─────────────────────────────────────────────────────────
1262
+ # 8. RUN APP
1263
+ # ─────────────────────────────────────────────────────────
1264
+
1265
+ if __name__ == "__main__":
1266
+ main()