Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update app.py
Browse files
app.py
CHANGED
@@ -165,7 +165,6 @@ def generate_filename(prompt, response, file_type="md"):
|
|
165 |
snippet_cleaned = clean_text_for_filename(snippet)
|
166 |
|
167 |
# Combine info terms and snippet
|
168 |
-
# Prioritize info terms in front
|
169 |
name_parts = info_terms + [snippet_cleaned]
|
170 |
full_name = '_'.join(name_parts)
|
171 |
|
@@ -271,7 +270,8 @@ def process_video(video_path, seconds_per_frame=1):
|
|
271 |
for i in range(0, total, skip):
|
272 |
vid.set(cv2.CAP_PROP_POS_FRAMES, i)
|
273 |
ret, frame = vid.read()
|
274 |
-
if not ret:
|
|
|
275 |
_, buf = cv2.imencode(".jpg", frame)
|
276 |
frames_b64.append(base64.b64encode(buf).decode("utf-8"))
|
277 |
vid.release()
|
@@ -298,18 +298,72 @@ def save_full_transcript(query, text):
|
|
298 |
"""Save full transcript of Arxiv results as a file."""
|
299 |
create_file(query, text, "md")
|
300 |
|
301 |
-
|
302 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
303 |
start = time.time()
|
|
|
|
|
304 |
client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
|
305 |
refs = client.predict(q,20,"Semantic Search","mistralai/Mixtral-8x7B-Instruct-v0.1",api_name="/update_with_rag_md")[0]
|
306 |
r2 = client.predict(q,"mistralai/Mixtral-8x7B-Instruct-v0.1",True,api_name="/ask_llm")
|
307 |
|
|
|
308 |
result = f"### ๐ {q}\n\n{r2}\n\n{refs}"
|
309 |
-
|
310 |
st.markdown(result)
|
311 |
|
312 |
-
# Generate
|
313 |
if full_audio:
|
314 |
complete_text = f"Complete response for query: {q}. {clean_for_speech(r2)} {clean_for_speech(refs)}"
|
315 |
audio_file_full = speak_with_edge_tts(complete_text)
|
@@ -329,7 +383,41 @@ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary
|
|
329 |
st.write("### ๐ Long Refs")
|
330 |
play_and_download_audio(audio_file_refs)
|
331 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
332 |
if titles_summary:
|
|
|
333 |
titles = []
|
334 |
for line in refs.split('\n'):
|
335 |
m = re.search(r"\[([^\]]+)\]", line)
|
@@ -339,7 +427,7 @@ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary
|
|
339 |
titles_text = "Titles: " + ", ".join(titles)
|
340 |
titles_text = clean_for_speech(titles_text)
|
341 |
audio_file_titles = speak_with_edge_tts(titles_text)
|
342 |
-
st.write("### ๐ Titles")
|
343 |
play_and_download_audio(audio_file_titles)
|
344 |
|
345 |
elapsed = time.time()-start
|
@@ -352,7 +440,8 @@ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary
|
|
352 |
|
353 |
def process_with_gpt(text):
|
354 |
"""Process text with GPT-4"""
|
355 |
-
if not text:
|
|
|
356 |
st.session_state.messages.append({"role":"user","content":text})
|
357 |
with st.chat_message("user"):
|
358 |
st.markdown(text)
|
@@ -370,7 +459,8 @@ def process_with_gpt(text):
|
|
370 |
|
371 |
def process_with_claude(text):
|
372 |
"""Process text with Claude"""
|
373 |
-
if not text:
|
|
|
374 |
with st.chat_message("user"):
|
375 |
st.markdown(text)
|
376 |
with st.chat_message("assistant"):
|
@@ -568,7 +658,6 @@ def main():
|
|
568 |
if full_transcript:
|
569 |
save_full_transcript(q_new, result)
|
570 |
|
571 |
-
|
572 |
elif tab_main == "๐ค Voice":
|
573 |
st.subheader("๐ค Voice Input")
|
574 |
user_text = st.text_area("๐ฌ Message:", height=100)
|
|
|
165 |
snippet_cleaned = clean_text_for_filename(snippet)
|
166 |
|
167 |
# Combine info terms and snippet
|
|
|
168 |
name_parts = info_terms + [snippet_cleaned]
|
169 |
full_name = '_'.join(name_parts)
|
170 |
|
|
|
270 |
for i in range(0, total, skip):
|
271 |
vid.set(cv2.CAP_PROP_POS_FRAMES, i)
|
272 |
ret, frame = vid.read()
|
273 |
+
if not ret:
|
274 |
+
break
|
275 |
_, buf = cv2.imencode(".jpg", frame)
|
276 |
frames_b64.append(base64.b64encode(buf).decode("utf-8"))
|
277 |
vid.release()
|
|
|
298 |
"""Save full transcript of Arxiv results as a file."""
|
299 |
create_file(query, text, "md")
|
300 |
|
301 |
+
# ------------------------------
|
302 |
+
# NEW: Helper to parse references
|
303 |
+
# ------------------------------
|
304 |
+
def parse_arxiv_refs(ref_text: str):
|
305 |
+
"""
|
306 |
+
Parse the multi-line references returned by the RAG pipeline.
|
307 |
+
Typical format lines like:
|
308 |
+
1) [Paper Title 2023] This is the summary ...
|
309 |
+
2) [Another Title (2024)] Another summary text ...
|
310 |
+
We'll attempt to find a year with a small regex or fallback.
|
311 |
+
Return list of dicts: { 'title': str, 'summary': str, 'year': int or None }
|
312 |
+
"""
|
313 |
+
lines = ref_text.split('\n')
|
314 |
+
results = []
|
315 |
+
for line in lines:
|
316 |
+
line = line.strip()
|
317 |
+
if not line:
|
318 |
+
continue
|
319 |
+
# Attempt to find [Title ...]
|
320 |
+
title_match = re.search(r"\[([^\]]+)\]", line)
|
321 |
+
if title_match:
|
322 |
+
raw_title = title_match.group(1).strip()
|
323 |
+
else:
|
324 |
+
# If no bracket found, skip or treat entire line as summary
|
325 |
+
raw_title = "No Title"
|
326 |
+
|
327 |
+
# Attempt to find trailing summary after bracket
|
328 |
+
# Example line: " [Paper Title 2024] Paper summary blah blah"
|
329 |
+
# So remove the bracketed portion from the line
|
330 |
+
remainder = line.replace(title_match.group(0), "").strip() if title_match else line
|
331 |
+
summary = remainder
|
332 |
+
|
333 |
+
# Attempt to guess year from the raw title
|
334 |
+
# We look for 4-digit patterns in raw_title or summary
|
335 |
+
year_match = re.search(r'(20\d{2})', raw_title)
|
336 |
+
if not year_match:
|
337 |
+
# fallback: try summary
|
338 |
+
year_match = re.search(r'(20\d{2})', summary)
|
339 |
+
if year_match:
|
340 |
+
year = int(year_match.group(1))
|
341 |
+
else:
|
342 |
+
year = None
|
343 |
+
|
344 |
+
results.append({
|
345 |
+
'title': raw_title,
|
346 |
+
'summary': summary,
|
347 |
+
'year': year
|
348 |
+
})
|
349 |
+
return results
|
350 |
+
|
351 |
+
|
352 |
+
def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
|
353 |
+
titles_summary=True, full_audio=False):
|
354 |
+
"""Perform Arxiv search and generate audio summaries."""
|
355 |
start = time.time()
|
356 |
+
|
357 |
+
# ๐ฏ 1) Query the HF RAG pipeline
|
358 |
client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
|
359 |
refs = client.predict(q,20,"Semantic Search","mistralai/Mixtral-8x7B-Instruct-v0.1",api_name="/update_with_rag_md")[0]
|
360 |
r2 = client.predict(q,"mistralai/Mixtral-8x7B-Instruct-v0.1",True,api_name="/ask_llm")
|
361 |
|
362 |
+
# ๐ฏ 2) Combine for final text output
|
363 |
result = f"### ๐ {q}\n\n{r2}\n\n{refs}"
|
|
|
364 |
st.markdown(result)
|
365 |
|
366 |
+
# ๐ฏ 3) Generate "all at once" audio if requested
|
367 |
if full_audio:
|
368 |
complete_text = f"Complete response for query: {q}. {clean_for_speech(r2)} {clean_for_speech(refs)}"
|
369 |
audio_file_full = speak_with_edge_tts(complete_text)
|
|
|
383 |
st.write("### ๐ Long Refs")
|
384 |
play_and_download_audio(audio_file_refs)
|
385 |
|
386 |
+
# --------------------------------------
|
387 |
+
# NEW: Parse references, show sorted list
|
388 |
+
# --------------------------------------
|
389 |
+
parsed_refs = parse_arxiv_refs(refs)
|
390 |
+
|
391 |
+
# Sort by year descending (put None at bottom)
|
392 |
+
# If you want to skip older than 2022, you can filter them:
|
393 |
+
# parsed_refs = [r for r in parsed_refs if (r["year"] is not None and r["year"] >= 2022)]
|
394 |
+
parsed_refs.sort(key=lambda x: x["year"] if x["year"] else 0, reverse=True)
|
395 |
+
|
396 |
+
st.write("## Individual Papers (Most Recent First)")
|
397 |
+
for idx, paper in enumerate(parsed_refs):
|
398 |
+
year_str = paper["year"] if paper["year"] else "Unknown Year"
|
399 |
+
st.markdown(f"**{idx+1}. {paper['title']}** \n*Year:* {year_str}")
|
400 |
+
st.markdown(f"*Summary:* {paper['summary']}")
|
401 |
+
|
402 |
+
# Two new TTS buttons: Title only or Title+Summary
|
403 |
+
colA, colB = st.columns(2)
|
404 |
+
with colA:
|
405 |
+
if st.button(f"๐ Title", key=f"title_{idx}"):
|
406 |
+
text_tts = clean_for_speech(paper['title'])
|
407 |
+
audio_file_title = speak_with_edge_tts(text_tts)
|
408 |
+
play_and_download_audio(audio_file_title)
|
409 |
+
|
410 |
+
with colB:
|
411 |
+
if st.button(f"๐ Title+Summary", key=f"summary_{idx}"):
|
412 |
+
text_tts = clean_for_speech(paper['title'] + ". " + paper['summary'])
|
413 |
+
audio_file_title_summary = speak_with_edge_tts(text_tts)
|
414 |
+
play_and_download_audio(audio_file_title_summary)
|
415 |
+
|
416 |
+
st.write("---")
|
417 |
+
|
418 |
+
# Keep your original block for "Titles Only" if you want:
|
419 |
if titles_summary:
|
420 |
+
# This is your existing code block
|
421 |
titles = []
|
422 |
for line in refs.split('\n'):
|
423 |
m = re.search(r"\[([^\]]+)\]", line)
|
|
|
427 |
titles_text = "Titles: " + ", ".join(titles)
|
428 |
titles_text = clean_for_speech(titles_text)
|
429 |
audio_file_titles = speak_with_edge_tts(titles_text)
|
430 |
+
st.write("### ๐ Titles (All-In-One)")
|
431 |
play_and_download_audio(audio_file_titles)
|
432 |
|
433 |
elapsed = time.time()-start
|
|
|
440 |
|
441 |
def process_with_gpt(text):
|
442 |
"""Process text with GPT-4"""
|
443 |
+
if not text:
|
444 |
+
return
|
445 |
st.session_state.messages.append({"role":"user","content":text})
|
446 |
with st.chat_message("user"):
|
447 |
st.markdown(text)
|
|
|
459 |
|
460 |
def process_with_claude(text):
|
461 |
"""Process text with Claude"""
|
462 |
+
if not text:
|
463 |
+
return
|
464 |
with st.chat_message("user"):
|
465 |
st.markdown(text)
|
466 |
with st.chat_message("assistant"):
|
|
|
658 |
if full_transcript:
|
659 |
save_full_transcript(q_new, result)
|
660 |
|
|
|
661 |
elif tab_main == "๐ค Voice":
|
662 |
st.subheader("๐ค Voice Input")
|
663 |
user_text = st.text_area("๐ฌ Message:", height=100)
|