awacke1 commited on
Commit
6641937
ยท
verified ยท
1 Parent(s): 8ea4fb3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -10
app.py CHANGED
@@ -165,7 +165,6 @@ def generate_filename(prompt, response, file_type="md"):
165
  snippet_cleaned = clean_text_for_filename(snippet)
166
 
167
  # Combine info terms and snippet
168
- # Prioritize info terms in front
169
  name_parts = info_terms + [snippet_cleaned]
170
  full_name = '_'.join(name_parts)
171
 
@@ -271,7 +270,8 @@ def process_video(video_path, seconds_per_frame=1):
271
  for i in range(0, total, skip):
272
  vid.set(cv2.CAP_PROP_POS_FRAMES, i)
273
  ret, frame = vid.read()
274
- if not ret: break
 
275
  _, buf = cv2.imencode(".jpg", frame)
276
  frames_b64.append(base64.b64encode(buf).decode("utf-8"))
277
  vid.release()
@@ -298,18 +298,72 @@ def save_full_transcript(query, text):
298
  """Save full transcript of Arxiv results as a file."""
299
  create_file(query, text, "md")
300
 
301
- def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=False):
302
- """Perform Arxiv search and generate audio summaries"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
  start = time.time()
 
 
304
  client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
305
  refs = client.predict(q,20,"Semantic Search","mistralai/Mixtral-8x7B-Instruct-v0.1",api_name="/update_with_rag_md")[0]
306
  r2 = client.predict(q,"mistralai/Mixtral-8x7B-Instruct-v0.1",True,api_name="/ask_llm")
307
 
 
308
  result = f"### ๐Ÿ”Ž {q}\n\n{r2}\n\n{refs}"
309
-
310
  st.markdown(result)
311
 
312
- # Generate full audio version if requested
313
  if full_audio:
314
  complete_text = f"Complete response for query: {q}. {clean_for_speech(r2)} {clean_for_speech(refs)}"
315
  audio_file_full = speak_with_edge_tts(complete_text)
@@ -329,7 +383,41 @@ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary
329
  st.write("### ๐Ÿ“œ Long Refs")
330
  play_and_download_audio(audio_file_refs)
331
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
  if titles_summary:
 
333
  titles = []
334
  for line in refs.split('\n'):
335
  m = re.search(r"\[([^\]]+)\]", line)
@@ -339,7 +427,7 @@ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary
339
  titles_text = "Titles: " + ", ".join(titles)
340
  titles_text = clean_for_speech(titles_text)
341
  audio_file_titles = speak_with_edge_tts(titles_text)
342
- st.write("### ๐Ÿ”– Titles")
343
  play_and_download_audio(audio_file_titles)
344
 
345
  elapsed = time.time()-start
@@ -352,7 +440,8 @@ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary
352
 
353
  def process_with_gpt(text):
354
  """Process text with GPT-4"""
355
- if not text: return
 
356
  st.session_state.messages.append({"role":"user","content":text})
357
  with st.chat_message("user"):
358
  st.markdown(text)
@@ -370,7 +459,8 @@ def process_with_gpt(text):
370
 
371
  def process_with_claude(text):
372
  """Process text with Claude"""
373
- if not text: return
 
374
  with st.chat_message("user"):
375
  st.markdown(text)
376
  with st.chat_message("assistant"):
@@ -568,7 +658,6 @@ def main():
568
  if full_transcript:
569
  save_full_transcript(q_new, result)
570
 
571
-
572
  elif tab_main == "๐ŸŽค Voice":
573
  st.subheader("๐ŸŽค Voice Input")
574
  user_text = st.text_area("๐Ÿ’ฌ Message:", height=100)
 
165
  snippet_cleaned = clean_text_for_filename(snippet)
166
 
167
  # Combine info terms and snippet
 
168
  name_parts = info_terms + [snippet_cleaned]
169
  full_name = '_'.join(name_parts)
170
 
 
270
  for i in range(0, total, skip):
271
  vid.set(cv2.CAP_PROP_POS_FRAMES, i)
272
  ret, frame = vid.read()
273
+ if not ret:
274
+ break
275
  _, buf = cv2.imencode(".jpg", frame)
276
  frames_b64.append(base64.b64encode(buf).decode("utf-8"))
277
  vid.release()
 
298
  """Save full transcript of Arxiv results as a file."""
299
  create_file(query, text, "md")
300
 
301
+ # ------------------------------
302
+ # NEW: Helper to parse references
303
+ # ------------------------------
304
+ def parse_arxiv_refs(ref_text: str):
305
+ """
306
+ Parse the multi-line references returned by the RAG pipeline.
307
+ Typical format lines like:
308
+ 1) [Paper Title 2023] This is the summary ...
309
+ 2) [Another Title (2024)] Another summary text ...
310
+ We'll attempt to find a year with a small regex or fallback.
311
+ Return list of dicts: { 'title': str, 'summary': str, 'year': int or None }
312
+ """
313
+ lines = ref_text.split('\n')
314
+ results = []
315
+ for line in lines:
316
+ line = line.strip()
317
+ if not line:
318
+ continue
319
+ # Attempt to find [Title ...]
320
+ title_match = re.search(r"\[([^\]]+)\]", line)
321
+ if title_match:
322
+ raw_title = title_match.group(1).strip()
323
+ else:
324
+ # If no bracket found, skip or treat entire line as summary
325
+ raw_title = "No Title"
326
+
327
+ # Attempt to find trailing summary after bracket
328
+ # Example line: " [Paper Title 2024] Paper summary blah blah"
329
+ # So remove the bracketed portion from the line
330
+ remainder = line.replace(title_match.group(0), "").strip() if title_match else line
331
+ summary = remainder
332
+
333
+ # Attempt to guess year from the raw title
334
+ # We look for 4-digit patterns in raw_title or summary
335
+ year_match = re.search(r'(20\d{2})', raw_title)
336
+ if not year_match:
337
+ # fallback: try summary
338
+ year_match = re.search(r'(20\d{2})', summary)
339
+ if year_match:
340
+ year = int(year_match.group(1))
341
+ else:
342
+ year = None
343
+
344
+ results.append({
345
+ 'title': raw_title,
346
+ 'summary': summary,
347
+ 'year': year
348
+ })
349
+ return results
350
+
351
+
352
+ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
353
+ titles_summary=True, full_audio=False):
354
+ """Perform Arxiv search and generate audio summaries."""
355
  start = time.time()
356
+
357
+ # ๐ŸŽฏ 1) Query the HF RAG pipeline
358
  client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
359
  refs = client.predict(q,20,"Semantic Search","mistralai/Mixtral-8x7B-Instruct-v0.1",api_name="/update_with_rag_md")[0]
360
  r2 = client.predict(q,"mistralai/Mixtral-8x7B-Instruct-v0.1",True,api_name="/ask_llm")
361
 
362
+ # ๐ŸŽฏ 2) Combine for final text output
363
  result = f"### ๐Ÿ”Ž {q}\n\n{r2}\n\n{refs}"
 
364
  st.markdown(result)
365
 
366
+ # ๐ŸŽฏ 3) Generate "all at once" audio if requested
367
  if full_audio:
368
  complete_text = f"Complete response for query: {q}. {clean_for_speech(r2)} {clean_for_speech(refs)}"
369
  audio_file_full = speak_with_edge_tts(complete_text)
 
383
  st.write("### ๐Ÿ“œ Long Refs")
384
  play_and_download_audio(audio_file_refs)
385
 
386
+ # --------------------------------------
387
+ # NEW: Parse references, show sorted list
388
+ # --------------------------------------
389
+ parsed_refs = parse_arxiv_refs(refs)
390
+
391
+ # Sort by year descending (put None at bottom)
392
+ # If you want to skip older than 2022, you can filter them:
393
+ # parsed_refs = [r for r in parsed_refs if (r["year"] is not None and r["year"] >= 2022)]
394
+ parsed_refs.sort(key=lambda x: x["year"] if x["year"] else 0, reverse=True)
395
+
396
+ st.write("## Individual Papers (Most Recent First)")
397
+ for idx, paper in enumerate(parsed_refs):
398
+ year_str = paper["year"] if paper["year"] else "Unknown Year"
399
+ st.markdown(f"**{idx+1}. {paper['title']}** \n*Year:* {year_str}")
400
+ st.markdown(f"*Summary:* {paper['summary']}")
401
+
402
+ # Two new TTS buttons: Title only or Title+Summary
403
+ colA, colB = st.columns(2)
404
+ with colA:
405
+ if st.button(f"๐Ÿ”Š Title", key=f"title_{idx}"):
406
+ text_tts = clean_for_speech(paper['title'])
407
+ audio_file_title = speak_with_edge_tts(text_tts)
408
+ play_and_download_audio(audio_file_title)
409
+
410
+ with colB:
411
+ if st.button(f"๐Ÿ”Š Title+Summary", key=f"summary_{idx}"):
412
+ text_tts = clean_for_speech(paper['title'] + ". " + paper['summary'])
413
+ audio_file_title_summary = speak_with_edge_tts(text_tts)
414
+ play_and_download_audio(audio_file_title_summary)
415
+
416
+ st.write("---")
417
+
418
+ # Keep your original block for "Titles Only" if you want:
419
  if titles_summary:
420
+ # This is your existing code block
421
  titles = []
422
  for line in refs.split('\n'):
423
  m = re.search(r"\[([^\]]+)\]", line)
 
427
  titles_text = "Titles: " + ", ".join(titles)
428
  titles_text = clean_for_speech(titles_text)
429
  audio_file_titles = speak_with_edge_tts(titles_text)
430
+ st.write("### ๐Ÿ”– Titles (All-In-One)")
431
  play_and_download_audio(audio_file_titles)
432
 
433
  elapsed = time.time()-start
 
440
 
441
  def process_with_gpt(text):
442
  """Process text with GPT-4"""
443
+ if not text:
444
+ return
445
  st.session_state.messages.append({"role":"user","content":text})
446
  with st.chat_message("user"):
447
  st.markdown(text)
 
459
 
460
  def process_with_claude(text):
461
  """Process text with Claude"""
462
+ if not text:
463
+ return
464
  with st.chat_message("user"):
465
  st.markdown(text)
466
  with st.chat_message("assistant"):
 
658
  if full_transcript:
659
  save_full_transcript(q_new, result)
660
 
 
661
  elif tab_main == "๐ŸŽค Voice":
662
  st.subheader("๐ŸŽค Voice Input")
663
  user_text = st.text_area("๐Ÿ’ฌ Message:", height=100)