import json import streamlit as st import re import fitz # PyMuPDF # Function to construct the prompt def construct_prompt(prompt_template: str, user_input: str) -> str: return prompt_template.format(user_input=user_input) def construct_prompt_for_pdf(prompt_template: str, pdf_input: str) -> str: return prompt_template.format(pdf_input=pdf_input) def parse_json_content(cleaned_content: str): """ Parses the cleaned content to extract valid JSON data. Args: cleaned_content (str): The raw content containing JSON data. Returns: dict or list: The parsed JSON object. """ import re # Step 1: Strip unwanted characters and clean the content cleaned_content = cleaned_content.strip() # Step 2: Use regex to extract only the valid JSON block (e.g., starts with [ or {) json_match = re.search(r"(\{.*\}|\[.*\])", cleaned_content, re.DOTALL) if not json_match: raise ValueError("No valid JSON found in the content.") # Step 3: Extract and parse the valid JSON valid_json = json_match.group(0) # Extract matched JSON block try: extracted_data = json.loads(valid_json) except json.JSONDecodeError as e: raise ValueError(f"Failed to decode JSON. Error: {e}\nContent:\n{valid_json}") return extracted_data # Function to extract and parse JSON response def extract_llm_response(response): """ Extracts and parses the JSON response from the API. Args: response (dict): The API response containing a hint and proposals. Returns: tuple: A tuple containing the objective (str), key results (list), and hint (str). """ print("RESPONSE:",response) raw_message_content = response["choices"][0]["message"]["content"] print("raw_message_content:", raw_message_content) # Clean and parse the JSON content cleaned_content = raw_message_content.replace("`", "").split("json")[-1] print("cleaned content", cleaned_content) parsed_data = parse_json_content(cleaned_content=cleaned_content) print("parsed_data:",parsed_data) hint = parsed_data.get("hint", "") proposals = parsed_data.get("proposals", []) if proposals: # Extract the first proposal's objective and key results first_proposal = proposals[0] # Get the first proposal (assuming it's a list) objective = first_proposal.get("objective", "") key_results = first_proposal.get("key_results", []) else: objective = "" key_results = [] #print("debug:", parsed_data.get("objective", "")) return objective, key_results, hint #try: # Extract hint from the response hint = response.get("hint", "") # Extract proposals from the response proposals = response.get("proposals", []) print("hint:", hint) print("proposals:", proposals) # Check if proposals are available if proposals: # Extract the first proposal's objective and key results first_proposal = proposals[0] # Get the first proposal (assuming it's a list) objective = first_proposal.get("objective", "") key_results = first_proposal.get("key_results", []) else: objective = "" key_results = [] # Log parsed data for debugging print("parsed_data:", {"objective": objective, "key_results": key_results, "hint": hint}) return objective, key_results, hint #except Exception as e: # print(f"Error parsing API response: {e}") # return "", [], "" def extract_text_from_pdf(pdf_path): """Extract text from a PDF file.""" doc = fitz.open(pdf_path) text = "" for page in doc: text += page.get_text() return text def chunk_text(text, max_chars=3000): """Split text into smaller chunks.""" chunks = [] while len(text) > max_chars: split_index = text[:max_chars].rfind("\n") # Split at the nearest newline if split_index == -1: # No newline found split_index = max_chars chunks.append(text[:split_index]) text = text[split_index:] chunks.append(text) return chunks