Updating parameter extraction, addition of LLM solver and feedback functions

levi-accherman · levi-accherman · commit b00e162100b6 · 2025-11-10T11:58:53.000Z
diff --git a/app/LLM_solver.py b/app/LLM_solver.py
@@ -0,0 +1,58 @@
+from typing import Any, TypedDict
+import os
+from dotenv import load_dotenv
+from langchain_openai import ChatOpenAI
+import re
+import requests
+import base64
+
+
+class Params(TypedDict):
+    pass
+
+
+class Result(TypedDict):
+    preview: Any
+
+def LLM_solve(question_txt: str, input_type: str, pre_response_txt: str, post_response_txt: str) -> str:
+    load_dotenv()
+    llm = ChatOpenAI(
+        model=os.environ['OPENAI_MODEL'],
+        api_key=os.environ["OPENAI_API_KEY"],
+    )
+
+    prompt = fr"""
+    Follow these steps carefully:
+
+    A question text and its "input type" are given at the end of this prompt.
+    The task is to answer the question only, without any additional explanation.
+    The question is in the topic of either mathematics or science.
+    The "input type" can be one of the following:
+    BOOLEAN, EXPRESSION, MATRIX, MULTIPLE_CHOICE, NUMBER, NUMERIC_UNITS, TEXT.
+
+    For BOOLEAN type, answer either "True" or "False" only.
+    For EXPRESSION type, answer with a mathematical expression in LaTeX format.
+    For MATRIX type, answer with in LaTeX format the expressions or numbers for each of the element, from left to right and from top to bottom.
+    For MULTIPLE_CHOICE type, answer with either 1st, 2nd, 3rd, or 4th only.
+    For NUMBER type, answer with a number only.
+    For NUMERIC_UNITS type, answer with a number followed by a space and the unit, e.g., "9.8 m/s^2".
+    For TEXT type, answer with a short text  without any explanation. The answer is usually 2 words or less.
+
+    The "Pre response text" and "Post response text" are also given to you at the end of this prompt to help you understand the context.
+    For example, if the answer is "x=yz" and "Pre response text" is "x=", then you should answer with "yz" only.
+
+    Question text:
+    {question_txt}
+    Input type:
+    {input_type}
+    Pre response text:
+    {pre_response_txt}
+    Post response text:
+    {post_response_txt}
+
+    Now answer the question.
+    """
+
+    expr = llm.invoke(prompt).content.strip()
+
+    return expr
diff --git a/app/LLM_solver_testing.py b/app/LLM_solver_testing.py
@@ -0,0 +1,27 @@
+from LLM_solver import LLM_solve
+
+Q_list = [
+    ["Does water freeze at 0 degrees Celsius?", "BOOLEAN"],
+    ["Is an electron heavier than a proton?", "BOOLEAN"],
+    ["Solve for x. x^2 - 4 = 0", "EXPRESSION"],
+    ["d/dx (x^3)", "EXPRESSION"],
+    ["Write the unitary 2x2 matrix", "MATRIX"],
+    ["Transpose [[1,2,3],[4,5,6]]", "MATRIX"],
+    ["What is the largest planet in our solar system? (a) Earth (b) Jupiter (c) Saturn (d) Venus", "MULTIPLE_CHOICE"],
+    ["What is the chemical symbol for oxygen? (a) O (b) H (c) C (d) N", "MULTIPLE_CHOICE"],
+    ["5 + 7", "NUMBER"],
+    ["Square root of 9", "NUMBER"],
+    ["What is the acceleration due to gravity on Earth in m/s^2?", "NUMERIC_UNITS"],
+    ["Speed of light in vacuum in m/s?", "NUMERIC_UNITS"],
+    ["What is the satellite of Earth?", "TEXT"],
+    ["What is the chemical formula for water?", "TEXT"],
+]
+
+Q_list_fmx3 = [
+    ["Use elementary geometry to derive the rotation matrix that converts the Cartesian vector components (u₁, u₂, u₃) into cylindrical polar components (Vᵣ, Vθ, Vz). Write the 3x3 linear vector component transformation matrix.",
+     "MATRIX", "$\begin{pmatrix}V_{r} \\ V_{\theta} \\ V_{z} \end{pmatrix}=$", "$\begin{pmatrix}u_{1} \\ u_{2} \\ u_{3} \end{pmatrix}$"],
+]
+
+for q, t, pre, post in Q_list_fmx3:
+    answer = LLM_solve(q, t, pre, post)
+    print(answer)
diff --git a/app/evaluate_with_feedback.py b/app/evaluate_with_feedback.py
@@ -0,0 +1,100 @@
+import re
+from typing import Any, TypedDict
+import os
+from dotenv import load_dotenv
+from langchain_openai import ChatOpenAI
+
+
+def parse_markdown_with_images(markdown_text: str):
+    """Parse markdown text that may contain embedded images (![](url))"""
+    pattern = r'!\[.*?\]\((.*?)\)'
+    content = []
+    last_end = 0
+
+    for match in re.finditer(pattern, markdown_text):
+        start, end = match.span()
+        url = match.group(1).strip()
+
+        text_before = markdown_text[last_end:start].strip()
+        if text_before:
+            content.append({"type": "text", "text": text_before})
+
+        content.append({"type": "image_url", "image_url": {"url": url}})
+        last_end = end
+
+    remaining = markdown_text[last_end:].strip()
+    if remaining:
+        content.append({"type": "text", "text": remaining})
+    return content
+
+
+def eval_with_feedback(
+    question_markdown: str,
+    part_markdown: str,
+    pre_response_text: str,
+    student_answer: str,
+    post_response_text: str,
+    correct_answer: str,
+) -> str:
+    """
+    Evaluate a student's answer based on combined context from:
+    - Question (text + images)
+    - Part (text + images)
+    - Pre and post response text
+    """
+    load_dotenv()
+
+    llm = ChatOpenAI(
+        model=os.environ["OPENAI_MODEL"],  # must support image input (e.g. gpt-4o, gpt-5)
+        api_key=os.environ["OPENAI_API_KEY"],
+    )
+
+    # Parse both question and part markdowns
+    question_content = parse_markdown_with_images(question_markdown)
+    part_content = parse_markdown_with_images(part_markdown)
+
+    # Feedback generation instruction prompt
+    instruction_text = fr"""
+Follow these steps carefully:
+
+You are given:
+- A question and its sub-part (each may include diagrams or equations).
+- The pre-response text and post-response text that appear around the student's answer box.
+- The student's answer and the correct answer.
+
+Your task:
+1. Understand the problem statement and its context (including the question, part, and images).
+2. Analyze the reasoning that leads from the question to the correct answer.
+3. Identify *why* the student’s answer might differ (conceptual misunderstanding, skipped step, sign/unit error, etc.).
+4. Write one **short, indirect feedback sentence** that:
+   - Encourages the student to rethink that specific step or concept (thought trigger), and
+   - Refers to the relevant mathematical action or context (action trigger).
+5. Do NOT reveal the correct formula or result.
+
+Guidelines:
+- Use imperative mood: "Re-examine...", "Review...", "Reconsider...", "Verify...".
+- Mention a specific step or operation, e.g. "when integrating", "when substituting", "when solving for x".
+- Keep it concise (max 15 words).
+- Be constructive and professional.
+
+Now, generate only the final feedback sentence.
+
+Pre-response text: {pre_response_text}
+Student's answer (LaTeX): {student_answer}
+Post-response text: {post_response_text}
+Correct answer (LaTeX): {correct_answer}
+
+Output only the feedback sentence.
+"""
+
+    # Combine all content, preserving order and image placement
+    full_content = (
+        [{"type": "text", "text": "Main question:"}]
+        + question_content
+        + [{"type": "text", "text": "\nSub-part:"}]
+        + part_content
+        + [{"type": "text", "text": instruction_text}]
+    )
+
+    response = llm.invoke([{"role": "user", "content": full_content}])
+    return response.content.strip()
diff --git a/app/evaluate_with_feedback_testing.py b/app/evaluate_with_feedback_testing.py
@@ -0,0 +1,32 @@
+from evaluate_with_feedback import eval_with_feedback
+
+question_markdown = """
+Unless otherwise stated, assume standard atmosphere values of $\rho=1.225 \mathrm{~kg} / \mathrm{m}^{3}, \mu=1.79 \times 10^{-5} \mathrm{~kg} /(\mathrm{ms}), R=287.1 \mathrm{~J} /(\mathrm{kgK})$ and $\gamma=1.4$.
+
+A model of the real, separated flow around a circular cylinder is to approximate it as potential flow up to the separation point $\theta=\theta_{s}$, so that for $\theta_{s} \leq \theta \leq \pi$ :
+
+$$
+\phi=U_{\infty}\left(r+\frac{R^{2}}{r}\right) \cos \theta
+$$
+
+Beyond the separation point ( $0 \leq \theta<\theta_{s}$ ), the cylinder surface pressure is assumed constant and equal to the potential flow value at $\theta=\theta_{s}$. This is shown graphically in following figure, where $C_{p}=\left(p-p_{\infty}\right) /\left(\frac{1}{2} \rho U_{\infty}^{2}\right)$.
+
+![](https://lambda-feedback-prod-frontend-client-bucket.s3.eu-west-2.amazonaws.com/97c443aa-a1ad-494e-9277-54bcaa258dc3/ad69050e-0a87-49d7-a10f-8a746a213388.png){ width=60% }
+"""
+part_markdown = """
+Taking $\theta_{s}=99^{\circ}$ (i.e. separation $81^{\circ}$ from the front stagnation point), calculate the variation of $C_{p}$ on the surface.
+
+When $0 < \theta \leq \theta_s$:
+"""
+pre_response_text = """
+$C_p=$
+"""
+student_answer = """
+1-4 sin( theta)^2
+"""
+post_response_text = """
+"""
+correct_answer = """
+1-4sin(thetas)^2
+"""
+print(eval_with_feedback(question_markdown, part_markdown, pre_response_text, student_answer, post_response_text, correct_answer))
diff --git a/app/extract_parameter.py b/app/extract_parameter.py
@@ -30,6 +30,7 @@ def extract_parameter(question_txt: str) -> str:
     The conditions are mainly classified into the following two types:
     1) Conditions that define the properties of a constant (e.g., "x is a real number", "y is a complex number", "x > 0", etc.)
     2) Conditions that define the types of variables (e.g., "y is a function of x", "f is a matrix", "u is a vector", etc.)
+    3) Conditions that define the domain of variables (e.g., "x is in (0,1)", "y is larger than or equal to 2", etc.)
     These may be combined together (e.g., "y is a real-valued function of x", "A is a positive definite matrix", etc.)
     
     The output format is as follows:
@@ -55,13 +56,21 @@ def extract_parameter(question_txt: str) -> str:
     Example:
         "y is a function of x" → ["y(x)"]
         "f is a function of x and z" → ["f(x,z)"]
+    
+    For type 3) conditions, output in the format:
+        "domain"="(a, b)", "(a,b]", "[a, b)" or "[a,b]" depending on whether the endpoints are included or not.
+    Example:
+        "x is in (0,1)" → "(0,1)"
+        "y is larger than or equal to 2 and less than 5" → "[2,5)"
+        "z is between -1 and 1, inclusive" → "[-1,1]"
 
-    If both types are present, include both in the output dictionary.
+    If more than one type is present, include both/all in the output dictionary.
 
     Return the result strictly as a JSON-like Python dictionary with keys:
     {{
         "symbol_assumptions"={{...}},
-        "function"=[...]
+        "function"=[...],
+        "domain"="..."
     }}
 
     Do not include explanations, output only the dictionary.
diff --git a/app/parameter.py b/app/parameter.py
diff --git a/app/re_conversion.py b/app/re_conversion.py