@@ -71,6 +71,162 @@ def __init__(self, config):
7171 f"summary: { self ._summary_llm .get_model_name ()} "
7272 )
7373
74+ # -- Two-pass summarization helpers ----------------------------------
75+
76+ @staticmethod
77+ def _format_sections_for_prompt (
78+ sections : Dict [str , str ], max_chars : int = 60000
79+ ) -> str :
80+ """Format paper sections into a single string within a token budget.
81+
82+ Prioritizes methodology-relevant sections and truncates low-priority
83+ ones (acknowledgments, references, appendix) when over budget.
84+ """
85+ HIGH_PRIORITY_KEYWORDS = {
86+ "method" , "model" , "strategy" , "trading" , "signal" , "algorithm" ,
87+ "approach" , "result" , "experiment" , "implementation" , "data" ,
88+ "feature" , "regression" , "portfolio" , "backtest" , "return" ,
89+ "risk" , "parameter" , "calibration" , "estimation" , "framework" ,
90+ }
91+ LOW_PRIORITY_KEYWORDS = {
92+ "acknowledg" , "reference" , "bibliography" , "appendix" , "vita" ,
93+ "disclosure" , "funding" , "supplementar" ,
94+ }
95+
96+ def _priority (name : str ) -> int :
97+ lower = name .lower ()
98+ if any (kw in lower for kw in LOW_PRIORITY_KEYWORDS ):
99+ return 2
100+ if any (kw in lower for kw in HIGH_PRIORITY_KEYWORDS ):
101+ return 0
102+ return 1
103+
104+ ordered = sorted (sections .items (), key = lambda kv : (_priority (kv [0 ]), kv [0 ]))
105+
106+ parts : list [str ] = []
107+ total = 0
108+ for name , text in ordered :
109+ header = f"\n ### { name } \n "
110+ available = max_chars - total - len (header )
111+ if available <= 0 :
112+ break
113+ chunk = text [:available ]
114+ parts .append (header + chunk )
115+ total += len (header ) + len (chunk )
116+
117+ return "" .join (parts )
118+
119+ def extract_key_passages (self , sections : Dict [str , str ]) -> Optional [str ]:
120+ """Pass 1 — Extractive: quote verbatim passages relevant to implementation."""
121+ self .logger .info ("Two-pass pipeline — Pass 1 (extract key passages)" )
122+
123+ formatted = self ._format_sections_for_prompt (sections )
124+ if not formatted .strip ():
125+ self .logger .warning ("No section text to send to LLM" )
126+ return None
127+
128+ system = (
129+ "You are a quantitative finance research analyst. "
130+ "Read the paper sections below and QUOTE VERBATIM every passage that "
131+ "is relevant to implementing the described trading strategy or model. "
132+ "Do NOT paraphrase — copy the exact text. Do NOT skip passages because "
133+ "they use unfamiliar terms, novel formulas, or custom model names.\n \n "
134+ "Focus on:\n "
135+ "- Mathematical formulas and equations\n "
136+ "- Parameter values and calibration details\n "
137+ "- Entry and exit rules or signal generation logic\n "
138+ "- Risk controls, stop-loss rules, position sizing\n "
139+ "- Novel indicators or custom models (e.g., OU process, HMM, "
140+ "regime-switching, proprietary scores)\n "
141+ "- Universe selection and data requirements\n "
142+ "- Execution details (order types, rebalancing frequency)\n \n "
143+ "Output format: a numbered list of verbatim quotes, each preceded by "
144+ "the section name in brackets. Example:\n "
145+ "[Methodology] \" We define the signal as ...\" \n "
146+ )
147+
148+ prompt = f"Extract all implementable passages from this paper:\n { formatted } "
149+
150+ try :
151+ messages = [
152+ {"role" : "system" , "content" : system },
153+ {"role" : "user" , "content" : prompt },
154+ ]
155+ result = _run_async (
156+ self ._summary_llm .chat (
157+ messages = messages , max_tokens = 4096 , temperature = 0.1
158+ )
159+ )
160+ self .logger .info (
161+ f"Pass 1 complete — extracted { len (result )} chars of passages"
162+ )
163+ return result
164+ except Exception as e :
165+ self .logger .error (f"Pass 1 (extract_key_passages) failed: { e } " )
166+ return None
167+
168+ def interpret_strategy (self , extractions : str ) -> Optional [str ]:
169+ """Pass 2 — Interpretive: convert verbatim quotes into strategy spec."""
170+ self .logger .info ("Two-pass pipeline — Pass 2 (interpret strategy)" )
171+
172+ if not extractions or not extractions .strip ():
173+ self .logger .warning ("Empty extractions — nothing to interpret" )
174+ return None
175+
176+ system = (
177+ "You are a quantitative trading strategist. Convert the verbatim "
178+ "paper quotes below into a precise, implementable strategy "
179+ "specification. Base your output ONLY on what the quotes say. "
180+ "If the paper uses an OU process, specify an OU process — do NOT "
181+ "substitute RSI or SMA. If the paper describes a proprietary model "
182+ "or custom indicator, describe it faithfully.\n \n "
183+ "Use the following flexible structure. Skip any section that is "
184+ "genuinely irrelevant to this particular strategy:\n \n "
185+ "## STRATEGY OVERVIEW\n "
186+ "One paragraph summarizing the core idea.\n \n "
187+ "## MATHEMATICAL MODEL\n "
188+ "Formulas, distributions, state dynamics — as described in the paper.\n \n "
189+ "## SIGNAL GENERATION\n "
190+ "Exact entry/exit conditions with numeric thresholds.\n \n "
191+ "## EXIT RULES\n "
192+ "Stop loss, profit target, time stop, trailing stop — with exact values.\n \n "
193+ "## RISK MANAGEMENT\n "
194+ "Position sizing, max exposure, drawdown limits.\n \n "
195+ "## UNIVERSE / STOCK SELECTION\n "
196+ "Market, filters, number of instruments.\n \n "
197+ "## EXECUTION DETAILS\n "
198+ "Order types, rebalancing frequency, data resolution.\n \n "
199+ "## PARAMETER TABLE\n "
200+ "| Parameter | Value | Source |\n "
201+ "|-----------|-------|--------|\n "
202+ "Every numeric parameter from the paper with source attribution.\n "
203+ )
204+
205+ prompt = (
206+ "Convert these verbatim paper extractions into an implementable "
207+ f"strategy specification:\n \n { extractions } "
208+ )
209+
210+ try :
211+ messages = [
212+ {"role" : "system" , "content" : system },
213+ {"role" : "user" , "content" : prompt },
214+ ]
215+ result = _run_async (
216+ self ._summary_llm .chat (
217+ messages = messages , max_tokens = 4096 , temperature = 0.3
218+ )
219+ )
220+ self .logger .info (
221+ f"Pass 2 complete — strategy spec is { len (result )} chars"
222+ )
223+ return result
224+ except Exception as e :
225+ self .logger .error (f"Pass 2 (interpret_strategy) failed: { e } " )
226+ return None
227+
228+ # -- Legacy single-pass summary (kept intact) -------------------------
229+
74230 def generate_summary (self , extracted_data : Dict [str , List [str ]]) -> Optional [str ]:
75231 """Generate a structured trading strategy summary for algorithm generation."""
76232 self .logger .info ("Generating summary" )
0 commit comments