88 "log"
99 "net/url"
1010 "strings"
11+ stdhtml "html"
1112
1213 "golang.org/x/net/html"
1314
@@ -59,20 +60,30 @@ func isNextSiblingBr(node *html.Node) bool {
5960}
6061
6162func ParseNodesForPassage (node * html.Node ) string {
62- var text string
6363 var parts []string
6464
6565 for child := node .FirstChild ; child != nil ; child = child .NextSibling {
66- parts = append (parts , text )
66+ // Filter out footnotes sections/cross-refs if they appear as divs
67+ if child .Type == html .ElementNode {
68+ for _ , attr := range child .Attr {
69+ if attr .Key == "class" {
70+ if strings .Contains (attr .Val , "footnotes" ) || strings .Contains (attr .Val , "cross-refs" ) {
71+ continue
72+ }
73+ }
74+ }
75+ }
6776
6877 switch tag := child .Data ; tag {
6978 case "span" :
79+ // Keep existing logic for span (likely poetry lines in legacy/scraped HTML)
7080 childText := ParseNodesForPassage (child )
7181 parts = append (parts , childText )
7282 if len (strings .TrimSpace (childText )) > 0 && ! isNextSiblingBr (child ) {
7383 parts = append (parts , "\n " )
7484 }
7585 case "sup" :
86+ // Handle superscripts (verse numbers/footnotes)
7687 isFootnote := func (node * html.Node ) bool {
7788 for _ , attr := range node .Attr {
7889 if attr .Key == "class" && attr .Val == "footnote" {
@@ -85,67 +96,62 @@ func ParseNodesForPassage(node *html.Node) string {
8596 break
8697 }
8798 childText := ParseNodesForPassage (child )
99+ // Use TelegramSuperscript for unicode conversion
88100 if len (childText ) > 0 {
89- parts = append (parts , fmt . Sprintf ( "^%s^" , childText ))
101+ parts = append (parts , platform . TelegramSuperscript ( childText ))
90102 }
91103 break
92104 case "p" :
93105 parts = append (parts , ParseNodesForPassage (child ))
94- break
95- case "b" :
96- parts = append (parts , platform .TelegramBold (ParseNodesForPassage (child )))
97- case "i" :
98- parts = append (parts , platform .TelegramItalics (ParseNodesForPassage (child )))
99- break
106+ parts = append (parts , "\n \n " )
107+ case "b" , "strong" :
108+ parts = append (parts , fmt .Sprintf ("<b>%s</b>" , ParseNodesForPassage (child )))
109+ case "i" , "em" :
110+ parts = append (parts , fmt .Sprintf ("<i>%s</i>" , ParseNodesForPassage (child )))
111+ case "h1" , "h2" , "h3" , "h4" , "h5" , "h6" :
112+ // Ignore "Footnotes" or "Cross references" headers
113+ headerText := ParseNodesForPassage (child )
114+ if headerText == "Footnotes" || headerText == "Cross references" {
115+ continue
116+ }
117+ parts = append (parts , fmt .Sprintf ("\n \n <b>%s</b>\n " , headerText ))
118+ case "ul" , "ol" :
119+ parts = append (parts , ParseNodesForPassage (child ))
120+ case "li" :
121+ parts = append (parts , fmt .Sprintf ("• %s\n " , ParseNodesForPassage (child )))
100122 case "br" :
101123 parts = append (parts , "\n " )
102- break
124+ case "div" :
125+ parts = append (parts , ParseNodesForPassage (child ))
103126 default :
104- parts = append (parts , child .Data )
127+ if child .Type == html .TextNode {
128+ parts = append (parts , stdhtml .EscapeString (child .Data ))
129+ } else if child .Type == html .ElementNode {
130+ // Recurse for unknown elements to preserve content
131+ parts = append (parts , ParseNodesForPassage (child ))
132+ }
105133 }
106134 }
107135
108- text = strings .Join (parts , "" )
109-
110- if node .Data == "h1" || node .Data == "h2" || node .Data == "h3" || node .Data == "h4" {
111- text = fmt .Sprintf ("*%s*" , text )
112- }
113- return text
136+ return strings .Join (parts , "" )
114137}
115138
116139func GetPassage (ref string , doc * html.Node , version string ) string {
117- filtNodes := utils .FilterTree (doc , func (child * html.Node ) bool {
118- switch tag := child .Data ; tag {
119- case "h1" :
120- fallthrough
121- case "h2" :
122- fallthrough
123- case "h3" :
124- fallthrough
125- case "h4" :
126- if child .FirstChild .Data == "Footnotes" || child .FirstChild .Data == "Cross references" {
127- return false
128- }
129- fallthrough
130- case "p" :
131- return true
132- }
133- return false
134- })
140+ // Replaced FilterTree with direct parsing of the root node
141+ // This allows handling arbitrary structure (divs, lists) returned by the API
135142
136- textBlocks := utils . MapNodeListToString ( filtNodes , ParseNodesForPassage )
143+ text := ParseNodesForPassage ( doc )
137144
138145 var passage strings.Builder
139146
140147 if len (ref ) > 0 {
141- refString := fmt .Sprintf ("_%s_ (%s)" , ref , version )
148+ // Use HTML formatting for reference
149+ refString := fmt .Sprintf ("<i>%s</i> (%s)" , ref , version )
142150 passage .WriteString (refString )
143151 }
144152
145- for _ , block := range textBlocks {
146- passage .WriteString ("\n " )
147- passage .WriteString (block )
148- }
153+ passage .WriteString ("\n " )
154+ passage .WriteString (strings .TrimSpace (text ))
149155
150156 return passage .String ()
151157}
@@ -158,6 +164,11 @@ func ParsePassageFromHtml(ref string, rawHtml string, version string) string {
158164 return rawHtml
159165 }
160166
167+ // html.Parse returns a doc with html->body structure.
168+ // GetPassage -> ParseNodesForPassage will traverse it.
169+ // We might want to find 'body' to avoid processing 'head'?
170+ // ParseNodesForPassage iterates children. doc->html->body.
171+ // We can let it recurse.
161172 return strings .TrimSpace (GetPassage (ref , doc , version ))
162173}
163174
@@ -181,6 +192,7 @@ func GetBiblePassageFallback(env def.SessionData) def.SessionData {
181192
182193 // Attempt to get the passage
183194 env .Res .Message = GetPassage (ref , passageNode , config .Version )
195+ env .Res .ParseMode = def .TELEGRAM_PARSE_MODE_HTML
184196
185197 return env
186198}
@@ -224,6 +236,7 @@ func GetBiblePassage(env def.SessionData) def.SessionData {
224236
225237 if len (resp .Verse ) > 0 {
226238 env .Res .Message = ParsePassageFromHtml (env .Msg .Message , resp .Verse , config .Version )
239+ env .Res .ParseMode = def .TELEGRAM_PARSE_MODE_HTML
227240 return env
228241 }
229242 }
0 commit comments