This repository was archived by the owner on Mar 1, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsciknowmap.py
More file actions
70 lines (55 loc) · 2.25 KB
/
sciknowmap.py
File metadata and controls
70 lines (55 loc) · 2.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import math
import operator
#
# Provides HTML code for a single topic signature based on greyscale coding
# for each word
#
def topic_signature_html(m, t_tuple, n_words, colormap, global_min=None, global_max=None):
t_id = t_tuple[0]
t_percent = t_tuple[1]
color = colormap[t_id]
def invert_hex(hex_number):
inverse = hex(abs(int(hex_number, 16) - 255))[2:]
# If the number is a single digit add a preceding zero
if len(inverse) == 1:
inverse = '0' + inverse
return inverse
def float_to_greyscale(f):
val = '%x' % int(f * 255)
val = invert_hex(val)
return '#%s%s%s' % (val, val, val)
word_weights = sorted(
m.topics[t_id].items(), key=operator.itemgetter(1), reverse=True
)[:n_words]
vals = [x[1] for x in word_weights]
val_max = max(vals)
val_min = math.sqrt(min(vals) / 2)
val_diff = float(val_max - val_min)
if global_min and global_max:
global_diff = float(global_max - global_min)
t_percent_2sf = '%s' % float('%.2g' % t_percent)
ret = '<emph><font color="' + color + '">■ </font>#' + str(t_id) + ' (' + t_percent_2sf + '): </emph>'
for (y, z) in sorted(word_weights, key=lambda x: x[1],
reverse=True):
p = float(z - val_min) / val_diff
if global_min and global_max:
q = float(z - global_min) / global_diff
else:
q = p
ret += '<span style="color:%s" title="%s%% relevant">%s</span>\n' % (
float_to_greyscale(p), int(q * 100), y.replace('_', ' '))
return ret
def document_signature_html(corpus, doc_id, DT, m, doc_list, n_topics, n_words, colormap):
doc_count = DT.shape[0]
top_topics = sorted(
enumerate(DT[doc_id]), reverse=True, key=operator.itemgetter(1)
)[:n_topics]
doc = corpus[doc_list[doc_id]]
html_signature = '<p><b>' + doc.title + '</b></br>'
html_signature += '<i>' + ', '.join(doc.authors) + '</i>'
# if(doc.url):
# html_signature += ' [<a href="'+doc.url+'">Link</a>]'
html_signature += '</br>'
html_signature += '</br>'.join([topic_signature_html(m, top_topics[i], n_words, colormap) for i in range(n_topics)])
html_signature += '</p>'
return html_signature