-
Notifications
You must be signed in to change notification settings - Fork 0
/
parserv6.py
380 lines (353 loc) · 13.5 KB
/
parserv6.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
# -*- coding: utf-8 -*-
print("**Python Markdown parser - md-pypage - converts multiple Markdown files to Website**")
print("**Written by Lars Müller alias LMD or appgurueu in Python 3.5 - requires Python >= 3.2**")
#from cgi import html
from xml.sax.saxutils import escape, unescape
import urllib.parse as parse
from os import walk
from os.path import splitext, join
import math
html_escape_table = {
'"': """,
"'": "'"
}
def html_escape(text):
return escape(text, html_escape_table)
def img(alt,url,title):
return """<img class="img-fluid" src=\""""+url+"""\" alt=\""""+alt+"""\"><div class="caption">"""+parse_markdown(title[1:-1])+"""</div>"""
def parse_markdown(string,parent=False,parent_quote=False,subquote=0): # PARSES A SINGLE LINE !
global liste
global ID
global headers
global quote
suffix=""
prefix=""
if string.find("*") != -1 and (string[0:string.find("*")].count(" ") == string.find("*")) and not (parent or string[string.find("*")+1]=="*"): # LISTS
prevliste=liste
liste=1+int(string.find("*")/3)
if (liste > prevliste):
for i in range(0,liste-prevliste):
prefix+="<ul>"
elif (liste < prevliste):
for i in range(0,prevliste-liste):
prefix+="</ul>"
return prefix+"<li>"+parse_markdown(string[string.find("*")+2:],parent=True)+"</li>"+suffix
if not parent and liste != 0 and string=="":
for i in range(0,liste):
prefix+="</ul>"
liste=0
if len(string) > 2 and string[0:2]=="> ":
curquote=1
string=string[2:]
while len(string) > 2 and string[0:2]=="> ":
string=string[2:]
curquote+=1
for i in range(quote,curquote):
prefix+='<blockquote class="blockquote">'
for i in range(curquote,quote):
prefix+='</blockquote>'
quote=curquote
return prefix+parse_markdown(string,parent_quote=True)
if not parent_quote and quote != 0:
for i in range(0,quote):
prefix+="</blockquote>"
quote=0
if (len(string)) == 0:
return prefix+"<br>"
if len(string) > 3 and (string[0] == "*" or string[0] == "-" or string[0] == "_")and string.count(string[0])==len(string):
return prefix+"<hr>"
if (string[-2:]==" "):
return prefix+parse_markdown(string[:-2],parent=True)+"<br>"
if (string[0]=="#"):
space=string.find(" ")
c=string[0:space-1].count("#")
if space==-1 or string[space+1:].count(" ")==len(string)-space-1:
return "<br>"
if (space-1==c):
ID+=1
c+=1
temp="<h"+str(c)+'>'+parse_markdown(string[space+1:],parent=True)+"</h"+str(c)+">"
headers.append((temp,str(ID)))
temp=prefix+temp[:3]+' id="gheader'+str(ID)+'"'+temp[3:]
return temp
bold=False
boldamount=string.count("**")//2*2
ba=0
italic=False
ia=0
italicamount=string.count("__")//2*2
code=False
link=False
link2=False
codeamount=string.count("`")//2*2
ca=0
startindex=0
tags=[]
currentstring=""
index=-1
while index in range(-1,len(string)-1):
index+=1
appendtag=False
c=string[index]
if c == "`":
if ca < codeamount:
code=not code
ca=ca+1
if not code: # We have just closed a code fragment
tags.append((string[startindex+1:index],"code"))
continue
else: # A new one starts : SAVE INDEX + SAVE CURRENT STRING !
appendtag=True
elif not code:
if c == "*" and len(string) > index+1 and string[index+1] == "*" and ba < boldamount and not italic:
index+=1
bold=not bold
ba=ba+1
if not bold: # We have just closed a code fragment
tags.append((string[startindex+1:index-1],"bold"))
continue
else: # A new one starts : SAVE INDEX + SAVE CURRENT STRING !
appendtag=True
elif c == "_" and len(string) > index+1 and string[index+1] == "_" and ia < italicamount and not bold:
index+=1
italic=not italic
ia=ia+1
if not italic: # We have just closed a code fragment
tags.append((string[startindex+1:index-1],"italic"))
continue
else: # A new one starts : SAVE INDEX + SAVE CURRENT STRING !
appendtag=True
elif c == "<" and not link:
appendtag=True
link=True
elif c == ">" and link:
link=False
tags.append((string[startindex+1:index],"link"))
continue
elif c == "!" and len(string) > index+1 and string[index+1] == "[":
if len(currentstring) > 0:
tags.append((currentstring,"normal"))
currentstring=""
breakit=False
text=""
index+=1
for i in range(index+2,len(string)-3):
c2=string[i]
if (c2 == "]"):
text=string[index+1:i]
if string[i+1]=="(":
inside=False
for j in range(i+3,len(string)):
c3=string[j]
if c3=="\"":
inside=not inside
if inside:
continue
if (c3 == ")"):
breakit=True
imglink=string[i+2:j].split(" ",1)
tags.append((img(text,imglink[0],imglink[1]),"image"))
index=j
break#Hmmm
break
if breakit:
continue
elif c == "[":
if len(currentstring) > 0:
tags.append((currentstring,"normal"))
currentstring=""
breakit=False
text=""
for i in range(index+2,len(string)-3):
c2=string[i]
if (c2 == "]"):
text=string[index+1:i]
if string[i+1]=="(":
for j in range(i+3,len(string)):
c3=string[j]
if (c3 == ")"):
breakit=True
tags.append((text,"link",string[i+2:j]))
index=j
break#Hmmm
break
if breakit:
continue
if appendtag:
tags.append((currentstring,"normal"))
currentstring=""
startindex=index
continue
if not bold and not italic and not code and not link and not link2:
currentstring+=c
if len(currentstring) != 0:
tags.append((currentstring,"normal"))
result=""
for tag in tags:
string=tag[0]
p=""
s=""
wrap=False
if tag[1]=="code":
p,s="<code>","</code>"
elif tag[1]=="bold":
p,s="<b>","</b>"
wrap=True
elif tag[1]=="italic":
p,s="<em>","</em>"
wrap=True
elif tag[1]=="link":
if len(tag) == 2:
if tag[0][0:4] == "http": # CHECK LINKS !
p,s='<a href="'+tag[0]+'">',"</a>"
else:
p,s='<a href="'+tag[2]+'">',"</a>"
elif tag[1]=="image":
result+=tag[0]
continue
elif tag[1]=="italic":
p,s="<em>","</em>"
if wrap:
result+=p+parse_markdown(string,parent_quote=True,parent=True)+s
else:
result+=p+html_escape(string)+s
if parent_quote and parent:
return result
return prefix+"<p>"+result+"</p>"
def parse_md(string): # Parse line by line
lines=string.split("\n")
ret=""
for i in range(len(lines)-1,0,-1): # Convert alternate header writings(underlines)
if abs(len(lines[i-1])-len(lines[i])) < 3 and len(lines[i]) > 0:
if lines[i].count("=")==len(lines[i]):
lines[i]=""
lines[i-1]="# "+lines[i-1]
elif lines[i].count("-")==len(lines[i]):
lines[i]=""
lines[i-1]="## "+lines[i-1]
i=0
ident=False
segments=0
for line in lines:
prefix=""
suffix=""
asteriskpos=line.find("*")
# or (len(line) > 1 and line[0]=="\t" and (asteriskpos==-1 or line[0:asteriskpos].count("\t") != asteriskpos)))
if liste== 0 and ((len(line) > 4 and line[0:4]==" "*4 and (asteriskpos==-1 or asteriskpos > 1 or line[0:asteriskpos].count(" ") != asteriskpos))):
if not ident:
prefix="<pre><code>"
#print("START : "+line[4:])
ident=True
elif ident:
ident=False
prefix="</code></pre>"
segments+=1
#else:
#if (len(line > 4)
#print("{"+line[0:4]+";"+line[0]+"}")
lval=""
if ident:
lval=html_escape(line[4:])+"\n"
else:
lval=parse_markdown(line)
ret+=prefix+lval
i=i+1
print("**Found "+str(segments)+" multi-line code segments.**")
if ident:
ident=False
return ret+"</code></pre>"
return ret
def code(): # Parse multi-line code fragments
global markdown
last=-1
i=0
stuff=[]
while (i < len(markdown)):
if markdown[i:i+3]=="`"*3: # Handle GitHub style code tags
i=i+3
if last < 0:
start=-(last+1)
last=i
stuff.append((markdown[start:last-3],False))
else:
stuff.append((markdown[last:i-3],True))
last=-i-1
i=i+1
start=-(last+1)
stuff.append((markdown[start:],False))
#print(stuff)
markdown=""
for s in stuff:
if s[1]:
markdown+="<pre><code>"+s[0]+"</code></pre>"
else:
markdown+=parse_md(s[0])
# This python script grabs the newest lua_api.txt from Minetest GitHub repo and converts it to HTML, plus adding some bookmarks & css
# So mainly MD -> HTML. Written by me to improve my rusty Python skills.
# © Lars Müller @appguru.eu
directory=input("Directory name : ")
html_files={}
for a,b,files in walk(directory):
for file in files:
final = open(join(directory,file), 'r').read()
content=final.split("\n",1)
head=content[0].split(":",1)
print("**Converting "+file+"...**")
#markdown = parse.unquote(content[1]) # Read & convert
markdown=content[1]
liste=0 # Which sublist we are in right NOW
quote=0 # Which blockquote we are in right NOW
headers=[] # Stores all the headers + IDs
ID=0 # Stores header ID counter
print("**Starting parsing...**")
code()
print("**...finished parsing.**")
nav=""
print("**Creating content table...**")
for header in headers:
nav+="""<li><a class="nav-link" href="#gheader"""+header[1]+"""">"""+header[0]+"""</a></li>""" # Create navbar
print("**...finished creating content table. "+str(len(headers))+" Headers are included.**")
html_files[splitext(file)[0]]=(head[0],head[1],markdown,nav)
print("**Reading template...**")
template = open('template.html', 'r').read()
from os import mkdir
try:
mkdir(directory+"_page")
except:
print("**Directory already exists.**")
for key,val in html_files.items():
print("**Inserting "+key+" into template file...**")
string=template.replace("<!--PLACETITLE-->",val[0])
string=string.replace("<!--PLACESTUFF-->",val[2])
string=string.replace("<!--PLACENAV-->",val[3])
navi='<a class="nav-link" href="index.html">Home</a>'
for key2,val2 in html_files.items():
if key != key2:
navi+='<a class="nav-link" href="'+key2+'.html">'+val2[0]+'</a>' # Create navbar
else:
navi+='<a class="nav-link active" id="v-pills-home-tab" data-toggle="pill" href="'+key2+'.html" role="tab" aria-controls="v-pills-home" aria-selected="true">'+val2[0]+'</a>'
string=string.replace("<!--PLACENAV2-->",navi)
file = open(join(directory+"_page",key+'.html'), 'w') # SAVE AS lua_api.html
file.write(string)
print("**...saved.**")
file.close()
preview=""
for key2,val2 in html_files.items():
preview+='<div class="col-sm-4"><div class="container" id="index"><h2><a href="'+key2+'.html">'+val2[0]+""" »</a></h2>
<p>"""+val2[1]+"""
</p>
</div></div>"""
print("**Reading preview template...**")
template = open('index_template.html', 'r').read()
string=template.replace("<!--PLACESTUFF-->",preview)
file = open(join(directory+"_page",'index.html'), 'w') # SAVE
file.write(string)
print("**Preview saved.**")
file.close()
from shutil import copyfile,rmtree
try:
copyfile("jumbotron.css",join(directory+"_page","jumbotron.css"))
except:
rmtree(join(directory+"_page","jumbotron.css"))
copyfile("jumbotron.css",join(directory+"_page","jumbotron.css"))
print("**Stylesheets already exist.**")