Browse Source

fix: unsuited segmentation

master
Dnomd343 2 years ago
parent
commit
b768f58c19
  1. 2
      src/ixsw.la/extract.py

2
src/ixsw.la/extract.py

@ -23,7 +23,7 @@ def splitHtml(rawHtml: str) -> dict: # extract from raw html content
div = '\n'.join(div.prettify().split('\n')[1: -2]) div = '\n'.join(div.prettify().split('\n')[1: -2])
return { return {
'title': '%s %s' % (title[1], title[2].strip()), 'title': '%s %s' % (title[1], title[2].strip()),
'content': [x.strip() for x in div.split('<br/>\n <br/>\n')] 'content': [x.strip() for x in div.split('\n <br/>\n <br/>') if x.strip() != '']
} }

Loading…
Cancel
Save