import json import gensim from article.models import ArticleModel
# relist 文章id的list def getTheme(reslist): fencelist = list() for res in reslist: id = res[0] articles = ArticleModel.objects.get(id = id ) filefence = json.loads(articles.file_fence) fencelist.append(filefence) #字典{词的id:词的个数} dictionary = gensim.corpora.Dictionary(fencelist) #文档词频矩阵 corpus = [dictionary.doc2bow() for word in fencelist]
#获得文章中与关键词有关的段落 def GetWrodContent(reslist,keylist): wordcontent = dict() for res in reslist: id = res[0] article = ArticleModel.objects.get(id = id) filepath = article.file_path with open(filepath,'r',encoding='utf-8') as f: text = f.read() textlist = re.split('\n',text)
showlist = [] for texts in textlist: for key in keylist: if key in texts: showlist.append(texts) break