# Summary creator import json import spacy import summaryhelper import tfidf import key_words_technical import killwords import collectpersonaldata import collectthirdparty # This function create a complete paragraph out of the pieces. # This approach is better because more sentences gives a better context for nlp def create_paragraph(cluster): result = "" for entry in cluster: if entry["tag"] == "P" or entry["tag"] == "LI": result += entry["text"] return result #this function is the entry point into this def run(liste, nlp): #with open('test.txt') as json_file: # data = json.load(json_file) #liste = data #print(liste) # contains the result of every step result_list = []; #indexdict cotains every tf*idf measure # these measure is used because of the fact that spacy not always classify correctly. # So the measurement is used to drop the paragraphs that has no value for us indexdict = [] for cluster in liste: paragraph = create_paragraph(cluster) indexdict.append({"average" : tfidf.run(paragraph, nlp), "text": paragraph, "id":cluster[0]["id"], "cluster": cluster}) # Printfunction for debugging #for tf in indexdict: # print(tf["average"]) print(len(indexdict)) # indexdict save is used to hold the original tfidf-list back indexdict_save = indexdict #this stept creates the list with the data that will be saved #list_save_saved = create_saved_list(indexdict_save,nlp) #this step updates the tf*idf lis in combination with third parties #list_third_party = create_third(indexdict_save,nlp) collected_data = collectpersonaldata.run(indexdict_save,nlp) collected_third = collectthirdparty.run(indexdict_save,nlp) #for col in collected_data: # print(col["text"]) result_list.append(collected_data) result_list.append(collected_third) return result_list #with open('test.txt') as json_file: # data = json.load(json_file) #nlp = spacy.load("en_core_web_sm") #run(data, nlp)