Bachelorarbeit/server-final/summarycreator.py

# Summary creator

import json
import spacy
import summaryhelper
import tfidf
import key_words_technical
import killwords
import collectpersonaldata
import collectthirdparty

# This function create a complete paragraph out of the pieces.
# This approach is better because more sentences gives a better context for nlp
def create_paragraph(cluster):
    result = ""
    for entry in cluster:
        if entry["tag"] == "P" or entry["tag"] == "LI":
            result += entry["text"]

    return result


#this function is the entry point into this

def run(liste, nlp):

    #with open('test.txt') as json_file:
     #   data = json.load(json_file)

    #liste = data
    #print(liste)


    # contains the result of every step
    result_list = [];

    #indexdict cotains every tf*idf measure
    # these measure is used because of the fact that spacy not always classify correctly.
    # So the measurement is used to drop the paragraphs that has no value for us
    indexdict = []
    for cluster in liste:
        paragraph = create_paragraph(cluster)
        indexdict.append({"average" : tfidf.run(paragraph, nlp), "text": paragraph, "id":cluster[0]["id"], "cluster": cluster})

    # Printfunction for debugging
    #for tf in indexdict:
     #   print(tf["average"])


    print(len(indexdict))


    # indexdict save is used to hold the original tfidf-list back
    indexdict_save = indexdict

    #this stept creates the list with the data that will be saved
    #list_save_saved = create_saved_list(indexdict_save,nlp)

    #this step updates the tf*idf lis in combination with third parties
    #list_third_party = create_third(indexdict_save,nlp)
    collected_data = collectpersonaldata.run(indexdict_save,nlp)
    collected_third = collectthirdparty.run(indexdict_save,nlp)
    #for col in collected_data:
    #    print(col["text"])
    result_list.append(collected_data)
    result_list.append(collected_third)


    return result_list

#with open('test.txt') as json_file:
#    data = json.load(json_file)

#nlp = spacy.load("en_core_web_sm")
#run(data, nlp)