Bachelorarbeit/server-final/summarycreator.py

# Summary creator

import json
import spacy
import summaryhelper
import tfidf
import key_words_technical
import killwords
import collectpersonaldata
import collectthirdparty

# This function create a complete paragraph out of the pieces.
# This approach is better because more sentences gives a better context for nlp
def create_paragraph(cluster):
    result = ""
    for entry in cluster:
        if entry["tag"] == "P" or entry["tag"] == "LI":
            result += entry["text"]
    
    return result


#this function is the entry point into this

def run(liste, nlp):
    
    #with open('test.txt') as json_file:
     #   data = json.load(json_file)       

    #liste = data
    #print(liste)
    

    # contains the result of every step
    result_list = [];

    #indexdict cotains every tf*idf measure
    # these measure is used because of the fact that spacy not always classify correctly.
    # So the measurement is used to drop the paragraphs that has no value for us
    indexdict = [] 
    for cluster in liste:
        paragraph = create_paragraph(cluster)
        indexdict.append({"average" : tfidf.run(paragraph, nlp), "text": paragraph, "id":cluster[0]["id"], "cluster": cluster})

    # Printfunction for debugging 
    #for tf in indexdict:
     #   print(tf["average"])
    

    print(len(indexdict))


    # indexdict save is used to hold the original tfidf-list back
    indexdict_save = indexdict
    
    #this stept creates the list with the data that will be saved 
    #list_save_saved = create_saved_list(indexdict_save,nlp)

    #this step updates the tf*idf lis in combination with third parties
    #list_third_party = create_third(indexdict_save,nlp)
    collected_data = collectpersonaldata.run(indexdict_save,nlp)
    collected_third = collectthirdparty.run(indexdict_save,nlp)
    #for col in collected_data:
    #    print(col["text"])
    result_list.append(collected_data)
    result_list.append(collected_third)
     
    
    return result_list

#with open('test.txt') as json_file:
#    data = json.load(json_file)       

#nlp = spacy.load("en_core_web_sm")
#run(data, nlp)
Umzug auf Gitea 4 years ago			`# Summary creator`

			`import json`
			`import spacy`
			`import summaryhelper`
			`import tfidf`
			`import key_words_technical`
			`import killwords`
			`import collectpersonaldata`
			`import collectthirdparty`

			`# This function create a complete paragraph out of the pieces.`
			`# This approach is better because more sentences gives a better context for nlp`
			`def create_paragraph(cluster):`
			`result = ""`
			`for entry in cluster:`
			`if entry["tag"] == "P" or entry["tag"] == "LI":`
			`result += entry["text"]`

			`return result`



			`#this function is the entry point into this`

			`def run(liste, nlp):`

			`#with open('test.txt') as json_file:`
			`# data = json.load(json_file)`

			`#liste = data`
			`#print(liste)`


			`# contains the result of every step`
			`result_list = [];`

			`#indexdict cotains every tf*idf measure`
			`# these measure is used because of the fact that spacy not always classify correctly.`
			`# So the measurement is used to drop the paragraphs that has no value for us`
			`indexdict = []`
			`for cluster in liste:`
			`paragraph = create_paragraph(cluster)`
			`indexdict.append({"average" : tfidf.run(paragraph, nlp), "text": paragraph, "id":cluster[0]["id"], "cluster": cluster})`

			`# Printfunction for debugging`
			`#for tf in indexdict:`
			`# print(tf["average"])`



			`print(len(indexdict))`


			`# indexdict save is used to hold the original tfidf-list back`
			`indexdict_save = indexdict`

			`#this stept creates the list with the data that will be saved`
			`#list_save_saved = create_saved_list(indexdict_save,nlp)`

			`#this step updates the tf*idf lis in combination with third parties`
			`#list_third_party = create_third(indexdict_save,nlp)`
			`collected_data = collectpersonaldata.run(indexdict_save,nlp)`
			`collected_third = collectthirdparty.run(indexdict_save,nlp)`
			`#for col in collected_data:`
			`# print(col["text"])`
			`result_list.append(collected_data)`
			`result_list.append(collected_third)`




			`return result_list`

			`#with open('test.txt') as json_file:`
			`# data = json.load(json_file)`

			`#nlp = spacy.load("en_core_web_sm")`
			`#run(data, nlp)`