from PyPDF2 import PdfReader import codecs def createTestFile(link): reader = PdfReader(link) number_of_pages = len(reader.pages) page = reader.pages[0] f = codecs.open('output.txt', 'w', encoding='utf-8') for page in reader.pages: text = page.extract_text() f.write(text) createTestFile("trainingsdata/data1.pdf")