You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
14 lines
361 B
14 lines
361 B
from PyPDF2 import PdfReader |
|
import codecs |
|
|
|
def createTestFile(link): |
|
reader = PdfReader(link) |
|
number_of_pages = len(reader.pages) |
|
page = reader.pages[0] |
|
f = codecs.open('output.txt', 'w', encoding='utf-8') |
|
for page in reader.pages: |
|
text = page.extract_text() |
|
f.write(text) |
|
|
|
|
|
createTestFile("trainingsdata/data1.pdf") |