You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
14 lines
361 B
14 lines
361 B
3 years ago
|
from PyPDF2 import PdfReader
|
||
|
import codecs
|
||
|
|
||
|
def createTestFile(link):
|
||
|
reader = PdfReader(link)
|
||
|
number_of_pages = len(reader.pages)
|
||
|
page = reader.pages[0]
|
||
|
f = codecs.open('output.txt', 'w', encoding='utf-8')
|
||
|
for page in reader.pages:
|
||
|
text = page.extract_text()
|
||
|
f.write(text)
|
||
|
|
||
|
|
||
|
createTestFile("trainingsdata/data1.pdf")
|