Sunday, July 28, 2024

Extract pages from PDF using Python

# This code was suggested by Gemini and runs on Colab

!pip install PyPDF2==3.0.1

from PyPDF2 import PdfReader, PdfWriter

def extract_pages(input_pdf, output_pdf, pages):
    pdf_reader = PdfReader(input_pdf)
    pdf_writer = PdfWriter()

    for page_num in pages:
        page = pdf_reader.pages[page_num]
        pdf_writer.add_page(page)

    with open(output_pdf, 'wb') as out_file:
        pdf_writer.write(out_file)

input_pdf_file = '/content/sample_data/Emp Skills IX.pdf'
output_pdf_file = '/content/sample_data/Green IX.pdf'
#                              +5,   +6
pages_to_extract = list(range(175, 206))

extract_pages(input_pdf_file, output_pdf_file, pages_to_extract)

===========================================================================

# This code suggested by ChatGPT & had problems on IDLE

# pip install pypdf

from PyPDF2 import PdfReader, PdfWriter


def extract_pages(inputFile, outputFile, pages):

  reader = PdfReader(inputFile)

  writer = PdfWriter()

  for page_number in pages:

    writer.add_page(reader.pages[page_number])

  with open(outputFile, 'wb') as out:

    writer.write(out)


innFile = 'US_Declaration.pdf'

outFile = 'US_Declaration1.pdf'

pages = [i for i in range(2)] # [0, 1]

extract_pages(innFile, outFile, pages)


No comments:

Post a Comment