pdf-reader

PDF reading and extraction

Safety Notice

This listing is imported from skills.sh public index metadata. Review upstream SKILL.md and repository scripts before running.

Copy this and send it to your AI assistant to learn

Install skill "pdf-reader" with this command: npx skills add thinkfleetai/thinkfleet-engine/thinkfleetai-thinkfleet-engine-pdf-reader

PDF Reader

Tools for reading, extracting, and manipulating PDF files.

Extract text from PDF

# Using pdftotext (poppler-utils) if available
pdftotext input.pdf output.txt

# Using python with PyPDF2
python3 -c "
import PyPDF2

with open('input.pdf', 'rb') as f:
    reader = PyPDF2.PdfReader(f)
    for i, page in enumerate(reader.pages):
        text = page.extract_text()
        print(f'--- Page {i+1} ---')
        print(text)
"

Extract tables from PDF

python3 -c "
import sys
try:
    import tabula
    dfs = tabula.read_pdf('input.pdf', pages='all')
    for i, df in enumerate(dfs):
        print(f'--- Table {i+1} ---')
        print(df.to_csv(index=False))
except ImportError:
    print('Install tabula-py: pip install tabula-py')
    sys.exit(1)
"

Get PDF metadata

python3 -c "
import PyPDF2, json

with open('input.pdf', 'rb') as f:
    reader = PyPDF2.PdfReader(f)
    meta = reader.metadata
    info = {
        'title': meta.title if meta else None,
        'author': meta.author if meta else None,
        'subject': meta.subject if meta else None,
        'creator': meta.creator if meta else None,
        'pages': len(reader.pages),
    }
    print(json.dumps(info, indent=2, default=str))
"

Convert PDF to text

python3 -c "
import PyPDF2

with open('input.pdf', 'rb') as f:
    reader = PyPDF2.PdfReader(f)
    text = []
    for page in reader.pages:
        text.append(page.extract_text() or '')
    with open('output.txt', 'w') as out:
        out.write('\n\n'.join(text))
print('Converted to output.txt')
"

Merge PDFs

python3 -c "
import PyPDF2, sys

merger = PyPDF2.PdfMerger()
files = sys.argv[1:]
for pdf in files:
    merger.append(pdf)
merger.write('merged.pdf')
merger.close()
print(f'Merged {len(files)} files into merged.pdf')
" file1.pdf file2.pdf file3.pdf

Split PDF pages

python3 -c "
import PyPDF2

with open('input.pdf', 'rb') as f:
    reader = PyPDF2.PdfReader(f)
    for i, page in enumerate(reader.pages):
        writer = PyPDF2.PdfWriter()
        writer.add_page(page)
        out_path = f'page_{i+1:03d}.pdf'
        with open(out_path, 'wb') as out:
            writer.write(out)
        print(f'Wrote {out_path}')
print(f'Split into {len(reader.pages)} files')
"

Extract images from PDF

python3 -c "
import PyPDF2, os

os.makedirs('extracted_images', exist_ok=True)
with open('input.pdf', 'rb') as f:
    reader = PyPDF2.PdfReader(f)
    img_count = 0
    for page_num, page in enumerate(reader.pages):
        for img_key in page.images:
            img_count += 1
            img = page.images[img_key]
            ext = os.path.splitext(img.name)[1] or '.png'
            out_path = f'extracted_images/page{page_num+1}_img{img_count}{ext}'
            with open(out_path, 'wb') as out:
                out.write(img.data)
            print(f'Extracted {out_path}')
print(f'Total images extracted: {img_count}')
"

Source Transparency

This detail page is rendered from real SKILL.md content. Trust labels are metadata-based hints, not a safety guarantee.

Related Skills

Related by shared tags or category signals.

General

local-whisper

No summary provided by upstream source.

Repository SourceNeeds Review
General

kagi-search

No summary provided by upstream source.

Repository SourceNeeds Review
General

feishu-bridge

No summary provided by upstream source.

Repository SourceNeeds Review
General

bambu-local

No summary provided by upstream source.

Repository SourceNeeds Review