semantic-search

Semantic Search Skill

Safety Notice

This listing is imported from skills.sh public index metadata. Review upstream SKILL.md and repository scripts before running.

Copy this and send it to your AI assistant to learn

Install skill "semantic-search" with this command: npx skills add winsorllc/upgraded-carnival/winsorllc-upgraded-carnival-semantic-search

Semantic Search Skill

Search through files and directories for content using keyword matching and basic semantic analysis.

When to Use

✅ USE this skill when:

  • Finding code that implements a feature

  • Searching documentation for topics

  • Locating files by their content

  • Finding similar code patterns

  • Researching codebase structure

When NOT to Use

❌ DON'T use this skill when:

  • Searching binary files → use file tools

  • Exact regex patterns → use grep

  • Searching very large repos (>100k files) → use indexed search

Installation

cd /job npm install natural compromise

Features

  • Keyword Search: Simple text matching across files

  • Stemming: Matches word variations (run, running, ran)

  • TF-IDF Scoring: Ranks results by relevance

  • File Filtering: Filter by extension, path patterns

  • Context: Shows surrounding lines for each match

Usage

Basic Search

const { searchFiles } = require('./semantic-search');

const results = await searchFiles('.', { query: 'authentication middleware', extensions: ['.js', '.ts'], maxResults: 20 });

console.log(results);

Advanced Search

const results = await searchFiles('/path/to/code', { query: 'error handling database', excludeDirs: ['node_modules', 'dist', '.git'], extensions: ['.js', '.ts', '.py'], contextLines: 3, maxResults: 50, minScore: 0.3 });

Node.js Implementation

const fs = require('fs'); const path = require('path'); const natural = require('natural');

class SemanticSearcher { constructor(options = {}) { this.stemmer = natural.PorterStemmer; this.tokenizer = new natural.WordTokenizer(); this.maxFileSize = options.maxFileSize || 1024 * 1024; // 1MB this.excludeDirs = options.excludeDirs || [ 'node_modules', 'dist', 'build', '.git', 'vendor', 'pycache', '.next', '.nuxt' ]; }

tokenize(text) { return this.tokenizer.tokenize(text.toLowerCase()) .map(token => this.stemmer.stem(token)); }

calculateTF(tokens) { const tf = {}; tokens.forEach(token => { tf[token] = (tf[token] || 0) + 1; }); const maxFreq = Math.max(...Object.values(tf)); Object.keys(tf).forEach(key => { tf[key] /= maxFreq; }); return tf; }

scoreDocument(queryTokens, docTokens) { const querySet = new Set(queryTokens); let score = 0; docTokens.forEach(token => { if (querySet.has(token)) score++; }); return score / Math.max(docTokens.length, 1); }

async searchFiles(rootDir, query, options = {}) { const queryTokens = this.tokenize(query); const results = []; const files = await this.walkDirectory(rootDir, options);

for (const file of files) {
  try {
    const content = await fs.promises.readFile(file, 'utf-8');
    const tokens = this.tokenize(content);
    const score = this.scoreDocument(queryTokens, tokens);

    if (score > (options.minScore || 0.1)) {
      const lines = content.split('\n');
      const matchLines = this.findMatchingLines(lines, queryTokens, options.contextLines || 2);
      
      results.push({
        file: path.relative(rootDir, file),
        score: score.toFixed(3),
        matches: matchLines,
        totalLines: lines.length
      });
    }
  } catch (e) {
    // Skip unreadable files
  }
}

return results.sort((a, b) => parseFloat(b.score) - parseFloat(a.score))
  .slice(0, options.maxResults || 20);

}

async walkDirectory(dir, options = {}) { const files = []; const extensions = options.extensions || null;

async function walk(currentDir) {
  const entries = await fs.promises.readdir(currentDir, { withFileTypes: true });
  
  for (const entry of entries) {
    if (entry.isDirectory()) {
      if (!this.excludeDirs.includes(entry.name)) {
        await walk(path.join(currentDir, entry.name));
      }
    } else if (entry.isFile()) {
      if (!extensions || extensions.some(ext => entry.name.endsWith(ext))) {
        const filePath = path.join(currentDir, entry.name);
        const stats = await fs.promises.stat(filePath);
        if (stats.size <= this.maxFileSize) {
          files.push(filePath);
        }
      }
    }
  }
}

await walk.call(this, dir);
return files;

}

findMatchingLines(lines, queryTokens, contextLines) { const matches = [];

lines.forEach((line, index) => {
  const lineTokens = this.tokenize(line);
  const matchCount = lineTokens.filter(t => queryTokens.includes(t)).length;
  
  if (matchCount > 0) {
    const start = Math.max(0, index - contextLines);
    const end = Math.min(lines.length, index + contextLines + 1);
    
    matches.push({
      lineNumber: index + 1,
      content: line.trim(),
      context: lines.slice(start, end).join('\n'),
      matchScore: matchCount
    });
  }
});

return matches.slice(0, 10);

} }

// Usage const searcher = new SemanticSearcher(); const results = await searcher.searchFiles('.', 'authentication', { extensions: ['.js', '.ts'], maxResults: 10 });

console.log(JSON.stringify(results, null, 2));

Command Line Usage

Search for authentication code

node index.js search "auth middleware" --ext .js,.ts --max 10

Search with context

node index.js search "error handling" --context 5

Search specific directory

node index.js search "database" --dir src/

Output Format

{ "query": "authentication middleware", "totalMatches": 5, "results": [ { "file": "src/middleware/auth.js", "score": "0.847", "matches": [ { "lineNumber": 42, "content": "function authenticateUser(token) {", "context": "...", "matchScore": 3 } ] } ] }

Quick Tips

  • Use specific terms: "JWT validation" not just "auth"

  • Include type hints: ".js" files often have different patterns

  • Multiple words improve accuracy

  • Use camelCase terms for code search

Notes

  • Searches text files only

  • Case-insensitive matching

  • Stemming improves recall

  • Scores range from 0.0 to 1.0

Source Transparency

This detail page is rendered from real SKILL.md content. Trust labels are metadata-based hints, not a safety guarantee.

Related Skills

Related by shared tags or category signals.

General

vector-memory

No summary provided by upstream source.

Repository SourceNeeds Review
General

model-router

No summary provided by upstream source.

Repository SourceNeeds Review
General

rss-reader

No summary provided by upstream source.

Repository SourceNeeds Review
General

video-frames

No summary provided by upstream source.

Repository SourceNeeds Review