WordPress · psrpinto · Aug 21, 2024 · Aug 21, 2024 · Aug 21, 2024 · Aug 21, 2024
diff --git a/src/app.ts b/src/app.ts
@@ -1,5 +1,5 @@
 import { initPlayground } from './playground';
 import { PlaygroundClient } from '@wp-playground/client';

 declare global {
 	interface Window {
@@ -10,11 +10,11 @@
 const iframeId = 'playground';
 const iframe = document.getElementById( iframeId ) as HTMLIFrameElement;
 
-initPlayground( iframeId )
-	.then( ( playground ) => {
-		window.playground = playground;
-	} )
-	.catch( ( err ) => console.error( err ) );
+// initPlayground( iframeId )
+// 	.then( ( playground ) => {
+// 		window.playground = playground;
+// 	} )
+// 	.catch( ( err ) => console.error( err ) );
 
 const relayToPlayground = function ( response: any ) {
 	console.log( response, chrome.runtime.lastError );

diff --git a/src/content.ts b/src/content.ts
@@ -1,6 +1,29 @@
 // Constants
+import { findExtractors } from './extractor/registry';
+import { DOMSource } from './extractor/source';
+
 const MESSAGE_NAMESPACE = 'TRY_WORDPRESS';
 
+const source = new DOMSource( document );
+
+const extractors = findExtractors( source );
+if ( extractors.length === 0 ) {
+	throw new Error( 'No extractor was found' );
+} else if ( extractors.length > 1 ) {
+	throw new Error( 'Multiple extractors were found' );
+}
+
+const extractor = extractors[ 0 ];
+console.log( `Found extractor ${ extractor.info().slug }` );
+
+extractor
+	.extractData( source, ( entry ) => {
+		// Do something with the entry.
+		console.log( entry );
+	} )
+	.then( () => console.log( 'Extraction finished' ) )
+	.catch( ( err ) => console.log( err ) );
+
 const wpInsertPost = ( data: any ) => {
 	data.post_status = 'publish';
 	let code = "<?php require_once 'wordpress/wp-load.php';\n";

diff --git a/src/extractor/extractor.ts b/src/extractor/extractor.ts
@@ -0,0 +1,48 @@
+import { SourceData, SourceInfo, Source } from './source';
+
+/**
+ * Information about the Extractor.
+ */
+export interface ExtractorInfo {
+	/**
+	 * Unique identifier of the Extractor, e.g. "wordpress-rest".
+	 * Must be a lower-case string.
+	 * There must not be more than one extractor with the same slug.
+	 */
+	slug: string;
+
+	/**
+	 * Title of the Extractor, e.g. "WordPress".
+	 */
+	title: string;
+
+	/**
+	 * Description of the Extractor, e.g. "Extracts posts and pages from a WordPress site using the WordPress REST API".
+	 */
+	description: string;
+}
+
+export interface Extractor {
+	/**
+	 * Returns information about the Extractor.
+	 */
+	info(): ExtractorInfo;
+
+	/**
+	 * Tells whether the Extractor supports a given Source.
+	 */
+	supports( source: Source ): boolean;
+
+	/**
+	 * Extracts information about the Source, like its title, language, etc.
+	 */
+	extractInfo( source: Source ): Promise< SourceInfo >;
+
+	/**
+	 * Extracts data from a given Document.
+	 */
+	extractData(
+		source: Source,
+		callback: ( siteData: SourceData ) => void
+	): Promise< void >;
+}
diff --git a/src/extractor/registry.ts b/src/extractor/registry.ts
@@ -0,0 +1,39 @@
+import { Extractor } from './extractor';
+import { WordPressRestExtractor } from './wordpress-rest';
+import { Source } from './source';
+
+const extractors = new Map< string, Extractor >();
+
+registerExtractor( new WordPressRestExtractor() );
+
+/**
+ * Find Extractors that support a given Source.
+ */
+export function findExtractors( source: Source ): Extractor[] {
+	let matches: Extractor[] = [];
+	for ( let [ slug, extractor ] of extractors ) {
+		if ( extractor.supports( source ) ) {
+			matches.push( extractor );
+		}
+	}
+	return matches;
+}
+
+/**
+ * Register an Extractor.
+ */
+function registerExtractor( extractor: Extractor ) {
+	const slug = extractor.info().slug;
+	if ( slug.toLowerCase() !== slug ) {
+		throw new Error(
+			`The Extractor's slug must be a sequence of lower-case characters, got '${ slug }'`
+		);
+	}
+
+	if ( extractors.has( slug ) ) {
+		throw new Error(
+			`An Extractor with slug ${ slug } is already registered`
+		);
+	}
+	extractors.set( slug, extractor );
+}
diff --git a/src/extractor/source.ts b/src/extractor/source.ts
@@ -0,0 +1,46 @@
+/**
+ * Source of data to be extracted, like a DOM document, a URL or any other kind of resource.
+ * For the moment, only DOM Document is supported.
+ */
+export abstract class Source {
+	abstract resource(): any;
+}
+
+/**
+ * Source backed by a DOM document instance.
+ */
+export class DOMSource extends Source {
+	private readonly document: Document;
+
+	constructor( document: Document ) {
+		super();
+		this.document = document;
+	}
+
+	resource(): Document {
+		return this.document;
+	}
+}
+
+/**
+ * Information about the Source to be extracted.
+ */
+export interface SourceInfo {
+	/**
+	 * The site's title.
+	 */
+	title: string;
+}
+
+/**
+ * A piece of data in the Source under extraction, like a post or a page.
+ */
+export interface SourceData {
+	/**
+	 * Slug of the Extractor which extracted this data.
+	 * This is automatically set, the Extractor does not need to set it.
+	 */
+	extractor: string;
+	title: string;
+	content: string;
+}
diff --git a/src/extractor/wordpress-rest.ts b/src/extractor/wordpress-rest.ts
@@ -0,0 +1,52 @@
+import { Extractor, ExtractorInfo } from './extractor';
+import { DOMSource, Source, SourceData, SourceInfo } from './source';
+
+export class WordPressRestExtractor implements Extractor {
+	info(): ExtractorInfo {
+		return {
+			slug: 'wordpress-rest',
+			title: 'WordPress REST API',
+			description:
+				'Extracts posts and pages from a WordPress site using the WordPress REST API',
+		};
+	}
+
+	supports( source: Source ): boolean {
+		if ( ! ( source instanceof DOMSource ) ) {
+			return false;
+		}
+		const document = source.resource();
+
+		const post = document.querySelector( 'article.post' );
+		if ( post ) {
+			// Check if the CSS class matches `post-<id>`.
+			const matches = post.className.match( /post-(\d+)/ );
+			if ( matches !== null ) {
+				return true;
+			}
+		}
+
+		const page = document.querySelector( 'article.page' );
+		if ( page ) {
+			// Check if the CSS class matches `post-<id>`.
+			const matches = page.className.match( /post-(\d+)/ );
+			if ( matches !== null ) {
+				return true;
+			}
+		}
+
+		return false;
+	}
+
+	async extractInfo( source: Source ): Promise< SourceInfo > {
+		// TODO.
+		return { title: 'Foo' };
+	}
+
+	async extractData(
+		source: Source,
+		callback: ( entry: SourceData ) => void
+	): Promise< void > {
+		// TODO.
+	}
+}
diff --git a/tsconfig.json b/tsconfig.json
@@ -3,7 +3,7 @@
         "outDir": "./build/typescript",
         "noImplicitAny": true,
         "module": "es6",
-        "target": "es5",
+        "target": "es6",
         "jsx": "react",
         "allowJs": true,
         "moduleResolution": "node"