diff --git a/src/app.ts b/src/app.ts index 8aa4c607..aa0423b5 100644 --- a/src/app.ts +++ b/src/app.ts @@ -10,11 +10,11 @@ declare global { const iframeId = 'playground'; const iframe = document.getElementById( iframeId ) as HTMLIFrameElement; -initPlayground( iframeId ) - .then( ( playground ) => { - window.playground = playground; - } ) - .catch( ( err ) => console.error( err ) ); +// initPlayground( iframeId ) +// .then( ( playground ) => { +// window.playground = playground; +// } ) +// .catch( ( err ) => console.error( err ) ); const relayToPlayground = function ( response: any ) { console.log( response, chrome.runtime.lastError ); diff --git a/src/content.ts b/src/content.ts index 6d9b815f..c7594bab 100644 --- a/src/content.ts +++ b/src/content.ts @@ -1,6 +1,29 @@ // Constants +import { findExtractors } from './extractor/registry'; +import { DOMSource } from './extractor/source'; + const MESSAGE_NAMESPACE = 'TRY_WORDPRESS'; +const source = new DOMSource( document ); + +const extractors = findExtractors( source ); +if ( extractors.length === 0 ) { + throw new Error( 'No extractor was found' ); +} else if ( extractors.length > 1 ) { + throw new Error( 'Multiple extractors were found' ); +} + +const extractor = extractors[ 0 ]; +console.log( `Found extractor ${ extractor.info().slug }` ); + +extractor + .extractData( source, ( entry ) => { + // Do something with the entry. + console.log( entry ); + } ) + .then( () => console.log( 'Extraction finished' ) ) + .catch( ( err ) => console.log( err ) ); + const wpInsertPost = ( data: any ) => { data.post_status = 'publish'; let code = "; + + /** + * Extracts data from a given Document. + */ + extractData( + source: Source, + callback: ( siteData: SourceData ) => void + ): Promise< void >; +} diff --git a/src/extractor/registry.ts b/src/extractor/registry.ts new file mode 100644 index 00000000..78d6f17a --- /dev/null +++ b/src/extractor/registry.ts @@ -0,0 +1,39 @@ +import { Extractor } from './extractor'; +import { WordPressRestExtractor } from './wordpress-rest'; +import { Source } from './source'; + +const extractors = new Map< string, Extractor >(); + +registerExtractor( new WordPressRestExtractor() ); + +/** + * Find Extractors that support a given Source. + */ +export function findExtractors( source: Source ): Extractor[] { + let matches: Extractor[] = []; + for ( let [ slug, extractor ] of extractors ) { + if ( extractor.supports( source ) ) { + matches.push( extractor ); + } + } + return matches; +} + +/** + * Register an Extractor. + */ +function registerExtractor( extractor: Extractor ) { + const slug = extractor.info().slug; + if ( slug.toLowerCase() !== slug ) { + throw new Error( + `The Extractor's slug must be a sequence of lower-case characters, got '${ slug }'` + ); + } + + if ( extractors.has( slug ) ) { + throw new Error( + `An Extractor with slug ${ slug } is already registered` + ); + } + extractors.set( slug, extractor ); +} diff --git a/src/extractor/source.ts b/src/extractor/source.ts new file mode 100644 index 00000000..f7bd64f4 --- /dev/null +++ b/src/extractor/source.ts @@ -0,0 +1,46 @@ +/** + * Source of data to be extracted, like a DOM document, a URL or any other kind of resource. + * For the moment, only DOM Document is supported. + */ +export abstract class Source { + abstract resource(): any; +} + +/** + * Source backed by a DOM document instance. + */ +export class DOMSource extends Source { + private readonly document: Document; + + constructor( document: Document ) { + super(); + this.document = document; + } + + resource(): Document { + return this.document; + } +} + +/** + * Information about the Source to be extracted. + */ +export interface SourceInfo { + /** + * The site's title. + */ + title: string; +} + +/** + * A piece of data in the Source under extraction, like a post or a page. + */ +export interface SourceData { + /** + * Slug of the Extractor which extracted this data. + * This is automatically set, the Extractor does not need to set it. + */ + extractor: string; + title: string; + content: string; +} diff --git a/src/extractor/wordpress-rest.ts b/src/extractor/wordpress-rest.ts new file mode 100644 index 00000000..5e4c8427 --- /dev/null +++ b/src/extractor/wordpress-rest.ts @@ -0,0 +1,52 @@ +import { Extractor, ExtractorInfo } from './extractor'; +import { DOMSource, Source, SourceData, SourceInfo } from './source'; + +export class WordPressRestExtractor implements Extractor { + info(): ExtractorInfo { + return { + slug: 'wordpress-rest', + title: 'WordPress REST API', + description: + 'Extracts posts and pages from a WordPress site using the WordPress REST API', + }; + } + + supports( source: Source ): boolean { + if ( ! ( source instanceof DOMSource ) ) { + return false; + } + const document = source.resource(); + + const post = document.querySelector( 'article.post' ); + if ( post ) { + // Check if the CSS class matches `post-`. + const matches = post.className.match( /post-(\d+)/ ); + if ( matches !== null ) { + return true; + } + } + + const page = document.querySelector( 'article.page' ); + if ( page ) { + // Check if the CSS class matches `post-`. + const matches = page.className.match( /post-(\d+)/ ); + if ( matches !== null ) { + return true; + } + } + + return false; + } + + async extractInfo( source: Source ): Promise< SourceInfo > { + // TODO. + return { title: 'Foo' }; + } + + async extractData( + source: Source, + callback: ( entry: SourceData ) => void + ): Promise< void > { + // TODO. + } +} diff --git a/tsconfig.json b/tsconfig.json index 96bccc1e..9263f2de 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -3,7 +3,7 @@ "outDir": "./build/typescript", "noImplicitAny": true, "module": "es6", - "target": "es5", + "target": "es6", "jsx": "react", "allowJs": true, "moduleResolution": "node"