Skip to content
This repository has been archived by the owner on Jun 2, 2018. It is now read-only.

Commit

Permalink
feat: support content parser
Browse files Browse the repository at this point in the history
  • Loading branch information
azu committed Oct 21, 2017
1 parent af9bbf5 commit 6b27430
Show file tree
Hide file tree
Showing 8 changed files with 1,052 additions and 207 deletions.
15 changes: 12 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,18 @@ const Category = require("jser-item-category-parser").Category;
const results = parse(fs.readFileSync(path.join(__dirname, "fixtures", "2017-01-11-Node.js-v7.4.0-npm-v4-PhantomJS-2.5.0-Beta-clean-code.md"), "utf-8"));
/*
[{
category: 'Headline',
url: 'https://medium.com/inferno-js/introducing-inferno-1-0-f3da5c4e773b'
}]
category: "Headline",
url: "https://groups.google.com/d/topic/phantomjs/AefOuwkgBh0",
tags: ["browser", "ReleaseNote", "webkit"],
content: "PhantomJS 2.5.0βリリース。\nQtWebKitがアップデートされES2015のサポート、WebPのサポートなど",
title: "[Release] PhantomJS 2.5.0 Beta - Google グループ",
relatedLinks: [
{
title: "Comparison with QtWebKit 5.6 · annulen/webkit Wiki",
url: "https://github.com/annulen/webkit/wiki/Comparison-with-QtWebKit-5.6"
}
]
}]
*/
```
Expand Down
23 changes: 20 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,16 @@
],
"name": "jser-item-category-parser",
"version": "1.0.5",
"description": "JSer.info post item category parser.",
"content": "JSer.info post item category parser.",
"main": "lib/index.js",
"scripts": {
"test": "mocha test/",
"build": "NODE_ENV=production babel src --out-dir lib --source-maps",
"watch": "babel src --out-dir lib --watch --source-maps",
"prepublish": "npm run --if-present build"
"prepublish": "npm run --if-present build",
"prettier": "prettier --write '**/*.{js,jsx,ts,tsx,css}'",
"precommit": "lint-staged",
"postcommit": "git reset"
},
"keywords": [
"jser"
Expand All @@ -36,14 +39,28 @@
"babel-preset-jsdoc-to-assert": "^4.0.0",
"babel-preset-power-assert": "^1.0.0",
"babel-register": "^6.22.0",
"husky": "^0.14.3",
"lint-staged": "^4.3.0",
"mocha": "^3.2.0",
"power-assert": "^1.4.2"
"power-assert": "^1.4.2",
"prettier": "^1.7.4"
},
"dependencies": {
"execall": "^1.0.0",
"lodash.difference": "^4.5.0",
"remark": "^6.2.0",
"unist-util-find-all-after": "^1.0.0",
"unist-util-is": "^2.0.0",
"unist-util-select": "^1.5.0"
},
"prettier": {
"printWidth": 120,
"tabWidth": 4
},
"lint-staged": {
"*.{js,jsx,ts,tsx,css}": [
"prettier --write",
"git add"
]
}
}
8 changes: 4 additions & 4 deletions src/category.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,24 @@ const Category = {
SlideVideo: "スライド、動画関係",
WebsiteDocument: "サイト、サービス、ドキュメント",
SoftwareLibrary: "ソフトウェア、ツール、ライブラリ関係",
Book: "書籍関係",
Book: "書籍関係"
};
const CategoryKey = {
Headline: "Headline",
Article: "Article",
SlideVideo: "SlideVideo",
WebsiteDocument: "WebsiteDocument",
SoftwareLibrary: "SoftwareLibrary",
Book: "Book",
Book: "Book"
};
const CompatibleCategory = {
Headline: "ヘッドライン",
Article: "アーティクル",
SlideVideo: "スライド、動画関係",
WebsiteDocument: "サイト、サービス",
SoftwareLibrary: "ソフトウェア、ツール、ライブラリ関係",
Book: "書籍関係",
Book: "書籍関係"
};
module.exports.Category = Category;
module.exports.CategoryKey = CategoryKey;
module.exports.CompatibleCategory = CompatibleCategory;
module.exports.CompatibleCategory = CompatibleCategory;
132 changes: 132 additions & 0 deletions src/content-parser.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// MIT © 2017 azu
"use strict";
const execall = require("execall");

export class CurrentContent {
constructor() {
// date is missing
this.title = undefined;
this.url = undefined;
this.tags = [];
this.content = undefined;
this.relatedLinks = [];
}
}

export class ContentParser {
get MARK() {
return {
SKIP: "SKIP"
};
}

constructor() {
this.contents = [];
this.currentContent = new CurrentContent();
}

process(nodeList, text) {
const processPattern = this.processPattern;
let processIndex = 0;
let nodeIndex = 0;
while (nodeIndex !== nodeList.length) {
const node = nodeList[nodeIndex];
const process = processPattern[processIndex];
const result = process(node, text);
if (result === this.MARK.SKIP) {
processIndex++;
} else {
nodeIndex++;
processIndex++;
}
if (processIndex === processPattern.length) {
this.contents.push(this.currentContent);
this.currentContent = new CurrentContent();
processIndex = 0;
}
}
}

get processPattern() {
/**
## StealJS 1.0 Release
[www.bitovi.com/blog/stealjs-1.0-release](https://www.bitovi.com/blog/stealjs-1.0-release "StealJS 1.0 Release")
<p class="jser-tags jser-tag-icon"><span class="jser-tag">JavaScript</span> <span class="jser-tag">Tools</span> <span class="jser-tag">library</span> <span class="jser-tag">ReleaseNote</span></p>
開発時は動的なモジュールローダで、本番時はsteal-toolsでのproduction buildでbundleできるStealJS 1.0リリース
- [Easy ES6 with StealJS - YouTube](https://www.youtube.com/watch?v=VKydmxRm6w8 "Easy ES6 with StealJS - YouTube")
*/
return [
node => {
if (node.type !== "thematicBreak") {
throw new Error("should start thematicBreak", node);
}
},
/*
## StealJS 1.0 Release
*/
(node, text) => {
if (node.type !== "heading" && node.depth === 2) {
throw new Error("should start heading", node);
}
this.currentContent.title = text
.slice(node.position.start.offset, node.position.end.offset)
.replace(/^## /, "");
},
/* URL
[www.bitovi.com/blog/stealjs-1.0-release](https://www.bitovi.com/blog/stealjs-1.0-release "StealJS 1.0 Release")
*/
node => {
if (node.type !== "paragraph") {
throw new Error("should start heading", node);
}
const link = node.children[0];
if (link.type !== "link") throw new Error("should link", node);
this.currentContent.url = link.url;
},
node => {
if (node.type !== "html") {
throw new Error("should start html", node);
}
const tagPattern = /<span class="jser-tag">(.*?)<\/span>/g;
const matches = execall(tagPattern, node.value);
this.currentContent.tags = matches.map(match => {
return match.sub[0];
});
},
// content
(node, text) => {
if (node.type !== "paragraph") {
throw new Error("should has body paragraph", node);
}
this.currentContent.content = text.slice(node.position.start.offset, node.position.end.offset);
},
// 場合によってはcontentが2つ?
(node, text) => {
if (node.type !== "paragraph") {
return this.MARK.SKIP;
}
this.currentContent.content +=
"\n\n" + text.slice(node.position.start.offset, node.position.end.offset);
},
node => {
if (node.type !== "list") {
return this.MARK.SKIP;
}
node.children.forEach(listItem => {
const paragraph = listItem.children[0];
const link = paragraph.children[0];
const title = link.children[0].value;
this.currentContent.relatedLinks.push({
title,
url: link.url
});
});
}
];
}
}
2 changes: 1 addition & 1 deletion src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ const CategoryKey = require("./category").CategoryKey;
const parse = require("./parse");
module.exports.parse = parse;
module.exports.Category = Category;
module.exports.CategoryKey = CategoryKey;
module.exports.CategoryKey = CategoryKey;
51 changes: 24 additions & 27 deletions src/parse.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
// MIT © 2017 azu
"use strict";
import { ContentParser } from "./content-parser";

const remarkAbstract = require("remark");
const remark = remarkAbstract();
const findAllAfter = require('unist-util-find-all-after');
const difference = require('lodash.difference');
const select = require('unist-util-select');
const is = require('unist-util-is');
const findAllAfter = require("unist-util-find-all-after");
const difference = require("lodash.difference");
const select = require("unist-util-select");
const is = require("unist-util-is");
const Category = require("./category").Category;
const CompatibleCategory = require("./category").CompatibleCategory;

Expand All @@ -21,20 +23,20 @@ const betweenNodes = (parent, start, end) => {
if (!end) {
return nodesAfter;
}
const nodesIgnores = findAllAfter(parent, end);
const nodesIgnores = [end].concat(findAllAfter(parent, end));
return difference(nodesAfter, nodesIgnores, (nodeA, nodeB) => {
return is(nodeA, nodeB);
});
};
const getGroupKey = (htmlNode) => {
const getGroupKey = htmlNode => {
const value = htmlNode.value;
const [matchKey] = Object.keys(Category).filter(key => {
return value.indexOf(Category[key]) !== -1;
});
if (matchKey !== undefined) {
return matchKey;
}
const [compatibleMatchKey] = Object.keys(Category).filter(key => {
const [compatibleMatchKey] = Object.keys(Category).filter(key => {
return value.indexOf(CompatibleCategory[key]) !== -1;
});
if (compatibleMatchKey) {
Expand All @@ -46,36 +48,31 @@ const getGroupKey = (htmlNode) => {
* @param {string} content
* @returns {[*]}
*/
module.exports = function (content) {
module.exports = function(content) {
const AST = remark.parse(content);
const allCategory = select(AST, 'html[value*=<h1]');
const allLinks = select(AST, 'heading ~ paragraph > link');
const allCategory = select(AST, "html[value*=<h1]");
const allLinks = select(AST, "heading ~ paragraph > link");
const results = [];
allCategory.forEach((categoryNode, index) => {
const nextCategoryNode = allCategory[index + 1];
const currentCategory = getGroupKey(categoryNode);
// not found category
if(currentCategory === null) {
if (currentCategory === null) {
return;
}
const currentCategoryNodes = betweenNodes(AST, categoryNode, nextCategoryNode);
currentCategoryNodes.forEach(categoryNode => {
const targetLinkNodes = select(categoryNode, 'link');
if (targetLinkNodes.length === 0) {
return;
}
const targetLinkNode = targetLinkNodes[0];
// if this node is currentCategory, add results as currentCategory node
allLinks.forEach(linkNode => {
const isLinkNodeCurrentCategory = is(targetLinkNode, linkNode);
if (isLinkNodeCurrentCategory) {
results.push({
category: currentCategory,
url: linkNode.url
});
}
const contentParser = new ContentParser();
contentParser.process(currentCategoryNodes, content);
contentParser.contents.forEach(content => {
results.push({
category: currentCategory,
title: content.title,
url: content.url,
tags: content.tags,
content: content.content,
relatedLinks: content.relatedLinks
});
});
});
return results;
};
};
Loading

0 comments on commit 6b27430

Please sign in to comment.