From 44c31e757902610dbc40a338f0cf12e860e8f371 Mon Sep 17 00:00:00 2001 From: Baoshuo Date: Fri, 20 Sep 2024 23:42:15 +0800 Subject: [PATCH 1/4] refactor: courses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 重构了选课列表解析,现在有 `ScheduleRules` 字段返回解析好的上课时间地点信息 --- course.go | 122 +++++++++++++++++++++++++++++++++++++++++++------ model.go | 38 +++++++++------ utils/utils.go | 30 ++++++++++++ 3 files changed, 164 insertions(+), 26 deletions(-) diff --git a/course.go b/course.go index f90e877..4d09078 100644 --- a/course.go +++ b/course.go @@ -1,6 +1,7 @@ package jwch import ( + "strconv" "strings" "github.com/west2-online/jwch/constants" @@ -73,21 +74,116 @@ func (s *Student) GetSemesterCourses(term, viewState, eventValidation string) ([ return nil, errno.HTMLParseError.WithMessage("get course info failed") } + // 解析上课时间、地点 + /* + 05-18 星期1:3-4节 铜盘A110 + 05-17 星期3:1-2节 铜盘A110 + 05-17 星期5:3-4节 铜盘A110 + */ + courseInfo8 := strings.Split(utils.InnerTextWithBr(info[8]), "\n") + scheduleRules := []CourseScheduleRule{} + + for i := 0; i < len(courseInfo8); i++ { + courseInfo8[i] = strings.TrimSpace(courseInfo8[i]) + + if courseInfo8[i] == "" { // 空行 + continue + } + + lineData := strings.Fields(courseInfo8[i]) + + if len(lineData) < 3 { + return nil, errno.HTMLParseError.WithMessage("get course info failed") + } + + if strings.Contains(lineData[0], "周") { // 处理整周的课程,比如军训 + /* + 03周 星期1 - 04周 星期7 + [0] 03周 + [1] 星期1 + [2] - + [3] 04周 + [4] 星期7 + */ + startWeek, _ := strconv.Atoi(strings.TrimSuffix(lineData[0], "周")) + endWeek, _ := strconv.Atoi(strings.TrimSuffix(lineData[3], "周")) + startWeekday, _ := strconv.Atoi(strings.TrimPrefix(lineData[1], "星期")) + endWeekday, _ := strconv.Atoi(strings.TrimPrefix(lineData[4], "星期")) + + /* + 目前对于这种课程的解析有两种猜测: + 1. 第3周的周一到第4周的周日 + 2. 第3周到第4周,每周的周一到周日 + 福uu客户端现在采用的是猜测2,所以现在先按照猜测2来解析 + */ + for i := startWeekday; i <= endWeekday; i++ { + scheduleRules = append(scheduleRules, CourseScheduleRule{ + Location: "", + StartClass: 1, + EndClass: 8, + StartWeek: startWeek, + EndWeek: endWeek, + Weekday: i, + Single: true, + Double: true, + }) + } + + continue + } + + /* + 08-16 星期5:7-8节 铜盘A508 + [0] 08-16 + [1] 星期5:7-8节 + [2] 铜盘A508 + */ + /* + 02-14 星期1:1-2节(双) 旗山西1-206 + [0] 02-14 + [1] 星期1:1-2节(双) + [2] 旗山西1-206 + */ + /* + 01-13 星期1:3-4节(单) 旗山西1-206 + [0] 01-13 + [1] 星期1:3-4节(单) + [2] 旗山西1-206 + */ + + weekInfo := strings.SplitN(lineData[0], "-", 2) // [8, 16] + dayInfo := strings.SplitN(lineData[1], ":", 2) // ["星期5", "7-8节"] or ["星期1", "1-2节(双)"] + classBasicInfo := strings.Split(dayInfo[1], "节") // ["7-8", ""] or ["1-2", "(双)"] + classInfo := strings.Split(classBasicInfo[0], "-") // ["7", "8"] + + scheduleRules = append(scheduleRules, CourseScheduleRule{ + Location: lineData[2], + StartClass: utils.SafeAtoi(classInfo[0]), + EndClass: utils.SafeAtoi(classInfo[1]), + StartWeek: utils.SafeAtoi(weekInfo[0]), + EndWeek: utils.SafeAtoi(weekInfo[1]), + Weekday: utils.SafeAtoi(strings.TrimPrefix(dayInfo[0], "星期")), + Single: !strings.Contains(classBasicInfo[1], "双"), + Double: !strings.Contains(classBasicInfo[1], "单"), + }) + } + // TODO: performance optimization res = append(res, &Course{ - Type: htmlquery.OutputHTML(info[0], false), - Name: htmlquery.OutputHTML(info[1], false), - Syllabus: "https://jwcjwxt2.fzu.edu.cn:81" + safeExtractRegex(`javascript:pop1\('(.*?)&`, safeExtractionValue(info[2], "a", "href", 0)), - LessonPlan: "https://jwcjwxt2.fzu.edu.cn:81" + safeExtractRegex(`javascript:pop1\('(.*?)&`, safeExtractionValue(info[2], "a", "href", 1)), - PaymentStatus: safeExtractionFirst(info[3], "font"), - Credits: safeExtractionFirst(info[4], "span"), - ElectiveType: utils.GetChineseCharacter(htmlquery.OutputHTML(info[5], false)), - ExamType: utils.GetChineseCharacter(htmlquery.OutputHTML(info[6], false)), - Teacher: htmlquery.OutputHTML(info[7], false), - Classroom: strings.TrimSpace(htmlquery.InnerText(info[8])), - ExamTime: strings.TrimSpace(htmlquery.InnerText(info[9])), - Remark: htmlquery.OutputHTML(info[10], false), - Adjust: htmlquery.OutputHTML(info[11], false), + Type: htmlquery.OutputHTML(info[0], false), + Name: htmlquery.OutputHTML(info[1], false), + Syllabus: "https://jwcjwxt2.fzu.edu.cn:81" + safeExtractRegex(`javascript:pop1\('(.*?)&`, safeExtractionValue(info[2], "a", "href", 0)), + LessonPlan: "https://jwcjwxt2.fzu.edu.cn:81" + safeExtractRegex(`javascript:pop1\('(.*?)&`, safeExtractionValue(info[2], "a", "href", 1)), + PaymentStatus: safeExtractionFirst(info[3], "font"), + Credits: safeExtractionFirst(info[4], "span"), + ElectiveType: utils.GetChineseCharacter(htmlquery.OutputHTML(info[5], false)), + ExamType: utils.GetChineseCharacter(htmlquery.OutputHTML(info[6], false)), + Teacher: htmlquery.OutputHTML(info[7], false), + ScheduleRules: scheduleRules, + RawScheduleRules: strings.Join(courseInfo8, "\n"), + ExamTime: strings.TrimSpace(htmlquery.InnerText(info[9])), + Remark: htmlquery.OutputHTML(info[10], false), + Adjust: htmlquery.OutputHTML(info[11], false), }) } diff --git a/model.go b/model.go index 0542c61..7998fc8 100644 --- a/model.go +++ b/model.go @@ -47,19 +47,31 @@ type Term struct { // 课程信息 type Course struct { - Type string `json:"type"` // 修读类别 - Name string `json:"name"` // 课程名称 - PaymentStatus string `json:"paymentstatus"` // 缴费状态 - Syllabus string `json:"syllabus"` // 课程大纲 - LessonPlan string `json:"lessonplan"` // 课程计划 - Credits string `json:"credit"` // 学分 - ElectiveType string `json:"electivetype"` // 选课类型 - ExamType string `json:"examtype"` // 考试类别 - Teacher string `json:"teacher"` // 任课教师 - Classroom string `json:"classroom"` // 上课时间地点 - ExamTime string `json:"examtime"` // 考试时间地点 - Remark string `json:"remark"` // 备注 - Adjust string `json:"adjust"` // 调课信息 + Type string `json:"type"` // 修读类别 + Name string `json:"name"` // 课程名称 + PaymentStatus string `json:"paymentstatus"` // 缴费状态 + Syllabus string `json:"syllabus"` // 课程大纲 + LessonPlan string `json:"lessonplan"` // 课程计划 + Credits string `json:"credit"` // 学分 + ElectiveType string `json:"electivetype"` // 选课类型 + ExamType string `json:"examtype"` // 考试类别 + Teacher string `json:"teacher"` // 任课教师 + ScheduleRules []CourseScheduleRule `json:"scheduleRules"` // 上课时间地点规则 + RawScheduleRules string `json:"rawScheduleRules"` // 上课时间地点(原始文本) + ExamTime string `json:"examtime"` // 考试时间地点 + Remark string `json:"remark"` // 备注 + Adjust string `json:"adjust"` // 调课信息 +} + +type CourseScheduleRule struct { + Location string `json:"location"` // 上课地点 + StartClass int `json:"startClass"` // 开始节数 + EndClass int `json:"endClass"` // 结束节数 + StartWeek int `json:"startWeek"` // 开始周 + EndWeek int `json:"endWeek"` // 结束周 + Weekday int `json:"weekday"` // 星期几 + Single bool `json:"single"` // 单周 (PS: 为啥不用 odd) + Double bool `json:"double"` // 双周 (PS: 为啥不用 even) } type Mark struct { diff --git a/utils/utils.go b/utils/utils.go index 12d1029..391327c 100644 --- a/utils/utils.go +++ b/utils/utils.go @@ -7,9 +7,11 @@ import ( "encoding/hex" "encoding/json" "fmt" + "golang.org/x/net/html" "net/http" "os" "reflect" + "strconv" ) func SaveData(filePath string, data []byte) error { @@ -108,3 +110,31 @@ func JSONUnmarshalFromFile(filePath string, v any) error { return json.Unmarshal(data, v) } + +func InnerTextWithBr(n *html.Node) string { + if n.Type == html.TextNode { + return n.Data + } + + if n.Type == html.ElementNode && n.Data == "br" { + return "\n" + } + + var buf bytes.Buffer + + for c := n.FirstChild; c != nil; c = c.NextSibling { + buf.WriteString(InnerTextWithBr(c)) + } + + return buf.String() +} + +func SafeAtoi(s string) int { + n, err := strconv.Atoi(s) + + if err != nil { + return 0 + } + + return n +} From b0f597ea464f3ecab82494bfa0cb396dfe4aa40d Mon Sep 17 00:00:00 2001 From: Baoshuo Date: Fri, 20 Sep 2024 23:54:28 +0800 Subject: [PATCH 2/4] chore: remove some unnecessary fields --- course.go | 10 +++------- model.go | 6 ------ 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/course.go b/course.go index 4d09078..8c75f96 100644 --- a/course.go +++ b/course.go @@ -170,20 +170,16 @@ func (s *Student) GetSemesterCourses(term, viewState, eventValidation string) ([ // TODO: performance optimization res = append(res, &Course{ - Type: htmlquery.OutputHTML(info[0], false), Name: htmlquery.OutputHTML(info[1], false), Syllabus: "https://jwcjwxt2.fzu.edu.cn:81" + safeExtractRegex(`javascript:pop1\('(.*?)&`, safeExtractionValue(info[2], "a", "href", 0)), LessonPlan: "https://jwcjwxt2.fzu.edu.cn:81" + safeExtractRegex(`javascript:pop1\('(.*?)&`, safeExtractionValue(info[2], "a", "href", 1)), - PaymentStatus: safeExtractionFirst(info[3], "font"), - Credits: safeExtractionFirst(info[4], "span"), - ElectiveType: utils.GetChineseCharacter(htmlquery.OutputHTML(info[5], false)), - ExamType: utils.GetChineseCharacter(htmlquery.OutputHTML(info[6], false)), Teacher: htmlquery.OutputHTML(info[7], false), ScheduleRules: scheduleRules, RawScheduleRules: strings.Join(courseInfo8, "\n"), - ExamTime: strings.TrimSpace(htmlquery.InnerText(info[9])), Remark: htmlquery.OutputHTML(info[10], false), - Adjust: htmlquery.OutputHTML(info[11], false), + // TODO: 解析调课 + // 06周 星期3:5-6节 调至 09周 星期1:7-8节 旗山西1-206 + Adjust: htmlquery.OutputHTML(info[11], false), }) } diff --git a/model.go b/model.go index 7998fc8..2b0694a 100644 --- a/model.go +++ b/model.go @@ -47,18 +47,12 @@ type Term struct { // 课程信息 type Course struct { - Type string `json:"type"` // 修读类别 Name string `json:"name"` // 课程名称 - PaymentStatus string `json:"paymentstatus"` // 缴费状态 Syllabus string `json:"syllabus"` // 课程大纲 LessonPlan string `json:"lessonplan"` // 课程计划 - Credits string `json:"credit"` // 学分 - ElectiveType string `json:"electivetype"` // 选课类型 - ExamType string `json:"examtype"` // 考试类别 Teacher string `json:"teacher"` // 任课教师 ScheduleRules []CourseScheduleRule `json:"scheduleRules"` // 上课时间地点规则 RawScheduleRules string `json:"rawScheduleRules"` // 上课时间地点(原始文本) - ExamTime string `json:"examtime"` // 考试时间地点 Remark string `json:"remark"` // 备注 Adjust string `json:"adjust"` // 调课信息 } From a7e9726cb8249699610bf65b2645e1172c4976ae Mon Sep 17 00:00:00 2001 From: Baoshuo Date: Sat, 21 Sep 2024 11:36:36 +0800 Subject: [PATCH 3/4] =?UTF-8?q?fix:=20=E4=BF=AE=E6=AD=A3=E6=95=B4=E5=91=A8?= =?UTF-8?q?=E8=AF=BE=E7=A8=8B=E7=9A=84=E8=A7=A3=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- course.go | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/course.go b/course.go index 8c75f96..b0e3892 100644 --- a/course.go +++ b/course.go @@ -110,20 +110,29 @@ func (s *Student) GetSemesterCourses(term, viewState, eventValidation string) ([ startWeekday, _ := strconv.Atoi(strings.TrimPrefix(lineData[1], "星期")) endWeekday, _ := strconv.Atoi(strings.TrimPrefix(lineData[4], "星期")) - /* - 目前对于这种课程的解析有两种猜测: - 1. 第3周的周一到第4周的周日 - 2. 第3周到第4周,每周的周一到周日 - 福uu客户端现在采用的是猜测2,所以现在先按照猜测2来解析 - */ - for i := startWeekday; i <= endWeekday; i++ { + for weekday := 1; weekday <= 7; weekday++ { + curStartWeek := startWeek + curEndWeek := endWeek + + if weekday < startWeekday { + curStartWeek++ + } + + if weekday > endWeekday { + curEndWeek-- + } + + if curStartWeek > curEndWeek { + continue + } + scheduleRules = append(scheduleRules, CourseScheduleRule{ Location: "", StartClass: 1, EndClass: 8, - StartWeek: startWeek, - EndWeek: endWeek, - Weekday: i, + StartWeek: curStartWeek, + EndWeek: curEndWeek, + Weekday: weekday, Single: true, Double: true, }) From 0f6275970fec4394b22b873084039de0c1600c65 Mon Sep 17 00:00:00 2001 From: Baoshuo Date: Sat, 21 Sep 2024 13:25:44 +0800 Subject: [PATCH 4/4] =?UTF-8?q?feat:=20=E8=A7=A3=E6=9E=90=E8=B0=83?= =?UTF-8?q?=E8=AF=BE=E4=BF=A1=E6=81=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- course.go | 148 ++++++++++++++++++++++++++++++++++++++++++++++++------ model.go | 16 +++++- 2 files changed, 149 insertions(+), 15 deletions(-) diff --git a/course.go b/course.go index b0e3892..a5aead8 100644 --- a/course.go +++ b/course.go @@ -1,6 +1,8 @@ package jwch import ( + "regexp" + "sort" "strconv" "strings" @@ -74,7 +76,42 @@ func (s *Student) GetSemesterCourses(term, viewState, eventValidation string) ([ return nil, errno.HTMLParseError.WithMessage("get course info failed") } - // 解析上课时间、地点 + // 解析调课信息 + /* + 06周 星期3:5-6节 调至 09周 星期1:7-8节 旗山西1-206 + */ + courseInfo11 := strings.Split(utils.InnerTextWithBr(info[11]), "\n") + adjustRegex := regexp.MustCompile(`(\d{2})周 星期(\d):(\d{1,2})-(\d{1,2})节[\s ]*调至[\s ]*(\d{2})周 星期(\d):(\d{1,2})-(\d{1,2})节[\s ]*(\S*)`) + adjustRules := []CourseAdjustRule{} + + for i := 0; i < len(courseInfo11); i++ { + courseInfo11[i] = strings.TrimSpace(courseInfo11[i]) + + if courseInfo11[i] == "" { // 空行 + continue + } + + adjustMatchArr := adjustRegex.FindStringSubmatch(courseInfo11[i]) + + if len(adjustMatchArr) < 10 { + return nil, errno.HTMLParseError.WithMessage("get course adjust failed") + } + + adjustRules = append(adjustRules, CourseAdjustRule{ + OldWeek: utils.SafeAtoi(adjustMatchArr[1]), + OldWeekday: utils.SafeAtoi(adjustMatchArr[2]), + OldStartClass: utils.SafeAtoi(adjustMatchArr[3]), + OldEndClass: utils.SafeAtoi(adjustMatchArr[4]), + + NewWeek: utils.SafeAtoi(adjustMatchArr[5]), + NewWeekday: utils.SafeAtoi(adjustMatchArr[6]), + NewStartClass: utils.SafeAtoi(adjustMatchArr[7]), + NewEndClass: utils.SafeAtoi(adjustMatchArr[8]), + NewLocation: adjustMatchArr[9], + }) + } + + // 解析上课时间、地点,融合调课信息 /* 05-18 星期1:3-4节 铜盘A110 05-17 星期3:1-2节 铜盘A110 @@ -135,6 +172,7 @@ func (s *Student) GetSemesterCourses(term, viewState, eventValidation string) ([ Weekday: weekday, Single: true, Double: true, + Adjust: false, }) } @@ -160,21 +198,105 @@ func (s *Student) GetSemesterCourses(term, viewState, eventValidation string) ([ [2] 旗山西1-206 */ + // 是不是用正则表达式更好一点? weekInfo := strings.SplitN(lineData[0], "-", 2) // [8, 16] dayInfo := strings.SplitN(lineData[1], ":", 2) // ["星期5", "7-8节"] or ["星期1", "1-2节(双)"] classBasicInfo := strings.Split(dayInfo[1], "节") // ["7-8", ""] or ["1-2", "(双)"] classInfo := strings.Split(classBasicInfo[0], "-") // ["7", "8"] + location := lineData[2] + startClass := utils.SafeAtoi(classInfo[0]) + endClass := utils.SafeAtoi(classInfo[1]) + startWeek := utils.SafeAtoi(weekInfo[0]) + endWeek := utils.SafeAtoi(weekInfo[1]) + weekDay := utils.SafeAtoi(strings.TrimPrefix(dayInfo[0], "星期")) + single := !strings.Contains(classBasicInfo[1], "双") + double := !strings.Contains(classBasicInfo[1], "单") + + if len(adjustRules) == 0 { + scheduleRules = append(scheduleRules, CourseScheduleRule{ + Location: location, + StartClass: startClass, + EndClass: endClass, + StartWeek: startWeek, + EndWeek: endWeek, + Weekday: weekDay, + Single: single, + Double: double, + }) + } else { + startWeek := utils.SafeAtoi(weekInfo[0]) + endWeek := utils.SafeAtoi(weekInfo[1]) + startClass := utils.SafeAtoi(classInfo[0]) + endClass := utils.SafeAtoi(classInfo[1]) + removedWeeks := []int{} + + for _, adjustRule := range adjustRules { + // 匹配是否是对应的调课信息 + if adjustRule.OldWeek < startWeek || + adjustRule.OldWeek > endWeek || + adjustRule.OldStartClass != startClass || + adjustRule.OldEndClass != endClass || + adjustRule.OldWeekday != weekDay { + continue + } - scheduleRules = append(scheduleRules, CourseScheduleRule{ - Location: lineData[2], - StartClass: utils.SafeAtoi(classInfo[0]), - EndClass: utils.SafeAtoi(classInfo[1]), - StartWeek: utils.SafeAtoi(weekInfo[0]), - EndWeek: utils.SafeAtoi(weekInfo[1]), - Weekday: utils.SafeAtoi(strings.TrimPrefix(dayInfo[0], "星期")), - Single: !strings.Contains(classBasicInfo[1], "双"), - Double: !strings.Contains(classBasicInfo[1], "单"), - }) + // 记录被去掉的周次 + removedWeeks = append(removedWeeks, adjustRule.OldWeek) + + // 添加新的课程信息 + scheduleRules = append(scheduleRules, CourseScheduleRule{ + Location: adjustRule.NewLocation, + StartClass: adjustRule.NewStartClass, + EndClass: adjustRule.NewEndClass, + StartWeek: adjustRule.NewWeek, + EndWeek: adjustRule.NewWeek, + Weekday: adjustRule.NewWeekday, + Single: true, + Double: true, + Adjust: true, // 调课 + }) + } + + sort.Ints(removedWeeks) + // 去掉被调课的周次 + curStartWeek := startWeek + + for _, removedWeek := range removedWeeks { + if removedWeek == curStartWeek { + curStartWeek++ + + continue + } + + scheduleRules = append(scheduleRules, CourseScheduleRule{ + Location: location, + StartClass: startClass, + EndClass: endClass, + StartWeek: curStartWeek, + EndWeek: removedWeek - 1, + Weekday: weekDay, + Single: single, + Double: double, + Adjust: false, + }) + + curStartWeek = removedWeek + 1 + } + + if curStartWeek <= endWeek { + scheduleRules = append(scheduleRules, CourseScheduleRule{ + Location: location, + StartClass: startClass, + EndClass: endClass, + StartWeek: curStartWeek, + EndWeek: endWeek, + Weekday: weekDay, + Single: single, + Double: double, + Adjust: false, + }) + } + } } // TODO: performance optimization @@ -185,10 +307,8 @@ func (s *Student) GetSemesterCourses(term, viewState, eventValidation string) ([ Teacher: htmlquery.OutputHTML(info[7], false), ScheduleRules: scheduleRules, RawScheduleRules: strings.Join(courseInfo8, "\n"), + RawAdjust: strings.Join(courseInfo11, "\n"), Remark: htmlquery.OutputHTML(info[10], false), - // TODO: 解析调课 - // 06周 星期3:5-6节 调至 09周 星期1:7-8节 旗山西1-206 - Adjust: htmlquery.OutputHTML(info[11], false), }) } diff --git a/model.go b/model.go index 2b0694a..5282cda 100644 --- a/model.go +++ b/model.go @@ -53,8 +53,8 @@ type Course struct { Teacher string `json:"teacher"` // 任课教师 ScheduleRules []CourseScheduleRule `json:"scheduleRules"` // 上课时间地点规则 RawScheduleRules string `json:"rawScheduleRules"` // 上课时间地点(原始文本) + RawAdjust string `json:"rawAdjust"` // 调课信息(原始文本) Remark string `json:"remark"` // 备注 - Adjust string `json:"adjust"` // 调课信息 } type CourseScheduleRule struct { @@ -66,6 +66,20 @@ type CourseScheduleRule struct { Weekday int `json:"weekday"` // 星期几 Single bool `json:"single"` // 单周 (PS: 为啥不用 odd) Double bool `json:"double"` // 双周 (PS: 为啥不用 even) + Adjust bool `json:"adjust"` // 调课 +} + +type CourseAdjustRule struct { + OldWeek int `json:"oldWeek"` // 原-周次 + OldWeekday int `json:"oldWeekday"` // 原-星期几 + OldStartClass int `json:"oldStartClass"` // 原-开始节数 + OldEndClass int `json:"oldEndClass"` // 原-结束节数 + + NewWeek int `json:"newWeek"` // 新-周次 + NewWeekday int `json:"newWeekday"` // 新-星期几 + NewStartClass int `json:"newStartClass"` // 新-开始节数 + NewEndClass int `json:"newEndClass"` // 新-结束节数 + NewLocation string `json:"newLocation"` // 新-上课地点 } type Mark struct {