gabarito tp02

This commit is contained in:
Felipe Domingos 2022-04-08 08:22:32 -03:00
parent 7e10a58dd0
commit 7d3067139b
2 changed files with 645 additions and 0 deletions

311
tps/gabarito/tp02/Film.java Normal file
View File

@ -0,0 +1,311 @@
import java.io.*;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Date;
/**
* @author Thiago de Campos Ribeiro Nolasco
*/
public class Film {
// Attributes
private String name;
private String ogTitle;
private Date releaseDate;
private Integer duration;
private String genre;
private String ogLanguage;
private String situation;
private Float budget;
private String[] arrKeyWds;
SimpleDateFormat sdf = new SimpleDateFormat("dd/MM/yyyy");
// Constructors
public Film() {
this(null, null, null, null, null, null, null, null);
}
/**
* @param name
* @param ogTitle
* @param releaseDate
* @param duration
* @param genre
* @param ogLanguage
* @param situation
* @param budget
*/
public Film(String name, String ogTitle, Date releaseDate, Integer duration, String genre, String ogLanguage, String situation, Float budget) {
this.name = name;
this.ogTitle = ogTitle;
this.releaseDate = releaseDate;
this.duration = duration;
this.genre = genre;
this.ogLanguage = ogLanguage;
this.situation = situation;
this.budget = budget;
this.arrKeyWds = null;
}
// Getters and Setters
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getOgTitle() {
return ogTitle;
}
public void setOgTitle(String ogTitle) {
this.ogTitle = ogTitle;
}
public Date getReleaseDate() {
return releaseDate;
}
public void setReleaseDate(Date releaseDate) {
this.releaseDate = releaseDate;
}
public Integer getDuration() {
return duration;
}
public void setDuration(Integer duration) {
this.duration = duration;
}
public String getGenre() {
return genre;
}
public void setGenre(String genre) {
this.genre = genre;
}
public String getOgLanguage() {
return ogLanguage;
}
public void setOgLanguage(String ogLanguage) {
this.ogLanguage = ogLanguage;
}
public String getSituation() {
return situation;
}
public void setSituation(String situation) {
this.situation = situation;
}
public Float getBudget() {
return budget;
}
public void setBudget(Float budget) {
this.budget = budget;
}
public String[] getArrKeyWds() {
return arrKeyWds;
}
public void setArrKeyWds(String[] arrKeyWds) {
this.arrKeyWds = arrKeyWds;
}
public Film clone(){
Film cloned = new Film();
cloned.name = this.name;
cloned.ogTitle = this.ogTitle;
cloned.releaseDate = this.releaseDate;
cloned.duration = this.duration;
cloned.genre = this.genre;
cloned.ogLanguage = this.ogLanguage;
cloned.situation = this.situation;
cloned.budget = this.budget;
cloned.arrKeyWds = this.arrKeyWds;
return cloned;
}
/**
* @param fileName
*/
public void ler(String fileName){
// Getting the right path for each read file
String path = "./filmes/" + fileName;
// Method that will split chunks of the read HTML and will assign the value to each Film's attribute
splittingString(path);
}
private void splittingString(String path){
// Data declaration
String line = "";
try (BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(path),"UTF-8"))) {
// Film name
while(!reader.readLine().contains("title ott"));
while(!reader.readLine().contains("h2"));
this.name = removeTags(reader.readLine().trim());
// Film release date
while(!reader.readLine().contains("\"release\""));
this.releaseDate = sdf.parse(removeTags(reader.readLine().trim()));
// Film genre
while(!reader.readLine().contains("genres"));
// In this case, will use "line" because the last readLine will have the content that we want
while(!(line = reader.readLine()).contains("<a href"));
this.genre = removeTags(line).trim();
// Film duration
while(!reader.readLine().contains("runtime"));
reader.readLine(); // Needed because an empty line was found
this.duration = hoursToMinutes(reader.readLine().trim());
// Film original title (if there is) & situation
this.ogTitle = this.name;
while( !(line = reader.readLine()).contains("Situação</bdi>") ) {
if(line.contains("Título original")){
this.ogTitle = removeTags(line.replace("Título original", " ")).trim();
}
}
this.situation = removeTags(line.replace("Situação", " ")).trim();
// Film original language
while( !(line = reader.readLine()).contains("Idioma original</bdi>") );
this.ogLanguage = removeTags(line.replace("Idioma original", " ")).trim();
// Film budget
while( !(line = reader.readLine()).contains("Orçamento</bdi>") );
String aux = removeTags(line.replace("Orçamento", " ")).trim();
this.budget = (aux.equals("-")) ? 0.0F : convertBudget(aux);
// Film key-words
line = "";
while( !reader.readLine().contains("Palavras-chave</bdi>") );
while( !(line += reader.readLine().trim() + " ").contains("</ul>") );
if(!line.contains("Nenhuma palavra-chave foi adicionada")){
arrKeyWds = removeTags(line).trim().split(" ");
}
} catch (FileNotFoundException e){
System.out.println("File not found");
} catch (IOException e){
System.out.println("File cannot be read");
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* Receives a line that contains an HTML content and removes its tags
* @param line
* @return
*/
private String removeTags(String line){
// Data declaration
String resp = "";
int i = 0;
/*
The main idea here is to check if the char is equals to '<', if it's, it means that an HTML tag has opened
So, CAN'T read anything until the tag is closed, '>' is found.
It's also checking if any HTML special character (&....;) or if any "()" is found
IF found, don't read anything until it has ended.
*/
while (i < line.length()) {
if (line.charAt(i) == '<') {
i++;
while (line.charAt(i) != '>') i++;
}else {
resp += line.charAt(i);
}
i++;
}
// Returning cleaned line
return resp.replace("&nbsp;", "");
}
/**
* Receives a String that contains hours, and convert it to minutes (Integer)
* @param value
* @return
*/
private int hoursToMinutes(String value){
// Data declaration
value = removeLetters(value);
int result = 0, minutes = 0;
String[] splitValue = value.split(" ");
if(splitValue.length > 1) {
int hour = Integer.parseInt(splitValue[0]);
minutes = Integer.parseInt(splitValue[1]);
result = (60 * hour) + minutes;
} else {
minutes = Integer.parseInt(splitValue[0]);
result = minutes;
}
return result;
}
/**
* Receives a String that contains hours, and leave only the numbers (ex: 1h 49m = 1 49)
* @param value
* @return
*/
private String removeLetters(String value){
// Data declaration
String result = "";
for(int i = 0; i < value.length(); i++){
// If char is a number, a blank space, or a '.' (Used on convertBudget), will be stored into "result"
if( (value.charAt(i) >= 48 && value.charAt(i) <= 57) || value.charAt(i) == ' ' || value.charAt(i) == '.')
result += value.charAt(i);
}
return result;
}
/**
* Receives a String that contains a FLOAT number, and converts it to a FLOAT number
* (PS: It's necessary to remove few characters because String has ',' on it)
* @param value
* @return
*/
private Float convertBudget(String value){
return Float.parseFloat(removeLetters(value));
}
@Override
public String toString() {
final StringBuffer sb = new StringBuffer();
sb.append(name);
sb.append(" ").append(ogTitle);
sb.append(" ").append(sdf.format(getReleaseDate()));
sb.append(" ").append(duration);
sb.append(" ").append(genre);
sb.append(" ").append(ogLanguage);
sb.append(" ").append(situation);
sb.append(" ").append(budget);
sb.append(" ").append(arrKeyWds == null ? "[]" : Arrays.asList(arrKeyWds).toString());
return sb.toString();
}
public void imprimir(){
System.out.println(this.toString());
}
}

334
tps/gabarito/tp02/Movie.c Normal file
View File

@ -0,0 +1,334 @@
// -------------------------------------------------------------------------------- //
// @author Pedro Henrique Lopes Costa
// 1/2022
//
// -------------------------------------------------------------------------------- //
// Includes
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
// -------------------------------------------------------------------------------- //
// Definitions
#define MAX_MOVIES 100
#define MAX_FIELD_SIZE 100
#define MAX_KEYWORDS 20
#define MAX_LINE_SIZE 250
#define FDR_PREFIX "/tmp/filmes/"
// -------------------------------------------------------------------------------- //
// Structs
typedef struct {
int year,
month,
day;
} Date;
typedef struct {
char name[MAX_FIELD_SIZE],
original_title[MAX_FIELD_SIZE],
genre[MAX_FIELD_SIZE],
original_language[MAX_FIELD_SIZE],
situation[MAX_FIELD_SIZE],
keywords[MAX_KEYWORDS][MAX_FIELD_SIZE];
Date release_date;
int duration, count_keywords;
float budget;
} Movie;
// -------------------------------------------------------------------------------- //
// Global variables
Movie movies[MAX_MOVIES];
int count_movies = 0;
// -------------------------------------------------------------------------------- //
// Functions
bool isFim(char *str) { return str[0] == 'F' && str[1] == 'I' && str[2] == 'M'; }
char *remove_line_break(char *line) {
while (*line != '\r' && *line != '\n') line++;
*line = '\0';
return line;
}
char *freadline(char *line, int max_size, FILE *file) { return remove_line_break(fgets(line, max_size, file)); }
char *readline(char *line, int max_size) { return freadline(line, max_size, stdin); }
long int indexOf(char *str, char *search) {
long int pos = strcspn(str, search);
return pos == strlen(str) ? -1 : pos;
}
char *substring(char *string, int position, int length) {
char *p;
int c;
p = malloc(length+1);
if(p == NULL) {
printf("Unable to allocate memory.\n");
exit(1);
}
for(c = 0; c < length; c++) {
*(p+c) = *(string+position-1);
string++;
}
*(p+c) = '\0';
return p;
}
void str_replace(char *target, const char *needle, const char *replacement) {
char buffer[1024] = { 0 };
char *insert_point = &buffer[0];
const char *tmp = target;
size_t needle_len = strlen(needle);
size_t repl_len = strlen(replacement);
while(1) {
const char *p = strstr(tmp, needle);
if(p == NULL) {
strcpy(insert_point, tmp);
break;
}
memcpy(insert_point, tmp, p - tmp);
insert_point += p - tmp;
memcpy(insert_point, replacement, repl_len);
insert_point += repl_len;
tmp = p + needle_len;
}
strcpy(target, buffer);
}
int firstDigit(const char *str, int start) {
for(int i = start; i != strlen(str); i++) if(str[i] >= '0' && str[i] <= '9') return i;
return -1;
}
// Remove tags
char *extractOnlyText(char *html, char *text) {
char *start = text;
int contagem = 0;
while (*html != '\0') {
if (*html == '<') {
if (
(*(html + 1) == 'p') ||
(*(html + 1) == 'b' && *(html + 2) == 'r') ||
(*(html + 1) == '/' && *(html + 2) == 'h' && *(html + 3) == '1') ||
(*(html + 1) == '/' && *(html + 2) == 't' && *(html + 3) == 'h') ||
(*(html + 1) == '/' && *(html + 2) == 't' && *(html + 3) == 'd')
) break;
else contagem++;
}
else if (*html == '>') contagem--;
else if (contagem == 0 && *html != '"') {
if (*html == '&') html = strchr(html, ';');
else if (*html != '\r' && *html != '\n') *text++ = *html;
}
html++;
}
*text = '\0';
return *start == ' ' ? start + 1 : start;
}
// -------------------------------------------------------------------------------- //
// Class movie functions
void movie_print(Movie *movie) {
printf("%s %s %02i/%02i/%04i %i %s %s %s %g [",
movie -> name,
movie -> original_title,
movie -> release_date.day, movie -> release_date.month, movie -> release_date.year,
movie -> duration,
movie -> genre,
movie -> original_language,
movie -> situation,
movie -> budget);
for(int i = 0; i < movie -> count_keywords; i++) {
if(i == movie -> count_keywords - 1) printf("%s]\n", movie -> keywords[i]);
else printf("%s, ", movie -> keywords[i]);
}
if(movie -> count_keywords == 0) printf("]\n");
}
void movie_readHtml(char *filename) {
FILE *html_file;
char *line_html = NULL;
size_t len = 0;
ssize_t read;
html_file = fopen(filename, "r");
if(html_file == NULL) exit(EXIT_FAILURE);
// ------------------------------------ //
// Creating movie variables
char *name = NULL,
*original_title = NULL,
*genre = NULL,
*original_language = NULL,
*situation = NULL,
*keywords = NULL;
Date release_date;
release_date.day = 0;
int duration = -1;
float budget = -1;
// ------------------------------------ //
// Read HTML line by line
while((read = getline(&line_html, &len, html_file)) != -1) {
// --------------------------- //
// Find movie name
if(name == NULL) {
if(strstr(line_html, "<title>") != NULL) {
name = strstr(line_html, "<title>") + 7;
strcpy(movies[count_movies].name, name);
str_replace(movies[count_movies].name, "&#8212;", "");
movies[count_movies].name[strlen(movies[count_movies].name) - 46] = '\0';
}
}
// --------------------------- //
// Find movie original title
if(original_title == NULL) {
if(strstr(line_html, "<p class=\"wrap\">") != NULL) {
original_title = strstr(line_html, "</strong> ") + 10;
original_title[strlen(original_title) - 5] = '\0';
strcpy(movies[count_movies].original_title, original_title);
}
}
// --------------------------- //
// Find movie release date
if(release_date.day == 0) {
if(strstr(line_html, "<span class=\"release\">") != NULL) {
// Skip one line
read = getline(&line_html, &len, html_file);
char *day, *month, *year;
day = substring(line_html, 9, 2);
month = substring(line_html, 12, 2);
year = substring(line_html, 15, 4);
movies[count_movies].release_date.day = atoi(day);
movies[count_movies].release_date.month = atoi(month);
movies[count_movies].release_date.year = atoi(year);
}
}
// --------------------------- //
// Find movie duration
if(duration == -1) {
if(strstr(line_html, "<span class=\"runtime\">") != NULL) {
// Skip two lines
read = getline(&line_html, &len, html_file);
read = getline(&line_html, &len, html_file);
int h_pos = indexOf(line_html, "h"),
hours = 0,
minutes = 0;
if(h_pos != -1) hours = atoi(substring(line_html, firstDigit(line_html, 0), h_pos));
minutes = atoi(substring(line_html, firstDigit(line_html, h_pos == -1 ? 0 : h_pos), strlen(line_html) - 1));
duration = (hours * 60) + minutes;
movies[count_movies].duration = duration;
}
}
// -------------------------- //
// Find movie genres
if(genre == NULL) {
if(strstr(line_html, "<span class=\"genres\">") != NULL) {
// Skip two lines
read = getline(&line_html, &len, html_file);
read = getline(&line_html, &len, html_file);
extractOnlyText(line_html, movies[count_movies].genre);
genre = substring(movies[count_movies].genre, 7, strlen(movies[count_movies].genre));
strcpy(movies[count_movies].genre, genre);
}
}
// --------------------------- //
// Find movie original language
if(original_language == NULL) {
if(strstr(line_html, "<bdi>Idioma original</bdi>") != NULL) {
strcpy(movies[count_movies].original_language, line_html);
original_language = substring(movies[count_movies].original_language, 50, strlen(line_html) - 54);
strcpy(movies[count_movies].original_language, original_language);
}
}
// --------------------------- //
// Find movie situation
if(situation == NULL) {
if(strstr(line_html, "<bdi>Situação</bdi>") != NULL) {
strcpy(movies[count_movies].situation, line_html);
situation = substring(movies[count_movies].situation, 44, strlen(line_html) - 44);
strcpy(movies[count_movies].situation, situation);
}
}
// --------------------------- //
// Find movie budget
if(budget == -1) {
if(strstr(line_html, "<bdi>Orçamento</bdi>") != NULL) {
char *p_budget, e_budget[strlen(line_html)];
strcpy(e_budget, line_html);
p_budget = substring(e_budget, 45, strlen(line_html) - 49);
if(!strcmp(p_budget, "-")) movies[count_movies].budget = 0;
else{
strcpy(e_budget, p_budget);
str_replace(e_budget, "$", "");
str_replace(e_budget, ",", "");
movies[count_movies].budget = atof(e_budget);
}
}
}
// --------------------------- //
// Find movie keywords
if(keywords == NULL) {
if(strstr(line_html, "<h4><bdi>Palavras-chave</bdi></h4>") != NULL) {
// Skip two lines until keywords starts
for(int i = 0; i < 2; i++) read = getline(&line_html, &len, html_file);
char tmp_line[strlen(line_html)];
strcpy(tmp_line, line_html);
keywords = substring(tmp_line, 5, strlen(line_html) - 5);
if(strcmp(keywords, "<p><bdi>Nenhuma palavra-chave foi adicionada.</bdi></p>")) {
// Skip more two lines until keywords starts
for(int x = 0; x < 2; x++) read = getline(&line_html, &len, html_file);
while(true) {
if(strstr(line_html, "</ul>") != NULL) break;
if(strstr(line_html, "<li>") != NULL){
extractOnlyText(line_html, tmp_line);
keywords = substring(tmp_line, 9, strlen(line_html) - 8);
strcpy(movies[count_movies].keywords[movies[count_movies].count_keywords++], keywords);
}
read = getline(&line_html, &len, html_file);
}
}
}
}
// ------------------------------------ //
// Verify variables still "null"
if(original_title == NULL) strcpy(movies[count_movies].original_title, movies[count_movies].name);
}
// ------------------------------------ //
fclose(html_file);
if(line_html) free(line_html);
}
// -------------------------------------------------------------------------------- //
int main() {
size_t prefix_size = strlen(FDR_PREFIX);
char line[MAX_LINE_SIZE];
strcpy(line, FDR_PREFIX);
readline(line + prefix_size, MAX_LINE_SIZE);
while(!isFim(line + prefix_size)) {
movie_readHtml(line);
movie_print(&movies[count_movies++]);
readline(line + prefix_size, MAX_LINE_SIZE);
}
return EXIT_SUCCESS;
}